CppADCodeGen
2.4.3
A C++ Algorithmic Differentiation Package with Source Code Generation
pthread_pool_c.hpp
1
const
char
CPPADCG_PTHREAD_POOL_C_FILE[] = R
"*=*(/* --------------------------------------------------------------------------
2
* CppADCodeGen: C++ Algorithmic Differentiation with Source Code Generation:
3
* Copyright (C) 2016 Ciengis
4
*
5
* CppADCodeGen is distributed under multiple licenses:
6
*
7
* - Eclipse Public License Version 1.0 (EPL1), and
8
* - GNU General Public License Version 3 (GPL3).
9
*
10
* EPL1 terms and conditions can be found in the file "epl-v10.txt", while
11
* terms and conditions for the GPL3 can be found in the file "gpl3.txt".
12
* ----------------------------------------------------------------------------
13
* Authors: Johan Hanssen Seferidis, Joao Leal
14
*/
15
21
#include <unistd.h>
22
#include <stdio.h>
23
#include <stdlib.h>
24
#include <pthread.h>
25
#include <errno.h>
26
#include <time.h>
27
#if defined(__linux__)
28
#include <sys/prctl.h>
29
#include <time.h>
30
#include <sys/time.h>
31
#define __USE_GNU /* required before including resource.h */
32
#include <sys/resource.h>
33
#endif
34
35
enum ScheduleStrategy {SCHED_STATIC = 1,
36
SCHED_DYNAMIC = 2,
37
SCHED_GUIDED = 3
38
};
39
40
enum ElapsedTimeReference {ELAPSED_TIME_AVG,
41
ELAPSED_TIME_MIN};
42
43
typedef struct ThPool ThPool;
44
typedef void (* thpool_function_type)(void*);
45
46
static ThPool* volatile cppadcg_pool = NULL;
47
static int cppadcg_pool_n_threads = 2;
48
static int cppadcg_pool_disabled = 0; // false
49
static int cppadcg_pool_verbose = 0; // false
50
static enum ElapsedTimeReference cppadcg_pool_time_update = ELAPSED_TIME_MIN;
51
static unsigned int cppadcg_pool_time_meas = 10; // default number of time measurements
52
static float cppadcg_pool_guided_maxgroupwork = 0.75;
53
54
static enum ScheduleStrategy schedule_strategy = SCHED_DYNAMIC;
55
56
/* ==================== INTERNAL HIGH LEVEL API ====================== */
57
58
static ThPool* thpool_init(int num_threads);
59
60
static int thpool_add_job(ThPool*,
61
thpool_function_type function,
62
void* arg,
63
const float* avgElapsed,
64
float* elapsed);
65
66
static int thpool_add_jobs(ThPool*,
67
thpool_function_type functions[],
68
void* args[],
69
const float avgElapsed[],
70
float elapsed[],
71
const int order[],
72
int job2Thread[],
73
int nJobs,
74
int lastElapsedChanged);
75
76
static void thpool_wait(ThPool*);
77
78
static void thpool_destroy(ThPool*);
79
80
/* ========================== STRUCTURES ============================ */
81
/* Binary semaphore */
82
typedef struct BSem {
83
pthread_mutex_t mutex;
84
pthread_cond_t cond;
85
int v;
86
} BSem;
87
88
89
/* Job */
90
typedef struct Job {
91
struct Job* prev; /* pointer to previous job */
92
thpool_function_type function; /* function pointer */
93
void* arg; /* function's argument */
94
const float* avgElapsed; /* the last measurement of elapsed time */
95
float* elapsed; /* the current elapsed time */
96
struct timespec startTime; /* initial time (verbose only) */
97
struct timespec endTime; /* final time (verbose only) */
98
int id; /* a job identifier used for debugging */
99
} Job;
100
101
/* Work group */
102
typedef struct WorkGroup {
103
struct WorkGroup* prev; /* pointer to previous WorkGroup */
104
struct Job* jobs; /* jobs */
105
int size; /* number of jobs */
106
struct timespec startTime; /* initial time (verbose only) */
107
struct timespec endTime; /* final time (verbose only) */
108
} WorkGroup;
109
110
/* Job queue */
111
typedef struct JobQueue {
112
pthread_mutex_t rwmutex; /* used for queue r/w access */
113
Job *front; /* pointer to front of queue */
114
Job *rear; /* pointer to rear of queue */
115
WorkGroup* group_front; /* previously created work groups (SCHED_STATIC scheduling only)*/
116
BSem *has_jobs; /* flag as binary semaphore */
117
int len; /* number of jobs in queue */
118
float total_time; /* total expected time to complete the work */
119
float highest_expected_return; /* the time when the last running thread is expected to request new work */
120
} JobQueue;
121
122
123
/* Thread */
124
typedef struct Thread {
125
int id; /* friendly id */
126
pthread_t pthread; /* pointer to actual thread */
127
struct ThPool* thpool; /* access to ThPool */
128
WorkGroup* processed_groups; /* processed work groups (verbose only) */
129
} Thread;
130
131
132
/* Threadpool */
133
typedef struct ThPool {
134
Thread** threads; /* pointer to threads */
135
int num_threads; /* total number of threads */
136
volatile int num_threads_alive; /* threads currently alive */
137
volatile int num_threads_working; /* threads currently working */
138
pthread_mutex_t thcount_lock; /* used for thread count etc */
139
pthread_cond_t threads_all_idle; /* signal to thpool_wait */
140
JobQueue* jobqueue; /* pointer to the job queue */
141
volatile int threads_keepalive;
142
} ThPool;
143
144
/* ========================== PUBLIC API ============================ */
145
146
void cppadcg_thpool_set_threads(int n) {
147
cppadcg_pool_n_threads = n;
148
}
149
150
int cppadcg_thpool_get_threads() {
151
return cppadcg_pool_n_threads;
152
}
153
154
void cppadcg_thpool_set_scheduler_strategy(enum ScheduleStrategy s) {
155
if(cppadcg_pool != NULL) {
156
pthread_mutex_lock(&cppadcg_pool->jobqueue->rwmutex);
157
schedule_strategy = s;
158
pthread_mutex_unlock(&cppadcg_pool->jobqueue->rwmutex);
159
} else {
160
// pool not yet created
161
schedule_strategy = s;
162
}
163
}
164
165
enum ScheduleStrategy cppadcg_thpool_get_scheduler_strategy() {
166
if(cppadcg_pool != NULL) {
167
enum ScheduleStrategy e;
168
pthread_mutex_lock(&cppadcg_pool->jobqueue->rwmutex);
169
e = schedule_strategy;
170
pthread_mutex_unlock(&cppadcg_pool->jobqueue->rwmutex);
171
return e;
172
} else {
173
// pool not yet created
174
return schedule_strategy;
175
}
176
}
177
178
void cppadcg_thpool_set_disabled(int disabled) {
179
cppadcg_pool_disabled = disabled;
180
}
181
182
int cppadcg_thpool_is_disabled() {
183
return cppadcg_pool_disabled;
184
}
185
186
void cppadcg_thpool_set_guided_maxgroupwork(float v) {
187
if(cppadcg_pool != NULL) {
188
pthread_mutex_lock(&cppadcg_pool->jobqueue->rwmutex);
189
cppadcg_pool_guided_maxgroupwork = v;
190
pthread_mutex_unlock(&cppadcg_pool->jobqueue->rwmutex);
191
} else {
192
// pool not yet created
193
cppadcg_pool_guided_maxgroupwork = v;
194
}
195
}
196
197
float cppadcg_thpool_get_guided_maxgroupwork() {
198
if(cppadcg_pool != NULL) {
199
float r;
200
pthread_mutex_lock(&cppadcg_pool->jobqueue->rwmutex);
201
r = cppadcg_pool_guided_maxgroupwork;
202
pthread_mutex_unlock(&cppadcg_pool->jobqueue->rwmutex);
203
return r;
204
} else {
205
// pool not yet created
206
return cppadcg_pool_guided_maxgroupwork;
207
}
208
}
209
210
unsigned int cppadcg_thpool_get_n_time_meas() {
211
return cppadcg_pool_time_meas;
212
}
213
214
void cppadcg_thpool_set_n_time_meas(unsigned int n) {
215
cppadcg_pool_time_meas = n;
216
}
217
218
void cppadcg_thpool_set_verbose(int v) {
219
cppadcg_pool_verbose = v;
220
}
221
222
enum ElapsedTimeReference cppadcg_thpool_get_time_meas_ref() {
223
return cppadcg_pool_time_update;
224
}
225
226
void cppadcg_thpool_set_time_meas_ref(enum ElapsedTimeReference r) {
227
cppadcg_pool_time_update = r;
228
}
229
230
int cppadcg_thpool_is_verbose() {
231
return cppadcg_pool_verbose;
232
}
233
234
void cppadcg_thpool_prepare() {
235
if(cppadcg_pool == NULL) {
236
cppadcg_pool = thpool_init(cppadcg_pool_n_threads);
237
}
238
}
239
240
void cppadcg_thpool_add_job(thpool_function_type function,
241
void* arg,
242
float* avgElapsed,
243
float* elapsed) {
244
if (!cppadcg_pool_disabled) {
245
cppadcg_thpool_prepare();
246
if (cppadcg_pool != NULL) {
247
thpool_add_job(cppadcg_pool, function, arg, avgElapsed, elapsed);
248
return;
249
}
250
}
251
252
// thread pool not used
253
(*function)(arg);
254
}
255
256
void cppadcg_thpool_add_jobs(thpool_function_type functions[],
257
void* args[],
258
const float avgElapsed[],
259
float elapsed[],
260
const int order[],
261
int job2Thread[],
262
int nJobs,
263
int lastElapsedChanged) {
264
int i;
265
if (!cppadcg_pool_disabled) {
266
cppadcg_thpool_prepare();
267
if (cppadcg_pool != NULL) {
268
thpool_add_jobs(cppadcg_pool, functions, args, avgElapsed, elapsed, order, job2Thread, nJobs, lastElapsedChanged);
269
return;
270
}
271
}
272
273
// thread pool not used
274
for (i = 0; i < nJobs; ++i) {
275
(*functions[i])(args[i]);
276
}
277
}
278
279
void cppadcg_thpool_wait() {
280
if(cppadcg_pool != NULL) {
281
thpool_wait(cppadcg_pool);
282
}
283
}
284
285
typedef struct pair_double_int {
286
float val;
287
int index;
288
} pair_double_int;
289
290
static int comparePair(const void* a, const void* b) {
291
if (((pair_double_int*) a)->val < ((pair_double_int*) b)->val)
292
return -1;
293
if (((pair_double_int*) a)->val == ((pair_double_int*) b)->val)
294
return 0;
295
return 1;
296
}
297
298
void cppadcg_thpool_update_order(float refElapsed[],
299
unsigned int nTimeMeas,
300
const float elapsed[],
301
int order[],
302
int nJobs) {
303
if(nJobs == 0 || refElapsed == NULL || elapsed == NULL || order == NULL)
304
return;
305
306
struct pair_double_int elapsedOrder[nJobs];
307
int i;
308
int nonZero = 0; // false
309
310
for(i = 0; i < nJobs; ++i) {
311
if(elapsed[i] != 0) {
312
nonZero = 1;
313
break;
314
}
315
}
316
317
if (!nonZero) {
318
if (cppadcg_pool_verbose) {
319
fprintf(stdout, "order not updated: all times are zero\n");
320
}
321
return;
322
}
323
324
if(cppadcg_pool_time_update == ELAPSED_TIME_AVG) {
325
for (i = 0; i < nJobs; ++i) {
326
refElapsed[i] = (refElapsed[i] * nTimeMeas + elapsed[i]) / (nTimeMeas + 1);
327
elapsedOrder[i].val = refElapsed[i];
328
elapsedOrder[i].index = i;
329
}
330
} else {
331
// cppadcg_pool_time_update == ELAPSED_TIME_MIN
332
for (i = 0; i < nJobs; ++i) {
333
if(nTimeMeas == 0 || elapsed[i] < refElapsed[i]) {
334
refElapsed[i] = elapsed[i];
335
}
336
elapsedOrder[i].val = refElapsed[i];
337
elapsedOrder[i].index = i;
338
}
339
}
340
341
qsort(elapsedOrder, nJobs, sizeof(struct pair_double_int), comparePair);
342
343
for (i = 0; i < nJobs; ++i) {
344
order[elapsedOrder[i].index] = nJobs - i - 1; // descending order
345
}
346
347
if (cppadcg_pool_verbose) {
348
fprintf(stdout, "new order (%i values):\n", nTimeMeas + 1);
349
for (i = 0; i < nJobs; ++i) {
350
fprintf(stdout, " job id: %i order: %i time: %e s\n", i, order[i], refElapsed[i]);
351
}
352
}
353
354
}
355
356
void cppadcg_thpool_shutdown() {
357
if(cppadcg_pool != NULL) {
358
thpool_destroy(cppadcg_pool);
359
cppadcg_pool = NULL;
360
}
361
}
362
363
/* ========================== PROTOTYPES ============================ */
364
365
static void thpool_cleanup(ThPool* thpool);
366
367
static int thread_init(ThPool* thpool,
368
Thread** thread,
369
int id);
370
static void* thread_do(Thread* thread);
371
static void thread_destroy(Thread* thread);
372
373
static int jobqueue_init(ThPool* thpool);
374
static void jobqueue_clear(ThPool* thpool);
375
static void jobqueue_push(JobQueue* queue,
376
Job* newjob_p);
377
static void jobqueue_multipush(JobQueue* queue,
378
Job* newjob[],
379
int nJobs);
380
static int jobqueue_push_static_jobs(ThPool* thpool,
381
Job* newjobs[],
382
const float avgElapsed[],
383
int jobs2thread[],
384
int nJobs,
385
int lastElapsedChanged);
386
static WorkGroup* jobqueue_pull(ThPool* thpool, int id);
387
static void jobqueue_destroy(ThPool* thpool);
388
389
static void bsem_init(BSem *bsem, int value);
390
static void bsem_reset(BSem *bsem);
391
static void bsem_post(BSem *bsem);
392
static void bsem_post_all(BSem *bsem);
393
static void bsem_wait(BSem *bsem);
394
395
396
/* ============================ TIME ============================== */
397
398
static float get_thread_time(struct timespec* cputime,
399
int* info) {
400
*info = clock_gettime(CLOCK_THREAD_CPUTIME_ID, cputime);
401
if(*info == 0) {
402
return cputime->tv_sec + cputime->tv_nsec * 1e-9f;
403
} else {
404
fprintf(stderr, "failed clock_gettime()\n");
405
return 0;
406
}
407
}
408
409
static float get_monotonic_time(struct timespec* time,
410
int* info) {
411
*info = clock_gettime(CLOCK_MONOTONIC, time);
412
if(*info == 0) {
413
return time->tv_sec + time->tv_nsec * 1e-9f;
414
} else {
415
fprintf(stderr, "failed clock_gettime()\n");
416
return 0;
417
}
418
}
419
420
static void get_monotonic_time2(struct timespec* time) {
421
int info;
422
info = clock_gettime(CLOCK_MONOTONIC, time);
423
if(info != 0) {
424
time->tv_sec = 0;
425
time->tv_nsec = 0;
426
fprintf(stderr, "failed clock_gettime()\n");
427
}
428
}
429
430
void timespec_diff(struct timespec* end,
431
struct timespec* start,
432
struct timespec* result) {
433
if ((end->tv_nsec - start->tv_nsec) < 0) {
434
result->tv_sec = end->tv_sec - start->tv_sec - 1;
435
result->tv_nsec = end->tv_nsec - start->tv_nsec + 1000000000;
436
} else {
437
result->tv_sec = end->tv_sec - start->tv_sec;
438
result->tv_nsec = end->tv_nsec - start->tv_nsec;
439
}
440
}
441
442
/* ========================== THREADPOOL ============================ */
443
461
struct ThPool* thpool_init(int num_threads) {
462
if (num_threads < 0) {
463
num_threads = 0;
464
}
465
466
if(cppadcg_pool_verbose) {
467
fprintf(stdout, "thpool_init(): Thread pool created with %i threads\n", num_threads);
468
}
469
470
if(num_threads == 0) {
471
cppadcg_pool_disabled = 1; // true
472
return NULL;
473
}
474
475
/* Make new thread pool */
476
ThPool* thpool;
477
thpool = (ThPool*) malloc(sizeof(ThPool));
478
if (thpool == NULL) {
479
fprintf(stderr, "thpool_init(): Could not allocate memory for thread pool\n");
480
return NULL;
481
}
482
thpool->num_threads = num_threads;
483
thpool->num_threads_alive = 0;
484
thpool->num_threads_working = 0;
485
thpool->threads_keepalive = 1;
486
487
/* Initialize the job queue */
488
if (jobqueue_init(thpool) == -1) {
489
fprintf(stderr, "thpool_init(): Could not allocate memory for job queue\n");
490
free(thpool);
491
return NULL;
492
}
493
494
/* Make threads in pool */
495
thpool->threads = (Thread**) malloc(num_threads * sizeof(Thread*));
496
if (thpool->threads == NULL) {
497
fprintf(stderr, "thpool_init(): Could not allocate memory for threads\n");
498
jobqueue_destroy(thpool);
499
free(thpool->jobqueue);
500
free(thpool);
501
return NULL;
502
}
503
504
pthread_mutex_init(&(thpool->thcount_lock), NULL);
505
pthread_cond_init(&thpool->threads_all_idle, NULL);
506
507
/* Thread init */
508
int n;
509
for (n = 0; n < num_threads; n++) {
510
thread_init(thpool, &thpool->threads[n], n);
511
}
512
513
/* Wait for threads to initialize */
514
while (thpool->num_threads_alive != num_threads) {}
515
516
return thpool;
517
}
518
546
static int thpool_add_job(ThPool* thpool,
547
thpool_function_type function,
548
void* arg,
549
const float* avgElapsed,
550
float* elapsed) {
551
Job* newjob;
552
553
newjob = (struct Job*) malloc(sizeof(struct Job));
554
if (newjob == NULL) {
555
fprintf(stderr, "thpool_add_job(): Could not allocate memory for new job\n");
556
return -1;
557
}
558
559
/* add function and argument */
560
newjob->function = function;
561
newjob->arg = arg;
562
newjob->avgElapsed = avgElapsed;
563
newjob->elapsed = elapsed;
564
565
/* add job to queue */
566
jobqueue_push(thpool->jobqueue, newjob);
567
568
return 0;
569
}
570
571
static int thpool_add_jobs(ThPool* thpool,
572
thpool_function_type functions[],
573
void* args[],
574
const float avgElapsed[],
575
float elapsed[],
576
const int order[],
577
int job2Thread[],
578
int nJobs,
579
int lastElapsedChanged) {
580
Job* newjobs[nJobs];
581
int i;
582
int j;
583
584
for (i = 0; i < nJobs; ++i) {
585
newjobs[i] = (Job*) malloc(sizeof(Job));
586
if (newjobs[i] == NULL) {
587
fprintf(stderr, "thpool_add_jobs(): Could not allocate memory for new jobs\n");
588
return -1;
589
}
590
591
j = order != NULL ? order[i] : i;
592
/* add function and argument */
593
newjobs[i]->function = functions[j];
594
newjobs[i]->arg = args[j];
595
newjobs[i]->id = i;
596
if (avgElapsed != NULL)
597
newjobs[i]->avgElapsed = &avgElapsed[j];
598
else
599
newjobs[i]->avgElapsed = NULL;
600
601
if (elapsed != NULL)
602
newjobs[i]->elapsed = &elapsed[j];
603
else
604
newjobs[i]->elapsed = NULL;
605
}
606
607
/* add jobs to queue */
608
if (schedule_strategy == SCHED_STATIC && avgElapsed != NULL && order != NULL && nJobs > 0 && avgElapsed[0] > 0) {
609
return jobqueue_push_static_jobs(thpool, newjobs, avgElapsed, job2Thread, nJobs, lastElapsedChanged);
610
} else {
611
jobqueue_multipush(thpool->jobqueue, newjobs, nJobs);
612
return 0;
613
}
614
}
615
619
static int jobqueue_push_static_jobs(ThPool* thpool,
620
Job* newjobs[],
621
const float avgElapsed[],
622
int jobs2thread[],
623
int nJobs,
624
int lastElapsedChanged) {
625
float total_duration, target_duration, next_duration, best_duration;
626
int i, j, iBest;
627
int added;
628
int num_threads = thpool->num_threads;
629
int* n_jobs;
630
float* durations = NULL;
631
WorkGroup** groups;
632
WorkGroup* group;
633
634
if(nJobs < num_threads)
635
num_threads = nJobs;
636
637
n_jobs = (int*) malloc(num_threads * sizeof(int));
638
if (n_jobs == NULL) {
639
fprintf(stderr, "jobqueue_push_static_jobs(): Could not allocate memory\n");
640
return -1;
641
}
642
643
groups = (WorkGroup**) malloc(num_threads * sizeof(WorkGroup*));
644
if (groups == NULL) {
645
fprintf(stderr, "jobqueue_push_static_jobs(): Could not allocate memory\n");
646
free(n_jobs);
647
return -1;
648
}
649
650
for (i = 0; i < num_threads; ++i) {
651
n_jobs[i] = 0;
652
}
653
654
total_duration = 0;
655
for (i = 0; i < nJobs; ++i) {
656
total_duration += avgElapsed[i];
657
}
658
659
660
if (nJobs > 0 && (lastElapsedChanged || jobs2thread[0] < 0)) {
661
durations = (float*) malloc(num_threads * sizeof(float));
662
if (durations == NULL) {
663
fprintf(stderr, "jobqueue_push_static_jobs(): Could not allocate memory\n");
664
free(n_jobs);
665
free(groups);
666
return -1;
667
}
668
669
for(i = 0; i < num_threads; ++i) {
670
durations[i] = 0;
671
}
672
673
// decide in which work group to place each job
674
target_duration = total_duration / num_threads;
675
676
for (j = 0; j < nJobs; ++j) {
677
added = 0;
678
for (i = 0; i < num_threads; ++i) {
679
next_duration = durations[i] + avgElapsed[j];
680
if (next_duration < target_duration) {
681
durations[i] = next_duration;
682
n_jobs[i]++;
683
jobs2thread[j] = i;
684
added = 1;
685
break;
686
}
687
}
688
689
if (!added) {
690
best_duration = durations[0] + avgElapsed[j];
691
iBest = 0;
692
for (i = 1; i < num_threads; ++i) {
693
next_duration = durations[i] + avgElapsed[j];
694
if (next_duration < best_duration) {
695
best_duration = next_duration;
696
iBest = i;
697
}
698
}
699
durations[iBest] = best_duration;
700
n_jobs[iBest]++;
701
jobs2thread[j] = iBest;
702
}
703
}
704
705
} else {
706
// reuse existing information
707
708
for (j = 0; j < nJobs; ++j) {
709
n_jobs[jobs2thread[j]]++;
710
}
711
}
712
716
for (i = 0; i < num_threads; ++i) {
717
group = (WorkGroup*) malloc(sizeof(WorkGroup));
718
group->size = 0;
719
group->jobs = (Job*) malloc(n_jobs[i] * sizeof(Job));
720
groups[i] = group;
721
}
722
for (i = 0; i < num_threads - 1; ++i) {
723
groups[i]->prev = groups[i + 1];
724
}
725
groups[num_threads - 1]->prev = NULL;
726
727
// place jobs on the work groups
728
for (j = 0; j < nJobs; ++j) {
729
i = jobs2thread[j];
730
group = groups[i];
731
group->jobs[group->size] = *newjobs[j]; // copy
732
group->size++;
733
free(newjobs[j]);
734
}
735
736
if (cppadcg_pool_verbose) {
737
if (durations != NULL) {
738
for (i = 0; i < num_threads; ++i) {
739
fprintf(stdout, "jobqueue_push_static_jobs(): work group %i with %i jobs for %e s\n", i, groups[i]->size, durations[i]);
740
}
741
} else {
742
for (i = 0; i < num_threads; ++i) {
743
fprintf(stdout, "jobqueue_push_static_jobs(): work group %i with %i jobs\n", i, groups[i]->size);
744
}
745
}
746
}
747
751
pthread_mutex_lock(&thpool->jobqueue->rwmutex);
752
753
groups[num_threads - 1]->prev = thpool->jobqueue->group_front;
754
thpool->jobqueue->group_front = groups[0];
755
756
bsem_post_all(thpool->jobqueue->has_jobs);
757
758
pthread_mutex_unlock(&thpool->jobqueue->rwmutex);
759
760
// clean up
761
free(durations);
762
free(n_jobs);
763
free(groups);
764
765
return 0;
766
}
767
794
static void thpool_wait(ThPool* thpool) {
795
pthread_mutex_lock(&thpool->thcount_lock);
796
while (thpool->jobqueue->len || thpool->jobqueue->group_front || thpool->num_threads_working) { //// PROBLEM HERE!!!! len is not locked!!!!
797
pthread_cond_wait(&thpool->threads_all_idle, &thpool->thcount_lock);
798
}
799
thpool->jobqueue->total_time = 0;
800
thpool->jobqueue->highest_expected_return = 0;
801
pthread_mutex_unlock(&thpool->thcount_lock);
802
803
thpool_cleanup(thpool);
804
}
805
806
813
void thpool_cleanup(ThPool* thpool) {
814
// for debugging only
815
816
struct timespec diffTime;
817
int gid = 0;
818
Thread* thread;
819
WorkGroup* workGroup;
820
WorkGroup* workGroupPrev;
821
822
if (!cppadcg_pool_verbose) {
823
return;
824
}
825
826
for (int j = 0; j < thpool->num_threads; ++j) {
827
thread = thpool->threads[j];
828
829
workGroup = thread->processed_groups;
830
while (workGroup != NULL) {
831
timespec_diff(&workGroup->endTime, &workGroup->startTime, &diffTime);
832
fprintf(stdout, "# Thread %i, Group %i, started at %ld.%.9ld, ended at %ld.%.9ld, elapsed %ld.%.9ld, executed %i jobs\n",
833
thread->id, gid, workGroup->startTime.tv_sec, workGroup->startTime.tv_nsec, workGroup->endTime.tv_sec, workGroup->endTime.tv_nsec, diffTime.tv_sec,
834
diffTime.tv_nsec, workGroup->size);
835
836
for (int i = 0; i < workGroup->size; ++i) {
837
Job* job = &workGroup->jobs[i];
838
839
timespec_diff(&job->endTime, &job->startTime, &diffTime);
840
fprintf(stdout, "## Thread %i, Group %i, Job %i, started at %ld.%.9ld, ended at %ld.%.9ld, elapsed %ld.%.9ld\n",
841
thread->id, gid, job->id, job->startTime.tv_sec, job->startTime.tv_nsec, job->endTime.tv_sec, job->endTime.tv_nsec, diffTime.tv_sec,
842
diffTime.tv_nsec);
843
}
844
845
gid++;
846
847
workGroupPrev = workGroup->prev;
848
849
// clean-up
850
free(workGroup->jobs);
851
free(workGroup);
852
853
workGroup = workGroupPrev;
854
}
855
856
thread->processed_groups = NULL;
857
}
858
}
859
879
static void thpool_destroy(ThPool* thpool) {
880
/* No need to destory if it's NULL */
881
if (thpool == NULL) return;
882
883
volatile int threads_total = thpool->num_threads_alive;
884
885
/* End each thread 's infinite loop */
886
thpool->threads_keepalive = 0;
887
888
/* Give one second to kill idle threads */
889
double TIMEOUT = 1.0;
890
time_t start, end;
891
double tpassed = 0.0;
892
time(&start);
893
while (tpassed < TIMEOUT && thpool->num_threads_alive) {
894
bsem_post_all(thpool->jobqueue->has_jobs);
895
time(&end);
896
tpassed = difftime(end, start);
897
}
898
899
/* Poll remaining threads */
900
while (thpool->num_threads_alive) {
901
bsem_post_all(thpool->jobqueue->has_jobs);
902
sleep(1);
903
}
904
905
/* cleanup current work groups */
906
thpool_cleanup(thpool);
907
908
/* Job queue cleanup */
909
jobqueue_destroy(thpool);
910
free(thpool->jobqueue);
911
912
/* Deallocs */
913
int n;
914
for (n = 0; n < threads_total; n++) {
915
thread_destroy(thpool->threads[n]);
916
}
917
free(thpool->threads);
918
free(thpool);
919
920
if(cppadcg_pool_verbose) {
921
fprintf(stdout, "thpool_destroy(): thread pool destroyed\n");
922
}
923
}
924
925
926
/* ============================ THREAD ============================== */
927
928
929
/* Initialize a thread in the thread pool
930
*
931
* @param thread address to the pointer of the thread to be created
932
* @param id id to be given to the thread
933
* @return 0 on success, -1 otherwise.
934
*/
935
static int thread_init(ThPool* thpool,
936
Thread** thread,
937
int id) {
938
939
*thread = (Thread*) malloc(sizeof(Thread));
940
if (*thread == NULL) {
941
fprintf(stderr, "thread_init(): Could not allocate memory for thread\n");
942
return -1;
943
}
944
945
(*thread)->thpool = thpool;
946
(*thread)->id = id;
947
(*thread)->processed_groups = NULL;
948
949
pthread_create(&(*thread)->pthread, NULL, (void*) thread_do, (*thread));
950
pthread_detach((*thread)->pthread);
951
return 0;
952
}
953
954
/* What each thread is doing
955
*
956
* In principle this is an endless loop. The only time this loop gets interrupted is once
957
* thpool_destroy() is invoked or the program exits.
958
*
959
* @param thread thread that will run this function
960
* @return nothing
961
*/
962
static void* thread_do(Thread* thread) {
963
float elapsed;
964
int info;
965
struct timespec cputime;
966
JobQueue* queue;
967
WorkGroup* workGroup;
968
Job* job;
969
thpool_function_type func_buff;
970
void* arg_buff;
971
int i;
972
973
/* Set thread name for profiling and debugging */
974
char thread_name[128] = {0};
975
sprintf(thread_name, "thread-pool-%d", thread->id);
976
977
#if defined(__linux__)
978
/* Use prctl instead to prevent using _GNU_SOURCE flag and implicit declaration */
979
prctl(PR_SET_NAME, thread_name);
980
#elif defined(__APPLE__) && defined(__MACH__)
981
pthread_setname_np(thread_name);
982
#else
983
fprintf(stderr, "thread_do(): pthread_setname_np is not supported on this system");
984
#endif
985
986
/* Assure all threads have been created before starting serving */
987
ThPool* thpool = thread->thpool;
988
989
/* Mark thread as alive (initialized) */
990
pthread_mutex_lock(&thpool->thcount_lock);
991
thpool->num_threads_alive += 1;
992
pthread_mutex_unlock(&thpool->thcount_lock);
993
994
queue = thpool->jobqueue;
995
996
while (thpool->threads_keepalive) {
997
998
bsem_wait(queue->has_jobs);
999
1000
if (!thpool->threads_keepalive) {
1001
break;
1002
}
1003
1004
pthread_mutex_lock(&thpool->thcount_lock);
1005
thpool->num_threads_working++;
1006
pthread_mutex_unlock(&thpool->thcount_lock);
1007
1008
while (thpool->threads_keepalive) {
1009
/* Read job from queue and execute it */
1010
pthread_mutex_lock(&queue->rwmutex);
1011
workGroup = jobqueue_pull(thpool, thread->id);
1012
pthread_mutex_unlock(&queue->rwmutex);
1013
1014
if (workGroup == NULL)
1015
break;
1016
1017
if (cppadcg_pool_verbose) {
1018
get_monotonic_time2(&workGroup->startTime);
1019
}
1020
1021
for (i = 0; i < workGroup->size; ++i) {
1022
job = &workGroup->jobs[i];
1023
1024
if (cppadcg_pool_verbose) {
1025
get_monotonic_time2(&job->startTime);
1026
}
1027
1028
int do_benchmark = job->elapsed != NULL;
1029
if (do_benchmark) {
1030
elapsed = -get_thread_time(&cputime, &info);
1031
}
1032
1033
/* Execute the job */
1034
func_buff = job->function;
1035
arg_buff = job->arg;
1036
func_buff(arg_buff);
1037
1038
if (do_benchmark && info == 0) {
1039
elapsed += get_thread_time(&cputime, &info);
1040
if (info == 0) {
1041
(*job->elapsed) = elapsed;
1042
}
1043
}
1044
1045
if (cppadcg_pool_verbose) {
1046
get_monotonic_time2(&job->endTime);
1047
}
1048
}
1049
1050
if (cppadcg_pool_verbose) {
1051
get_monotonic_time2(&workGroup->endTime);
1052
1053
if (thread->processed_groups == NULL) {
1054
thread->processed_groups = workGroup;
1055
} else {
1056
workGroup->prev = thread->processed_groups;
1057
thread->processed_groups = workGroup;
1058
}
1059
} else {
1060
free(workGroup->jobs);
1061
free(workGroup);
1062
}
1063
}
1064
1065
pthread_mutex_lock(&thpool->thcount_lock);
1066
thpool->num_threads_working--;
1067
if (!thpool->num_threads_working) {
1068
pthread_cond_signal(&thpool->threads_all_idle);
1069
}
1070
pthread_mutex_unlock(&thpool->thcount_lock);
1071
}
1072
1073
pthread_mutex_lock(&thpool->thcount_lock);
1074
thpool->num_threads_alive--;
1075
pthread_mutex_unlock(&thpool->thcount_lock);
1076
1077
return NULL;
1078
}
1079
1080
1081
/* Frees a thread */
1082
static void thread_destroy(Thread* thread) {
1083
free(thread);
1084
}
1085
1086
1087
/* ============================ JOB QUEUE =========================== */
1088
1089
1090
/* Initialize queue */
1091
static int jobqueue_init(ThPool* thpool) {
1092
1093
JobQueue* queue = (JobQueue*) malloc(sizeof(JobQueue));
1094
if (queue == NULL) {
1095
return -1;
1096
}
1097
thpool->jobqueue = queue;
1098
queue->len = 0;
1099
queue->front = NULL;
1100
queue->rear = NULL;
1101
queue->group_front = NULL;
1102
queue->total_time = 0;
1103
queue->highest_expected_return = 0;
1104
1105
queue->has_jobs = (BSem*) malloc(sizeof(BSem));
1106
if (queue->has_jobs == NULL) {
1107
return -1;
1108
}
1109
1110
pthread_mutex_init(&(queue->rwmutex), NULL);
1111
bsem_init(queue->has_jobs, 0);
1112
1113
return 0;
1114
}
1115
1116
1117
/* Clear the queue */
1118
static void jobqueue_clear(ThPool* thpool) {
1119
WorkGroup* group;
1120
int size;
1121
1122
do {
1123
group = jobqueue_pull(thpool, -1);
1124
if (group == NULL) {
1125
size = 0;
1126
} else {
1127
size = group->size;
1128
free(group->jobs);
1129
free(group);
1130
}
1131
} while (size > 0);
1132
1133
thpool->jobqueue->front = NULL;
1134
thpool->jobqueue->rear = NULL;
1135
bsem_reset(thpool->jobqueue->has_jobs);
1136
thpool->jobqueue->len = 0;
1137
thpool->jobqueue->group_front = NULL;
1138
thpool->jobqueue->total_time = 0;
1139
thpool->jobqueue->highest_expected_return = 0;
1140
}
1141
1142
1146
static void jobqueue_push_internal(JobQueue* queue,
1147
Job* newjob) {
1148
newjob->prev = NULL;
1149
1150
switch (queue->len) {
1151
1152
case 0: /* if no jobs in queue */
1153
queue->front = newjob;
1154
queue->rear = newjob;
1155
break;
1156
1157
default: /* if jobs in queue */
1158
queue->rear->prev = newjob;
1159
queue->rear = newjob;
1160
1161
}
1162
if(newjob->avgElapsed != NULL) {
1163
queue->total_time += *newjob->avgElapsed;
1164
}
1165
queue->len++;
1166
}
1167
1171
static void jobqueue_push(JobQueue* queue,
1172
Job* newjob) {
1173
pthread_mutex_lock(&queue->rwmutex);
1174
1175
jobqueue_push_internal(queue, newjob);
1176
1177
bsem_post(queue->has_jobs);
1178
1179
pthread_mutex_unlock(&queue->rwmutex);
1180
}
1181
1182
1186
static void jobqueue_multipush(JobQueue* queue,
1187
Job* newjob[],
1188
int nJobs) {
1189
int i;
1190
1191
pthread_mutex_lock(&queue->rwmutex);
1192
1193
for(i = 0; i < nJobs; ++i) {
1194
jobqueue_push_internal(queue, newjob[i]);
1195
}
1196
1197
bsem_post_all(queue->has_jobs);
1198
1199
pthread_mutex_unlock(&queue->rwmutex);
1200
}
1201
1202
static Job* jobqueue_extract_single(JobQueue* queue) {
1203
Job* job = queue->front;
1204
1205
switch (queue->len) {
1206
case 0: /* if no jobs in queue */
1207
return NULL;
1208
1209
case 1: /* if one job in queue */
1210
queue->front = NULL;
1211
queue->rear = NULL;
1212
queue->len = 0;
1213
queue->total_time = 0;
1214
queue->highest_expected_return = 0;
1215
return job;
1216
1217
default: /* if >1 jobs in queue */
1218
queue->front = job->prev;
1219
queue->len--;
1220
if(job->avgElapsed != NULL) {
1221
queue->total_time -= *job->avgElapsed;
1222
}
1223
return job;
1224
}
1225
}
1226
1227
static void jobqueue_extract_single_group(JobQueue* queue,
1228
WorkGroup* group) {
1229
Job* job = jobqueue_extract_single(queue);
1230
if(job != NULL) {
1231
group->size = 1;
1232
group->jobs = (Job*) malloc(sizeof(Job));
1233
group->jobs[0] = *job; // copy
1234
free(job);
1235
} else {
1236
group->size = 0;
1237
group->jobs = NULL;
1238
}
1239
}
1240
1246
static WorkGroup* jobqueue_pull(ThPool* thpool,
1247
int id) {
1248
1249
WorkGroup* group;
1250
Job* job;
1251
float current_time;
1252
float duration, duration_next, min_duration, target_duration;
1253
struct timespec timeAux;
1254
int info;
1255
int i;
1256
JobQueue* queue = thpool->jobqueue;
1257
1258
if (schedule_strategy == SCHED_STATIC && queue->group_front != NULL) {
1259
// STATIC
1260
group = queue->group_front;
1261
1262
queue->group_front = group->prev;
1263
group->prev = NULL;
1264
1265
} else if (queue->len == 0) {
1266
// nothing to do
1267
group = NULL;
1268
1269
} else if (schedule_strategy == SCHED_DYNAMIC || queue->len == 1 || queue->total_time <= 0) {
1270
// SCHED_DYNAMIC
1271
group = (WorkGroup*) malloc(sizeof(WorkGroup));
1272
group->prev = NULL;
1273
1274
if (cppadcg_pool_verbose) {
1275
if (schedule_strategy == SCHED_GUIDED) {
1276
if (queue->len == 1)
1277
fprintf(stdout, "jobqueue_pull(): Thread %i given a work group with 1 job\n", id);
1278
else if (queue->total_time <= 0)
1279
fprintf(stdout, "jobqueue_pull(): Thread %i using single-job instead of multi-job (no timing information)\n", id);
1280
} else if (schedule_strategy == SCHED_STATIC && queue->len >= 1) {
1281
if (queue->total_time >= 0) {
1282
// this should not happen but just in case the user messed up
1283
fprintf(stderr, "jobqueue_pull(): Thread %i given a work group with 1 job\n", id);
1284
} else {
1285
fprintf(stdout, "jobqueue_pull(): Thread %i given a work group with 1 job\n", id);
1286
}
1287
}
1288
}
1289
1290
jobqueue_extract_single_group(thpool->jobqueue, group);
1291
} else { // schedule_strategy == SCHED_GUIDED
1292
// SCHED_GUIDED
1293
group = (WorkGroup*) malloc(sizeof(WorkGroup));
1294
group->prev = NULL;
1295
1296
job = queue->front;
1297
1298
if (job->avgElapsed == NULL) {
1299
if (cppadcg_pool_verbose) {
1300
fprintf(stderr, "jobqueue_pull(): Thread %i using single job instead of multi-job (No timing information for current job)\n", id);
1301
}
1302
// cannot use this strategy (something went wrong!)
1303
jobqueue_extract_single_group(thpool->jobqueue, group);
1304
1305
} else {
1306
// there are at least 2 jobs in the queue
1307
group->size = 1;
1308
duration = *job->avgElapsed;
1309
duration_next = duration;
1310
job = job->prev;
1311
target_duration = queue->total_time * cppadcg_pool_guided_maxgroupwork / thpool->num_threads; // always positive
1312
current_time = get_monotonic_time(&timeAux, &info);
1313
1314
if (queue->highest_expected_return > 0 && info) {
1315
min_duration = 0.9f * (queue->highest_expected_return - current_time);
1316
if (target_duration < min_duration) {
1317
target_duration = min_duration;
1318
}
1319
}
1320
1321
do {
1322
if (job->avgElapsed == NULL) {
1323
break;
1324
}
1325
duration_next += *job->avgElapsed;
1326
if (duration_next < target_duration) {
1327
group->size++;
1328
duration = duration_next;
1329
} else {
1330
break;
1331
}
1332
job = job->prev;
1333
} while (job != queue->front);
1334
1335
if (cppadcg_pool_verbose) {
1336
fprintf(stdout, "jobqueue_pull(): Thread %i given a work group with %i jobs for %e s (target: %e s)\n", id, group->size, duration, target_duration);
1337
}
1338
1339
group->jobs = (Job*) malloc(group->size * sizeof(Job));
1340
for (i = 0; i < group->size; ++i) {
1341
job = jobqueue_extract_single(thpool->jobqueue);
1342
group->jobs[i] = *job; // copy
1343
free(job);
1344
}
1345
1346
duration_next = current_time + duration; // the time when the current work is expected to end
1347
if(duration_next > queue->highest_expected_return)
1348
queue->highest_expected_return = duration_next;
1349
}
1350
1351
}
1352
/* more than one job in queue -> post it */
1353
if (queue->len > 0 || queue->group_front != NULL) {
1354
bsem_post(queue->has_jobs);
1355
}
1356
1357
return group;
1358
}
1359
1360
1361
/* Free all queue resources back to the system */
1362
static void jobqueue_destroy(ThPool* thpool) {
1363
jobqueue_clear(thpool);
1364
free(thpool->jobqueue->has_jobs);
1365
}
1366
1367
1368
1369
1370
1371
/* ======================== SYNCHRONISATION ========================= */
1372
1373
1374
/* Init semaphore to 1 or 0 */
1375
static void bsem_init(BSem* bsem, int value) {
1376
if (value < 0 || value > 1) {
1377
fprintf(stderr, "bsem_init(): Binary semaphore can take only values 1 or 0");
1378
exit(1);
1379
}
1380
pthread_mutex_init(&(bsem->mutex), NULL);
1381
pthread_cond_init(&(bsem->cond), NULL);
1382
bsem->v = value;
1383
}
1384
1385
1386
/* Reset semaphore to 0 */
1387
static void bsem_reset(BSem* bsem) {
1388
bsem_init(bsem, 0);
1389
}
1390
1391
1392
/* Post to at least one thread */
1393
static void bsem_post(BSem* bsem) {
1394
pthread_mutex_lock(&bsem->mutex);
1395
bsem->v = 1;
1396
pthread_cond_signal(&bsem->cond);
1397
pthread_mutex_unlock(&bsem->mutex);
1398
}
1399
1400
1401
/* Post to all threads */
1402
static void bsem_post_all(BSem* bsem) {
1403
pthread_mutex_lock(&bsem->mutex);
1404
bsem->v = 1;
1405
pthread_cond_broadcast(&bsem->cond);
1406
pthread_mutex_unlock(&bsem->mutex);
1407
}
1408
1409
1410
/* Wait on semaphore until semaphore has value 0 */
1411
static void bsem_wait(BSem* bsem) {
1412
pthread_mutex_lock(&bsem->mutex);
1413
while (bsem->v != 1) {
1414
pthread_cond_wait(&bsem->cond, &bsem->mutex);
1415
}
1416
bsem->v = 0;
1417
pthread_mutex_unlock(&bsem->mutex);
1418
}
1419
)*=*";
1420
1421
const
size_t
CPPADCG_PTHREAD_POOL_C_FILE_SIZE = 43345;
1422
include
cppad
cg
model
threadpool
pthread_pool_c.hpp
Generated on Tue Jan 23 2024 13:46:34 for CppADCodeGen by
1.8.17