CppADCodeGen 2.4.3
A C++ Algorithmic Differentiation Package with Source Code Generation
Loading...
Searching...
No Matches
pthread_pool_c.hpp
1const char CPPADCG_PTHREAD_POOL_C_FILE[] = R"*=*(/* --------------------------------------------------------------------------
2 * CppADCodeGen: C++ Algorithmic Differentiation with Source Code Generation:
3 * Copyright (C) 2016 Ciengis
4 *
5 * CppADCodeGen is distributed under multiple licenses:
6 *
7 * - Eclipse Public License Version 1.0 (EPL1), and
8 * - GNU General Public License Version 3 (GPL3).
9 *
10 * EPL1 terms and conditions can be found in the file "epl-v10.txt", while
11 * terms and conditions for the GPL3 can be found in the file "gpl3.txt".
12 * ----------------------------------------------------------------------------
13 * Authors: Johan Hanssen Seferidis, Joao Leal
14 */
15
21#include <unistd.h>
22#include <stdio.h>
23#include <stdlib.h>
24#include <pthread.h>
25#include <errno.h>
26#include <time.h>
27#if defined(__linux__)
28#include <sys/prctl.h>
29#include <time.h>
30#include <sys/time.h>
31#define __USE_GNU /* required before including resource.h */
32#include <sys/resource.h>
33#endif
34
35enum ScheduleStrategy {SCHED_STATIC = 1,
36 SCHED_DYNAMIC = 2,
37 SCHED_GUIDED = 3
38 };
39
40enum ElapsedTimeReference {ELAPSED_TIME_AVG,
41 ELAPSED_TIME_MIN};
42
43typedef struct ThPool ThPool;
44typedef void (* thpool_function_type)(void*);
45
46static ThPool* volatile cppadcg_pool = NULL;
47static int cppadcg_pool_n_threads = 2;
48static int cppadcg_pool_disabled = 0; // false
49static int cppadcg_pool_verbose = 0; // false
50static enum ElapsedTimeReference cppadcg_pool_time_update = ELAPSED_TIME_MIN;
51static unsigned int cppadcg_pool_time_meas = 10; // default number of time measurements
52static float cppadcg_pool_guided_maxgroupwork = 0.75;
53
54static enum ScheduleStrategy schedule_strategy = SCHED_DYNAMIC;
55
56/* ==================== INTERNAL HIGH LEVEL API ====================== */
57
58static ThPool* thpool_init(int num_threads);
59
60static int thpool_add_job(ThPool*,
61 thpool_function_type function,
62 void* arg,
63 const float* avgElapsed,
64 float* elapsed);
65
66static int thpool_add_jobs(ThPool*,
67 thpool_function_type functions[],
68 void* args[],
69 const float avgElapsed[],
70 float elapsed[],
71 const int order[],
72 int job2Thread[],
73 int nJobs,
74 int lastElapsedChanged);
75
76static void thpool_wait(ThPool*);
77
78static void thpool_destroy(ThPool*);
79
80/* ========================== STRUCTURES ============================ */
81/* Binary semaphore */
82typedef struct BSem {
83 pthread_mutex_t mutex;
84 pthread_cond_t cond;
85 int v;
86} BSem;
87
88
89/* Job */
90typedef struct Job {
91 struct Job* prev; /* pointer to previous job */
92 thpool_function_type function; /* function pointer */
93 void* arg; /* function's argument */
94 const float* avgElapsed; /* the last measurement of elapsed time */
95 float* elapsed; /* the current elapsed time */
96 struct timespec startTime; /* initial time (verbose only) */
97 struct timespec endTime; /* final time (verbose only) */
98 int id; /* a job identifier used for debugging */
99} Job;
100
101/* Work group */
102typedef struct WorkGroup {
103 struct WorkGroup* prev; /* pointer to previous WorkGroup */
104 struct Job* jobs; /* jobs */
105 int size; /* number of jobs */
106 struct timespec startTime; /* initial time (verbose only) */
107 struct timespec endTime; /* final time (verbose only) */
108} WorkGroup;
109
110/* Job queue */
111typedef struct JobQueue {
112 pthread_mutex_t rwmutex; /* used for queue r/w access */
113 Job *front; /* pointer to front of queue */
114 Job *rear; /* pointer to rear of queue */
115 WorkGroup* group_front; /* previously created work groups (SCHED_STATIC scheduling only)*/
116 BSem *has_jobs; /* flag as binary semaphore */
117 int len; /* number of jobs in queue */
118 float total_time; /* total expected time to complete the work */
119 float highest_expected_return; /* the time when the last running thread is expected to request new work */
120} JobQueue;
121
122
123/* Thread */
124typedef struct Thread {
125 int id; /* friendly id */
126 pthread_t pthread; /* pointer to actual thread */
127 struct ThPool* thpool; /* access to ThPool */
128 WorkGroup* processed_groups; /* processed work groups (verbose only) */
129} Thread;
130
131
132/* Threadpool */
133typedef struct ThPool {
134 Thread** threads; /* pointer to threads */
135 int num_threads; /* total number of threads */
136 volatile int num_threads_alive; /* threads currently alive */
137 volatile int num_threads_working; /* threads currently working */
138 pthread_mutex_t thcount_lock; /* used for thread count etc */
139 pthread_cond_t threads_all_idle; /* signal to thpool_wait */
140 JobQueue* jobqueue; /* pointer to the job queue */
141 volatile int threads_keepalive;
142} ThPool;
143
144/* ========================== PUBLIC API ============================ */
145
146void cppadcg_thpool_set_threads(int n) {
147 cppadcg_pool_n_threads = n;
148}
149
150int cppadcg_thpool_get_threads() {
151 return cppadcg_pool_n_threads;
152}
153
154void cppadcg_thpool_set_scheduler_strategy(enum ScheduleStrategy s) {
155 if(cppadcg_pool != NULL) {
156 pthread_mutex_lock(&cppadcg_pool->jobqueue->rwmutex);
157 schedule_strategy = s;
158 pthread_mutex_unlock(&cppadcg_pool->jobqueue->rwmutex);
159 } else {
160 // pool not yet created
161 schedule_strategy = s;
162 }
163}
164
165enum ScheduleStrategy cppadcg_thpool_get_scheduler_strategy() {
166 if(cppadcg_pool != NULL) {
167 enum ScheduleStrategy e;
168 pthread_mutex_lock(&cppadcg_pool->jobqueue->rwmutex);
169 e = schedule_strategy;
170 pthread_mutex_unlock(&cppadcg_pool->jobqueue->rwmutex);
171 return e;
172 } else {
173 // pool not yet created
174 return schedule_strategy;
175 }
176}
177
178void cppadcg_thpool_set_disabled(int disabled) {
179 cppadcg_pool_disabled = disabled;
180}
181
182int cppadcg_thpool_is_disabled() {
183 return cppadcg_pool_disabled;
184}
185
186void cppadcg_thpool_set_guided_maxgroupwork(float v) {
187 if(cppadcg_pool != NULL) {
188 pthread_mutex_lock(&cppadcg_pool->jobqueue->rwmutex);
189 cppadcg_pool_guided_maxgroupwork = v;
190 pthread_mutex_unlock(&cppadcg_pool->jobqueue->rwmutex);
191 } else {
192 // pool not yet created
193 cppadcg_pool_guided_maxgroupwork = v;
194 }
195}
196
197float cppadcg_thpool_get_guided_maxgroupwork() {
198 if(cppadcg_pool != NULL) {
199 float r;
200 pthread_mutex_lock(&cppadcg_pool->jobqueue->rwmutex);
201 r = cppadcg_pool_guided_maxgroupwork;
202 pthread_mutex_unlock(&cppadcg_pool->jobqueue->rwmutex);
203 return r;
204 } else {
205 // pool not yet created
206 return cppadcg_pool_guided_maxgroupwork;
207 }
208}
209
210unsigned int cppadcg_thpool_get_n_time_meas() {
211 return cppadcg_pool_time_meas;
212}
213
214void cppadcg_thpool_set_n_time_meas(unsigned int n) {
215 cppadcg_pool_time_meas = n;
216}
217
218void cppadcg_thpool_set_verbose(int v) {
219 cppadcg_pool_verbose = v;
220}
221
222enum ElapsedTimeReference cppadcg_thpool_get_time_meas_ref() {
223 return cppadcg_pool_time_update;
224}
225
226void cppadcg_thpool_set_time_meas_ref(enum ElapsedTimeReference r) {
227 cppadcg_pool_time_update = r;
228}
229
230int cppadcg_thpool_is_verbose() {
231 return cppadcg_pool_verbose;
232}
233
234void cppadcg_thpool_prepare() {
235 if(cppadcg_pool == NULL) {
236 cppadcg_pool = thpool_init(cppadcg_pool_n_threads);
237 }
238}
239
240void cppadcg_thpool_add_job(thpool_function_type function,
241 void* arg,
242 float* avgElapsed,
243 float* elapsed) {
244 if (!cppadcg_pool_disabled) {
245 cppadcg_thpool_prepare();
246 if (cppadcg_pool != NULL) {
247 thpool_add_job(cppadcg_pool, function, arg, avgElapsed, elapsed);
248 return;
249 }
250 }
251
252 // thread pool not used
253 (*function)(arg);
254}
255
256void cppadcg_thpool_add_jobs(thpool_function_type functions[],
257 void* args[],
258 const float avgElapsed[],
259 float elapsed[],
260 const int order[],
261 int job2Thread[],
262 int nJobs,
263 int lastElapsedChanged) {
264 int i;
265 if (!cppadcg_pool_disabled) {
266 cppadcg_thpool_prepare();
267 if (cppadcg_pool != NULL) {
268 thpool_add_jobs(cppadcg_pool, functions, args, avgElapsed, elapsed, order, job2Thread, nJobs, lastElapsedChanged);
269 return;
270 }
271 }
272
273 // thread pool not used
274 for (i = 0; i < nJobs; ++i) {
275 (*functions[i])(args[i]);
276 }
277}
278
279void cppadcg_thpool_wait() {
280 if(cppadcg_pool != NULL) {
281 thpool_wait(cppadcg_pool);
282 }
283}
284
285typedef struct pair_double_int {
286 float val;
287 int index;
288} pair_double_int;
289
290static int comparePair(const void* a, const void* b) {
291 if (((pair_double_int*) a)->val < ((pair_double_int*) b)->val)
292 return -1;
293 if (((pair_double_int*) a)->val == ((pair_double_int*) b)->val)
294 return 0;
295 return 1;
296}
297
298void cppadcg_thpool_update_order(float refElapsed[],
299 unsigned int nTimeMeas,
300 const float elapsed[],
301 int order[],
302 int nJobs) {
303 if(nJobs == 0 || refElapsed == NULL || elapsed == NULL || order == NULL)
304 return;
305
306 struct pair_double_int elapsedOrder[nJobs];
307 int i;
308 int nonZero = 0; // false
309
310 for(i = 0; i < nJobs; ++i) {
311 if(elapsed[i] != 0) {
312 nonZero = 1;
313 break;
314 }
315 }
316
317 if (!nonZero) {
318 if (cppadcg_pool_verbose) {
319 fprintf(stdout, "order not updated: all times are zero\n");
320 }
321 return;
322 }
323
324 if(cppadcg_pool_time_update == ELAPSED_TIME_AVG) {
325 for (i = 0; i < nJobs; ++i) {
326 refElapsed[i] = (refElapsed[i] * nTimeMeas + elapsed[i]) / (nTimeMeas + 1);
327 elapsedOrder[i].val = refElapsed[i];
328 elapsedOrder[i].index = i;
329 }
330 } else {
331 // cppadcg_pool_time_update == ELAPSED_TIME_MIN
332 for (i = 0; i < nJobs; ++i) {
333 if(nTimeMeas == 0 || elapsed[i] < refElapsed[i]) {
334 refElapsed[i] = elapsed[i];
335 }
336 elapsedOrder[i].val = refElapsed[i];
337 elapsedOrder[i].index = i;
338 }
339 }
340
341 qsort(elapsedOrder, nJobs, sizeof(struct pair_double_int), comparePair);
342
343 for (i = 0; i < nJobs; ++i) {
344 order[elapsedOrder[i].index] = nJobs - i - 1; // descending order
345 }
346
347 if (cppadcg_pool_verbose) {
348 fprintf(stdout, "new order (%i values):\n", nTimeMeas + 1);
349 for (i = 0; i < nJobs; ++i) {
350 fprintf(stdout, " job id: %i order: %i time: %e s\n", i, order[i], refElapsed[i]);
351 }
352 }
353
354}
355
356void cppadcg_thpool_shutdown() {
357 if(cppadcg_pool != NULL) {
358 thpool_destroy(cppadcg_pool);
359 cppadcg_pool = NULL;
360 }
361}
362
363/* ========================== PROTOTYPES ============================ */
364
365static void thpool_cleanup(ThPool* thpool);
366
367static int thread_init(ThPool* thpool,
368 Thread** thread,
369 int id);
370static void* thread_do(Thread* thread);
371static void thread_destroy(Thread* thread);
372
373static int jobqueue_init(ThPool* thpool);
374static void jobqueue_clear(ThPool* thpool);
375static void jobqueue_push(JobQueue* queue,
376 Job* newjob_p);
377static void jobqueue_multipush(JobQueue* queue,
378 Job* newjob[],
379 int nJobs);
380static int jobqueue_push_static_jobs(ThPool* thpool,
381 Job* newjobs[],
382 const float avgElapsed[],
383 int jobs2thread[],
384 int nJobs,
385 int lastElapsedChanged);
386static WorkGroup* jobqueue_pull(ThPool* thpool, int id);
387static void jobqueue_destroy(ThPool* thpool);
388
389static void bsem_init(BSem *bsem, int value);
390static void bsem_reset(BSem *bsem);
391static void bsem_post(BSem *bsem);
392static void bsem_post_all(BSem *bsem);
393static void bsem_wait(BSem *bsem);
394
395
396/* ============================ TIME ============================== */
397
398static float get_thread_time(struct timespec* cputime,
399 int* info) {
400 *info = clock_gettime(CLOCK_THREAD_CPUTIME_ID, cputime);
401 if(*info == 0) {
402 return cputime->tv_sec + cputime->tv_nsec * 1e-9f;
403 } else {
404 fprintf(stderr, "failed clock_gettime()\n");
405 return 0;
406 }
407}
408
409static float get_monotonic_time(struct timespec* time,
410 int* info) {
411 *info = clock_gettime(CLOCK_MONOTONIC, time);
412 if(*info == 0) {
413 return time->tv_sec + time->tv_nsec * 1e-9f;
414 } else {
415 fprintf(stderr, "failed clock_gettime()\n");
416 return 0;
417 }
418}
419
420static void get_monotonic_time2(struct timespec* time) {
421 int info;
422 info = clock_gettime(CLOCK_MONOTONIC, time);
423 if(info != 0) {
424 time->tv_sec = 0;
425 time->tv_nsec = 0;
426 fprintf(stderr, "failed clock_gettime()\n");
427 }
428}
429
430void timespec_diff(struct timespec* end,
431 struct timespec* start,
432 struct timespec* result) {
433 if ((end->tv_nsec - start->tv_nsec) < 0) {
434 result->tv_sec = end->tv_sec - start->tv_sec - 1;
435 result->tv_nsec = end->tv_nsec - start->tv_nsec + 1000000000;
436 } else {
437 result->tv_sec = end->tv_sec - start->tv_sec;
438 result->tv_nsec = end->tv_nsec - start->tv_nsec;
439 }
440}
441
442/* ========================== THREADPOOL ============================ */
443
461struct ThPool* thpool_init(int num_threads) {
462 if (num_threads < 0) {
463 num_threads = 0;
464 }
465
466 if(cppadcg_pool_verbose) {
467 fprintf(stdout, "thpool_init(): Thread pool created with %i threads\n", num_threads);
468 }
469
470 if(num_threads == 0) {
471 cppadcg_pool_disabled = 1; // true
472 return NULL;
473 }
474
475 /* Make new thread pool */
476 ThPool* thpool;
477 thpool = (ThPool*) malloc(sizeof(ThPool));
478 if (thpool == NULL) {
479 fprintf(stderr, "thpool_init(): Could not allocate memory for thread pool\n");
480 return NULL;
481 }
482 thpool->num_threads = num_threads;
483 thpool->num_threads_alive = 0;
484 thpool->num_threads_working = 0;
485 thpool->threads_keepalive = 1;
486
487 /* Initialize the job queue */
488 if (jobqueue_init(thpool) == -1) {
489 fprintf(stderr, "thpool_init(): Could not allocate memory for job queue\n");
490 free(thpool);
491 return NULL;
492 }
493
494 /* Make threads in pool */
495 thpool->threads = (Thread**) malloc(num_threads * sizeof(Thread*));
496 if (thpool->threads == NULL) {
497 fprintf(stderr, "thpool_init(): Could not allocate memory for threads\n");
498 jobqueue_destroy(thpool);
499 free(thpool->jobqueue);
500 free(thpool);
501 return NULL;
502 }
503
504 pthread_mutex_init(&(thpool->thcount_lock), NULL);
505 pthread_cond_init(&thpool->threads_all_idle, NULL);
506
507 /* Thread init */
508 int n;
509 for (n = 0; n < num_threads; n++) {
510 thread_init(thpool, &thpool->threads[n], n);
511 }
512
513 /* Wait for threads to initialize */
514 while (thpool->num_threads_alive != num_threads) {}
515
516 return thpool;
517}
518
546static int thpool_add_job(ThPool* thpool,
547 thpool_function_type function,
548 void* arg,
549 const float* avgElapsed,
550 float* elapsed) {
551 Job* newjob;
552
553 newjob = (struct Job*) malloc(sizeof(struct Job));
554 if (newjob == NULL) {
555 fprintf(stderr, "thpool_add_job(): Could not allocate memory for new job\n");
556 return -1;
557 }
558
559 /* add function and argument */
560 newjob->function = function;
561 newjob->arg = arg;
562 newjob->avgElapsed = avgElapsed;
563 newjob->elapsed = elapsed;
564
565 /* add job to queue */
566 jobqueue_push(thpool->jobqueue, newjob);
567
568 return 0;
569}
570
571static int thpool_add_jobs(ThPool* thpool,
572 thpool_function_type functions[],
573 void* args[],
574 const float avgElapsed[],
575 float elapsed[],
576 const int order[],
577 int job2Thread[],
578 int nJobs,
579 int lastElapsedChanged) {
580 Job* newjobs[nJobs];
581 int i;
582 int j;
583
584 for (i = 0; i < nJobs; ++i) {
585 newjobs[i] = (Job*) malloc(sizeof(Job));
586 if (newjobs[i] == NULL) {
587 fprintf(stderr, "thpool_add_jobs(): Could not allocate memory for new jobs\n");
588 return -1;
589 }
590
591 j = order != NULL ? order[i] : i;
592 /* add function and argument */
593 newjobs[i]->function = functions[j];
594 newjobs[i]->arg = args[j];
595 newjobs[i]->id = i;
596 if (avgElapsed != NULL)
597 newjobs[i]->avgElapsed = &avgElapsed[j];
598 else
599 newjobs[i]->avgElapsed = NULL;
600
601 if (elapsed != NULL)
602 newjobs[i]->elapsed = &elapsed[j];
603 else
604 newjobs[i]->elapsed = NULL;
605 }
606
607 /* add jobs to queue */
608 if (schedule_strategy == SCHED_STATIC && avgElapsed != NULL && order != NULL && nJobs > 0 && avgElapsed[0] > 0) {
609 return jobqueue_push_static_jobs(thpool, newjobs, avgElapsed, job2Thread, nJobs, lastElapsedChanged);
610 } else {
611 jobqueue_multipush(thpool->jobqueue, newjobs, nJobs);
612 return 0;
613 }
614}
615
619static int jobqueue_push_static_jobs(ThPool* thpool,
620 Job* newjobs[],
621 const float avgElapsed[],
622 int jobs2thread[],
623 int nJobs,
624 int lastElapsedChanged) {
625 float total_duration, target_duration, next_duration, best_duration;
626 int i, j, iBest;
627 int added;
628 int num_threads = thpool->num_threads;
629 int* n_jobs;
630 float* durations = NULL;
631 WorkGroup** groups;
632 WorkGroup* group;
633
634 if(nJobs < num_threads)
635 num_threads = nJobs;
636
637 n_jobs = (int*) malloc(num_threads * sizeof(int));
638 if (n_jobs == NULL) {
639 fprintf(stderr, "jobqueue_push_static_jobs(): Could not allocate memory\n");
640 return -1;
641 }
642
643 groups = (WorkGroup**) malloc(num_threads * sizeof(WorkGroup*));
644 if (groups == NULL) {
645 fprintf(stderr, "jobqueue_push_static_jobs(): Could not allocate memory\n");
646 free(n_jobs);
647 return -1;
648 }
649
650 for (i = 0; i < num_threads; ++i) {
651 n_jobs[i] = 0;
652 }
653
654 total_duration = 0;
655 for (i = 0; i < nJobs; ++i) {
656 total_duration += avgElapsed[i];
657 }
658
659
660 if (nJobs > 0 && (lastElapsedChanged || jobs2thread[0] < 0)) {
661 durations = (float*) malloc(num_threads * sizeof(float));
662 if (durations == NULL) {
663 fprintf(stderr, "jobqueue_push_static_jobs(): Could not allocate memory\n");
664 free(n_jobs);
665 free(groups);
666 return -1;
667 }
668
669 for(i = 0; i < num_threads; ++i) {
670 durations[i] = 0;
671 }
672
673 // decide in which work group to place each job
674 target_duration = total_duration / num_threads;
675
676 for (j = 0; j < nJobs; ++j) {
677 added = 0;
678 for (i = 0; i < num_threads; ++i) {
679 next_duration = durations[i] + avgElapsed[j];
680 if (next_duration < target_duration) {
681 durations[i] = next_duration;
682 n_jobs[i]++;
683 jobs2thread[j] = i;
684 added = 1;
685 break;
686 }
687 }
688
689 if (!added) {
690 best_duration = durations[0] + avgElapsed[j];
691 iBest = 0;
692 for (i = 1; i < num_threads; ++i) {
693 next_duration = durations[i] + avgElapsed[j];
694 if (next_duration < best_duration) {
695 best_duration = next_duration;
696 iBest = i;
697 }
698 }
699 durations[iBest] = best_duration;
700 n_jobs[iBest]++;
701 jobs2thread[j] = iBest;
702 }
703 }
704
705 } else {
706 // reuse existing information
707
708 for (j = 0; j < nJobs; ++j) {
709 n_jobs[jobs2thread[j]]++;
710 }
711 }
712
716 for (i = 0; i < num_threads; ++i) {
717 group = (WorkGroup*) malloc(sizeof(WorkGroup));
718 group->size = 0;
719 group->jobs = (Job*) malloc(n_jobs[i] * sizeof(Job));
720 groups[i] = group;
721 }
722 for (i = 0; i < num_threads - 1; ++i) {
723 groups[i]->prev = groups[i + 1];
724 }
725 groups[num_threads - 1]->prev = NULL;
726
727 // place jobs on the work groups
728 for (j = 0; j < nJobs; ++j) {
729 i = jobs2thread[j];
730 group = groups[i];
731 group->jobs[group->size] = *newjobs[j]; // copy
732 group->size++;
733 free(newjobs[j]);
734 }
735
736 if (cppadcg_pool_verbose) {
737 if (durations != NULL) {
738 for (i = 0; i < num_threads; ++i) {
739 fprintf(stdout, "jobqueue_push_static_jobs(): work group %i with %i jobs for %e s\n", i, groups[i]->size, durations[i]);
740 }
741 } else {
742 for (i = 0; i < num_threads; ++i) {
743 fprintf(stdout, "jobqueue_push_static_jobs(): work group %i with %i jobs\n", i, groups[i]->size);
744 }
745 }
746 }
747
751 pthread_mutex_lock(&thpool->jobqueue->rwmutex);
752
753 groups[num_threads - 1]->prev = thpool->jobqueue->group_front;
754 thpool->jobqueue->group_front = groups[0];
755
756 bsem_post_all(thpool->jobqueue->has_jobs);
757
758 pthread_mutex_unlock(&thpool->jobqueue->rwmutex);
759
760 // clean up
761 free(durations);
762 free(n_jobs);
763 free(groups);
764
765 return 0;
766}
767
794static void thpool_wait(ThPool* thpool) {
795 pthread_mutex_lock(&thpool->thcount_lock);
796 while (thpool->jobqueue->len || thpool->jobqueue->group_front || thpool->num_threads_working) { //// PROBLEM HERE!!!! len is not locked!!!!
797 pthread_cond_wait(&thpool->threads_all_idle, &thpool->thcount_lock);
798 }
799 thpool->jobqueue->total_time = 0;
800 thpool->jobqueue->highest_expected_return = 0;
801 pthread_mutex_unlock(&thpool->thcount_lock);
802
803 thpool_cleanup(thpool);
804}
805
806
813void thpool_cleanup(ThPool* thpool) {
814 // for debugging only
815
816 struct timespec diffTime;
817 int gid = 0;
818 Thread* thread;
819 WorkGroup* workGroup;
820 WorkGroup* workGroupPrev;
821
822 if (!cppadcg_pool_verbose) {
823 return;
824 }
825
826 for (int j = 0; j < thpool->num_threads; ++j) {
827 thread = thpool->threads[j];
828
829 workGroup = thread->processed_groups;
830 while (workGroup != NULL) {
831 timespec_diff(&workGroup->endTime, &workGroup->startTime, &diffTime);
832 fprintf(stdout, "# Thread %i, Group %i, started at %ld.%.9ld, ended at %ld.%.9ld, elapsed %ld.%.9ld, executed %i jobs\n",
833 thread->id, gid, workGroup->startTime.tv_sec, workGroup->startTime.tv_nsec, workGroup->endTime.tv_sec, workGroup->endTime.tv_nsec, diffTime.tv_sec,
834 diffTime.tv_nsec, workGroup->size);
835
836 for (int i = 0; i < workGroup->size; ++i) {
837 Job* job = &workGroup->jobs[i];
838
839 timespec_diff(&job->endTime, &job->startTime, &diffTime);
840 fprintf(stdout, "## Thread %i, Group %i, Job %i, started at %ld.%.9ld, ended at %ld.%.9ld, elapsed %ld.%.9ld\n",
841 thread->id, gid, job->id, job->startTime.tv_sec, job->startTime.tv_nsec, job->endTime.tv_sec, job->endTime.tv_nsec, diffTime.tv_sec,
842 diffTime.tv_nsec);
843 }
844
845 gid++;
846
847 workGroupPrev = workGroup->prev;
848
849 // clean-up
850 free(workGroup->jobs);
851 free(workGroup);
852
853 workGroup = workGroupPrev;
854 }
855
856 thread->processed_groups = NULL;
857 }
858}
859
879static void thpool_destroy(ThPool* thpool) {
880 /* No need to destory if it's NULL */
881 if (thpool == NULL) return;
882
883 volatile int threads_total = thpool->num_threads_alive;
884
885 /* End each thread 's infinite loop */
886 thpool->threads_keepalive = 0;
887
888 /* Give one second to kill idle threads */
889 double TIMEOUT = 1.0;
890 time_t start, end;
891 double tpassed = 0.0;
892 time(&start);
893 while (tpassed < TIMEOUT && thpool->num_threads_alive) {
894 bsem_post_all(thpool->jobqueue->has_jobs);
895 time(&end);
896 tpassed = difftime(end, start);
897 }
898
899 /* Poll remaining threads */
900 while (thpool->num_threads_alive) {
901 bsem_post_all(thpool->jobqueue->has_jobs);
902 sleep(1);
903 }
904
905 /* cleanup current work groups */
906 thpool_cleanup(thpool);
907
908 /* Job queue cleanup */
909 jobqueue_destroy(thpool);
910 free(thpool->jobqueue);
911
912 /* Deallocs */
913 int n;
914 for (n = 0; n < threads_total; n++) {
915 thread_destroy(thpool->threads[n]);
916 }
917 free(thpool->threads);
918 free(thpool);
919
920 if(cppadcg_pool_verbose) {
921 fprintf(stdout, "thpool_destroy(): thread pool destroyed\n");
922 }
923}
924
925
926/* ============================ THREAD ============================== */
927
928
929/* Initialize a thread in the thread pool
930 *
931 * @param thread address to the pointer of the thread to be created
932 * @param id id to be given to the thread
933 * @return 0 on success, -1 otherwise.
934 */
935static int thread_init(ThPool* thpool,
936 Thread** thread,
937 int id) {
938
939 *thread = (Thread*) malloc(sizeof(Thread));
940 if (*thread == NULL) {
941 fprintf(stderr, "thread_init(): Could not allocate memory for thread\n");
942 return -1;
943 }
944
945 (*thread)->thpool = thpool;
946 (*thread)->id = id;
947 (*thread)->processed_groups = NULL;
948
949 pthread_create(&(*thread)->pthread, NULL, (void*) thread_do, (*thread));
950 pthread_detach((*thread)->pthread);
951 return 0;
952}
953
954/* What each thread is doing
955*
956* In principle this is an endless loop. The only time this loop gets interrupted is once
957* thpool_destroy() is invoked or the program exits.
958*
959* @param thread thread that will run this function
960* @return nothing
961*/
962static void* thread_do(Thread* thread) {
963 float elapsed;
964 int info;
965 struct timespec cputime;
966 JobQueue* queue;
967 WorkGroup* workGroup;
968 Job* job;
969 thpool_function_type func_buff;
970 void* arg_buff;
971 int i;
972
973 /* Set thread name for profiling and debugging */
974 char thread_name[128] = {0};
975 sprintf(thread_name, "thread-pool-%d", thread->id);
976
977#if defined(__linux__)
978 /* Use prctl instead to prevent using _GNU_SOURCE flag and implicit declaration */
979 prctl(PR_SET_NAME, thread_name);
980#elif defined(__APPLE__) && defined(__MACH__)
981 pthread_setname_np(thread_name);
982#else
983 fprintf(stderr, "thread_do(): pthread_setname_np is not supported on this system");
984#endif
985
986 /* Assure all threads have been created before starting serving */
987 ThPool* thpool = thread->thpool;
988
989 /* Mark thread as alive (initialized) */
990 pthread_mutex_lock(&thpool->thcount_lock);
991 thpool->num_threads_alive += 1;
992 pthread_mutex_unlock(&thpool->thcount_lock);
993
994 queue = thpool->jobqueue;
995
996 while (thpool->threads_keepalive) {
997
998 bsem_wait(queue->has_jobs);
999
1000 if (!thpool->threads_keepalive) {
1001 break;
1002 }
1003
1004 pthread_mutex_lock(&thpool->thcount_lock);
1005 thpool->num_threads_working++;
1006 pthread_mutex_unlock(&thpool->thcount_lock);
1007
1008 while (thpool->threads_keepalive) {
1009 /* Read job from queue and execute it */
1010 pthread_mutex_lock(&queue->rwmutex);
1011 workGroup = jobqueue_pull(thpool, thread->id);
1012 pthread_mutex_unlock(&queue->rwmutex);
1013
1014 if (workGroup == NULL)
1015 break;
1016
1017 if (cppadcg_pool_verbose) {
1018 get_monotonic_time2(&workGroup->startTime);
1019 }
1020
1021 for (i = 0; i < workGroup->size; ++i) {
1022 job = &workGroup->jobs[i];
1023
1024 if (cppadcg_pool_verbose) {
1025 get_monotonic_time2(&job->startTime);
1026 }
1027
1028 int do_benchmark = job->elapsed != NULL;
1029 if (do_benchmark) {
1030 elapsed = -get_thread_time(&cputime, &info);
1031 }
1032
1033 /* Execute the job */
1034 func_buff = job->function;
1035 arg_buff = job->arg;
1036 func_buff(arg_buff);
1037
1038 if (do_benchmark && info == 0) {
1039 elapsed += get_thread_time(&cputime, &info);
1040 if (info == 0) {
1041 (*job->elapsed) = elapsed;
1042 }
1043 }
1044
1045 if (cppadcg_pool_verbose) {
1046 get_monotonic_time2(&job->endTime);
1047 }
1048 }
1049
1050 if (cppadcg_pool_verbose) {
1051 get_monotonic_time2(&workGroup->endTime);
1052
1053 if (thread->processed_groups == NULL) {
1054 thread->processed_groups = workGroup;
1055 } else {
1056 workGroup->prev = thread->processed_groups;
1057 thread->processed_groups = workGroup;
1058 }
1059 } else {
1060 free(workGroup->jobs);
1061 free(workGroup);
1062 }
1063 }
1064
1065 pthread_mutex_lock(&thpool->thcount_lock);
1066 thpool->num_threads_working--;
1067 if (!thpool->num_threads_working) {
1068 pthread_cond_signal(&thpool->threads_all_idle);
1069 }
1070 pthread_mutex_unlock(&thpool->thcount_lock);
1071 }
1072
1073 pthread_mutex_lock(&thpool->thcount_lock);
1074 thpool->num_threads_alive--;
1075 pthread_mutex_unlock(&thpool->thcount_lock);
1076
1077 return NULL;
1078}
1079
1080
1081/* Frees a thread */
1082static void thread_destroy(Thread* thread) {
1083 free(thread);
1084}
1085
1086
1087/* ============================ JOB QUEUE =========================== */
1088
1089
1090/* Initialize queue */
1091static int jobqueue_init(ThPool* thpool) {
1092
1093 JobQueue* queue = (JobQueue*) malloc(sizeof(JobQueue));
1094 if (queue == NULL) {
1095 return -1;
1096 }
1097 thpool->jobqueue = queue;
1098 queue->len = 0;
1099 queue->front = NULL;
1100 queue->rear = NULL;
1101 queue->group_front = NULL;
1102 queue->total_time = 0;
1103 queue->highest_expected_return = 0;
1104
1105 queue->has_jobs = (BSem*) malloc(sizeof(BSem));
1106 if (queue->has_jobs == NULL) {
1107 return -1;
1108 }
1109
1110 pthread_mutex_init(&(queue->rwmutex), NULL);
1111 bsem_init(queue->has_jobs, 0);
1112
1113 return 0;
1114}
1115
1116
1117/* Clear the queue */
1118static void jobqueue_clear(ThPool* thpool) {
1119 WorkGroup* group;
1120 int size;
1121
1122 do {
1123 group = jobqueue_pull(thpool, -1);
1124 if (group == NULL) {
1125 size = 0;
1126 } else {
1127 size = group->size;
1128 free(group->jobs);
1129 free(group);
1130 }
1131 } while (size > 0);
1132
1133 thpool->jobqueue->front = NULL;
1134 thpool->jobqueue->rear = NULL;
1135 bsem_reset(thpool->jobqueue->has_jobs);
1136 thpool->jobqueue->len = 0;
1137 thpool->jobqueue->group_front = NULL;
1138 thpool->jobqueue->total_time = 0;
1139 thpool->jobqueue->highest_expected_return = 0;
1140}
1141
1142
1146static void jobqueue_push_internal(JobQueue* queue,
1147 Job* newjob) {
1148 newjob->prev = NULL;
1149
1150 switch (queue->len) {
1151
1152 case 0: /* if no jobs in queue */
1153 queue->front = newjob;
1154 queue->rear = newjob;
1155 break;
1156
1157 default: /* if jobs in queue */
1158 queue->rear->prev = newjob;
1159 queue->rear = newjob;
1160
1161 }
1162 if(newjob->avgElapsed != NULL) {
1163 queue->total_time += *newjob->avgElapsed;
1164 }
1165 queue->len++;
1166}
1167
1171static void jobqueue_push(JobQueue* queue,
1172 Job* newjob) {
1173 pthread_mutex_lock(&queue->rwmutex);
1174
1175 jobqueue_push_internal(queue, newjob);
1176
1177 bsem_post(queue->has_jobs);
1178
1179 pthread_mutex_unlock(&queue->rwmutex);
1180}
1181
1182
1186static void jobqueue_multipush(JobQueue* queue,
1187 Job* newjob[],
1188 int nJobs) {
1189 int i;
1190
1191 pthread_mutex_lock(&queue->rwmutex);
1192
1193 for(i = 0; i < nJobs; ++i) {
1194 jobqueue_push_internal(queue, newjob[i]);
1195 }
1196
1197 bsem_post_all(queue->has_jobs);
1198
1199 pthread_mutex_unlock(&queue->rwmutex);
1200}
1201
1202static Job* jobqueue_extract_single(JobQueue* queue) {
1203 Job* job = queue->front;
1204
1205 switch (queue->len) {
1206 case 0: /* if no jobs in queue */
1207 return NULL;
1208
1209 case 1: /* if one job in queue */
1210 queue->front = NULL;
1211 queue->rear = NULL;
1212 queue->len = 0;
1213 queue->total_time = 0;
1214 queue->highest_expected_return = 0;
1215 return job;
1216
1217 default: /* if >1 jobs in queue */
1218 queue->front = job->prev;
1219 queue->len--;
1220 if(job->avgElapsed != NULL) {
1221 queue->total_time -= *job->avgElapsed;
1222 }
1223 return job;
1224 }
1225}
1226
1227static void jobqueue_extract_single_group(JobQueue* queue,
1228 WorkGroup* group) {
1229 Job* job = jobqueue_extract_single(queue);
1230 if(job != NULL) {
1231 group->size = 1;
1232 group->jobs = (Job*) malloc(sizeof(Job));
1233 group->jobs[0] = *job; // copy
1234 free(job);
1235 } else {
1236 group->size = 0;
1237 group->jobs = NULL;
1238 }
1239}
1240
1246static WorkGroup* jobqueue_pull(ThPool* thpool,
1247 int id) {
1248
1249 WorkGroup* group;
1250 Job* job;
1251 float current_time;
1252 float duration, duration_next, min_duration, target_duration;
1253 struct timespec timeAux;
1254 int info;
1255 int i;
1256 JobQueue* queue = thpool->jobqueue;
1257
1258 if (schedule_strategy == SCHED_STATIC && queue->group_front != NULL) {
1259 // STATIC
1260 group = queue->group_front;
1261
1262 queue->group_front = group->prev;
1263 group->prev = NULL;
1264
1265 } else if (queue->len == 0) {
1266 // nothing to do
1267 group = NULL;
1268
1269 } else if (schedule_strategy == SCHED_DYNAMIC || queue->len == 1 || queue->total_time <= 0) {
1270 // SCHED_DYNAMIC
1271 group = (WorkGroup*) malloc(sizeof(WorkGroup));
1272 group->prev = NULL;
1273
1274 if (cppadcg_pool_verbose) {
1275 if (schedule_strategy == SCHED_GUIDED) {
1276 if (queue->len == 1)
1277 fprintf(stdout, "jobqueue_pull(): Thread %i given a work group with 1 job\n", id);
1278 else if (queue->total_time <= 0)
1279 fprintf(stdout, "jobqueue_pull(): Thread %i using single-job instead of multi-job (no timing information)\n", id);
1280 } else if (schedule_strategy == SCHED_STATIC && queue->len >= 1) {
1281 if (queue->total_time >= 0) {
1282 // this should not happen but just in case the user messed up
1283 fprintf(stderr, "jobqueue_pull(): Thread %i given a work group with 1 job\n", id);
1284 } else {
1285 fprintf(stdout, "jobqueue_pull(): Thread %i given a work group with 1 job\n", id);
1286 }
1287 }
1288 }
1289
1290 jobqueue_extract_single_group(thpool->jobqueue, group);
1291 } else { // schedule_strategy == SCHED_GUIDED
1292 // SCHED_GUIDED
1293 group = (WorkGroup*) malloc(sizeof(WorkGroup));
1294 group->prev = NULL;
1295
1296 job = queue->front;
1297
1298 if (job->avgElapsed == NULL) {
1299 if (cppadcg_pool_verbose) {
1300 fprintf(stderr, "jobqueue_pull(): Thread %i using single job instead of multi-job (No timing information for current job)\n", id);
1301 }
1302 // cannot use this strategy (something went wrong!)
1303 jobqueue_extract_single_group(thpool->jobqueue, group);
1304
1305 } else {
1306 // there are at least 2 jobs in the queue
1307 group->size = 1;
1308 duration = *job->avgElapsed;
1309 duration_next = duration;
1310 job = job->prev;
1311 target_duration = queue->total_time * cppadcg_pool_guided_maxgroupwork / thpool->num_threads; // always positive
1312 current_time = get_monotonic_time(&timeAux, &info);
1313
1314 if (queue->highest_expected_return > 0 && info) {
1315 min_duration = 0.9f * (queue->highest_expected_return - current_time);
1316 if (target_duration < min_duration) {
1317 target_duration = min_duration;
1318 }
1319 }
1320
1321 do {
1322 if (job->avgElapsed == NULL) {
1323 break;
1324 }
1325 duration_next += *job->avgElapsed;
1326 if (duration_next < target_duration) {
1327 group->size++;
1328 duration = duration_next;
1329 } else {
1330 break;
1331 }
1332 job = job->prev;
1333 } while (job != queue->front);
1334
1335 if (cppadcg_pool_verbose) {
1336 fprintf(stdout, "jobqueue_pull(): Thread %i given a work group with %i jobs for %e s (target: %e s)\n", id, group->size, duration, target_duration);
1337 }
1338
1339 group->jobs = (Job*) malloc(group->size * sizeof(Job));
1340 for (i = 0; i < group->size; ++i) {
1341 job = jobqueue_extract_single(thpool->jobqueue);
1342 group->jobs[i] = *job; // copy
1343 free(job);
1344 }
1345
1346 duration_next = current_time + duration; // the time when the current work is expected to end
1347 if(duration_next > queue->highest_expected_return)
1348 queue->highest_expected_return = duration_next;
1349 }
1350
1351 }
1352 /* more than one job in queue -> post it */
1353 if (queue->len > 0 || queue->group_front != NULL) {
1354 bsem_post(queue->has_jobs);
1355 }
1356
1357 return group;
1358}
1359
1360
1361/* Free all queue resources back to the system */
1362static void jobqueue_destroy(ThPool* thpool) {
1363 jobqueue_clear(thpool);
1364 free(thpool->jobqueue->has_jobs);
1365}
1366
1367
1368
1369
1370
1371/* ======================== SYNCHRONISATION ========================= */
1372
1373
1374/* Init semaphore to 1 or 0 */
1375static void bsem_init(BSem* bsem, int value) {
1376 if (value < 0 || value > 1) {
1377 fprintf(stderr, "bsem_init(): Binary semaphore can take only values 1 or 0");
1378 exit(1);
1379 }
1380 pthread_mutex_init(&(bsem->mutex), NULL);
1381 pthread_cond_init(&(bsem->cond), NULL);
1382 bsem->v = value;
1383}
1384
1385
1386/* Reset semaphore to 0 */
1387static void bsem_reset(BSem* bsem) {
1388 bsem_init(bsem, 0);
1389}
1390
1391
1392/* Post to at least one thread */
1393static void bsem_post(BSem* bsem) {
1394 pthread_mutex_lock(&bsem->mutex);
1395 bsem->v = 1;
1396 pthread_cond_signal(&bsem->cond);
1397 pthread_mutex_unlock(&bsem->mutex);
1398}
1399
1400
1401/* Post to all threads */
1402static void bsem_post_all(BSem* bsem) {
1403 pthread_mutex_lock(&bsem->mutex);
1404 bsem->v = 1;
1405 pthread_cond_broadcast(&bsem->cond);
1406 pthread_mutex_unlock(&bsem->mutex);
1407}
1408
1409
1410/* Wait on semaphore until semaphore has value 0 */
1411static void bsem_wait(BSem* bsem) {
1412 pthread_mutex_lock(&bsem->mutex);
1413 while (bsem->v != 1) {
1414 pthread_cond_wait(&bsem->cond, &bsem->mutex);
1415 }
1416 bsem->v = 0;
1417 pthread_mutex_unlock(&bsem->mutex);
1418}
1419)*=*";
1420
1421const size_t CPPADCG_PTHREAD_POOL_C_FILE_SIZE = 43345;
1422