1const char CPPADCG_PTHREAD_POOL_C_FILE[] = R
"*=*(/* --------------------------------------------------------------------------
2 * CppADCodeGen: C++ Algorithmic Differentiation with Source Code Generation:
3 * Copyright (C) 2016 Ciengis
5 * CppADCodeGen is distributed under multiple licenses:
7 * - Eclipse Public License Version 1.0 (EPL1), and
8 * - GNU General Public License Version 3 (GPL3).
10 * EPL1 terms and conditions can be found in the file "epl-v10.txt", while
11 * terms and conditions for the GPL3 can be found in the file "gpl3.txt".
12 * ----------------------------------------------------------------------------
13 * Authors: Johan Hanssen Seferidis, Joao Leal
31#define __USE_GNU /* required before including resource.h */
32#include <sys/resource.h>
35enum ScheduleStrategy {SCHED_STATIC = 1,
40enum ElapsedTimeReference {ELAPSED_TIME_AVG,
43typedef struct ThPool ThPool;
44typedef void (* thpool_function_type)(void*);
46static ThPool* volatile cppadcg_pool = NULL;
47static int cppadcg_pool_n_threads = 2;
48static int cppadcg_pool_disabled = 0; // false
49static int cppadcg_pool_verbose = 0; // false
50static enum ElapsedTimeReference cppadcg_pool_time_update = ELAPSED_TIME_MIN;
51static unsigned int cppadcg_pool_time_meas = 10; // default number of time measurements
52static float cppadcg_pool_guided_maxgroupwork = 0.75;
54static enum ScheduleStrategy schedule_strategy = SCHED_DYNAMIC;
56/* ==================== INTERNAL HIGH LEVEL API ====================== */
58static ThPool* thpool_init(int num_threads);
60static int thpool_add_job(ThPool*,
61 thpool_function_type function,
63 const float* avgElapsed,
66static int thpool_add_jobs(ThPool*,
67 thpool_function_type functions[],
69 const float avgElapsed[],
74 int lastElapsedChanged);
76static void thpool_wait(ThPool*);
78static void thpool_destroy(ThPool*);
80/* ========================== STRUCTURES ============================ */
83 pthread_mutex_t mutex;
91 struct Job* prev; /* pointer to previous job */
92 thpool_function_type function; /* function pointer */
93 void* arg; /* function's argument */
94 const float* avgElapsed; /* the last measurement of elapsed time */
95 float* elapsed; /* the current elapsed time */
96 struct timespec startTime; /* initial time (verbose only) */
97 struct timespec endTime; /* final time (verbose only) */
98 int id; /* a job identifier used for debugging */
102typedef struct WorkGroup {
103 struct WorkGroup* prev; /* pointer to previous WorkGroup */
104 struct Job* jobs; /* jobs */
105 int size; /* number of jobs */
106 struct timespec startTime; /* initial time (verbose only) */
107 struct timespec endTime; /* final time (verbose only) */
111typedef struct JobQueue {
112 pthread_mutex_t rwmutex; /* used for queue r/w access */
113 Job *front; /* pointer to front of queue */
114 Job *rear; /* pointer to rear of queue */
115 WorkGroup* group_front; /* previously created work groups (SCHED_STATIC scheduling only)*/
116 BSem *has_jobs; /* flag as binary semaphore */
117 int len; /* number of jobs in queue */
118 float total_time; /* total expected time to complete the work */
119 float highest_expected_return; /* the time when the last running thread is expected to request new work */
124typedef struct Thread {
125 int id; /* friendly id */
126 pthread_t pthread; /* pointer to actual thread */
127 struct ThPool* thpool; /* access to ThPool */
128 WorkGroup* processed_groups; /* processed work groups (verbose only) */
133typedef struct ThPool {
134 Thread** threads; /* pointer to threads */
135 int num_threads; /* total number of threads */
136 volatile int num_threads_alive; /* threads currently alive */
137 volatile int num_threads_working; /* threads currently working */
138 pthread_mutex_t thcount_lock; /* used for thread count etc */
139 pthread_cond_t threads_all_idle; /* signal to thpool_wait */
140 JobQueue* jobqueue; /* pointer to the job queue */
141 volatile int threads_keepalive;
144/* ========================== PUBLIC API ============================ */
146void cppadcg_thpool_set_threads(int n) {
147 cppadcg_pool_n_threads = n;
150int cppadcg_thpool_get_threads() {
151 return cppadcg_pool_n_threads;
154void cppadcg_thpool_set_scheduler_strategy(enum ScheduleStrategy s) {
155 if(cppadcg_pool != NULL) {
156 pthread_mutex_lock(&cppadcg_pool->jobqueue->rwmutex);
157 schedule_strategy = s;
158 pthread_mutex_unlock(&cppadcg_pool->jobqueue->rwmutex);
160 // pool not yet created
161 schedule_strategy = s;
165enum ScheduleStrategy cppadcg_thpool_get_scheduler_strategy() {
166 if(cppadcg_pool != NULL) {
167 enum ScheduleStrategy e;
168 pthread_mutex_lock(&cppadcg_pool->jobqueue->rwmutex);
169 e = schedule_strategy;
170 pthread_mutex_unlock(&cppadcg_pool->jobqueue->rwmutex);
173 // pool not yet created
174 return schedule_strategy;
178void cppadcg_thpool_set_disabled(int disabled) {
179 cppadcg_pool_disabled = disabled;
182int cppadcg_thpool_is_disabled() {
183 return cppadcg_pool_disabled;
186void cppadcg_thpool_set_guided_maxgroupwork(float v) {
187 if(cppadcg_pool != NULL) {
188 pthread_mutex_lock(&cppadcg_pool->jobqueue->rwmutex);
189 cppadcg_pool_guided_maxgroupwork = v;
190 pthread_mutex_unlock(&cppadcg_pool->jobqueue->rwmutex);
192 // pool not yet created
193 cppadcg_pool_guided_maxgroupwork = v;
197float cppadcg_thpool_get_guided_maxgroupwork() {
198 if(cppadcg_pool != NULL) {
200 pthread_mutex_lock(&cppadcg_pool->jobqueue->rwmutex);
201 r = cppadcg_pool_guided_maxgroupwork;
202 pthread_mutex_unlock(&cppadcg_pool->jobqueue->rwmutex);
205 // pool not yet created
206 return cppadcg_pool_guided_maxgroupwork;
210unsigned int cppadcg_thpool_get_n_time_meas() {
211 return cppadcg_pool_time_meas;
214void cppadcg_thpool_set_n_time_meas(unsigned int n) {
215 cppadcg_pool_time_meas = n;
218void cppadcg_thpool_set_verbose(int v) {
219 cppadcg_pool_verbose = v;
222enum ElapsedTimeReference cppadcg_thpool_get_time_meas_ref() {
223 return cppadcg_pool_time_update;
226void cppadcg_thpool_set_time_meas_ref(enum ElapsedTimeReference r) {
227 cppadcg_pool_time_update = r;
230int cppadcg_thpool_is_verbose() {
231 return cppadcg_pool_verbose;
234void cppadcg_thpool_prepare() {
235 if(cppadcg_pool == NULL) {
236 cppadcg_pool = thpool_init(cppadcg_pool_n_threads);
240void cppadcg_thpool_add_job(thpool_function_type function,
244 if (!cppadcg_pool_disabled) {
245 cppadcg_thpool_prepare();
246 if (cppadcg_pool != NULL) {
247 thpool_add_job(cppadcg_pool, function, arg, avgElapsed, elapsed);
252 // thread pool not used
256void cppadcg_thpool_add_jobs(thpool_function_type functions[],
258 const float avgElapsed[],
263 int lastElapsedChanged) {
265 if (!cppadcg_pool_disabled) {
266 cppadcg_thpool_prepare();
267 if (cppadcg_pool != NULL) {
268 thpool_add_jobs(cppadcg_pool, functions, args, avgElapsed, elapsed, order, job2Thread, nJobs, lastElapsedChanged);
273 // thread pool not used
274 for (i = 0; i < nJobs; ++i) {
275 (*functions[i])(args[i]);
279void cppadcg_thpool_wait() {
280 if(cppadcg_pool != NULL) {
281 thpool_wait(cppadcg_pool);
285typedef struct pair_double_int {
290static int comparePair(const void* a, const void* b) {
291 if (((pair_double_int*) a)->val < ((pair_double_int*) b)->val)
293 if (((pair_double_int*) a)->val == ((pair_double_int*) b)->val)
298void cppadcg_thpool_update_order(float refElapsed[],
299 unsigned int nTimeMeas,
300 const float elapsed[],
303 if(nJobs == 0 || refElapsed == NULL || elapsed == NULL || order == NULL)
306 struct pair_double_int elapsedOrder[nJobs];
308 int nonZero = 0; // false
310 for(i = 0; i < nJobs; ++i) {
311 if(elapsed[i] != 0) {
318 if (cppadcg_pool_verbose) {
319 fprintf(stdout, "order not updated: all times are zero\n");
324 if(cppadcg_pool_time_update == ELAPSED_TIME_AVG) {
325 for (i = 0; i < nJobs; ++i) {
326 refElapsed[i] = (refElapsed[i] * nTimeMeas + elapsed[i]) / (nTimeMeas + 1);
327 elapsedOrder[i].val = refElapsed[i];
328 elapsedOrder[i].index = i;
331 // cppadcg_pool_time_update == ELAPSED_TIME_MIN
332 for (i = 0; i < nJobs; ++i) {
333 if(nTimeMeas == 0 || elapsed[i] < refElapsed[i]) {
334 refElapsed[i] = elapsed[i];
336 elapsedOrder[i].val = refElapsed[i];
337 elapsedOrder[i].index = i;
341 qsort(elapsedOrder, nJobs, sizeof(struct pair_double_int), comparePair);
343 for (i = 0; i < nJobs; ++i) {
344 order[elapsedOrder[i].index] = nJobs - i - 1; // descending order
347 if (cppadcg_pool_verbose) {
348 fprintf(stdout, "new order (%i values):\n", nTimeMeas + 1);
349 for (i = 0; i < nJobs; ++i) {
350 fprintf(stdout, " job id: %i order: %i time: %e s\n", i, order[i], refElapsed[i]);
356void cppadcg_thpool_shutdown() {
357 if(cppadcg_pool != NULL) {
358 thpool_destroy(cppadcg_pool);
363/* ========================== PROTOTYPES ============================ */
365static void thpool_cleanup(ThPool* thpool);
367static int thread_init(ThPool* thpool,
370static void* thread_do(Thread* thread);
371static void thread_destroy(Thread* thread);
373static int jobqueue_init(ThPool* thpool);
374static void jobqueue_clear(ThPool* thpool);
375static void jobqueue_push(JobQueue* queue,
377static void jobqueue_multipush(JobQueue* queue,
380static int jobqueue_push_static_jobs(ThPool* thpool,
382 const float avgElapsed[],
385 int lastElapsedChanged);
386static WorkGroup* jobqueue_pull(ThPool* thpool, int id);
387static void jobqueue_destroy(ThPool* thpool);
389static void bsem_init(BSem *bsem, int value);
390static void bsem_reset(BSem *bsem);
391static void bsem_post(BSem *bsem);
392static void bsem_post_all(BSem *bsem);
393static void bsem_wait(BSem *bsem);
396/* ============================ TIME ============================== */
398static float get_thread_time(struct timespec* cputime,
400 *info = clock_gettime(CLOCK_THREAD_CPUTIME_ID, cputime);
402 return cputime->tv_sec + cputime->tv_nsec * 1e-9f;
404 fprintf(stderr, "failed clock_gettime()\n");
409static float get_monotonic_time(struct timespec* time,
411 *info = clock_gettime(CLOCK_MONOTONIC, time);
413 return time->tv_sec + time->tv_nsec * 1e-9f;
415 fprintf(stderr, "failed clock_gettime()\n");
420static void get_monotonic_time2(struct timespec* time) {
422 info = clock_gettime(CLOCK_MONOTONIC, time);
426 fprintf(stderr, "failed clock_gettime()\n");
430void timespec_diff(struct timespec* end,
431 struct timespec* start,
432 struct timespec* result) {
433 if ((end->tv_nsec - start->tv_nsec) < 0) {
434 result->tv_sec = end->tv_sec - start->tv_sec - 1;
435 result->tv_nsec = end->tv_nsec - start->tv_nsec + 1000000000;
437 result->tv_sec = end->tv_sec - start->tv_sec;
438 result->tv_nsec = end->tv_nsec - start->tv_nsec;
442/* ========================== THREADPOOL ============================ */
461struct ThPool* thpool_init(int num_threads) {
462 if (num_threads < 0) {
466 if(cppadcg_pool_verbose) {
467 fprintf(stdout, "thpool_init(): Thread pool created with %i threads\n", num_threads);
470 if(num_threads == 0) {
471 cppadcg_pool_disabled = 1; // true
475 /* Make new thread pool */
477 thpool = (ThPool*) malloc(sizeof(ThPool));
478 if (thpool == NULL) {
479 fprintf(stderr, "thpool_init(): Could not allocate memory for thread pool\n");
482 thpool->num_threads = num_threads;
483 thpool->num_threads_alive = 0;
484 thpool->num_threads_working = 0;
485 thpool->threads_keepalive = 1;
487 /* Initialize the job queue */
488 if (jobqueue_init(thpool) == -1) {
489 fprintf(stderr, "thpool_init(): Could not allocate memory for job queue\n");
494 /* Make threads in pool */
495 thpool->threads = (Thread**) malloc(num_threads * sizeof(Thread*));
496 if (thpool->threads == NULL) {
497 fprintf(stderr, "thpool_init(): Could not allocate memory for threads\n");
498 jobqueue_destroy(thpool);
499 free(thpool->jobqueue);
504 pthread_mutex_init(&(thpool->thcount_lock), NULL);
505 pthread_cond_init(&thpool->threads_all_idle, NULL);
509 for (n = 0; n < num_threads; n++) {
510 thread_init(thpool, &thpool->threads[n], n);
513 /* Wait for threads to initialize */
514 while (thpool->num_threads_alive != num_threads) {}
546static int thpool_add_job(ThPool* thpool,
547 thpool_function_type function,
549 const float* avgElapsed,
553 newjob = (struct Job*) malloc(sizeof(struct Job));
554 if (newjob == NULL) {
555 fprintf(stderr, "thpool_add_job(): Could not allocate memory for new job\n");
559 /* add function and argument */
560 newjob->function = function;
562 newjob->avgElapsed = avgElapsed;
563 newjob->elapsed = elapsed;
565 /* add job to queue */
566 jobqueue_push(thpool->jobqueue, newjob);
571static int thpool_add_jobs(ThPool* thpool,
572 thpool_function_type functions[],
574 const float avgElapsed[],
579 int lastElapsedChanged) {
584 for (i = 0; i < nJobs; ++i) {
585 newjobs[i] = (Job*) malloc(sizeof(Job));
586 if (newjobs[i] == NULL) {
587 fprintf(stderr, "thpool_add_jobs(): Could not allocate memory for new jobs\n");
591 j = order != NULL ? order[i] : i;
592 /* add function and argument */
593 newjobs[i]->function = functions[j];
594 newjobs[i]->arg = args[j];
596 if (avgElapsed != NULL)
597 newjobs[i]->avgElapsed = &avgElapsed[j];
599 newjobs[i]->avgElapsed = NULL;
602 newjobs[i]->elapsed = &elapsed[j];
604 newjobs[i]->elapsed = NULL;
607 /* add jobs to queue */
608 if (schedule_strategy == SCHED_STATIC && avgElapsed != NULL && order != NULL && nJobs > 0 && avgElapsed[0] > 0) {
609 return jobqueue_push_static_jobs(thpool, newjobs, avgElapsed, job2Thread, nJobs, lastElapsedChanged);
611 jobqueue_multipush(thpool->jobqueue, newjobs, nJobs);
619static int jobqueue_push_static_jobs(ThPool* thpool,
621 const float avgElapsed[],
624 int lastElapsedChanged) {
625 float total_duration, target_duration, next_duration, best_duration;
628 int num_threads = thpool->num_threads;
630 float* durations = NULL;
634 if(nJobs < num_threads)
637 n_jobs = (int*) malloc(num_threads * sizeof(int));
638 if (n_jobs == NULL) {
639 fprintf(stderr, "jobqueue_push_static_jobs(): Could not allocate memory\n");
643 groups = (WorkGroup**) malloc(num_threads * sizeof(WorkGroup*));
644 if (groups == NULL) {
645 fprintf(stderr, "jobqueue_push_static_jobs(): Could not allocate memory\n");
650 for (i = 0; i < num_threads; ++i) {
655 for (i = 0; i < nJobs; ++i) {
656 total_duration += avgElapsed[i];
660 if (nJobs > 0 && (lastElapsedChanged || jobs2thread[0] < 0)) {
661 durations = (float*) malloc(num_threads * sizeof(float));
662 if (durations == NULL) {
663 fprintf(stderr, "jobqueue_push_static_jobs(): Could not allocate memory\n");
669 for(i = 0; i < num_threads; ++i) {
673 // decide in which work group to place each job
674 target_duration = total_duration / num_threads;
676 for (j = 0; j < nJobs; ++j) {
678 for (i = 0; i < num_threads; ++i) {
679 next_duration = durations[i] + avgElapsed[j];
680 if (next_duration < target_duration) {
681 durations[i] = next_duration;
690 best_duration = durations[0] + avgElapsed[j];
692 for (i = 1; i < num_threads; ++i) {
693 next_duration = durations[i] + avgElapsed[j];
694 if (next_duration < best_duration) {
695 best_duration = next_duration;
699 durations[iBest] = best_duration;
701 jobs2thread[j] = iBest;
706 // reuse existing information
708 for (j = 0; j < nJobs; ++j) {
709 n_jobs[jobs2thread[j]]++;
716 for (i = 0; i < num_threads; ++i) {
717 group = (WorkGroup*) malloc(sizeof(WorkGroup));
719 group->jobs = (Job*) malloc(n_jobs[i] * sizeof(Job));
722 for (i = 0; i < num_threads - 1; ++i) {
723 groups[i]->prev = groups[i + 1];
725 groups[num_threads - 1]->prev = NULL;
727 // place jobs on the work groups
728 for (j = 0; j < nJobs; ++j) {
731 group->jobs[group->size] = *newjobs[j]; // copy
736 if (cppadcg_pool_verbose) {
737 if (durations != NULL) {
738 for (i = 0; i < num_threads; ++i) {
739 fprintf(stdout, "jobqueue_push_static_jobs(): work group %i with %i jobs for %e s\n", i, groups[i]->size, durations[i]);
742 for (i = 0; i < num_threads; ++i) {
743 fprintf(stdout, "jobqueue_push_static_jobs(): work group %i with %i jobs\n", i, groups[i]->size);
751 pthread_mutex_lock(&thpool->jobqueue->rwmutex);
753 groups[num_threads - 1]->prev = thpool->jobqueue->group_front;
754 thpool->jobqueue->group_front = groups[0];
756 bsem_post_all(thpool->jobqueue->has_jobs);
758 pthread_mutex_unlock(&thpool->jobqueue->rwmutex);
794static void thpool_wait(ThPool* thpool) {
795 pthread_mutex_lock(&thpool->thcount_lock);
796 while (thpool->jobqueue->len || thpool->jobqueue->group_front || thpool->num_threads_working) { //// PROBLEM HERE!!!! len is not locked!!!!
797 pthread_cond_wait(&thpool->threads_all_idle, &thpool->thcount_lock);
799 thpool->jobqueue->total_time = 0;
800 thpool->jobqueue->highest_expected_return = 0;
801 pthread_mutex_unlock(&thpool->thcount_lock);
803 thpool_cleanup(thpool);
813void thpool_cleanup(ThPool* thpool) {
814 // for debugging only
816 struct timespec diffTime;
819 WorkGroup* workGroup;
820 WorkGroup* workGroupPrev;
822 if (!cppadcg_pool_verbose) {
826 for (int j = 0; j < thpool->num_threads; ++j) {
827 thread = thpool->threads[j];
829 workGroup = thread->processed_groups;
830 while (workGroup != NULL) {
831 timespec_diff(&workGroup->endTime, &workGroup->startTime, &diffTime);
832 fprintf(stdout, "# Thread %i, Group %i, started at %ld.%.9ld, ended at %ld.%.9ld, elapsed %ld.%.9ld, executed %i jobs\n",
833 thread->id, gid, workGroup->startTime.tv_sec, workGroup->startTime.tv_nsec, workGroup->endTime.tv_sec, workGroup->endTime.tv_nsec, diffTime.tv_sec,
834 diffTime.tv_nsec, workGroup->size);
836 for (int i = 0; i < workGroup->size; ++i) {
837 Job* job = &workGroup->jobs[i];
839 timespec_diff(&job->endTime, &job->startTime, &diffTime);
840 fprintf(stdout, "## Thread %i, Group %i, Job %i, started at %ld.%.9ld, ended at %ld.%.9ld, elapsed %ld.%.9ld\n",
841 thread->id, gid, job->id, job->startTime.tv_sec, job->startTime.tv_nsec, job->endTime.tv_sec, job->endTime.tv_nsec, diffTime.tv_sec,
847 workGroupPrev = workGroup->prev;
850 free(workGroup->jobs);
853 workGroup = workGroupPrev;
856 thread->processed_groups = NULL;
879static void thpool_destroy(ThPool* thpool) {
880 /* No need to destory if it's NULL */
881 if (thpool == NULL) return;
883 volatile int threads_total = thpool->num_threads_alive;
885 /* End each thread 's infinite loop */
886 thpool->threads_keepalive = 0;
888 /* Give one second to kill idle threads */
889 double TIMEOUT = 1.0;
891 double tpassed = 0.0;
893 while (tpassed < TIMEOUT && thpool->num_threads_alive) {
894 bsem_post_all(thpool->jobqueue->has_jobs);
896 tpassed = difftime(end, start);
899 /* Poll remaining threads */
900 while (thpool->num_threads_alive) {
901 bsem_post_all(thpool->jobqueue->has_jobs);
905 /* cleanup current work groups */
906 thpool_cleanup(thpool);
908 /* Job queue cleanup */
909 jobqueue_destroy(thpool);
910 free(thpool->jobqueue);
914 for (n = 0; n < threads_total; n++) {
915 thread_destroy(thpool->threads[n]);
917 free(thpool->threads);
920 if(cppadcg_pool_verbose) {
921 fprintf(stdout, "thpool_destroy(): thread pool destroyed\n");
926/* ============================ THREAD ============================== */
929/* Initialize a thread in the thread pool
931 * @param thread address to the pointer of the thread to be created
932 * @param id id to be given to the thread
933 * @return 0 on success, -1 otherwise.
935static int thread_init(ThPool* thpool,
939 *thread = (Thread*) malloc(sizeof(Thread));
940 if (*thread == NULL) {
941 fprintf(stderr, "thread_init(): Could not allocate memory for thread\n");
945 (*thread)->thpool = thpool;
947 (*thread)->processed_groups = NULL;
949 pthread_create(&(*thread)->pthread, NULL, (void*) thread_do, (*thread));
950 pthread_detach((*thread)->pthread);
954/* What each thread is doing
956* In principle this is an endless loop. The only time this loop gets interrupted is once
957* thpool_destroy() is invoked or the program exits.
959* @param thread thread that will run this function
962static void* thread_do(Thread* thread) {
965 struct timespec cputime;
967 WorkGroup* workGroup;
969 thpool_function_type func_buff;
973 /* Set thread name for profiling and debugging */
974 char thread_name[128] = {0};
975 sprintf(thread_name, "thread-pool-%d", thread->id);
977#if defined(__linux__)
978 /* Use prctl instead to prevent using _GNU_SOURCE flag and implicit declaration */
979 prctl(PR_SET_NAME, thread_name);
980#elif defined(__APPLE__) && defined(__MACH__)
981 pthread_setname_np(thread_name);
983 fprintf(stderr, "thread_do(): pthread_setname_np is not supported on this system");
986 /* Assure all threads have been created before starting serving */
987 ThPool* thpool = thread->thpool;
989 /* Mark thread as alive (initialized) */
990 pthread_mutex_lock(&thpool->thcount_lock);
991 thpool->num_threads_alive += 1;
992 pthread_mutex_unlock(&thpool->thcount_lock);
994 queue = thpool->jobqueue;
996 while (thpool->threads_keepalive) {
998 bsem_wait(queue->has_jobs);
1000 if (!thpool->threads_keepalive) {
1004 pthread_mutex_lock(&thpool->thcount_lock);
1005 thpool->num_threads_working++;
1006 pthread_mutex_unlock(&thpool->thcount_lock);
1008 while (thpool->threads_keepalive) {
1009 /* Read job from queue and execute it */
1010 pthread_mutex_lock(&queue->rwmutex);
1011 workGroup = jobqueue_pull(thpool, thread->id);
1012 pthread_mutex_unlock(&queue->rwmutex);
1014 if (workGroup == NULL)
1017 if (cppadcg_pool_verbose) {
1018 get_monotonic_time2(&workGroup->startTime);
1021 for (i = 0; i < workGroup->size; ++i) {
1022 job = &workGroup->jobs[i];
1024 if (cppadcg_pool_verbose) {
1025 get_monotonic_time2(&job->startTime);
1028 int do_benchmark = job->elapsed != NULL;
1030 elapsed = -get_thread_time(&cputime, &info);
1033 /* Execute the job */
1034 func_buff = job->function;
1035 arg_buff = job->arg;
1036 func_buff(arg_buff);
1038 if (do_benchmark && info == 0) {
1039 elapsed += get_thread_time(&cputime, &info);
1041 (*job->elapsed) = elapsed;
1045 if (cppadcg_pool_verbose) {
1046 get_monotonic_time2(&job->endTime);
1050 if (cppadcg_pool_verbose) {
1051 get_monotonic_time2(&workGroup->endTime);
1053 if (thread->processed_groups == NULL) {
1054 thread->processed_groups = workGroup;
1056 workGroup->prev = thread->processed_groups;
1057 thread->processed_groups = workGroup;
1060 free(workGroup->jobs);
1065 pthread_mutex_lock(&thpool->thcount_lock);
1066 thpool->num_threads_working--;
1067 if (!thpool->num_threads_working) {
1068 pthread_cond_signal(&thpool->threads_all_idle);
1070 pthread_mutex_unlock(&thpool->thcount_lock);
1073 pthread_mutex_lock(&thpool->thcount_lock);
1074 thpool->num_threads_alive--;
1075 pthread_mutex_unlock(&thpool->thcount_lock);
1082static void thread_destroy(Thread* thread) {
1087/* ============================ JOB QUEUE =========================== */
1090/* Initialize queue */
1091static int jobqueue_init(ThPool* thpool) {
1093 JobQueue* queue = (JobQueue*) malloc(sizeof(JobQueue));
1094 if (queue == NULL) {
1097 thpool->jobqueue = queue;
1099 queue->front = NULL;
1101 queue->group_front = NULL;
1102 queue->total_time = 0;
1103 queue->highest_expected_return = 0;
1105 queue->has_jobs = (BSem*) malloc(sizeof(BSem));
1106 if (queue->has_jobs == NULL) {
1110 pthread_mutex_init(&(queue->rwmutex), NULL);
1111 bsem_init(queue->has_jobs, 0);
1117/* Clear the queue */
1118static void jobqueue_clear(ThPool* thpool) {
1123 group = jobqueue_pull(thpool, -1);
1124 if (group == NULL) {
1133 thpool->jobqueue->front = NULL;
1134 thpool->jobqueue->rear = NULL;
1135 bsem_reset(thpool->jobqueue->has_jobs);
1136 thpool->jobqueue->len = 0;
1137 thpool->jobqueue->group_front = NULL;
1138 thpool->jobqueue->total_time = 0;
1139 thpool->jobqueue->highest_expected_return = 0;
1146static void jobqueue_push_internal(JobQueue* queue,
1148 newjob->prev = NULL;
1150 switch (queue->len) {
1152 case 0: /* if no jobs in queue */
1153 queue->front = newjob;
1154 queue->rear = newjob;
1157 default: /* if jobs in queue */
1158 queue->rear->prev = newjob;
1159 queue->rear = newjob;
1162 if(newjob->avgElapsed != NULL) {
1163 queue->total_time += *newjob->avgElapsed;
1171static void jobqueue_push(JobQueue* queue,
1173 pthread_mutex_lock(&queue->rwmutex);
1175 jobqueue_push_internal(queue, newjob);
1177 bsem_post(queue->has_jobs);
1179 pthread_mutex_unlock(&queue->rwmutex);
1186static void jobqueue_multipush(JobQueue* queue,
1191 pthread_mutex_lock(&queue->rwmutex);
1193 for(i = 0; i < nJobs; ++i) {
1194 jobqueue_push_internal(queue, newjob[i]);
1197 bsem_post_all(queue->has_jobs);
1199 pthread_mutex_unlock(&queue->rwmutex);
1202static Job* jobqueue_extract_single(JobQueue* queue) {
1203 Job* job = queue->front;
1205 switch (queue->len) {
1206 case 0: /* if no jobs in queue */
1209 case 1: /* if one job in queue */
1210 queue->front = NULL;
1213 queue->total_time = 0;
1214 queue->highest_expected_return = 0;
1217 default: /* if >1 jobs in queue */
1218 queue->front = job->prev;
1220 if(job->avgElapsed != NULL) {
1221 queue->total_time -= *job->avgElapsed;
1227static void jobqueue_extract_single_group(JobQueue* queue,
1229 Job* job = jobqueue_extract_single(queue);
1232 group->jobs = (Job*) malloc(sizeof(Job));
1233 group->jobs[0] = *job; // copy
1246static WorkGroup* jobqueue_pull(ThPool* thpool,
1252 float duration, duration_next, min_duration, target_duration;
1253 struct timespec timeAux;
1256 JobQueue* queue = thpool->jobqueue;
1258 if (schedule_strategy == SCHED_STATIC && queue->group_front != NULL) {
1260 group = queue->group_front;
1262 queue->group_front = group->prev;
1265 } else if (queue->len == 0) {
1269 } else if (schedule_strategy == SCHED_DYNAMIC || queue->len == 1 || queue->total_time <= 0) {
1271 group = (WorkGroup*) malloc(sizeof(WorkGroup));
1274 if (cppadcg_pool_verbose) {
1275 if (schedule_strategy == SCHED_GUIDED) {
1276 if (queue->len == 1)
1277 fprintf(stdout, "jobqueue_pull(): Thread %i given a work group with 1 job\n", id);
1278 else if (queue->total_time <= 0)
1279 fprintf(stdout, "jobqueue_pull(): Thread %i using single-job instead of multi-job (no timing information)\n", id);
1280 } else if (schedule_strategy == SCHED_STATIC && queue->len >= 1) {
1281 if (queue->total_time >= 0) {
1282 // this should not happen but just in case the user messed up
1283 fprintf(stderr, "jobqueue_pull(): Thread %i given a work group with 1 job\n", id);
1285 fprintf(stdout, "jobqueue_pull(): Thread %i given a work group with 1 job\n", id);
1290 jobqueue_extract_single_group(thpool->jobqueue, group);
1291 } else { // schedule_strategy == SCHED_GUIDED
1293 group = (WorkGroup*) malloc(sizeof(WorkGroup));
1298 if (job->avgElapsed == NULL) {
1299 if (cppadcg_pool_verbose) {
1300 fprintf(stderr, "jobqueue_pull(): Thread %i using single job instead of multi-job (No timing information for current job)\n", id);
1302 // cannot use this strategy (something went wrong!)
1303 jobqueue_extract_single_group(thpool->jobqueue, group);
1306 // there are at least 2 jobs in the queue
1308 duration = *job->avgElapsed;
1309 duration_next = duration;
1311 target_duration = queue->total_time * cppadcg_pool_guided_maxgroupwork / thpool->num_threads; // always positive
1312 current_time = get_monotonic_time(&timeAux, &info);
1314 if (queue->highest_expected_return > 0 && info) {
1315 min_duration = 0.9f * (queue->highest_expected_return - current_time);
1316 if (target_duration < min_duration) {
1317 target_duration = min_duration;
1322 if (job->avgElapsed == NULL) {
1325 duration_next += *job->avgElapsed;
1326 if (duration_next < target_duration) {
1328 duration = duration_next;
1333 } while (job != queue->front);
1335 if (cppadcg_pool_verbose) {
1336 fprintf(stdout, "jobqueue_pull(): Thread %i given a work group with %i jobs for %e s (target: %e s)\n", id, group->size, duration, target_duration);
1339 group->jobs = (Job*) malloc(group->size * sizeof(Job));
1340 for (i = 0; i < group->size; ++i) {
1341 job = jobqueue_extract_single(thpool->jobqueue);
1342 group->jobs[i] = *job; // copy
1346 duration_next = current_time + duration; // the time when the current work is expected to end
1347 if(duration_next > queue->highest_expected_return)
1348 queue->highest_expected_return = duration_next;
1352 /* more than one job in queue -> post it */
1353 if (queue->len > 0 || queue->group_front != NULL) {
1354 bsem_post(queue->has_jobs);
1361/* Free all queue resources back to the system */
1362static void jobqueue_destroy(ThPool* thpool) {
1363 jobqueue_clear(thpool);
1364 free(thpool->jobqueue->has_jobs);
1371/* ======================== SYNCHRONISATION ========================= */
1374/* Init semaphore to 1 or 0 */
1375static void bsem_init(BSem* bsem, int value) {
1376 if (value < 0 || value > 1) {
1377 fprintf(stderr, "bsem_init(): Binary semaphore can take only values 1 or 0");
1380 pthread_mutex_init(&(bsem->mutex), NULL);
1381 pthread_cond_init(&(bsem->cond), NULL);
1386/* Reset semaphore to 0 */
1387static void bsem_reset(BSem* bsem) {
1392/* Post to at least one thread */
1393static void bsem_post(BSem* bsem) {
1394 pthread_mutex_lock(&bsem->mutex);
1396 pthread_cond_signal(&bsem->cond);
1397 pthread_mutex_unlock(&bsem->mutex);
1401/* Post to all threads */
1402static void bsem_post_all(BSem* bsem) {
1403 pthread_mutex_lock(&bsem->mutex);
1405 pthread_cond_broadcast(&bsem->cond);
1406 pthread_mutex_unlock(&bsem->mutex);
1410/* Wait on semaphore until semaphore has value 0 */
1411static void bsem_wait(BSem* bsem) {
1412 pthread_mutex_lock(&bsem->mutex);
1413 while (bsem->v != 1) {
1414 pthread_cond_wait(&bsem->cond, &bsem->mutex);
1417 pthread_mutex_unlock(&bsem->mutex);
1421const size_t CPPADCG_PTHREAD_POOL_C_FILE_SIZE = 43345;