CppADCodeGen  2.4.3
A C++ Algorithmic Differentiation Package with Source Code Generation
pthread_pool_c.hpp
1 const char CPPADCG_PTHREAD_POOL_C_FILE[] = R"*=*(/* --------------------------------------------------------------------------
2  * CppADCodeGen: C++ Algorithmic Differentiation with Source Code Generation:
3  * Copyright (C) 2016 Ciengis
4  *
5  * CppADCodeGen is distributed under multiple licenses:
6  *
7  * - Eclipse Public License Version 1.0 (EPL1), and
8  * - GNU General Public License Version 3 (GPL3).
9  *
10  * EPL1 terms and conditions can be found in the file "epl-v10.txt", while
11  * terms and conditions for the GPL3 can be found in the file "gpl3.txt".
12  * ----------------------------------------------------------------------------
13  * Authors: Johan Hanssen Seferidis, Joao Leal
14  */
15 
21 #include <unistd.h>
22 #include <stdio.h>
23 #include <stdlib.h>
24 #include <pthread.h>
25 #include <errno.h>
26 #include <time.h>
27 #if defined(__linux__)
28 #include <sys/prctl.h>
29 #include <time.h>
30 #include <sys/time.h>
31 #define __USE_GNU /* required before including resource.h */
32 #include <sys/resource.h>
33 #endif
34 
35 enum ScheduleStrategy {SCHED_STATIC = 1,
36  SCHED_DYNAMIC = 2,
37  SCHED_GUIDED = 3
38  };
39 
40 enum ElapsedTimeReference {ELAPSED_TIME_AVG,
41  ELAPSED_TIME_MIN};
42 
43 typedef struct ThPool ThPool;
44 typedef void (* thpool_function_type)(void*);
45 
46 static ThPool* volatile cppadcg_pool = NULL;
47 static int cppadcg_pool_n_threads = 2;
48 static int cppadcg_pool_disabled = 0; // false
49 static int cppadcg_pool_verbose = 0; // false
50 static enum ElapsedTimeReference cppadcg_pool_time_update = ELAPSED_TIME_MIN;
51 static unsigned int cppadcg_pool_time_meas = 10; // default number of time measurements
52 static float cppadcg_pool_guided_maxgroupwork = 0.75;
53 
54 static enum ScheduleStrategy schedule_strategy = SCHED_DYNAMIC;
55 
56 /* ==================== INTERNAL HIGH LEVEL API ====================== */
57 
58 static ThPool* thpool_init(int num_threads);
59 
60 static int thpool_add_job(ThPool*,
61  thpool_function_type function,
62  void* arg,
63  const float* avgElapsed,
64  float* elapsed);
65 
66 static int thpool_add_jobs(ThPool*,
67  thpool_function_type functions[],
68  void* args[],
69  const float avgElapsed[],
70  float elapsed[],
71  const int order[],
72  int job2Thread[],
73  int nJobs,
74  int lastElapsedChanged);
75 
76 static void thpool_wait(ThPool*);
77 
78 static void thpool_destroy(ThPool*);
79 
80 /* ========================== STRUCTURES ============================ */
81 /* Binary semaphore */
82 typedef struct BSem {
83  pthread_mutex_t mutex;
84  pthread_cond_t cond;
85  int v;
86 } BSem;
87 
88 
89 /* Job */
90 typedef struct Job {
91  struct Job* prev; /* pointer to previous job */
92  thpool_function_type function; /* function pointer */
93  void* arg; /* function's argument */
94  const float* avgElapsed; /* the last measurement of elapsed time */
95  float* elapsed; /* the current elapsed time */
96  struct timespec startTime; /* initial time (verbose only) */
97  struct timespec endTime; /* final time (verbose only) */
98  int id; /* a job identifier used for debugging */
99 } Job;
100 
101 /* Work group */
102 typedef struct WorkGroup {
103  struct WorkGroup* prev; /* pointer to previous WorkGroup */
104  struct Job* jobs; /* jobs */
105  int size; /* number of jobs */
106  struct timespec startTime; /* initial time (verbose only) */
107  struct timespec endTime; /* final time (verbose only) */
108 } WorkGroup;
109 
110 /* Job queue */
111 typedef struct JobQueue {
112  pthread_mutex_t rwmutex; /* used for queue r/w access */
113  Job *front; /* pointer to front of queue */
114  Job *rear; /* pointer to rear of queue */
115  WorkGroup* group_front; /* previously created work groups (SCHED_STATIC scheduling only)*/
116  BSem *has_jobs; /* flag as binary semaphore */
117  int len; /* number of jobs in queue */
118  float total_time; /* total expected time to complete the work */
119  float highest_expected_return; /* the time when the last running thread is expected to request new work */
120 } JobQueue;
121 
122 
123 /* Thread */
124 typedef struct Thread {
125  int id; /* friendly id */
126  pthread_t pthread; /* pointer to actual thread */
127  struct ThPool* thpool; /* access to ThPool */
128  WorkGroup* processed_groups; /* processed work groups (verbose only) */
129 } Thread;
130 
131 
132 /* Threadpool */
133 typedef struct ThPool {
134  Thread** threads; /* pointer to threads */
135  int num_threads; /* total number of threads */
136  volatile int num_threads_alive; /* threads currently alive */
137  volatile int num_threads_working; /* threads currently working */
138  pthread_mutex_t thcount_lock; /* used for thread count etc */
139  pthread_cond_t threads_all_idle; /* signal to thpool_wait */
140  JobQueue* jobqueue; /* pointer to the job queue */
141  volatile int threads_keepalive;
142 } ThPool;
143 
144 /* ========================== PUBLIC API ============================ */
145 
146 void cppadcg_thpool_set_threads(int n) {
147  cppadcg_pool_n_threads = n;
148 }
149 
150 int cppadcg_thpool_get_threads() {
151  return cppadcg_pool_n_threads;
152 }
153 
154 void cppadcg_thpool_set_scheduler_strategy(enum ScheduleStrategy s) {
155  if(cppadcg_pool != NULL) {
156  pthread_mutex_lock(&cppadcg_pool->jobqueue->rwmutex);
157  schedule_strategy = s;
158  pthread_mutex_unlock(&cppadcg_pool->jobqueue->rwmutex);
159  } else {
160  // pool not yet created
161  schedule_strategy = s;
162  }
163 }
164 
165 enum ScheduleStrategy cppadcg_thpool_get_scheduler_strategy() {
166  if(cppadcg_pool != NULL) {
167  enum ScheduleStrategy e;
168  pthread_mutex_lock(&cppadcg_pool->jobqueue->rwmutex);
169  e = schedule_strategy;
170  pthread_mutex_unlock(&cppadcg_pool->jobqueue->rwmutex);
171  return e;
172  } else {
173  // pool not yet created
174  return schedule_strategy;
175  }
176 }
177 
178 void cppadcg_thpool_set_disabled(int disabled) {
179  cppadcg_pool_disabled = disabled;
180 }
181 
182 int cppadcg_thpool_is_disabled() {
183  return cppadcg_pool_disabled;
184 }
185 
186 void cppadcg_thpool_set_guided_maxgroupwork(float v) {
187  if(cppadcg_pool != NULL) {
188  pthread_mutex_lock(&cppadcg_pool->jobqueue->rwmutex);
189  cppadcg_pool_guided_maxgroupwork = v;
190  pthread_mutex_unlock(&cppadcg_pool->jobqueue->rwmutex);
191  } else {
192  // pool not yet created
193  cppadcg_pool_guided_maxgroupwork = v;
194  }
195 }
196 
197 float cppadcg_thpool_get_guided_maxgroupwork() {
198  if(cppadcg_pool != NULL) {
199  float r;
200  pthread_mutex_lock(&cppadcg_pool->jobqueue->rwmutex);
201  r = cppadcg_pool_guided_maxgroupwork;
202  pthread_mutex_unlock(&cppadcg_pool->jobqueue->rwmutex);
203  return r;
204  } else {
205  // pool not yet created
206  return cppadcg_pool_guided_maxgroupwork;
207  }
208 }
209 
210 unsigned int cppadcg_thpool_get_n_time_meas() {
211  return cppadcg_pool_time_meas;
212 }
213 
214 void cppadcg_thpool_set_n_time_meas(unsigned int n) {
215  cppadcg_pool_time_meas = n;
216 }
217 
218 void cppadcg_thpool_set_verbose(int v) {
219  cppadcg_pool_verbose = v;
220 }
221 
222 enum ElapsedTimeReference cppadcg_thpool_get_time_meas_ref() {
223  return cppadcg_pool_time_update;
224 }
225 
226 void cppadcg_thpool_set_time_meas_ref(enum ElapsedTimeReference r) {
227  cppadcg_pool_time_update = r;
228 }
229 
230 int cppadcg_thpool_is_verbose() {
231  return cppadcg_pool_verbose;
232 }
233 
234 void cppadcg_thpool_prepare() {
235  if(cppadcg_pool == NULL) {
236  cppadcg_pool = thpool_init(cppadcg_pool_n_threads);
237  }
238 }
239 
240 void cppadcg_thpool_add_job(thpool_function_type function,
241  void* arg,
242  float* avgElapsed,
243  float* elapsed) {
244  if (!cppadcg_pool_disabled) {
245  cppadcg_thpool_prepare();
246  if (cppadcg_pool != NULL) {
247  thpool_add_job(cppadcg_pool, function, arg, avgElapsed, elapsed);
248  return;
249  }
250  }
251 
252  // thread pool not used
253  (*function)(arg);
254 }
255 
256 void cppadcg_thpool_add_jobs(thpool_function_type functions[],
257  void* args[],
258  const float avgElapsed[],
259  float elapsed[],
260  const int order[],
261  int job2Thread[],
262  int nJobs,
263  int lastElapsedChanged) {
264  int i;
265  if (!cppadcg_pool_disabled) {
266  cppadcg_thpool_prepare();
267  if (cppadcg_pool != NULL) {
268  thpool_add_jobs(cppadcg_pool, functions, args, avgElapsed, elapsed, order, job2Thread, nJobs, lastElapsedChanged);
269  return;
270  }
271  }
272 
273  // thread pool not used
274  for (i = 0; i < nJobs; ++i) {
275  (*functions[i])(args[i]);
276  }
277 }
278 
279 void cppadcg_thpool_wait() {
280  if(cppadcg_pool != NULL) {
281  thpool_wait(cppadcg_pool);
282  }
283 }
284 
285 typedef struct pair_double_int {
286  float val;
287  int index;
288 } pair_double_int;
289 
290 static int comparePair(const void* a, const void* b) {
291  if (((pair_double_int*) a)->val < ((pair_double_int*) b)->val)
292  return -1;
293  if (((pair_double_int*) a)->val == ((pair_double_int*) b)->val)
294  return 0;
295  return 1;
296 }
297 
298 void cppadcg_thpool_update_order(float refElapsed[],
299  unsigned int nTimeMeas,
300  const float elapsed[],
301  int order[],
302  int nJobs) {
303  if(nJobs == 0 || refElapsed == NULL || elapsed == NULL || order == NULL)
304  return;
305 
306  struct pair_double_int elapsedOrder[nJobs];
307  int i;
308  int nonZero = 0; // false
309 
310  for(i = 0; i < nJobs; ++i) {
311  if(elapsed[i] != 0) {
312  nonZero = 1;
313  break;
314  }
315  }
316 
317  if (!nonZero) {
318  if (cppadcg_pool_verbose) {
319  fprintf(stdout, "order not updated: all times are zero\n");
320  }
321  return;
322  }
323 
324  if(cppadcg_pool_time_update == ELAPSED_TIME_AVG) {
325  for (i = 0; i < nJobs; ++i) {
326  refElapsed[i] = (refElapsed[i] * nTimeMeas + elapsed[i]) / (nTimeMeas + 1);
327  elapsedOrder[i].val = refElapsed[i];
328  elapsedOrder[i].index = i;
329  }
330  } else {
331  // cppadcg_pool_time_update == ELAPSED_TIME_MIN
332  for (i = 0; i < nJobs; ++i) {
333  if(nTimeMeas == 0 || elapsed[i] < refElapsed[i]) {
334  refElapsed[i] = elapsed[i];
335  }
336  elapsedOrder[i].val = refElapsed[i];
337  elapsedOrder[i].index = i;
338  }
339  }
340 
341  qsort(elapsedOrder, nJobs, sizeof(struct pair_double_int), comparePair);
342 
343  for (i = 0; i < nJobs; ++i) {
344  order[elapsedOrder[i].index] = nJobs - i - 1; // descending order
345  }
346 
347  if (cppadcg_pool_verbose) {
348  fprintf(stdout, "new order (%i values):\n", nTimeMeas + 1);
349  for (i = 0; i < nJobs; ++i) {
350  fprintf(stdout, " job id: %i order: %i time: %e s\n", i, order[i], refElapsed[i]);
351  }
352  }
353 
354 }
355 
356 void cppadcg_thpool_shutdown() {
357  if(cppadcg_pool != NULL) {
358  thpool_destroy(cppadcg_pool);
359  cppadcg_pool = NULL;
360  }
361 }
362 
363 /* ========================== PROTOTYPES ============================ */
364 
365 static void thpool_cleanup(ThPool* thpool);
366 
367 static int thread_init(ThPool* thpool,
368  Thread** thread,
369  int id);
370 static void* thread_do(Thread* thread);
371 static void thread_destroy(Thread* thread);
372 
373 static int jobqueue_init(ThPool* thpool);
374 static void jobqueue_clear(ThPool* thpool);
375 static void jobqueue_push(JobQueue* queue,
376  Job* newjob_p);
377 static void jobqueue_multipush(JobQueue* queue,
378  Job* newjob[],
379  int nJobs);
380 static int jobqueue_push_static_jobs(ThPool* thpool,
381  Job* newjobs[],
382  const float avgElapsed[],
383  int jobs2thread[],
384  int nJobs,
385  int lastElapsedChanged);
386 static WorkGroup* jobqueue_pull(ThPool* thpool, int id);
387 static void jobqueue_destroy(ThPool* thpool);
388 
389 static void bsem_init(BSem *bsem, int value);
390 static void bsem_reset(BSem *bsem);
391 static void bsem_post(BSem *bsem);
392 static void bsem_post_all(BSem *bsem);
393 static void bsem_wait(BSem *bsem);
394 
395 
396 /* ============================ TIME ============================== */
397 
398 static float get_thread_time(struct timespec* cputime,
399  int* info) {
400  *info = clock_gettime(CLOCK_THREAD_CPUTIME_ID, cputime);
401  if(*info == 0) {
402  return cputime->tv_sec + cputime->tv_nsec * 1e-9f;
403  } else {
404  fprintf(stderr, "failed clock_gettime()\n");
405  return 0;
406  }
407 }
408 
409 static float get_monotonic_time(struct timespec* time,
410  int* info) {
411  *info = clock_gettime(CLOCK_MONOTONIC, time);
412  if(*info == 0) {
413  return time->tv_sec + time->tv_nsec * 1e-9f;
414  } else {
415  fprintf(stderr, "failed clock_gettime()\n");
416  return 0;
417  }
418 }
419 
420 static void get_monotonic_time2(struct timespec* time) {
421  int info;
422  info = clock_gettime(CLOCK_MONOTONIC, time);
423  if(info != 0) {
424  time->tv_sec = 0;
425  time->tv_nsec = 0;
426  fprintf(stderr, "failed clock_gettime()\n");
427  }
428 }
429 
430 void timespec_diff(struct timespec* end,
431  struct timespec* start,
432  struct timespec* result) {
433  if ((end->tv_nsec - start->tv_nsec) < 0) {
434  result->tv_sec = end->tv_sec - start->tv_sec - 1;
435  result->tv_nsec = end->tv_nsec - start->tv_nsec + 1000000000;
436  } else {
437  result->tv_sec = end->tv_sec - start->tv_sec;
438  result->tv_nsec = end->tv_nsec - start->tv_nsec;
439  }
440 }
441 
442 /* ========================== THREADPOOL ============================ */
443 
461 struct ThPool* thpool_init(int num_threads) {
462  if (num_threads < 0) {
463  num_threads = 0;
464  }
465 
466  if(cppadcg_pool_verbose) {
467  fprintf(stdout, "thpool_init(): Thread pool created with %i threads\n", num_threads);
468  }
469 
470  if(num_threads == 0) {
471  cppadcg_pool_disabled = 1; // true
472  return NULL;
473  }
474 
475  /* Make new thread pool */
476  ThPool* thpool;
477  thpool = (ThPool*) malloc(sizeof(ThPool));
478  if (thpool == NULL) {
479  fprintf(stderr, "thpool_init(): Could not allocate memory for thread pool\n");
480  return NULL;
481  }
482  thpool->num_threads = num_threads;
483  thpool->num_threads_alive = 0;
484  thpool->num_threads_working = 0;
485  thpool->threads_keepalive = 1;
486 
487  /* Initialize the job queue */
488  if (jobqueue_init(thpool) == -1) {
489  fprintf(stderr, "thpool_init(): Could not allocate memory for job queue\n");
490  free(thpool);
491  return NULL;
492  }
493 
494  /* Make threads in pool */
495  thpool->threads = (Thread**) malloc(num_threads * sizeof(Thread*));
496  if (thpool->threads == NULL) {
497  fprintf(stderr, "thpool_init(): Could not allocate memory for threads\n");
498  jobqueue_destroy(thpool);
499  free(thpool->jobqueue);
500  free(thpool);
501  return NULL;
502  }
503 
504  pthread_mutex_init(&(thpool->thcount_lock), NULL);
505  pthread_cond_init(&thpool->threads_all_idle, NULL);
506 
507  /* Thread init */
508  int n;
509  for (n = 0; n < num_threads; n++) {
510  thread_init(thpool, &thpool->threads[n], n);
511  }
512 
513  /* Wait for threads to initialize */
514  while (thpool->num_threads_alive != num_threads) {}
515 
516  return thpool;
517 }
518 
546 static int thpool_add_job(ThPool* thpool,
547  thpool_function_type function,
548  void* arg,
549  const float* avgElapsed,
550  float* elapsed) {
551  Job* newjob;
552 
553  newjob = (struct Job*) malloc(sizeof(struct Job));
554  if (newjob == NULL) {
555  fprintf(stderr, "thpool_add_job(): Could not allocate memory for new job\n");
556  return -1;
557  }
558 
559  /* add function and argument */
560  newjob->function = function;
561  newjob->arg = arg;
562  newjob->avgElapsed = avgElapsed;
563  newjob->elapsed = elapsed;
564 
565  /* add job to queue */
566  jobqueue_push(thpool->jobqueue, newjob);
567 
568  return 0;
569 }
570 
571 static int thpool_add_jobs(ThPool* thpool,
572  thpool_function_type functions[],
573  void* args[],
574  const float avgElapsed[],
575  float elapsed[],
576  const int order[],
577  int job2Thread[],
578  int nJobs,
579  int lastElapsedChanged) {
580  Job* newjobs[nJobs];
581  int i;
582  int j;
583 
584  for (i = 0; i < nJobs; ++i) {
585  newjobs[i] = (Job*) malloc(sizeof(Job));
586  if (newjobs[i] == NULL) {
587  fprintf(stderr, "thpool_add_jobs(): Could not allocate memory for new jobs\n");
588  return -1;
589  }
590 
591  j = order != NULL ? order[i] : i;
592  /* add function and argument */
593  newjobs[i]->function = functions[j];
594  newjobs[i]->arg = args[j];
595  newjobs[i]->id = i;
596  if (avgElapsed != NULL)
597  newjobs[i]->avgElapsed = &avgElapsed[j];
598  else
599  newjobs[i]->avgElapsed = NULL;
600 
601  if (elapsed != NULL)
602  newjobs[i]->elapsed = &elapsed[j];
603  else
604  newjobs[i]->elapsed = NULL;
605  }
606 
607  /* add jobs to queue */
608  if (schedule_strategy == SCHED_STATIC && avgElapsed != NULL && order != NULL && nJobs > 0 && avgElapsed[0] > 0) {
609  return jobqueue_push_static_jobs(thpool, newjobs, avgElapsed, job2Thread, nJobs, lastElapsedChanged);
610  } else {
611  jobqueue_multipush(thpool->jobqueue, newjobs, nJobs);
612  return 0;
613  }
614 }
615 
619 static int jobqueue_push_static_jobs(ThPool* thpool,
620  Job* newjobs[],
621  const float avgElapsed[],
622  int jobs2thread[],
623  int nJobs,
624  int lastElapsedChanged) {
625  float total_duration, target_duration, next_duration, best_duration;
626  int i, j, iBest;
627  int added;
628  int num_threads = thpool->num_threads;
629  int* n_jobs;
630  float* durations = NULL;
631  WorkGroup** groups;
632  WorkGroup* group;
633 
634  if(nJobs < num_threads)
635  num_threads = nJobs;
636 
637  n_jobs = (int*) malloc(num_threads * sizeof(int));
638  if (n_jobs == NULL) {
639  fprintf(stderr, "jobqueue_push_static_jobs(): Could not allocate memory\n");
640  return -1;
641  }
642 
643  groups = (WorkGroup**) malloc(num_threads * sizeof(WorkGroup*));
644  if (groups == NULL) {
645  fprintf(stderr, "jobqueue_push_static_jobs(): Could not allocate memory\n");
646  free(n_jobs);
647  return -1;
648  }
649 
650  for (i = 0; i < num_threads; ++i) {
651  n_jobs[i] = 0;
652  }
653 
654  total_duration = 0;
655  for (i = 0; i < nJobs; ++i) {
656  total_duration += avgElapsed[i];
657  }
658 
659 
660  if (nJobs > 0 && (lastElapsedChanged || jobs2thread[0] < 0)) {
661  durations = (float*) malloc(num_threads * sizeof(float));
662  if (durations == NULL) {
663  fprintf(stderr, "jobqueue_push_static_jobs(): Could not allocate memory\n");
664  free(n_jobs);
665  free(groups);
666  return -1;
667  }
668 
669  for(i = 0; i < num_threads; ++i) {
670  durations[i] = 0;
671  }
672 
673  // decide in which work group to place each job
674  target_duration = total_duration / num_threads;
675 
676  for (j = 0; j < nJobs; ++j) {
677  added = 0;
678  for (i = 0; i < num_threads; ++i) {
679  next_duration = durations[i] + avgElapsed[j];
680  if (next_duration < target_duration) {
681  durations[i] = next_duration;
682  n_jobs[i]++;
683  jobs2thread[j] = i;
684  added = 1;
685  break;
686  }
687  }
688 
689  if (!added) {
690  best_duration = durations[0] + avgElapsed[j];
691  iBest = 0;
692  for (i = 1; i < num_threads; ++i) {
693  next_duration = durations[i] + avgElapsed[j];
694  if (next_duration < best_duration) {
695  best_duration = next_duration;
696  iBest = i;
697  }
698  }
699  durations[iBest] = best_duration;
700  n_jobs[iBest]++;
701  jobs2thread[j] = iBest;
702  }
703  }
704 
705  } else {
706  // reuse existing information
707 
708  for (j = 0; j < nJobs; ++j) {
709  n_jobs[jobs2thread[j]]++;
710  }
711  }
712 
716  for (i = 0; i < num_threads; ++i) {
717  group = (WorkGroup*) malloc(sizeof(WorkGroup));
718  group->size = 0;
719  group->jobs = (Job*) malloc(n_jobs[i] * sizeof(Job));
720  groups[i] = group;
721  }
722  for (i = 0; i < num_threads - 1; ++i) {
723  groups[i]->prev = groups[i + 1];
724  }
725  groups[num_threads - 1]->prev = NULL;
726 
727  // place jobs on the work groups
728  for (j = 0; j < nJobs; ++j) {
729  i = jobs2thread[j];
730  group = groups[i];
731  group->jobs[group->size] = *newjobs[j]; // copy
732  group->size++;
733  free(newjobs[j]);
734  }
735 
736  if (cppadcg_pool_verbose) {
737  if (durations != NULL) {
738  for (i = 0; i < num_threads; ++i) {
739  fprintf(stdout, "jobqueue_push_static_jobs(): work group %i with %i jobs for %e s\n", i, groups[i]->size, durations[i]);
740  }
741  } else {
742  for (i = 0; i < num_threads; ++i) {
743  fprintf(stdout, "jobqueue_push_static_jobs(): work group %i with %i jobs\n", i, groups[i]->size);
744  }
745  }
746  }
747 
751  pthread_mutex_lock(&thpool->jobqueue->rwmutex);
752 
753  groups[num_threads - 1]->prev = thpool->jobqueue->group_front;
754  thpool->jobqueue->group_front = groups[0];
755 
756  bsem_post_all(thpool->jobqueue->has_jobs);
757 
758  pthread_mutex_unlock(&thpool->jobqueue->rwmutex);
759 
760  // clean up
761  free(durations);
762  free(n_jobs);
763  free(groups);
764 
765  return 0;
766 }
767 
794 static void thpool_wait(ThPool* thpool) {
795  pthread_mutex_lock(&thpool->thcount_lock);
796  while (thpool->jobqueue->len || thpool->jobqueue->group_front || thpool->num_threads_working) { //// PROBLEM HERE!!!! len is not locked!!!!
797  pthread_cond_wait(&thpool->threads_all_idle, &thpool->thcount_lock);
798  }
799  thpool->jobqueue->total_time = 0;
800  thpool->jobqueue->highest_expected_return = 0;
801  pthread_mutex_unlock(&thpool->thcount_lock);
802 
803  thpool_cleanup(thpool);
804 }
805 
806 
813 void thpool_cleanup(ThPool* thpool) {
814  // for debugging only
815 
816  struct timespec diffTime;
817  int gid = 0;
818  Thread* thread;
819  WorkGroup* workGroup;
820  WorkGroup* workGroupPrev;
821 
822  if (!cppadcg_pool_verbose) {
823  return;
824  }
825 
826  for (int j = 0; j < thpool->num_threads; ++j) {
827  thread = thpool->threads[j];
828 
829  workGroup = thread->processed_groups;
830  while (workGroup != NULL) {
831  timespec_diff(&workGroup->endTime, &workGroup->startTime, &diffTime);
832  fprintf(stdout, "# Thread %i, Group %i, started at %ld.%.9ld, ended at %ld.%.9ld, elapsed %ld.%.9ld, executed %i jobs\n",
833  thread->id, gid, workGroup->startTime.tv_sec, workGroup->startTime.tv_nsec, workGroup->endTime.tv_sec, workGroup->endTime.tv_nsec, diffTime.tv_sec,
834  diffTime.tv_nsec, workGroup->size);
835 
836  for (int i = 0; i < workGroup->size; ++i) {
837  Job* job = &workGroup->jobs[i];
838 
839  timespec_diff(&job->endTime, &job->startTime, &diffTime);
840  fprintf(stdout, "## Thread %i, Group %i, Job %i, started at %ld.%.9ld, ended at %ld.%.9ld, elapsed %ld.%.9ld\n",
841  thread->id, gid, job->id, job->startTime.tv_sec, job->startTime.tv_nsec, job->endTime.tv_sec, job->endTime.tv_nsec, diffTime.tv_sec,
842  diffTime.tv_nsec);
843  }
844 
845  gid++;
846 
847  workGroupPrev = workGroup->prev;
848 
849  // clean-up
850  free(workGroup->jobs);
851  free(workGroup);
852 
853  workGroup = workGroupPrev;
854  }
855 
856  thread->processed_groups = NULL;
857  }
858 }
859 
879 static void thpool_destroy(ThPool* thpool) {
880  /* No need to destory if it's NULL */
881  if (thpool == NULL) return;
882 
883  volatile int threads_total = thpool->num_threads_alive;
884 
885  /* End each thread 's infinite loop */
886  thpool->threads_keepalive = 0;
887 
888  /* Give one second to kill idle threads */
889  double TIMEOUT = 1.0;
890  time_t start, end;
891  double tpassed = 0.0;
892  time(&start);
893  while (tpassed < TIMEOUT && thpool->num_threads_alive) {
894  bsem_post_all(thpool->jobqueue->has_jobs);
895  time(&end);
896  tpassed = difftime(end, start);
897  }
898 
899  /* Poll remaining threads */
900  while (thpool->num_threads_alive) {
901  bsem_post_all(thpool->jobqueue->has_jobs);
902  sleep(1);
903  }
904 
905  /* cleanup current work groups */
906  thpool_cleanup(thpool);
907 
908  /* Job queue cleanup */
909  jobqueue_destroy(thpool);
910  free(thpool->jobqueue);
911 
912  /* Deallocs */
913  int n;
914  for (n = 0; n < threads_total; n++) {
915  thread_destroy(thpool->threads[n]);
916  }
917  free(thpool->threads);
918  free(thpool);
919 
920  if(cppadcg_pool_verbose) {
921  fprintf(stdout, "thpool_destroy(): thread pool destroyed\n");
922  }
923 }
924 
925 
926 /* ============================ THREAD ============================== */
927 
928 
929 /* Initialize a thread in the thread pool
930  *
931  * @param thread address to the pointer of the thread to be created
932  * @param id id to be given to the thread
933  * @return 0 on success, -1 otherwise.
934  */
935 static int thread_init(ThPool* thpool,
936  Thread** thread,
937  int id) {
938 
939  *thread = (Thread*) malloc(sizeof(Thread));
940  if (*thread == NULL) {
941  fprintf(stderr, "thread_init(): Could not allocate memory for thread\n");
942  return -1;
943  }
944 
945  (*thread)->thpool = thpool;
946  (*thread)->id = id;
947  (*thread)->processed_groups = NULL;
948 
949  pthread_create(&(*thread)->pthread, NULL, (void*) thread_do, (*thread));
950  pthread_detach((*thread)->pthread);
951  return 0;
952 }
953 
954 /* What each thread is doing
955 *
956 * In principle this is an endless loop. The only time this loop gets interrupted is once
957 * thpool_destroy() is invoked or the program exits.
958 *
959 * @param thread thread that will run this function
960 * @return nothing
961 */
962 static void* thread_do(Thread* thread) {
963  float elapsed;
964  int info;
965  struct timespec cputime;
966  JobQueue* queue;
967  WorkGroup* workGroup;
968  Job* job;
969  thpool_function_type func_buff;
970  void* arg_buff;
971  int i;
972 
973  /* Set thread name for profiling and debugging */
974  char thread_name[128] = {0};
975  sprintf(thread_name, "thread-pool-%d", thread->id);
976 
977 #if defined(__linux__)
978  /* Use prctl instead to prevent using _GNU_SOURCE flag and implicit declaration */
979  prctl(PR_SET_NAME, thread_name);
980 #elif defined(__APPLE__) && defined(__MACH__)
981  pthread_setname_np(thread_name);
982 #else
983  fprintf(stderr, "thread_do(): pthread_setname_np is not supported on this system");
984 #endif
985 
986  /* Assure all threads have been created before starting serving */
987  ThPool* thpool = thread->thpool;
988 
989  /* Mark thread as alive (initialized) */
990  pthread_mutex_lock(&thpool->thcount_lock);
991  thpool->num_threads_alive += 1;
992  pthread_mutex_unlock(&thpool->thcount_lock);
993 
994  queue = thpool->jobqueue;
995 
996  while (thpool->threads_keepalive) {
997 
998  bsem_wait(queue->has_jobs);
999 
1000  if (!thpool->threads_keepalive) {
1001  break;
1002  }
1003 
1004  pthread_mutex_lock(&thpool->thcount_lock);
1005  thpool->num_threads_working++;
1006  pthread_mutex_unlock(&thpool->thcount_lock);
1007 
1008  while (thpool->threads_keepalive) {
1009  /* Read job from queue and execute it */
1010  pthread_mutex_lock(&queue->rwmutex);
1011  workGroup = jobqueue_pull(thpool, thread->id);
1012  pthread_mutex_unlock(&queue->rwmutex);
1013 
1014  if (workGroup == NULL)
1015  break;
1016 
1017  if (cppadcg_pool_verbose) {
1018  get_monotonic_time2(&workGroup->startTime);
1019  }
1020 
1021  for (i = 0; i < workGroup->size; ++i) {
1022  job = &workGroup->jobs[i];
1023 
1024  if (cppadcg_pool_verbose) {
1025  get_monotonic_time2(&job->startTime);
1026  }
1027 
1028  int do_benchmark = job->elapsed != NULL;
1029  if (do_benchmark) {
1030  elapsed = -get_thread_time(&cputime, &info);
1031  }
1032 
1033  /* Execute the job */
1034  func_buff = job->function;
1035  arg_buff = job->arg;
1036  func_buff(arg_buff);
1037 
1038  if (do_benchmark && info == 0) {
1039  elapsed += get_thread_time(&cputime, &info);
1040  if (info == 0) {
1041  (*job->elapsed) = elapsed;
1042  }
1043  }
1044 
1045  if (cppadcg_pool_verbose) {
1046  get_monotonic_time2(&job->endTime);
1047  }
1048  }
1049 
1050  if (cppadcg_pool_verbose) {
1051  get_monotonic_time2(&workGroup->endTime);
1052 
1053  if (thread->processed_groups == NULL) {
1054  thread->processed_groups = workGroup;
1055  } else {
1056  workGroup->prev = thread->processed_groups;
1057  thread->processed_groups = workGroup;
1058  }
1059  } else {
1060  free(workGroup->jobs);
1061  free(workGroup);
1062  }
1063  }
1064 
1065  pthread_mutex_lock(&thpool->thcount_lock);
1066  thpool->num_threads_working--;
1067  if (!thpool->num_threads_working) {
1068  pthread_cond_signal(&thpool->threads_all_idle);
1069  }
1070  pthread_mutex_unlock(&thpool->thcount_lock);
1071  }
1072 
1073  pthread_mutex_lock(&thpool->thcount_lock);
1074  thpool->num_threads_alive--;
1075  pthread_mutex_unlock(&thpool->thcount_lock);
1076 
1077  return NULL;
1078 }
1079 
1080 
1081 /* Frees a thread */
1082 static void thread_destroy(Thread* thread) {
1083  free(thread);
1084 }
1085 
1086 
1087 /* ============================ JOB QUEUE =========================== */
1088 
1089 
1090 /* Initialize queue */
1091 static int jobqueue_init(ThPool* thpool) {
1092 
1093  JobQueue* queue = (JobQueue*) malloc(sizeof(JobQueue));
1094  if (queue == NULL) {
1095  return -1;
1096  }
1097  thpool->jobqueue = queue;
1098  queue->len = 0;
1099  queue->front = NULL;
1100  queue->rear = NULL;
1101  queue->group_front = NULL;
1102  queue->total_time = 0;
1103  queue->highest_expected_return = 0;
1104 
1105  queue->has_jobs = (BSem*) malloc(sizeof(BSem));
1106  if (queue->has_jobs == NULL) {
1107  return -1;
1108  }
1109 
1110  pthread_mutex_init(&(queue->rwmutex), NULL);
1111  bsem_init(queue->has_jobs, 0);
1112 
1113  return 0;
1114 }
1115 
1116 
1117 /* Clear the queue */
1118 static void jobqueue_clear(ThPool* thpool) {
1119  WorkGroup* group;
1120  int size;
1121 
1122  do {
1123  group = jobqueue_pull(thpool, -1);
1124  if (group == NULL) {
1125  size = 0;
1126  } else {
1127  size = group->size;
1128  free(group->jobs);
1129  free(group);
1130  }
1131  } while (size > 0);
1132 
1133  thpool->jobqueue->front = NULL;
1134  thpool->jobqueue->rear = NULL;
1135  bsem_reset(thpool->jobqueue->has_jobs);
1136  thpool->jobqueue->len = 0;
1137  thpool->jobqueue->group_front = NULL;
1138  thpool->jobqueue->total_time = 0;
1139  thpool->jobqueue->highest_expected_return = 0;
1140 }
1141 
1142 
1146 static void jobqueue_push_internal(JobQueue* queue,
1147  Job* newjob) {
1148  newjob->prev = NULL;
1149 
1150  switch (queue->len) {
1151 
1152  case 0: /* if no jobs in queue */
1153  queue->front = newjob;
1154  queue->rear = newjob;
1155  break;
1156 
1157  default: /* if jobs in queue */
1158  queue->rear->prev = newjob;
1159  queue->rear = newjob;
1160 
1161  }
1162  if(newjob->avgElapsed != NULL) {
1163  queue->total_time += *newjob->avgElapsed;
1164  }
1165  queue->len++;
1166 }
1167 
1171 static void jobqueue_push(JobQueue* queue,
1172  Job* newjob) {
1173  pthread_mutex_lock(&queue->rwmutex);
1174 
1175  jobqueue_push_internal(queue, newjob);
1176 
1177  bsem_post(queue->has_jobs);
1178 
1179  pthread_mutex_unlock(&queue->rwmutex);
1180 }
1181 
1182 
1186 static void jobqueue_multipush(JobQueue* queue,
1187  Job* newjob[],
1188  int nJobs) {
1189  int i;
1190 
1191  pthread_mutex_lock(&queue->rwmutex);
1192 
1193  for(i = 0; i < nJobs; ++i) {
1194  jobqueue_push_internal(queue, newjob[i]);
1195  }
1196 
1197  bsem_post_all(queue->has_jobs);
1198 
1199  pthread_mutex_unlock(&queue->rwmutex);
1200 }
1201 
1202 static Job* jobqueue_extract_single(JobQueue* queue) {
1203  Job* job = queue->front;
1204 
1205  switch (queue->len) {
1206  case 0: /* if no jobs in queue */
1207  return NULL;
1208 
1209  case 1: /* if one job in queue */
1210  queue->front = NULL;
1211  queue->rear = NULL;
1212  queue->len = 0;
1213  queue->total_time = 0;
1214  queue->highest_expected_return = 0;
1215  return job;
1216 
1217  default: /* if >1 jobs in queue */
1218  queue->front = job->prev;
1219  queue->len--;
1220  if(job->avgElapsed != NULL) {
1221  queue->total_time -= *job->avgElapsed;
1222  }
1223  return job;
1224  }
1225 }
1226 
1227 static void jobqueue_extract_single_group(JobQueue* queue,
1228  WorkGroup* group) {
1229  Job* job = jobqueue_extract_single(queue);
1230  if(job != NULL) {
1231  group->size = 1;
1232  group->jobs = (Job*) malloc(sizeof(Job));
1233  group->jobs[0] = *job; // copy
1234  free(job);
1235  } else {
1236  group->size = 0;
1237  group->jobs = NULL;
1238  }
1239 }
1240 
1246 static WorkGroup* jobqueue_pull(ThPool* thpool,
1247  int id) {
1248 
1249  WorkGroup* group;
1250  Job* job;
1251  float current_time;
1252  float duration, duration_next, min_duration, target_duration;
1253  struct timespec timeAux;
1254  int info;
1255  int i;
1256  JobQueue* queue = thpool->jobqueue;
1257 
1258  if (schedule_strategy == SCHED_STATIC && queue->group_front != NULL) {
1259  // STATIC
1260  group = queue->group_front;
1261 
1262  queue->group_front = group->prev;
1263  group->prev = NULL;
1264 
1265  } else if (queue->len == 0) {
1266  // nothing to do
1267  group = NULL;
1268 
1269  } else if (schedule_strategy == SCHED_DYNAMIC || queue->len == 1 || queue->total_time <= 0) {
1270  // SCHED_DYNAMIC
1271  group = (WorkGroup*) malloc(sizeof(WorkGroup));
1272  group->prev = NULL;
1273 
1274  if (cppadcg_pool_verbose) {
1275  if (schedule_strategy == SCHED_GUIDED) {
1276  if (queue->len == 1)
1277  fprintf(stdout, "jobqueue_pull(): Thread %i given a work group with 1 job\n", id);
1278  else if (queue->total_time <= 0)
1279  fprintf(stdout, "jobqueue_pull(): Thread %i using single-job instead of multi-job (no timing information)\n", id);
1280  } else if (schedule_strategy == SCHED_STATIC && queue->len >= 1) {
1281  if (queue->total_time >= 0) {
1282  // this should not happen but just in case the user messed up
1283  fprintf(stderr, "jobqueue_pull(): Thread %i given a work group with 1 job\n", id);
1284  } else {
1285  fprintf(stdout, "jobqueue_pull(): Thread %i given a work group with 1 job\n", id);
1286  }
1287  }
1288  }
1289 
1290  jobqueue_extract_single_group(thpool->jobqueue, group);
1291  } else { // schedule_strategy == SCHED_GUIDED
1292  // SCHED_GUIDED
1293  group = (WorkGroup*) malloc(sizeof(WorkGroup));
1294  group->prev = NULL;
1295 
1296  job = queue->front;
1297 
1298  if (job->avgElapsed == NULL) {
1299  if (cppadcg_pool_verbose) {
1300  fprintf(stderr, "jobqueue_pull(): Thread %i using single job instead of multi-job (No timing information for current job)\n", id);
1301  }
1302  // cannot use this strategy (something went wrong!)
1303  jobqueue_extract_single_group(thpool->jobqueue, group);
1304 
1305  } else {
1306  // there are at least 2 jobs in the queue
1307  group->size = 1;
1308  duration = *job->avgElapsed;
1309  duration_next = duration;
1310  job = job->prev;
1311  target_duration = queue->total_time * cppadcg_pool_guided_maxgroupwork / thpool->num_threads; // always positive
1312  current_time = get_monotonic_time(&timeAux, &info);
1313 
1314  if (queue->highest_expected_return > 0 && info) {
1315  min_duration = 0.9f * (queue->highest_expected_return - current_time);
1316  if (target_duration < min_duration) {
1317  target_duration = min_duration;
1318  }
1319  }
1320 
1321  do {
1322  if (job->avgElapsed == NULL) {
1323  break;
1324  }
1325  duration_next += *job->avgElapsed;
1326  if (duration_next < target_duration) {
1327  group->size++;
1328  duration = duration_next;
1329  } else {
1330  break;
1331  }
1332  job = job->prev;
1333  } while (job != queue->front);
1334 
1335  if (cppadcg_pool_verbose) {
1336  fprintf(stdout, "jobqueue_pull(): Thread %i given a work group with %i jobs for %e s (target: %e s)\n", id, group->size, duration, target_duration);
1337  }
1338 
1339  group->jobs = (Job*) malloc(group->size * sizeof(Job));
1340  for (i = 0; i < group->size; ++i) {
1341  job = jobqueue_extract_single(thpool->jobqueue);
1342  group->jobs[i] = *job; // copy
1343  free(job);
1344  }
1345 
1346  duration_next = current_time + duration; // the time when the current work is expected to end
1347  if(duration_next > queue->highest_expected_return)
1348  queue->highest_expected_return = duration_next;
1349  }
1350 
1351  }
1352  /* more than one job in queue -> post it */
1353  if (queue->len > 0 || queue->group_front != NULL) {
1354  bsem_post(queue->has_jobs);
1355  }
1356 
1357  return group;
1358 }
1359 
1360 
1361 /* Free all queue resources back to the system */
1362 static void jobqueue_destroy(ThPool* thpool) {
1363  jobqueue_clear(thpool);
1364  free(thpool->jobqueue->has_jobs);
1365 }
1366 
1367 
1368 
1369 
1370 
1371 /* ======================== SYNCHRONISATION ========================= */
1372 
1373 
1374 /* Init semaphore to 1 or 0 */
1375 static void bsem_init(BSem* bsem, int value) {
1376  if (value < 0 || value > 1) {
1377  fprintf(stderr, "bsem_init(): Binary semaphore can take only values 1 or 0");
1378  exit(1);
1379  }
1380  pthread_mutex_init(&(bsem->mutex), NULL);
1381  pthread_cond_init(&(bsem->cond), NULL);
1382  bsem->v = value;
1383 }
1384 
1385 
1386 /* Reset semaphore to 0 */
1387 static void bsem_reset(BSem* bsem) {
1388  bsem_init(bsem, 0);
1389 }
1390 
1391 
1392 /* Post to at least one thread */
1393 static void bsem_post(BSem* bsem) {
1394  pthread_mutex_lock(&bsem->mutex);
1395  bsem->v = 1;
1396  pthread_cond_signal(&bsem->cond);
1397  pthread_mutex_unlock(&bsem->mutex);
1398 }
1399 
1400 
1401 /* Post to all threads */
1402 static void bsem_post_all(BSem* bsem) {
1403  pthread_mutex_lock(&bsem->mutex);
1404  bsem->v = 1;
1405  pthread_cond_broadcast(&bsem->cond);
1406  pthread_mutex_unlock(&bsem->mutex);
1407 }
1408 
1409 
1410 /* Wait on semaphore until semaphore has value 0 */
1411 static void bsem_wait(BSem* bsem) {
1412  pthread_mutex_lock(&bsem->mutex);
1413  while (bsem->v != 1) {
1414  pthread_cond_wait(&bsem->cond, &bsem->mutex);
1415  }
1416  bsem->v = 0;
1417  pthread_mutex_unlock(&bsem->mutex);
1418 }
1419 )*=*";
1420 
1421 const size_t CPPADCG_PTHREAD_POOL_C_FILE_SIZE = 43345;
1422