1#ifndef CPPAD_CG_MODEL_C_SOURCE_GEN_IMPL_INCLUDED
2#define CPPAD_CG_MODEL_C_SOURCE_GEN_IMPL_INCLUDED
24const std::string ModelCSourceGen<Base>::FUNCTION_FORWAD_ZERO =
"forward_zero";
27const std::string ModelCSourceGen<Base>::FUNCTION_JACOBIAN =
"jacobian";
30const std::string ModelCSourceGen<Base>::FUNCTION_HESSIAN =
"hessian";
33const std::string ModelCSourceGen<Base>::FUNCTION_FORWARD_ONE =
"forward_one";
36const std::string ModelCSourceGen<Base>::FUNCTION_REVERSE_ONE =
"reverse_one";
39const std::string ModelCSourceGen<Base>::FUNCTION_REVERSE_TWO =
"reverse_two";
42const std::string ModelCSourceGen<Base>::FUNCTION_SPARSE_JACOBIAN =
"sparse_jacobian";
45const std::string ModelCSourceGen<Base>::FUNCTION_SPARSE_HESSIAN =
"sparse_hessian";
48const std::string ModelCSourceGen<Base>::FUNCTION_JACOBIAN_SPARSITY =
"jacobian_sparsity";
51const std::string ModelCSourceGen<Base>::FUNCTION_HESSIAN_SPARSITY =
"hessian_sparsity";
54const std::string ModelCSourceGen<Base>::FUNCTION_HESSIAN_SPARSITY2 =
"hessian_sparsity2";
57const std::string ModelCSourceGen<Base>::FUNCTION_SPARSE_FORWARD_ONE =
"sparse_forward_one";
60const std::string ModelCSourceGen<Base>::FUNCTION_SPARSE_REVERSE_ONE =
"sparse_reverse_one";
63const std::string ModelCSourceGen<Base>::FUNCTION_SPARSE_REVERSE_TWO =
"sparse_reverse_two";
66const std::string ModelCSourceGen<Base>::FUNCTION_FORWARD_ONE_SPARSITY =
"forward_one_sparsity";
69const std::string ModelCSourceGen<Base>::FUNCTION_REVERSE_ONE_SPARSITY =
"reverse_one_sparsity";
72const std::string ModelCSourceGen<Base>::FUNCTION_REVERSE_TWO_SPARSITY =
"sparse_reverse_two_sparsity";
75const std::string ModelCSourceGen<Base>::FUNCTION_INFO =
"info";
78const std::string ModelCSourceGen<Base>::FUNCTION_ATOMIC_FUNC_NAMES =
"atomic_functions";
81const std::string ModelCSourceGen<Base>::CONST =
"const";
92const std::map<std::string, std::string>& ModelCSourceGen<Base>::getSources(MultiThreadingType
multiThreadingType,
94 if (_sources.empty()) {
107 startingJob(
"'" + _name +
"'", JobTimer::SOURCE_FOR_MODEL);
110 generateZeroSource();
111 _zeroEvaluated =
true;
115 generateJacobianSource();
119 generateHessianSource();
123 generateSparseForwardOneSources();
124 generateForwardOneSources();
128 generateSparseReverseOneSources();
129 generateReverseOneSources();
133 generateSparseReverseTwoSources();
134 generateReverseTwoSources();
137 if (_sparseJacobian) {
141 if (_sparseHessian) {
145 if (_sparseJacobian || _forwardOne || _reverseOne) {
146 generateJacobianSparsitySource();
149 if (_sparseHessian || _reverseTwo) {
150 generateHessianSparsitySource();
153 generateInfoSource();
155 generateAtomicFuncNames();
161void ModelCSourceGen<Base>::generateLoops() {
162 if (_relatedDepCandidates.empty()) {
166 startingJob(
"", JobTimer::LOOP_DETECTION);
169 handler.setJobTimer(_jobTimer);
171 std::vector<CGBase>
xx(_fun.Domain());
174 for (
size_t i = 0;
i <
xx.size();
i++) {
175 xx[
i].setValue(_x[
i]);
179 std::vector<CGBase>
yy = _fun.Forward(0,
xx);
182 matcher.generateTapes(_funNoLoops, _loopTapes);
185 if (_jobTimer !=
nullptr && _jobTimer->isVerbose()) {
186 std::cout <<
" equation patterns: " <<
matcher.getEquationPatterns().size() <<
187 " loops: " <<
matcher.getLoops().size() << std::endl;
192void ModelCSourceGen<Base>::generateInfoSource() {
195 std::string
funcName = _name +
"_" + FUNCTION_INFO;
197 std::unique_ptr<VariableNameGenerator<Base> > nameGen(createVariableNameGenerator());
203 "unsigned int* indCount",
204 "unsigned int* depCount"});
206 " *baseName = \"" << _baseTypeName <<
" " <<
localBaseName <<
"\";\n"
207 " *m = " << _fun.Range() <<
";\n"
208 " *n = " << _fun.Domain() <<
";\n"
209 " *depCount = " << nameGen->getDependent().size() <<
"; // number of dependent array variables\n"
210 " *indCount = " << nameGen->getIndependent().size() <<
"; // number of independent array variables\n"
213 _sources[
funcName +
".c"] = _cache.str();
217void ModelCSourceGen<Base>::generateAtomicFuncNames() {
218 std::string
funcName = _name +
"_" + FUNCTION_ATOMIC_FUNC_NAMES;
219 size_t n = _atomicFunctions.size();
222 "unsigned long* n"});
224 " static const char* atomic[" <<
n <<
"] = {";
225 for (
size_t i = 0;
i <
n;
i++) {
226 if (
i > 0) _cache <<
", ";
227 _cache <<
"\"" << _atomicFunctions[
i] <<
"\"";
230 " *names = atomic;\n"
231 " *n = " <<
n <<
";\n"
234 _sources[
funcName +
".c"] = _cache.str();
238bool ModelCSourceGen<Base>::isAtomicsUsed() {
239 if (_zeroEvaluated) {
240 return _atomicFunctions.size() > 0;
242 return !getAtomicsInfo().empty();
247const std::map<size_t, AtomicUseInfo<Base> >& ModelCSourceGen<Base>::getAtomicsInfo() {
248 if (_atomicsInfo ==
nullptr) {
250 _atomicsInfo =
new std::map<size_t, AtomicUseInfo<Base> >(
adl.findAtomicsUsage());
252 return *_atomicsInfo;
257 const SparsitySetType& sparsity) {
258 std::vector<Color>
colors(sparsity.size());
265 for (
size_t i = 0;
i < sparsity.size();
i++) {
266 const std::set<size_t>& row = sparsity[
i];
267 if (row.size() == 0) {
273 if (_custom_hess.defined) {
274 for (
size_t j : row) {
315 const std::string&
suffix,
317 const std::map<
size_t, std::vector<size_t> >& elements) {
322 std::string
argsDcl =
langC.generateDefaultFunctionArgumentsDcl();
323 std::vector<std::string>
argsDcl2 =
langC.generateDefaultFunctionArgumentsDcl2();
324 std::string
args =
langC.generateDefaultFunctionArguments();
337 for (
const auto&
it : elements) {
339 _cache <<
" case " <<
it.first <<
":\n"
341 " return 0; // done\n";
343 _cache <<
" default:\n"
344 " return 1; // error\n"
363 const std::string&
suffix,
364 const std::map<size_t, T>& elements,
366 for (
const auto&
it : elements) {
367 size_t pos =
it.first;
373void ModelCSourceGen<Base>::generateSparsity1DSource(
const std::string&
function,
374 const std::vector<size_t>& sparsity) {
376 "unsigned long* nnz"});
381 LanguageC<Base>::printStaticIndexArray(_cache,
"nonzeros", sparsity);
383 _cache <<
" *sparsity = nonzeros;\n"
384 " *nnz = " << sparsity.size() <<
";\n"
389void ModelCSourceGen<Base>::generateSparsity2DSource(
const std::string&
function,
390 const LocalSparsityInfo& sparsity) {
391 const std::vector<size_t>& rows = sparsity.rows;
392 const std::vector<size_t>& cols = sparsity.cols;
394 CPPADCG_ASSERT_UNKNOWN(rows.size() == cols.size());
397 "unsigned long const** col",
398 "unsigned long* nnz"});
403 LanguageC<Base>::printStaticIndexArray(_cache,
"rows", rows);
406 LanguageC<Base>::printStaticIndexArray(_cache,
"cols", cols);
408 _cache <<
" *row = rows;\n"
410 " *nnz = " << rows.size() <<
";\n"
415void ModelCSourceGen<Base>::generateSparsity2DSource2(
const std::string&
function,
416 const std::vector<LocalSparsityInfo>&
sparsities) {
418 "unsigned long const** row",
419 "unsigned long const** col",
420 "unsigned long* nnz"});
423 std::ostringstream os;
430 const std::vector<size_t>& rows =
sparsities[
i].rows;
431 const std::vector<size_t>& cols =
sparsities[
i].cols;
432 CPPADCG_ASSERT_UNKNOWN(rows.size() == cols.size());
434 nnzs[
i] = rows.size();
440 LanguageC<Base>::printStaticIndexArray(_cache, os.str(), rows);
445 LanguageC<Base>::printStaticIndexArray(_cache, os.str(), cols);
455 _cache <<
" static " << LanguageC<Base>::U_INDEX_TYPE <<
" const * const " << name <<
"[" <<
maxNnzIndex
475 LanguageC<Base>::printStaticIndexArray(_cache,
"nnzs",
nnzs);
489 _cache <<
" *row = 0;\n"
498void ModelCSourceGen<Base>::generateSparsity1DSource2(
const std::string&
function,
499 const std::map<
size_t, std::vector<size_t> >& elements) {
501 "unsigned long const** elements",
502 "unsigned long* nnz"});
505 std::vector<size_t>
nnzs(elements.empty()? 0: elements.rbegin()->
first + 1);
509 for (
const auto&
it : elements) {
511 const std::vector<size_t>&
els =
it.second;
514 std::ostringstream os;
515 os <<
"els" <<
it.first;
516 LanguageC<Base>::printStaticIndexArray(_cache, os.str(),
els);
527 _cache <<
" static " << LanguageC<Base>::U_INDEX_TYPE <<
" const * const els[" <<
maxNnzIndex <<
"] = {";
528 auto it = elements.begin();
533 if (
it == elements.end() ||
i !=
it->first) {
536 _cache <<
"els" <<
i;
543 LanguageC<Base>::printStaticIndexArray(_cache,
"nnzs",
nnzs);
548 " *elements = els[pos];\n"
549 " *nnz = nnzs[pos];\n"
555 _cache <<
" *elements = 0;\n"
563inline std::map<size_t, std::vector<std::set<size_t> > > ModelCSourceGen<Base>::determineOrderByCol(
const std::map<
size_t, std::vector<size_t> >& elements,
564 const LocalSparsityInfo& sparsity) {
565 return determineOrderByCol(elements, sparsity.rows, sparsity.cols);
569inline std::map<size_t, std::vector<std::set<size_t> > > ModelCSourceGen<Base>::determineOrderByCol(
const std::map<
size_t, std::vector<size_t> >& elements,
570 const std::vector<size_t>&
userRows,
571 const std::vector<size_t>&
userCols) {
572 std::map<size_t, std::vector<std::set<size_t> > >
userLocation;
574 for (
const auto&
it : elements) {
575 size_t col =
it.first;
585inline std::vector<std::set<size_t> > ModelCSourceGen<Base>::determineOrderByCol(
size_t col,
587 const std::vector<size_t>&
userRows,
588 const std::vector<size_t>&
userCols) {
605inline std::map<size_t, std::vector<std::set<size_t> > > ModelCSourceGen<Base>::determineOrderByRow(
const std::map<
size_t, std::vector<size_t> >& elements,
606 const LocalSparsityInfo& sparsity) {
607 return determineOrderByRow(elements, sparsity.rows, sparsity.cols);
611inline std::map<size_t, std::vector<std::set<size_t> > > ModelCSourceGen<Base>::determineOrderByRow(
const std::map<
size_t, std::vector<size_t> >& elements,
612 const std::vector<size_t>&
userRows,
613 const std::vector<size_t>&
userCols) {
614 std::map<size_t, std::vector<std::set<size_t> > >
userLocation;
616 for (
const auto&
it : elements) {
617 size_t row =
it.first;
626inline std::vector<std::set<size_t> > ModelCSourceGen<Base>::determineOrderByRow(
size_t row,
628 const std::vector<size_t>&
userRows,
629 const std::vector<size_t>&
userCols) {
646void ModelCSourceGen<Base>::printFileStartPThreads(std::ostringstream&
cache,
647 const std::string& baseTypeName) {
649 cache << CPPADCG_PTHREAD_POOL_H_FILE <<
"\n";
651 cache <<
"typedef struct ExecArgStruct {\n"
652 " cppadcg_function_type func;\n"
653 " " << baseTypeName +
" const *const * in;\n"
654 " " << baseTypeName +
"* out[1];\n"
655 " struct LangCAtomicFun atomicFun;\n"
658 "static void exec_func(void* arg) {\n"
659 " ExecArgStruct* eArg = (ExecArgStruct*) arg;\n"
660 " (*eArg->func)(eArg->in, eArg->out, eArg->atomicFun);\n"
665void ModelCSourceGen<Base>::printFunctionStartPThreads(std::ostringstream&
cache,
669 for (
size_t i = 0;
i < size; ++
i) {
670 if (
i != 0)
cache <<
", ";
676 cache <<
" ExecArgStruct* args[" << size <<
"];\n";
677 cache <<
" static cppadcg_thpool_function_type execute_functions[" << size <<
"] = ";
680 cache <<
" static float ref_elapsed[" << size <<
"] = ";
683 cache <<
" static float elapsed[" << size <<
"] = ";
686 " static int order[" << size <<
"] = {";
687 for (
size_t i = 0;
i < size; ++
i) {
688 if (
i != 0)
cache <<
", ";
692 " static int job2Thread[" << size <<
"] = ";
695 " static int last_elapsed_changed = 1;\n"
696 " unsigned int nBench = cppadcg_thpool_get_n_time_meas();\n"
697 " static unsigned int n_meas = 0;\n"
698 " int do_benchmark = " << (size > 0 ?
"(n_meas < nBench && !cppadcg_thpool_is_disabled())" :
"0") <<
";\n"
699 " float* elapsed_p = do_benchmark ? elapsed : NULL;\n";
703void ModelCSourceGen<Base>::printFunctionEndPThreads(std::ostringstream&
cache,
705 cache <<
" cppadcg_thpool_add_jobs(execute_functions, (void**) args, ref_elapsed, elapsed_p, order, job2Thread, " << size <<
", last_elapsed_changed" <<
");\n"
707 " cppadcg_thpool_wait();\n"
709 " for(i = 0; i < " << size <<
"; ++i) {\n"
713 " if(do_benchmark) {\n"
714 " cppadcg_thpool_update_order(ref_elapsed, n_meas, elapsed, order, " << size <<
");\n"
717 " last_elapsed_changed = 0;\n"
722void ModelCSourceGen<Base>::printFileStartOpenMP(std::ostringstream&
cache) {
723 cache << CPPADCG_OPENMP_H_FILE <<
"\n"
725 "#include <stdio.h>\n"
726 "#include <time.h>\n";
730void ModelCSourceGen<Base>::printFunctionStartOpenMP(std::ostringstream&
cache,
733 " enum omp_sched_t old_kind;\n"
734 " int old_modifier;\n"
735 " int enabled = !cppadcg_openmp_is_disabled();\n"
736 " int verbose = cppadcg_openmp_is_verbose();\n"
737 " struct timespec start[" << size <<
"];\n"
738 " struct timespec end[" << size <<
"];\n"
739 " int thread_id[" << size <<
"];\n"
740 " unsigned int n_threads = cppadcg_openmp_get_threads();\n"
741 " if(n_threads > " << size <<
")\n"
742 " n_threads = " << size <<
";\n"
745 " omp_get_schedule(&old_kind, &old_modifier);\n"
746 " cppadcg_openmp_apply_scheduler_strategy();\n"
751void ModelCSourceGen<Base>::printLoopStartOpenMP(std::ostringstream&
cache,
753 cache <<
"#pragma omp parallel for private(outLocal) schedule(runtime) if(enabled) num_threads(n_threads)\n"
754 " for(i = 0; i < " << size <<
"; ++i) {\n"
757 " thread_id[i] = omp_get_thread_num();\n"
758 " info = clock_gettime(CLOCK_MONOTONIC, &start[i]);\n"
760 " start[i].tv_sec = 0;\n"
761 " start[i].tv_nsec = 0;\n"
762 " end[i].tv_sec = 0;\n"
763 " end[i].tv_nsec = 0;\n"
770void ModelCSourceGen<Base>::printLoopEndOpenMP(std::ostringstream&
cache,
775 " info = clock_gettime(CLOCK_MONOTONIC, &end[i]);\n"
777 " end[i].tv_sec = 0;\n"
778 " end[i].tv_nsec = 0;\n"
785 " omp_set_schedule(old_kind, old_modifier);\n"
789 " struct timespec diff;\n"
790 " for (i = 0; i < " << size <<
"; ++i) {\n"
791 " if ((end[i].tv_nsec - start[i].tv_nsec) < 0) {\n"
792 " diff.tv_sec = end[i].tv_sec - start[i].tv_sec - 1;\n"
793 " diff.tv_nsec = end[i].tv_nsec - start[i].tv_nsec + 1000000000;\n"
795 " diff.tv_sec = end[i].tv_sec - start[i].tv_sec;\n"
796 " diff.tv_nsec = end[i].tv_nsec - start[i].tv_nsec;\n"
798 " fprintf(stdout, \"## Thread %i, Job %li, started at %ld.%.9ld, ended at %ld.%.9ld, elapsed %ld.%.9ld\\n\",\n"
799 " thread_id[i], i, start[i].tv_sec, start[i].tv_nsec, end[i].tv_sec, end[i].tv_nsec, diff.tv_sec, diff.tv_nsec);\n"
806void ModelCSourceGen<Base>::startingJob(
const std::string&
jobName,
807 const JobType&
type) {
808 if (_jobTimer !=
nullptr)
813inline void ModelCSourceGen<Base>::finishedJob() {
814 if (_jobTimer !=
nullptr)
815 _jobTimer->finishedJob();
static void printFunctionDeclaration(std::ostringstream &out, const std::string &returnType, const std::string &functionName, const std::vector< std::string > &arguments, const std::vector< std::string > &arguments2={})
virtual void generateGlobalDirectionalFunctionSource(const std::string &function, const std::string &function2_suffix, const std::string &function_sparsity, const std::map< size_t, std::vector< size_t > > &elements)
std::vector< ModelCSourceGen< Base >::Color > colorByRow(const std::set< size_t > &columns, const SparsitySetType &sparsity)
bool GreaterThanZero(const cg::CG< Base > &x)