CppADCodeGen  2.3.0
A C++ Algorithmic Differentiation Package with Source Code Generation
dependent_pattern_matcher.hpp
1 #ifndef CPPAD_CG_DEPENDENT_PATTERN_MATCHER_INCLUDED
2 #define CPPAD_CG_DEPENDENT_PATTERN_MATCHER_INCLUDED
3 /* --------------------------------------------------------------------------
4  * CppADCodeGen: C++ Algorithmic Differentiation with Source Code Generation:
5  * Copyright (C) 2013 Ciengis
6  *
7  * CppADCodeGen is distributed under multiple licenses:
8  *
9  * - Eclipse Public License Version 1.0 (EPL1), and
10  * - GNU General Public License Version 3 (GPL3).
11  *
12  * EPL1 terms and conditions can be found in the file "epl-v10.txt", while
13  * terms and conditions for the GPL3 can be found in the file "gpl3.txt".
14  * ----------------------------------------------------------------------------
15  * Author: Joao Leal
16  */
17 
18 //#define CPPADCG_PRINT_DEBUG
19 
20 namespace CppAD {
21 namespace cg {
22 
23 template<class Base>
25 public:
28 public:
29 
31  if (e1->depRefIndex < e2->depRefIndex) {
32  eq1 = e1;
33  eq2 = e2;
34  } else {
35  eq1 = e2;
36  eq2 = e1;
37  }
38  }
39 };
40 
41 template<class Base>
42 inline bool operator<(const UniqueEquationPair<Base>& p1, const UniqueEquationPair<Base>& p2) {
43  return p1.eq1->depRefIndex < p2.eq1->depRefIndex || (!(p2.eq1->depRefIndex < p1.eq1->depRefIndex) && p1.eq2->depRefIndex < p2.eq2->depRefIndex);
44 }
45 
49 template<class Base>
51 public:
52  using CGBase = CG<Base>;
53 private:
54 
55  enum class INDEXED_OPERATION_TYPE {
56  INDEXED,
57  NONINDEXED,
58  BOTH
59  };
60  using Indexed2OpCountType = std::pair<INDEXED_OPERATION_TYPE, size_t>;
61  using Dep1Dep2SharedType = std::map<size_t, std::map<size_t, std::map<OperationNode<Base>*, Indexed2OpCountType> > >;
62  using DepPairType = std::pair<size_t, size_t>;
63  using TotalOps2validDepsType = std::map<size_t, std::map<DepPairType, const std::map<OperationNode<Base>*, Indexed2OpCountType>* > >;
64  using Eq2totalOps2validDepsType = std::map<UniqueEquationPair<Base>, TotalOps2validDepsType*>;
65  using MaxOps2eq2totalOps2validDepsType = std::map<size_t, Eq2totalOps2validDepsType>;
66 
67 private:
68  CodeHandler<Base>* handler_;
70  CodeHandlerVector<Base, bool> varIndexed_; // which nodes depend on indexed independent variables
71  const std::vector<std::set<size_t> >& relatedDepCandidates_;
72  std::vector<CGBase> dependents_; // a copy
73  const std::vector<CGBase>& independents_;
74  std::vector<EquationPattern<Base>*> equations_;
75  EquationPattern<Base>* eqCurr_;
76  std::map<size_t, EquationPattern<Base>*> dep2Equation_;
77  std::map<EquationPattern<Base>*, Loop<Base>*> equation2Loop_;
78  std::vector<Loop<Base>*> loops_;
82  std::map<EquationPattern<Base>*, std::set<EquationPattern<Base>*> > incompatible_;
86  std::map<UniqueEquationPair<Base>, Dep1Dep2SharedType> equationShared_;
91  std::map<OperationNode<Base>*, size_t> origTemp2Index_;
92  std::vector<std::set<size_t> > id2Deps;
93  size_t idCounter_;
98  CodeHandlerVector<Base, size_t> origShareNodeId_;
100  size_t color_;
101 public:
102 
112  DependentPatternMatcher(const std::vector<std::set<size_t> >& relatedDepCandidates,
113  const std::vector<CGBase>& dependents,
114  const std::vector<CGBase>& independents) :
115  handler_(independents[0].getCodeHandler()),
116  varId_(*handler_),
117  varIndexed_(*handler_),
118  relatedDepCandidates_(relatedDepCandidates),
119  dependents_(dependents),
120  independents_(independents),
121  idCounter_(0),
122  origShareNodeId_(*handler_),
123  color_(0) {
124  CPPADCG_ASSERT_UNKNOWN(independents_.size() > 0);
125  CPPADCG_ASSERT_UNKNOWN(independents_[0].getCodeHandler() != nullptr);
126  equations_.reserve(relatedDepCandidates_.size());
127  origShareNodeId_.adjustSize();
128  }
129 
130  const std::vector<EquationPattern<Base>*>& getEquationPatterns() const {
131  return equations_;
132  }
133 
134  const std::vector<Loop<Base>*>& getLoops() const {
135  return loops_;
136  }
137 
147  virtual void generateTapes(LoopFreeModel<Base>*& nonLoopTape,
148  std::set<LoopModel<Base>*>& loopTapes) {
149 
150  for (size_t j = 0; j < independents_.size(); j++) {
151  std::vector<size_t>& info = independents_[j].getOperationNode()->getInfo();
152  info.resize(1);
153  info[0] = j;
154  }
155 
156  findLoops();
157 
158  nonLoopTape = createNewTape();
159 
160  loopTapes.clear();
161  for (size_t l = 0; l < loops_.size(); l++) {
162  Loop<Base>* loop = loops_[l];
163  loopTapes.insert(loop->releaseLoopModel());
164  }
165  }
166 
167  virtual ~DependentPatternMatcher() {
168  for (size_t l = 0; l < loops_.size(); l++) {
169  delete loops_[l];
170  }
171  }
172 
173 private:
174 
181  virtual std::vector<Loop<Base>*> findLoops() {
182  using namespace std;
183 
184  size_t rSize = relatedDepCandidates_.size();
185  for (size_t r = 0; r < rSize; r++) {
186  const std::set<size_t>& candidates = relatedDepCandidates_[r];
187  for (size_t iDep : candidates) {
188  OperationNode<Base>* node = dependents_[iDep].getOperationNode();
189  if (node != nullptr && node->getOperationType() == CGOpCode::Inv) {
198  CPPADCG_ASSERT_UNKNOWN(handler_ == dependents_[iDep].getCodeHandler());
199  dependents_[iDep] = CG<Base>(*handler_->makeNode(CGOpCode::Alias, *node));
200  }
201  }
202  }
203 
204  varId_.adjustSize();
205  varId_.fill(0);
206 
207  // assign a unique Id to each node
208  assignIds();
209  id2Deps.resize(idCounter_ + 1);
210 
214  findRelatedVariables();
215 
216  for (EquationPattern<Base>* eq : equations_) {
217  for (size_t depIt : eq->dependents) {
218  dep2Equation_[depIt] = eq;
219  }
220  }
221 
222  const size_t eq_size = equations_.size();
223  loops_.reserve(eq_size);
224 
225  SmartSetPointer<set<size_t> > dependentRelations;
226  std::vector<set<size_t>*> dep2Relations(dependents_.size(), nullptr);
227  map<size_t, set<size_t> > dependentBlackListRelations;
228 
229  /*******************************************************************
230  * Combine related equations in the same loops
231  * (equations that share temporary variables)
232  ******************************************************************/
236  varIndexed_.adjustSize();
237  varIndexed_.fill(false);
238 
239  for (size_t e = 0; e < eq_size; e++) {
240  EquationPattern<Base>* eq = equations_[e];
241  eqCurr_ = eq;
242 
243  for (size_t depIt : eq->dependents) {
244  OperationNode<Base>* node = dependents_[depIt].getOperationNode();
245  // will define the dependents associated with each operation
246  markOperationsWithDependent(node, depIt);
247  }
248 
252  if (e > 0) {
253  handler_->startNewOperationTreeVisit();
254 
255  for (size_t depIt : eq->dependents) {
256  findSharedTemporaries(dependents_[depIt], depIt); // a color is used to mark indexed paths
257  }
258 
262  for (size_t depIt : eq->dependents) {
263  OperationNode<Base>* node = dependents_[depIt].getOperationNode();
264  EquationPattern<Base>::uncolor(node, varIndexed_); // must uncolor
265  }
266  }
267 
268  // create a loop for this equation
269  Loop<Base>* loop = new Loop<Base>(*eq);
270  loops_.push_back(loop);
271  equation2Loop_[eq] = loop;
272  }
273 
274  /*******************************************************************
275  * Attempt to combine loops with shared variables
276  ******************************************************************/
277  MaxOps2eq2totalOps2validDepsType maxOps2Eq2totalOps2validDeps;
278  Eq2totalOps2validDepsType eq2totalOps2validDeps;
279  SmartListPointer<TotalOps2validDepsType> totalOps2validDepsMem;
280 
286  for (size_t l1 = 0; l1 < loops_.size(); l1++) {
287  Loop<Base>* loop1 = loops_[l1];
288  CPPADCG_ASSERT_UNKNOWN(loop1->equations.size() == 1);
289  EquationPattern<Base>* eq1 = *loop1->equations.begin();
290 
291  for (size_t l2 = l1 + 1; l2 < loops_.size(); l2++) {
292  Loop<Base>* loop2 = loops_[l2];
293  CPPADCG_ASSERT_UNKNOWN(loop2->equations.size() == 1);
294  EquationPattern<Base>* eq2 = *loop2->equations.begin();
295 
296  UniqueEquationPair<Base> eqRel(eq1, eq2);
297  const auto eqSharedit = equationShared_.find(eqRel);
298  if (eqSharedit == equationShared_.end())
299  continue; // nothing is shared between eq1 and eq2
300 
301  const Dep1Dep2SharedType& dep1Dep2Shared = eqSharedit->second;
302 
306  TotalOps2validDepsType* totalOps2validDeps = new TotalOps2validDepsType();
307  totalOps2validDepsMem.push_back(totalOps2validDeps);
308  size_t maxOps = 0; // the maximum number of shared operations between two dependents
309 
310  bool canCombine = true;
311 
312  /***************************************************
313  * organize relations between dependents
314  **************************************************/
315  for (const auto& itDep1Dep2 : dep1Dep2Shared) {
316  size_t dep1 = itDep1Dep2.first;
317  const map<size_t, map<OperationNode<Base>*, Indexed2OpCountType> >& dep2Shared = itDep1Dep2.second;
318 
319  // multiple deps2 means multiple choices for a relation (only one dep1<->dep2 can be chosen)
320  for (const auto& itDep2 : dep2Shared) {
321  size_t dep2 = itDep2.first;
322  const map<OperationNode<Base>*, Indexed2OpCountType>& sharedTmps = itDep2.second;
323 
324  size_t totalOps = 0; // the total number of operations performed by shared variables with dep2
325  for (const auto& itShared : sharedTmps) {
326  if (itShared.second.first == INDEXED_OPERATION_TYPE::BOTH) {
332  canCombine = false;
333  break;
334  } else {
335  totalOps += itShared.second.second;
336  }
337  }
338 
339  if (!canCombine) break;
340 
341  DepPairType depRel(dep1, dep2);
342  (*totalOps2validDeps)[totalOps][depRel] = &sharedTmps;
343  maxOps = std::max(maxOps, totalOps);
344  }
345 
346  if (!canCombine) break;
347  }
348 
349  if (canCombine) {
350  maxOps2Eq2totalOps2validDeps[maxOps][eqRel] = totalOps2validDeps;
351  eq2totalOps2validDeps[eqRel] = totalOps2validDeps;
352  } else {
353  incompatible_[eq1].insert(eq2);
354  incompatible_[eq2].insert(eq1);
355  totalOps2validDepsMem.pop_back();
356  delete totalOps2validDeps;
357  }
358  }
359  }
360 
364  typename MaxOps2eq2totalOps2validDepsType::const_reverse_iterator itMaxOps;
365  for (itMaxOps = maxOps2Eq2totalOps2validDeps.rbegin(); itMaxOps != maxOps2Eq2totalOps2validDeps.rend(); ++itMaxOps) {
366 #ifdef CPPADCG_PRINT_DEBUG
367  std::cout << "\n\nmaxOps: " << itMaxOps->first << " count:" << itMaxOps->second.size() << std::endl;
368 #endif
369 
370  for (const auto& itEqPair : itMaxOps->second) {
371  const UniqueEquationPair<Base>& eqRel = itEqPair.first;
372 #ifdef CPPADCG_PRINT_DEBUG
373  std::cout << " eq1: " << *eqRel.eq1->dependents.begin() << " eq2: " << *eqRel.eq2->dependents.begin() << std::endl;
374 #endif
375 
376  Loop<Base>* loop1 = equation2Loop_.at(eqRel.eq1);
377  Loop<Base>* loop2 = equation2Loop_.at(eqRel.eq2);
378 
379  if (loop1 == loop2)
380  continue; // already done
381  if (contains(incompatible_, eqRel.eq1, eqRel.eq2))
382  continue; // incompatible
383 
388  SmartSetPointer<set<size_t> > dependentRelationsBak;
389  for (const set<size_t>* its : dependentRelations) {
390  dependentRelationsBak.insert(new set<size_t>(*its));
391  }
392 
393  // relationships between dependents for the resulting merged loop
394  set<set<size_t>*> loopRelations;
395 
396  set<EquationPattern<Base>*> indexedLoopRelations;
397  std::vector<std::pair<EquationPattern<Base>*, EquationPattern<Base>*> > nonIndexedLoopRelations;
398 
402  bool compatible = isCompatible(loop1, loop2,
403  eq2totalOps2validDeps,
404  dep2Relations, dependentBlackListRelations, dependentRelations,
405  loopRelations, indexedLoopRelations, nonIndexedLoopRelations);
406 
407  if (compatible) {
408  // merge the two loops
409 
410  // update the loop of the equations
411  for (EquationPattern<Base>* itle : loop2->equations) {
412  equation2Loop_[itle] = loop1;
413  }
414  loop1->merge(*loop2, indexedLoopRelations, nonIndexedLoopRelations);
415 
416  typename std::vector<Loop<Base>*>::const_iterator it = std::find(loops_.cbegin(), loops_.cend(), loop2);
417  CPPADCG_ASSERT_UNKNOWN(it != loops_.end());
418  loops_.erase(it);
419  delete loop2;
420 
421  loop1->setLinkedDependents(loopRelations);
422 
423  // relation between loop1 and loop2 done!
424  } else {
425  // restore dependent relations
426  dependentRelations.s.swap(dependentRelationsBak.s);
427  // map each dependent to the relation set where it is present
428  std::fill(dep2Relations.begin(), dep2Relations.end(), nullptr);
429  for (set<size_t>* relation : dependentRelations) {
430  for (size_t itd : *relation) {
431  dep2Relations[itd] = relation;
432  }
433  }
434 
435  }
436 
437  }
438  }
439 
443  for (size_t l = 0; l < loops_.size(); l++) {
444  loops_[l]->generateDependentLoopIndexes(dep2Equation_);
445  }
446 
447  /*******************************************************************
448  * Attempt to combine unrelated loops
449  ******************************************************************/
450  if (!loops_.empty()) {
451  for (size_t l1 = 0; l1 < loops_.size() - 1; l1++) {
452  Loop<Base>* loop1 = loops_[l1];
453  for (size_t l2 = l1 + 1; l2 < loops_.size();) {
454  Loop<Base>* loop2 = loops_[l2];
455 
456  bool canMerge = loop1->getIterationCount() == loop2->getIterationCount();
457  if (canMerge) {
458  // check if there are equations in the blacklist
459  canMerge = !find(loop1, loop2, incompatible_);
460  }
461 
462  if (canMerge) {
463  loop1->mergeEqGroups(*loop2);
464  loops_.erase(loops_.begin() + l2);
465  delete loop2;
466  } else {
467  l2++;
468  }
469  }
470  }
471  }
472 
473  size_t l_size = loops_.size();
474 
478  for (size_t l = 0; l < l_size; l++) {
479  Loop<Base>* loop = loops_[l];
480 
481  //Generate a local model for the loop
482  loop->createLoopModel(dependents_, independents_, dep2Equation_, origTemp2Index_);
483  }
484 
488  resetHandlerCounters();
489 
490  return loops_;
491  }
492 
501  inline bool isCompatible(Loop<Base>* loop1,
502  Loop<Base>* loop2,
503  const Eq2totalOps2validDepsType& eq2totalOps2validDeps,
504  std::vector<std::set<size_t>* >& dep2Relations,
505  std::map<size_t, std::set<size_t> >& dependentBlackListRelations,
506  SmartSetPointer<std::set<size_t> >& dependentRelations,
507  std::set<std::set<size_t>*>& loopRelations,
508  std::set<EquationPattern<Base>*>& indexedLoopRelations,
509  std::vector<std::pair<EquationPattern<Base>*, EquationPattern<Base>*> >& nonIndexedLoopRelations) {
510  using namespace std;
511 
512  bool compatible = true;
513 
518  map<size_t, map<UniqueEquationPair<Base>, TotalOps2validDepsType*> > totalOp2eq;
519 
520  for (EquationPattern<Base>* eq1 : loop1->equations) {
521 
522  for (EquationPattern<Base>* eq2 : loop2->equations) {
523 
524  UniqueEquationPair<Base> eqRel(eq1, eq2);
525 
526  typename Eq2totalOps2validDepsType::const_iterator eqSharedit = eq2totalOps2validDeps.find(eqRel);
527  if (eqSharedit == eq2totalOps2validDeps.end())
528  continue; // nothing is shared between eq1 and eq2
529 
530 
531  size_t maxOps = eqSharedit->second->rbegin()->first;
532  totalOp2eq[maxOps][eqRel] = eqSharedit->second;
533  }
534  }
535 
536  typename map<size_t, map<UniqueEquationPair<Base>, TotalOps2validDepsType*> >::const_reverse_iterator itr;
537  for (itr = totalOp2eq.rbegin(); itr != totalOp2eq.rend(); ++itr) {
538  // loop shared operation count
539 
540  for (const auto& itEq : itr->second) {
541  EquationPattern<Base>* eq1 = itEq.first.eq1;
542  EquationPattern<Base>* eq2 = itEq.first.eq2;
543  TotalOps2validDepsType& totalOps2validDeps = *itEq.second;
544 
545  /***************************************************
546  * attempt to combine dependents which share the
547  * highest number of operations first
548  **************************************************/
549  typename map<size_t, map<DepPairType, const map<OperationNode<Base>*, Indexed2OpCountType>* > >::const_reverse_iterator itOp2Dep2Shared;
550  for (itOp2Dep2Shared = totalOps2validDeps.rbegin(); itOp2Dep2Shared != totalOps2validDeps.rend(); ++itOp2Dep2Shared) {
551 #ifdef CPPADCG_PRINT_DEBUG
552  std::cout << " operation count: " << itOp2Dep2Shared->first << " relations: " << itOp2Dep2Shared->second.size() << std::endl;
553 #endif
554  for (const auto& itDep2Shared : itOp2Dep2Shared->second) {
555  DepPairType depRel = itDep2Shared.first;
556  size_t dep1 = depRel.first;
557  size_t dep2 = depRel.second;
558 
559  const map<OperationNode<Base>*, Indexed2OpCountType>& shared = *itDep2Shared.second;
564  compatible = findDepRelations(eq1, dep1, eq2, dep2, shared,
565  dep2Relations, dependentBlackListRelations, dependentRelations);
566  if (!compatible) break;
567  }
568 
569  loopRelations.clear();
570 
571  if (compatible) {
575  std::vector<Loop<Base>*> loops(2);
576  loops[0] = loop1;
577  loops[1] = loop2;
578  bool nonIndexedOnly = true;
579  for (size_t l = 0; l < 2; l++) {
580  Loop<Base>* loop = loops[l];
581  for (EquationPattern<Base>* eq : loop->equations) { // equation
582  for (size_t dep : eq->dependents) { // dependent
583  if (dep2Relations[dep] != nullptr) {
584  loopRelations.insert(dep2Relations[dep]);
585  nonIndexedOnly = false;
586  }
587  }
588  }
589  }
590 
591 
592 
593  if (nonIndexedOnly) {
594  nonIndexedLoopRelations.push_back(std::make_pair(eq1, eq2));
595  } else {
596  // there are shared indexed temporary variables
597  compatible = false;
598  size_t nNonIndexedRel1 = loop1->getLinkedEquationsByNonIndexedCount();
599  size_t nNonIndexedRel2 = loop2->getLinkedEquationsByNonIndexedCount();
600  size_t requiredSize = loop1->equations.size() + loop2->equations.size() - nNonIndexedRel1 - nNonIndexedRel2;
601 
602  for (set<size_t>* relations : loopRelations) {
603  if (relations->size() == requiredSize) {
604  compatible = true;
605  break;
606  }
607  }
608 #ifdef CPPADCG_PRINT_DEBUG
609  if (compatible) {
610  std::cout << " loopRelations:";
611  print(loopRelations);
612  std::cout << std::endl;
613  }
614 #endif
615  }
616  }
617 
618  if (!compatible) break;
619  }
620 
621  if (!compatible) {
622  incompatible_[eq1].insert(eq2);
623  incompatible_[eq2].insert(eq1);
624  break;
625  } else {
626  indexedLoopRelations.insert(eq1);
627  indexedLoopRelations.insert(eq2);
628  }
629  }
630 
631  if (!compatible) break;
632  }
633 
634  return compatible;
635  }
636 
645  bool findDepRelations(EquationPattern<Base>* eq1,
646  size_t dep1,
648  size_t dep2,
649  const std::map<OperationNode<Base>*, Indexed2OpCountType>& sharedNodes,
650  std::vector<std::set<size_t>* >& dep2Relations,
651  std::map<size_t, std::set<size_t> >& dependentBlackListRelations,
652  SmartSetPointer<std::set<size_t> >& dependentRelations) {
653  using namespace std;
654 
655  for (const auto& itShared : sharedNodes) {
656  OperationNode<Base>* sharedNode = itShared.first;
657 
658  // checks independents
659  bool compatible = canCombineEquations(*eq1, dep1, *eq2, dep2, *sharedNode,
660  dep2Relations, dependentBlackListRelations, dependentRelations);
661 
662  if (!compatible) return false;
663  }
664 
665  return true;
666  }
667 
668  void groupByLoopEqOp(EquationPattern<Base>* eq,
669  std::map<Loop<Base>*, std::map<EquationPattern<Base>*, std::map<size_t, std::pair<OperationNode<Base>*, bool> > > >& loopSharedTemps,
670  const std::map<OperationNode<Base>*, std::set<size_t> >& opShared,
671  bool indexed) {
672  using namespace std;
673 
674  for (OperationNode<Base>* shared : opShared) {
675  const set<size_t>& deps = id2Deps[varId_[*shared]];
676 
677  for (size_t dep : deps) {
678  EquationPattern<Base>* otherEq = dep2Equation_.at(dep);
679  if (eq != otherEq) {
680  Loop<Base>* loop = equation2Loop_.at(otherEq);
681  // the original ID (saved in evaluation order) is used to sort shared variables
682  // to ensure reproducibility between different runs
683  loopSharedTemps[loop][otherEq][origShareNodeId_[shared]] = std::make_pair(shared, indexed);
684  }
685  }
686  }
687  }
688 
696  virtual LoopFreeModel<Base>* createNewTape() {
697  CPPADCG_ASSERT_UNKNOWN(handler_ == independents_[0].getCodeHandler());
698 
699  size_t m = dependents_.size();
700  std::vector<bool> inLoop(m, false);
701  size_t eqInLoopCount = 0;
702 
706  size_t l_size = loops_.size();
707 
708  for (size_t l = 0; l < l_size; l++) {
709  Loop<Base>* loop = loops_[l];
710  LoopModel<Base>* loopModel = loop->getModel();
711 
715  const std::vector<std::vector<LoopPosition> >& ldeps = loopModel->getDependentIndexes();
716  for (size_t eq = 0; eq < ldeps.size(); eq++) {
717  for (size_t it = 0; it < ldeps[eq].size(); it++) {
718  const LoopPosition& pos = ldeps[eq][it];
719  if (pos.original != std::numeric_limits<size_t>::max()) {// some equations are not present in all iteration
720  inLoop[pos.original] = true;
721  eqInLoopCount++;
722  }
723  }
724  }
725  }
726 
730  assert(m >= eqInLoopCount);
731  size_t nonLoopEq = m - eqInLoopCount;
732  std::vector<CGBase> nonLoopDeps(nonLoopEq + origTemp2Index_.size());
733 
734  if (nonLoopDeps.size() == 0)
735  return nullptr; // there are no equations outside the loops
736 
740  size_t inl = 0;
741  std::vector<size_t> depTape2Orig(nonLoopEq);
742  if (eqInLoopCount < m) {
743  for (size_t i = 0; i < inLoop.size(); i++) {
744  if (!inLoop[i]) {
745  depTape2Orig[inl] = i;
746  nonLoopDeps[inl++] = dependents_[i];
747  }
748  }
749  }
750  CPPADCG_ASSERT_UNKNOWN(inl == nonLoopEq);
751 
755  for (const auto& itTmp : origTemp2Index_) {
756  size_t k = itTmp.second;
757  nonLoopDeps[nonLoopEq + k] = handler_->createCG(Argument<Base>(*itTmp.first));
758  }
759 
763  Evaluator<Base, CGBase> evaluator(*handler_);
764 
765  // set atomic functions
766  const std::map<size_t, CGAbstractAtomicFun<Base>* >& atomicsOrig = handler_->getAtomicFunctions();
767  std::map<size_t, atomic_base<CGBase>* > atomics;
768  atomics.insert(atomicsOrig.begin(), atomicsOrig.end());
769  evaluator.addAtomicFunctions(atomics);
770 
771  std::vector<AD<CGBase> > x(independents_.size());
772  for (size_t j = 0; j < x.size(); j++) {
773  if (independents_[j].isValueDefined())
774  x[j] = independents_[j].getValue();
775  }
776 
777  CppAD::Independent(x);
778  std::vector<AD<CGBase> > y = evaluator.evaluate(x, nonLoopDeps);
779 
780  std::unique_ptr<ADFun<CGBase> > tapeNoLoops(new ADFun<CGBase>());
781  tapeNoLoops->Dependent(y);
782 
783  return new LoopFreeModel<Base>(tapeNoLoops.release(), depTape2Orig);
784  }
785 
786  std::vector<EquationPattern<Base>*> findRelatedVariables() {
787  eqCurr_ = nullptr;
788  CodeHandlerVector<Base, size_t> varColor(*handler_);
789  color_ = 1; // used to mark visited nodes
790 
791  varColor.adjustSize();
792  varColor.fill(0);
793 
794  size_t rSize = relatedDepCandidates_.size();
795  for (size_t r = 0; r < rSize; r++) {
796  const std::set<size_t>& candidates = relatedDepCandidates_[r];
797  std::set<size_t> used;
798 
799  eqCurr_ = nullptr;
800 
801  std::set<size_t>::const_iterator itRef;
802  for (itRef = candidates.begin(); itRef != candidates.end(); ++itRef) {
803  size_t iDepRef = *itRef;
804 
805  // check if it has already been used
806  if (used.find(iDepRef) != used.end()) {
807  continue;
808  }
809 
810  if (eqCurr_ == nullptr || used.size() > 0) {
811  eqCurr_ = new EquationPattern<Base>(dependents_[iDepRef], iDepRef);
812  equations_.push_back(eqCurr_);
813  }
814 
815  std::set<size_t>::const_iterator it = itRef;
816  for (++it; it != candidates.end(); ++it) {
817  size_t iDep = *it;
818  // check if it has already been used
819  if (used.find(iDep) != used.end()) {
820  continue;
821  }
822 
823  if (eqCurr_->testAdd(iDep, dependents_[iDep], color_, varColor)) {
824  used.insert(iDep);
825  }
826  }
827 
828  if (eqCurr_->dependents.size() == 1) {
829  // nothing found :(
830  delete eqCurr_;
831  eqCurr_ = nullptr;
832  equations_.pop_back();
833  }
834  }
835  }
836 
841  for (size_t eq = 0; eq < equations_.size(); eq++) {
842  equations_[eq]->detectNonIndexedIndependents();
843  }
844 
845  return equations_;
846  }
847 
855  inline bool findSharedTemporaries(const CG<Base>& value,
856  size_t depIndex) {
857  OperationNode<Base>* depNode = value.getOperationNode();
858  size_t opCount = 0;
859  if (findSharedTemporaries(depNode, depIndex, opCount)) {
860  varIndexed_[*depNode] = true;
861  return true;
862  }
863  return false;
864  }
865 
875  inline bool findSharedTemporaries(OperationNode<Base>* node,
876  size_t depIndex,
877  size_t& opCount) {
878  if (node == nullptr)
879  return false; // nothing to do
880 
881  if (handler_->isVisited(*node)) {
882  opCount++; // this operation
883  return varIndexed_[*node];
884  }
885 
886  handler_->markVisited(*node);
887 
888  bool indexedOperation = false;
889 
890  size_t localOpCount = 1;
891  const std::vector<Argument<Base> >& args = node->getArguments();
892  size_t arg_size = args.size();
893  for (size_t a = 0; a < arg_size; a++) {
894  OperationNode<Base>*argOp = args[a].getOperation();
895  if (argOp != nullptr) {
896  if (argOp->getOperationType() != CGOpCode::Inv) {
897  indexedOperation |= findSharedTemporaries(argOp, depIndex, localOpCount);
898  } else {
899  indexedOperation |= !eqCurr_->containsConstantIndependent(node, a);
900  }
901  }
902  }
903 
904  opCount += localOpCount;
905 
906  varIndexed_[*node] = indexedOperation; // mark this operation as being indexed or not-indexed
907 
908  size_t id = varId_[*node];
909  std::set<size_t>& deps = id2Deps[id];
910 
911  if (deps.size() > 1 && node->getOperationType() != CGOpCode::Inv) {
915  for (size_t otherDep : deps) {
916 
917  EquationPattern<Base>* otherEquation = dep2Equation_.at(otherDep);
918  if (otherEquation != eqCurr_) {
922  UniqueEquationPair<Base> eqPair(eqCurr_, otherEquation);
923  Dep1Dep2SharedType& relation = equationShared_[eqPair];
924 
925  std::map<OperationNode<Base>*, Indexed2OpCountType>* reldepdep;
926  if (eqPair.eq1 == eqCurr_)
927  reldepdep = &relation[depIndex][otherDep];
928  else
929  reldepdep = &relation[otherDep][depIndex];
930 
931  INDEXED_OPERATION_TYPE expected = indexedOperation ? INDEXED_OPERATION_TYPE::INDEXED : INDEXED_OPERATION_TYPE::NONINDEXED;
932  typename std::map<OperationNode<Base>*, Indexed2OpCountType>::iterator itIndexedType = reldepdep->find(node);
933  if (itIndexedType == reldepdep->end()) {
934  (*reldepdep)[node] = Indexed2OpCountType(expected, localOpCount);
935  } else if (itIndexedType->second.first != expected) {
936  itIndexedType->second.first = INDEXED_OPERATION_TYPE::BOTH;
937  }
938 
939  break;
940  }
941  }
942  }
943 
944  return indexedOperation;
945  }
946 
954  inline void markOperationsWithDependent(const OperationNode<Base>* node,
955  size_t dep) {
956  if (node == nullptr || node->getOperationType() == CGOpCode::Inv)
957  return; // nothing to do
958 
959  size_t id = varId_[*node];
960 
961  std::set<size_t>& deps = id2Deps[id];
962 
963  if (deps.size() == 0) {
964  deps.insert(dep); // here for the first time
965  } else {
966  std::pair < std::set<size_t>::iterator, bool> added = deps.insert(dep);
967  if (!added.second) {
968  return; // already been here
969  }
970  }
971 
972  const std::vector<Argument<Base> >& args = node->getArguments();
973  size_t arg_size = args.size();
974  for (size_t i = 0; i < arg_size; i++) {
975  markOperationsWithDependent(args[i].getOperation(), dep);
976  }
977  }
978 
979  void assignIds() {
980  idCounter_ = 1;
981 
982  size_t rSize = relatedDepCandidates_.size();
983  for (size_t r = 0; r < rSize; r++) {
984  const std::set<size_t>& candidates = relatedDepCandidates_[r];
985 
986  for (size_t it : candidates) {
987  assignIds(dependents_[it].getOperationNode());
988  }
989  }
990  }
991 
992  void assignIds(OperationNode<Base>* node) {
993  if (node == nullptr || varId_[*node] > 0)
994  return;
995 
996  varId_[*node] = idCounter_;
997  origShareNodeId_.adjustSize(*node);
998  origShareNodeId_[*node] = idCounter_;
999  idCounter_++;
1000 
1001  const std::vector<Argument<Base> >& args = node->getArguments();
1002  size_t arg_size = args.size();
1003  for (size_t i = 0; i < arg_size; i++) {
1004  assignIds(args[i].getOperation());
1005  }
1006  }
1007 
1008  void resetHandlerCounters() {
1009  size_t rSize = relatedDepCandidates_.size();
1010  for (size_t r = 0; r < rSize; r++) {
1011  const std::set<size_t>& candidates = relatedDepCandidates_[r];
1012 
1013  for (size_t it : candidates) {
1014  resetHandlerCounters(dependents_[it].getOperationNode());
1015  }
1016  }
1017  }
1018 
1019  void resetHandlerCounters(OperationNode<Base>* node) {
1020  if (node == nullptr || varId_[*node] == 0 || origShareNodeId_[*node] == 0)
1021  return;
1022 
1023  varId_[*node] = 0;
1024  origShareNodeId_[*node] = 0;
1025 
1026  const std::vector<Argument<Base> >& args = node->getArguments();
1027  size_t arg_size = args.size();
1028  for (size_t i = 0; i < arg_size; i++) {
1029  resetHandlerCounters(args[i].getOperation());
1030  }
1031  }
1032 
1033  static bool find(Loop<Base>* loop1, Loop<Base>* loop2,
1034  const std::map<EquationPattern<Base>*, std::set<EquationPattern<Base>*> >& blackList) {
1035  for (EquationPattern<Base>* iteq1 : loop1->equations) {
1036 
1037  const auto itBlack = blackList.find(iteq1);
1038  if (itBlack != blackList.end()) {
1039 
1040  for (EquationPattern<Base>* iteq2 : loop2->equations) {
1041  if (itBlack->second.find(iteq2) != itBlack->second.end()) {
1042  return true; // found
1043  }
1044  }
1045  }
1046  }
1047 
1048  return false;
1049  }
1050 
1051  template<class T>
1052  static inline bool contains(const std::map<T, std::set<T> >& map, T eq1, T eq2) {
1053  typename std::map<T, std::set<T> >::const_iterator itb1;
1054  itb1 = map.find(eq1);
1055  if (itb1 != map.end()) {
1056  if (itb1->second.find(eq2) != itb1->second.end()) {
1057  return true;
1058  }
1059  }
1060  return false;
1061  }
1062 
1063  bool canCombineEquations(const EquationPattern<Base>& eq1,
1064  size_t dep1,
1065  const EquationPattern<Base>& eq2,
1066  size_t dep2,
1067  OperationNode<Base>& sharedTemp,
1068  std::vector<std::set<size_t>* >& dep2Relations,
1069  std::map<size_t, std::set<size_t> >& dependentBlackListRelations,
1070  SmartSetPointer<std::set<size_t> >& dependentRelations) {
1071  using namespace std;
1072 
1073  // must have indexed independents at the same locations in all equations
1074  const set<const OperationNode<Base>*> opWithIndepArgs = eq1.findOperationsUsingIndependents(sharedTemp);
1075 
1076  // must have indexed independents at the same locations in both equations
1077  for (const OperationNode<Base>* op : opWithIndepArgs) {
1078  // get indexed independent variable information
1079  // - equation 1
1080  typename map<const OperationNode<Base>*, OperationIndexedIndependents<Base> >::const_iterator indexed1It;
1081  OperationNode<Base>* op1 = eq1.operationEO2Reference.at(dep1).at(op); // convert to the reference of equation 1
1082  indexed1It = eq1.indexedOpIndep.op2Arguments.find(op1);
1083 
1084  // - equation 2
1085  typename map<const OperationNode<Base>*, OperationIndexedIndependents<Base> >::const_iterator indexed2It;
1086  OperationNode<Base>* op2 = eq2.operationEO2Reference.at(dep2).at(op); // convert to the reference of equation 2
1087  indexed2It = eq2.indexedOpIndep.op2Arguments.find(op2);
1088 
1092  if (indexed1It == eq1.indexedOpIndep.op2Arguments.end()) {
1093  if (indexed2It != eq2.indexedOpIndep.op2Arguments.end()) {
1094  return false; // indexed in one equation but non-indexed in the other
1095  }
1096  } else {
1097  if (indexed2It == eq2.indexedOpIndep.op2Arguments.end()) {
1098  return false; // indexed in one equation but non-indexed in the other
1099  }
1100 
1104  const OperationIndexedIndependents<Base>& indexed1Ops = indexed1It->second;
1105  const OperationIndexedIndependents<Base>& indexed2Ops = indexed2It->second;
1106 
1107  size_t a1Size = indexed1Ops.arg2Independents.size();
1108  if (a1Size != indexed2Ops.arg2Independents.size()) { // there must be the same number of arguments
1109  return false;
1110  }
1111 
1112  for (size_t a = 0; a < a1Size; a++) {
1113  const map<size_t, const OperationNode<Base>*>& eq1Dep2Indep = indexed1Ops.arg2Independents[a];
1114  const map<size_t, const OperationNode<Base>*>& eq2Dep2Indep = indexed2Ops.arg2Independents[a];
1115 
1116  if (eq1Dep2Indep.empty() != eq2Dep2Indep.empty())
1117  return false; // one is indexed and the other is non-indexed
1118 
1119  // it has to be possible to match dependents from the two equation patterns
1120 
1121  if (eq1Dep2Indep.empty()) {
1122  continue; // not indexed
1123  }
1124 
1125  // indexed independent variable
1126 
1127  // invert eq1Dep2Indep into eq1Indep2Dep
1128  using MapIndep2Dep = map<const OperationNode<Base>*, size_t, IndependentNodeSorter<Base> >;
1129  MapIndep2Dep eq1Indep2Dep;
1130  typename MapIndep2Dep::iterator hint = eq1Indep2Dep.begin();
1131  for (const auto& d2i : eq1Dep2Indep) {
1132  hint = eq1Indep2Dep.insert(hint, std::make_pair(d2i.second, d2i.first));
1133  hint++; // assume that the relation dep<->indep is always ascending
1134  }
1135 
1136  typename map<const OperationNode<Base>*, size_t>::const_iterator itHint = eq1Indep2Dep.begin();
1137 
1138  // check all iterations/dependents
1139  for (const auto& d2i : eq2Dep2Indep) {
1140  size_t dep2 = d2i.first;
1141  const OperationNode<Base>* indep = d2i.second;
1142  typename map<const OperationNode<Base>*, size_t>::const_iterator it;
1143  if (itHint->first == indep) {
1149  it = itHint;
1150  itHint++;
1151  } else {
1152  it = eq1Indep2Dep.find(indep);
1153  }
1154 
1155  if (it != eq1Indep2Dep.end()) {
1156  size_t dep1 = it->second;
1157 
1158  // check if this relation was previous excluded
1159  std::map<size_t, set<size_t> >::const_iterator itBlackL = dependentBlackListRelations.find(dep1);
1160  if (itBlackL != dependentBlackListRelations.end() && itBlackL->second.find(dep2) != itBlackL->second.end()) {
1161  return false; // these dependents cannot be in the same iteration
1162  }
1163 
1164  bool related = makeDependentRelation(eq1, dep1, eq2, dep2,
1165  dep2Relations, dependentRelations);
1166  if (!related)
1167  return false;
1168 
1169  } else {
1170  // equation pattern 1 does not have any iteration with indep from dep2
1171 
1172  // there is no need to have the same number of iterations in both equations!
1173  // but remember that these dependents cannot be in the same iteration from now on
1174  dependentBlackListRelations[dep2].insert(eq1.dependents.begin(), eq1.dependents.end());
1175  }
1176  }
1177 
1178  }
1179 
1180  }
1181 
1182  }
1183 
1184  return true;
1185  }
1186 
1187  bool isNonIndexed(const EquationPattern<Base>& eq2,
1188  size_t dep2,
1189  OperationNode<Base>& sharedTemp) {
1190  using namespace std;
1191 
1192 
1193  // must have indexed independents at the same locations in all equations
1194  const set<const OperationNode<Base>*> opWithIndepArgs = EquationPattern<Base>::findOperationsUsingIndependents(sharedTemp);
1195 
1196  for (const OperationNode<Base>* op : opWithIndepArgs) {
1197  // get indexed independent variable information
1198  // - equation 2
1199  OperationNode<Base>* op2 = eq2.operationEO2Reference.at(dep2).at(op); // convert to the reference of equation 2
1200 
1201  const auto indexed2It = eq2.indexedOpIndep.op2Arguments.find(op2);
1202  if (indexed2It != eq2.indexedOpIndep.op2Arguments.end()) {
1203  return false; // indexed in one equation but non-indexed in the other
1204  }
1205  }
1206 
1207  return true;
1208  }
1209 
1210  bool makeDependentRelation(const EquationPattern<Base>& eq1,
1211  size_t dep1,
1212  const EquationPattern<Base>& eq2,
1213  size_t dep2,
1214  std::vector<std::set<size_t>* >& dep2Relations,
1215  SmartSetPointer<std::set<size_t> >& dependentRelations) {
1216  using namespace std;
1217 
1218  set<size_t>* related1 = dep2Relations[dep1];
1219  set<size_t>* related2 = dep2Relations[dep2];
1220 
1221  // check if relations were established with a different dependent from the same equation pattern
1222  if (related1 != nullptr) {
1223  // dependent 1 already in a relation set
1224 
1225  if (related2 != nullptr) {
1226  // both dependents belong to previously existing relations sets
1227 
1228  if (related1 == related2)
1229  return true; // already done
1230 
1231  // relations must be merged (if possible)!
1232  // merge related2 into related1
1233  bool canMerge = true;
1234 
1235  for (size_t dep3 : *related2) {
1236 
1237  const EquationPattern<Base>& eq3 = *dep2Equation_.at(dep3);
1238  // make sure no other dependent from the same equation pattern was already in this relation set
1239  for (size_t it : eq3.dependents) {
1240  if (it != dep3 && related1->find(it) != related1->end()) {
1241  canMerge = false; // relation with a dependent from a different iteration!
1242  break;
1243  //return false;
1244  }
1245  }
1246 
1247  if (!canMerge)
1248  break;
1249  }
1250 
1251  if (canMerge) {
1252  for (size_t dep3 : *related2) {
1253  related1->insert(dep3);
1254  dep2Relations[dep3] = related1;
1255  }
1256 
1257  dependentRelations.erase(related2);
1258  delete related2;
1259  }
1268  } else {
1269  if (related1->find(dep2) == related1->end()) {
1270  // make sure no other dependent from the same equation pattern was already in this relation set
1271  bool canMerge = true;
1272  for (size_t it : eq2.dependents) {
1273  if (it != dep2 && related1->find(it) != related1->end()) {
1274  canMerge = false; // relation with a dependent from a different iteration!
1275  break;
1276  }
1277  }
1278 
1279  if (canMerge) {
1280  related1->insert(dep2);
1281  dep2Relations[dep2] = related1;
1282  }
1290  }
1291  }
1292 
1293  } else if (related2 != nullptr) {
1294  // dependent 2 already in a relation set
1295 
1296  // make sure no other dependent from the same equation pattern was already in this relation set
1297  bool canMerge = true;
1298  for (size_t it : eq1.dependents) {
1299  if (it != dep1 && related2->find(it) != related2->end()) {
1300  canMerge = false; // relation with a dependent from a different iteration!
1301  break;
1302  //return false;
1303  }
1304  }
1305 
1306  if (canMerge) {
1307  related2->insert(dep1);
1308  dep2Relations[dep1] = related2;
1309  }
1319  } else {
1320  // dependent 1 and dependent 2 not in any relation set
1321  set<size_t>* related = new std::set<size_t>();
1322  dependentRelations.insert(related);
1323  related->insert(dep1);
1324  related->insert(dep2);
1325  dep2Relations[dep1] = related;
1326  dep2Relations[dep2] = related;
1327  }
1328 
1329  return true;
1330  }
1331 
1332 };
1333 
1334 } // END cg namespace
1335 } // END CppAD namespace
1336 
1337 #endif
virtual void generateTapes(LoopFreeModel< Base > *&nonLoopTape, std::set< LoopModel< Base > *> &loopTapes)
std::vector< ActiveOut > evaluate(ArrayView< const ActiveOut > indepNew, ArrayView< const CG< ScalarIn > > depOld)
Definition: evaluator.hpp:92
std::set< EquationPattern< Base > * > equations
Definition: loop.hpp:59
std::map< size_t, std::map< const OperationNode< Base > *, OperationNode< Base > * > > operationEO2Reference
void merge(Loop< Base > &other, const std::set< EquationPattern< Base > *> &indexedLoopRelations, const std::vector< std::pair< EquationPattern< Base > *, EquationPattern< Base > *> > &nonIndexedLoopRelations)
Definition: loop.hpp:232
DependentPatternMatcher(const std::vector< std::set< size_t > > &relatedDepCandidates, const std::vector< CGBase > &dependents, const std::vector< CGBase > &independents)
void mergeEqGroups(Loop< Base > &other)
Definition: loop.hpp:262
const std::vector< Argument< Base > > & getArguments() const
size_t getIterationCount() const
Definition: loop.hpp:209
STL namespace.
std::vector< MapDep2Indep_type > arg2Independents
IndexedIndependent< Base > indexedOpIndep
CGOpCode getOperationType() const
const std::map< size_t, CGAbstractAtomicFun< Base > *> & getAtomicFunctions() const
void createLoopModel(const std::vector< CG< Base > > &dependents, const std::vector< CG< Base > > &independents, const std::map< size_t, EquationPattern< Base > *> &dep2Equation, std::map< OperationNode< Base > *, size_t > &origTemp2Index)
Definition: loop.hpp:283
const std::vector< std::vector< LoopPosition > > & getDependentIndexes() const
Definition: loop_model.hpp:291