CppADCodeGen 2.4.3
A C++ Algorithmic Differentiation Package with Source Code Generation
Loading...
Searching...
No Matches
sparse_forjac_hessian.hpp
1#ifndef CPPAD_CG_SPARSE_FORJAC_HESSIAN_INCLUDED
2#define CPPAD_CG_SPARSE_FORJAC_HESSIAN_INCLUDED
3/* --------------------------------------------------------------------------
4 * CppADCodeGen: C++ Algorithmic Differentiation with Source Code Generation:
5 * Copyright (C) 2013 Ciengis
6 * Copyright (C) 2019 Joao Leal
7 *
8 * CppADCodeGen is distributed under multiple licenses:
9 *
10 * - Eclipse Public License Version 1.0 (EPL1), and
11 * - GNU General Public License Version 3 (GPL3).
12 *
13 * EPL1 terms and conditions can be found in the file "epl-v10.txt", while
14 * terms and conditions for the GPL3 can be found in the file "gpl3.txt".
15 * ----------------------------------------------------------------------------
16 * Author: Joao Leal
17 */
21namespace CppAD {
22namespace cg {
23
29public:
31 std::vector<size_t> user_row;
33 std::vector<size_t> user_col;
36 std::vector<size_t> sort_row;
39 std::vector<size_t> sort_col;
41 size_t K;
42
43 template<class Base, class VectorSize>
44 inline void prepare(const ADFun<Base>& fun,
45 const VectorSize& row,
46 const VectorSize& col) {
50 K = row.size();
51 size_t n = fun.Domain();
52 size_t m = fun.Range();
53
54 if (user_row.empty()) {
55 // create version of (row, col, k) sorted by column value
56 user_col.resize(K + 1);
57 user_row.resize(K + 1);
58 sort_col.resize(K + 1);
59
60 // put sorted indices in user_row and user_col
61 for (size_t k = 0; k < K; k++) {
62 user_row[k] = row[k];
63 user_col[k] = col[k];
64 }
65 user_row[K] = m;
66 user_col[K] = n;
67
68 // put sorting indices in sort_col
69 index_sort(user_col, sort_col);
70 }
71
72#ifndef NDEBUG
73 CPPAD_ASSERT_KNOWN(size_t(row.size()) == K && size_t(col.size()) == K,
74 "sparseForJacHessian: either r or c does not have "
75 "the same size as jac.")
76 CPPAD_ASSERT_KNOWN(user_row.size() == K + 1 &&
77 user_col.size() == K + 1 &&
78 sort_col.size() == K + 1,
79 "sparseForJacHessian: invalid value in work.")
80 for (size_t k = 0; k < K; k++) {
81 CPPAD_ASSERT_KNOWN(row[k] < m,
82 "sparseForJacHessian: invalid value in r.")
83 CPPAD_ASSERT_KNOWN(col[k] < n,
84 "sparseForJacHessian: invalid value in c.")
85 CPPAD_ASSERT_KNOWN(sort_col[k] < K,
86 "sparseForJacHessian: invalid value in work.")
87 CPPAD_ASSERT_KNOWN(user_row[k] == row[k],
88 "sparseForJacHessian: invalid value in work.")
89 CPPAD_ASSERT_KNOWN(user_col[k] == col[k],
90 "sparseForJacHessian: invalid value in work.")
91 }
92#endif
93 }
95
96 inline void clear() {
97 user_row.clear();
98 user_col.clear();
99 sort_row.clear();
100 sort_col.clear();
101 }
102};
103
109public:
111 std::vector<size_t> r_sort;
113 std::vector<size_t> c_sort;
115 std::vector<size_t> k_sort;
117 size_t K;
118
119 template<class Base, class VectorSize>
120 inline void prepare(const ADFun<Base>& fun,
121 const VectorSize& row,
122 const VectorSize& col) {
126 size_t n = fun.Domain();
127 K = row.size();
128
129 if (r_sort.empty()) {
130 // create version of (row, col, k) sorted by row value
131 c_sort.resize(K);
132 r_sort.resize(K + 1);
133 k_sort.resize(K);
134
135 // put sorting indices in k_sort
136 index_sort(row, k_sort);
137
138 for (size_t k = 0; k < K; k++) {
139 r_sort[k] = row[ k_sort[k] ];
140 c_sort[k] = col[ k_sort[k] ];
141 }
142 r_sort[K] = n;
143 }
144#ifndef NDEBUG
145 CPPAD_ASSERT_KNOWN(size_t(row.size()) == K && size_t(col.size()) == K,
146 "sparseForJacHessian: either r or c does not have the same size as ehs.")
147 CPPAD_ASSERT_KNOWN(r_sort.size() == K + 1 &&
148 c_sort.size() == K &&
149 k_sort.size() == K,
150 "sparseForJacHessian: invalid value in work.")
151 for (size_t k = 0; k < K; k++) {
152 CPPAD_ASSERT_KNOWN(row[k] < n,
153 "sparseForJacHessian: invalid value in r.")
154 CPPAD_ASSERT_KNOWN(col[k] < n,
155 "sparseForJacHessian: invalid value in c.")
156 CPPAD_ASSERT_KNOWN(k_sort[k] < K,
157 "sparseForJacHessian: invalid value in work.")
158 CPPAD_ASSERT_KNOWN(r_sort[k] == row[ k_sort[k] ],
159 "sparseForJacHessian: invalid value in work.")
160 CPPAD_ASSERT_KNOWN(c_sort[k] == col[ k_sort[k] ],
161 "sparseForJacHessian: invalid value in work.")
162 }
163#endif
164 }
166
167 inline void clear() {
168 r_sort.clear();
169 c_sort.clear();
170 k_sort.clear();
171 }
172};
173
179public:
183 std::vector<size_t> color;
184
185 template<class Base, class VectorSize>
186 inline void prepare(const ADFun<Base>& fun,
187 const VectorSize& jacRow,
188 const VectorSize& jacCol,
189 const VectorSize& hesRow,
190 const VectorSize& hesCol) {
191 size_t n = fun.Domain();
192
193 CPPAD_ASSERT_KNOWN(color.empty() || color.size() == n,
194 "sparseForJacHessian: invalid value in work.")
195 if (!color.empty()) {
196 for (size_t j = 0; j < n; j++) {
197 CPPAD_ASSERT_KNOWN(color[j] < n,
198 "sparseForJacHessian: invalid value in work.")
199 }
200 }
201
202 jac.prepare(fun, jacRow, jacCol);
203 hes.prepare(fun, hesRow, hesCol);
204 }
206
207 inline void clear() {
208 jac.clear();
209 hes.clear();
210 color.clear();
211 }
212};
213
214template<class SparsityPattern>
215inline void computeNotUsed(SparsityPattern& not_used,
216 const SparsityPattern& sparsity,
217 const SparsityPattern& r_used,
218 size_t m,
219 size_t n) {
220 using SparIter = typename SparsityPattern::const_iterator;
221
222 assert(not_used.n_set() == 0);
223 not_used.resize(m, n);
224
225 for (size_t i = 0; i < n; i++) {
226 SparIter j_itr(sparsity, i);
227 size_t j = *j_itr;
228 while (j != sparsity.end()) {
229 if (!r_used.is_element(j, i))
230 not_used.add_element(j, i);
231 j = *(++j_itr);
232 }
233 }
234}
235
236template<class Base, class VectorSet>
237inline size_t colorForwardJacobianHessian(const ADFun<Base>& fun,
238 const VectorSet& jac_p,
239 const VectorSet& hes_p,
240 SparseForjacHessianWork& work) {
245 size_t i, j1, j11, j2, c, k;
246
247 using Set_type = typename VectorSet::value_type;
248 using SparsityPattern = typename local::sparse::internal_pattern<Set_type>::pattern_type;
249 using SparIter = typename SparsityPattern::const_iterator;
250
251 size_t n = fun.Domain();
252 size_t m = fun.Range();
253
254 std::vector<size_t>& color = work.color;
255
256 if (color.empty()) {
257
258 color.resize(n);
259
260 CPPAD_ASSERT_KNOWN(jac_p.size() == m,
261 "sparseForJacHessian: invalid jacobian sparsity pattern dimension.")
262 CPPAD_ASSERT_KNOWN(hes_p.size() == n,
263 "sparseForJacHessian: invalid hessian sparsity pattern dimension.")
264
268 // transpose sparsity pattern
269 SparsityPattern p_transpose;
270 bool transpose = true;
271 sparsity_user2internal(p_transpose, jac_p, n, m, transpose, "Invalid sparsity pattern");
272
273
274 size_t jac_K = work.jac.K;
275 std::vector<size_t>& jac_row = work.jac.user_row;
276 std::vector<size_t>& jac_col = work.jac.user_col;
277 std::vector<size_t>& sort_col = work.jac.sort_col;
278
279 CPPAD_ASSERT_UNKNOWN(p_transpose.n_set() == n)
280 CPPAD_ASSERT_UNKNOWN(p_transpose.end() == m)
281
282 // rows and columns that are in the returned jacobian
283 SparsityPattern jac_r_used, jac_c_used;
284 jac_r_used.resize(n, m);
285 jac_c_used.resize(m, n);
286
287 for (k = 0; k < jac_K; k++) {
288 CPPAD_ASSERT_UNKNOWN(jac_row[sort_col[k]] < m && jac_col[sort_col[k]] < n)
289 CPPAD_ASSERT_UNKNOWN(k == 0 || jac_col[sort_col[k - 1]] <= jac_col[sort_col[k]])
290 CPPAD_ASSERT_KNOWN(p_transpose.is_element(jac_col[sort_col[k]], jac_row[sort_col[k]]),
291 "sparseForJacHessian: "
292 "a (row, col) pair is not in sparsity pattern.")
293 jac_r_used.add_element(jac_col[sort_col[k]], jac_row[sort_col[k]]);
294 jac_c_used.add_element(jac_row[sort_col[k]], jac_col[sort_col[k]]);
295 }
296
297 // given a row index, which columns are non-zero and not used
298 SparsityPattern jac_not_used;
299 computeNotUsed(jac_not_used, p_transpose, jac_c_used, m, n);
300
304 SparsityPattern hes_sparsity;
305 transpose = false;
306 sparsity_user2internal(hes_sparsity, hes_p, n, n, transpose, "Invalid sparsity pattern");
307
308 size_t hes_K = work.hes.K;
309 std::vector<size_t>& hes_row(work.hes.r_sort);
310 std::vector<size_t>& hes_col(work.hes.c_sort);
311
312 CPPAD_ASSERT_UNKNOWN(hes_sparsity.n_set() == n)
313 CPPAD_ASSERT_UNKNOWN(hes_sparsity.end() == n)
314
315 // rows and columns that are in the returned hessian
316 SparsityPattern hes_r_used, hes_c_used;
317 hes_r_used.resize(n, n);
318 hes_c_used.resize(n, n);
319
320 for (k = 0; k < hes_K; k++) {
321 CPPAD_ASSERT_UNKNOWN(hes_row[k] < n && hes_col[k] < n)
322 CPPAD_ASSERT_UNKNOWN(k == 0 || hes_row[k - 1] <= hes_row[k])
323 CPPAD_ASSERT_KNOWN(hes_sparsity.is_element(hes_row[k], hes_col[k]),
324 "sparseForJacHessian: a (row, col) pair is not in sparsity pattern.")
325 hes_r_used.add_element(hes_col[k], hes_row[k]);
326 hes_c_used.add_element(hes_row[k], hes_col[k]);
327 }
328
329 // given a column index, which rows are non-zero and not used
330 SparsityPattern hes_not_used;
331 computeNotUsed(hes_not_used, hes_sparsity, hes_r_used, n, n);
332
333 // initial coloring
334 for (j1 = 0; j1 < n; j1++) {
335 color[j1] = j1;
336 }
337
338 // See GreedyPartialD2Coloring Algorithm Section 3.6.2 of
339 // Graph Coloring in Optimization Revisited by
340 // Assefaw Gebremedhin, Fredrik Maane, Alex Pothen
341 vectorBool forbidden(n);
342 for (j1 = 1; j1 < n; j1++) {
343 // initialize all colors as ok for this column
344 // (value of forbidden for c > j does not matter)
345 for (c = 0; c <= j1; c++)
346 forbidden[c] = false;
347
351 // for each row that is non-zero for this column
352 SparIter p_itr(p_transpose, j1);
353 i = *p_itr;
354 while (i != p_transpose.end()) {
355 // for each column that this row uses
356 SparIter jac_c_used_itr(jac_c_used, i);
357 j11 = *jac_c_used_itr;
358 while (j11 != jac_c_used.end()) {
359 // if this is not the same column, forbid its color
360 if (j11 < j1)
361 forbidden[ color[j11] ] = true;
362 j11 = *(++jac_c_used_itr);
363 }
364 i = *(++p_itr);
365 }
366
367 // for each row that this column uses
368 SparIter jac_r_used_itr(jac_r_used, j1);
369 i = *jac_r_used_itr;
370
371 while (i != jac_r_used.end()) {
372 // For each column that is non-zero for this row
373 // (the used columns have already been checked above).
374 SparIter jac_not_used_itr(jac_not_used, i);
375 j11 = *jac_not_used_itr;
376 while (j11 != jac_not_used.end()) {
377 // if this is not the same column, forbid its color
378 if (j11 < j1)
379 forbidden[ color[j11] ] = true;
380 j11 = *(++jac_not_used_itr);
381 }
382 i = *(++jac_r_used_itr);
383 }
384
388 // -----------------------------------------------------
389 // Forbid colors that this row would destroy results for.
390 // for each column that is non-zero for this row
391 SparIter hes_itr(hes_sparsity, j1);
392 j2 = *hes_itr;
393 while (j2 != hes_sparsity.end()) {
394 // for each row that this column uses
395 SparIter hes_r_used_itr(hes_r_used, j2);
396 j11 = *hes_r_used_itr;
397 while (j11 != hes_r_used.end()) {
398 // if this is not the same row, forbid its color
399 if (j11 < j1)
400 forbidden[ color[j11] ] = true;
401 j11 = *(++hes_r_used_itr);
402 }
403 j2 = *(++hes_itr);
404 }
405
406 // -------------------------------------------------------
407 // Forbid colors that would destroy the results for this row.
408 // for each column that this row used
409 SparIter hes_c_used_itr(hes_c_used, j1);
410 j2 = *hes_c_used_itr;
411 while (j2 != hes_c_used.end()) {
412 // For each row that is non-zero for this column
413 // (the used rows have already been checked above).
414 SparIter hes_not_used_itr(hes_not_used, j2);
415 j11 = *hes_not_used_itr;
416 while (j11 != hes_not_used.end()) {
417 // if this is not the same row, forbid its color
418 if (j11 < j1)
419 forbidden[ color[j11] ] = true;
420 j11 = *(++hes_not_used_itr);
421 }
422 j2 = *(++hes_c_used_itr);
423 }
424
425
426 // pick the color with smallest index
427 c = 0;
428 while (forbidden[c]) {
429 c++;
430 CPPAD_ASSERT_UNKNOWN(c <= j1)
431 }
432 color[j1] = c;
433 }
434 }
435
436
437 size_t n_color = 1;
438 for (j1 = 0; j1 < n; j1++)
439 n_color = std::max<size_t>(n_color, color[j1] + 1);
440
441 return n_color;
442}
443
490template<class Base, class VectorBase, class VectorSet, class VectorSize>
491size_t sparseForJacHessian(ADFun<Base>& fun,
492 const VectorBase& x,
493 const VectorBase& w,
494 VectorBase& y,
495 const VectorSet& jac_p,
496 const VectorSize& jac_row,
497 const VectorSize& jac_col,
498 VectorBase& jac,
499 const VectorSet& hes_p,
500 const VectorSize& hes_row,
501 const VectorSize& hes_col,
502 VectorBase& hes,
503 SparseForjacHessianWork& work) {
504 std::vector<VectorBase> vw(1);
505 std::vector<VectorBase> vhes(1);
506 vw[0] = w;
507 vhes[0] = hes;
508
509 size_t n_sweep = sparseForJacHessian(fun,
510 x, vw,
511 y,
512 jac_p, jac_row, jac_col, jac,
513 hes_p, hes_row, hes_col, vhes,
514 work);
515
516 hes = vhes[0];
517
518 return n_sweep;
519}
520
521template<class Base, class VectorBase, class VectorVectorBase, class VectorSet, class VectorSize>
522size_t sparseForJacHessian(ADFun<Base>& fun,
523 const VectorBase& x,
524 const VectorVectorBase& w,
525 VectorBase& y,
526 const VectorSet& jac_p,
527 const VectorSize& jac_row,
528 const VectorSize& jac_col,
529 VectorBase& jac,
530 const VectorSet& hes_p,
531 const VectorSize& hes_row,
532 const VectorSize& hes_col,
533 VectorVectorBase& hes,
534 SparseForjacHessianWork& work) {
535 using CppAD::vectorBool;
536 size_t j1, k, c;
537
538 size_t n = fun.Domain();
539 size_t m = fun.Range();
540
541 size_t nH = size_t(hes.size());
542 size_t jac_K = size_t(jac_row.size());
543 size_t hes_K = size_t(hes_row.size());
544
545 CPPADCG_ASSERT_KNOWN(size_t(x.size()) == n,
546 "sparseForJacHessian: size of x not equal domain dimension for f.")
547
548 CPPADCG_ASSERT_KNOWN(size_t(w.size()) == nH,
549 "sparseForJacHessian: size of w not equal to the size of hes.")
550
551 const std::vector<size_t>& jac_scol = work.jac.sort_col;
552 const std::vector<size_t>& hes_srow = work.hes.r_sort;
553 const std::vector<size_t>& hes_scol = work.hes.c_sort;
554 const std::vector<size_t>& hes_user_k = work.hes.k_sort;
555 const std::vector<size_t>& color = work.color;
556
557 // some values
558 const Base zero(0);
559 const Base one(1);
560
561 // check VectorBase is Simple Vector class with Base type elements
562 CheckSimpleVector<Base, VectorBase>();
563
564 CPPAD_ASSERT_UNKNOWN(size_t(x.size()) == n)
565
566 work.prepare(fun, jac_row, jac_col, hes_row, hes_col);
567
571 size_t n_color = colorForwardJacobianHessian(fun, jac_p, hes_p, work);
572
573
574 // Point at which we are evaluating the Hessian
575 y = fun.Forward(0, x);
576
577 // direction vector for calls to forward (columns of jacobian and rows of the Hessian)
578 VectorBase u(n);
579
580 // location for return values from forward
581 VectorBase dy(m);
582
583 // location for return values from reverse (columns of the Hessian)
584 VectorBase ddw(2 * n);
585
586 // initialize the return value
587 for (k = 0; k < jac_K; k++)
588 jac[k] = zero;
589 for (size_t h = 0; h < nH; h++) {
590 VectorBase& hesh = hes[h];
591 for (k = 0; k < hes_K; k++)
592 hesh[k] = zero;
593 }
594
595 // loop over colors
596 size_t n_sweep = 0;
597 for (c = 0; c < n_color; c++) {
598
599 bool anyJac = false;
600 size_t kJac = 0;
601 for (j1 = 0; j1 < n; j1++) {
602 if (color[j1] == c) {
603 // find first k such that col[sort_col[k]] has color c
604 while (work.jac.user_col[jac_scol[kJac]] < j1)
605 kJac++;
606 anyJac = work.jac.user_col[jac_scol[kJac]] == j1;
607 if (anyJac)
608 break;
609 }
610 }
611
612 bool anyHes = false;
613 size_t kHessStart = 0;
614 for (j1 = 0; j1 < n; j1++) {
615 if (color[j1] == c) {
616 // find first k such that row[k] has color c
617 while (hes_srow[kHessStart] < j1)
618 kHessStart++;
619 anyHes = hes_srow[kHessStart] == j1;
620 if (anyHes)
621 break;
622 }
623 }
624
625 if (anyJac || anyHes) {
626 n_sweep++;
627 // combine all rows with this color
628 for (j1 = 0; j1 < n; j1++) {
629 u[j1] = zero;
630 if (color[j1] == c)
631 u[j1] = one;
632 }
633 // call forward mode for all these rows at once
634 dy = fun.Forward(1, u);
635
636 if (anyJac) {
637 // set the corresponding components of the result
638 for (j1 = 0; j1 < n; j1++) {
639 if (color[j1] == c) {
640 // find first index in c for this jacobian column
641 while (work.jac.user_col[jac_scol[kJac]] < j1)
642 kJac++;
643 // extract the row results for this column
644 while (work.jac.user_col[jac_scol[kJac]] == j1) {
645 jac[ jac_scol[kJac] ] = dy[ work.jac.user_row[jac_scol[kJac]] ];
646 kJac++;
647 }
648 }
649 }
650 }
651
652 if (anyHes) {
653 n_sweep++;
654
655 for (size_t h = 0; h < nH; h++) {
656 // evaluate derivative of w^T * F'(x) * u
657 ddw = fun.Reverse(2, w[h]);
658
659 VectorBase& hesh = hes[h];
660
661 // set the corresponding components of the result
662 size_t kHess = kHessStart;
663 for (j1 = 0; j1 < n; j1++) {
664 if (color[j1] == c) {
665 // find first index in c for this column
666 while (hes_srow[kHess] < j1)
667 kHess++;
668 // extract the results for this row
669 while (hes_srow[kHess] == j1) {
670 size_t j2 = hes_scol[kHess];
671 hesh[ hes_user_k[kHess] ] = ddw[ j2 * 2 + 1 ];
672 kHess++;
673 }
674 }
675 }
676 }
677 }
678 }
679 }
680 return n_sweep;
681}
682
683}
684}
685
686#endif
size_t K
number elements in the user sparse Hessian
void clear()
inform CppAD that this information needs to be recomputed
std::vector< size_t > r_sort
version of user r array sorted by row or column
std::vector< size_t > k_sort
mapping from sorted array indices to user array indices
void prepare(const ADFun< Base > &fun, const VectorSize &row, const VectorSize &col)
std::vector< size_t > c_sort
version of user c array sorted by row or column
std::vector< size_t > sort_row
indices that sort the user arrays by row with the extra value K at the end
std::vector< size_t > user_row
version of user row array with the extra value m at end
void prepare(const ADFun< Base > &fun, const VectorSize &row, const VectorSize &col)
std::vector< size_t > sort_col
indices that sort the user arrays by column with the extra value K at the end
size_t K
number elements in the user sparse Jacobian
std::vector< size_t > user_col
version of user col array with the extra value n at end
void clear()
inform CppAD that this information needs to be recomputed
std::vector< size_t > color
results of the coloring algorithm
void clear()
inform CppAD that this information needs to be recomputed