Eigen  3.3.0
 
Loading...
Searching...
No Matches
ProductEvaluators.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
5// Copyright (C) 2008-2010 Gael Guennebaud <gael.guennebaud@inria.fr>
6// Copyright (C) 2011 Jitse Niesen <jitse@maths.leeds.ac.uk>
7//
8// This Source Code Form is subject to the terms of the Mozilla
9// Public License v. 2.0. If a copy of the MPL was not distributed
10// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
11
12
13#ifndef EIGEN_PRODUCTEVALUATORS_H
14#define EIGEN_PRODUCTEVALUATORS_H
15
16namespace Eigen {
17
18namespace internal {
19
28template<typename Lhs, typename Rhs, int Options>
29struct evaluator<Product<Lhs, Rhs, Options> >
30 : public product_evaluator<Product<Lhs, Rhs, Options> >
31{
32 typedef Product<Lhs, Rhs, Options> XprType;
33 typedef product_evaluator<XprType> Base;
34
35 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {}
36};
37
38// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B"
39// TODO we should apply that rule only if that's really helpful
40template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
41struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
42 const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
43 const Product<Lhs, Rhs, DefaultProduct> > >
44{
45 static const bool value = true;
46};
47template<typename Lhs, typename Rhs, typename Scalar1, typename Scalar2, typename Plain1>
48struct evaluator<CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
49 const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
50 const Product<Lhs, Rhs, DefaultProduct> > >
51 : public evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> >
52{
53 typedef CwiseBinaryOp<internal::scalar_product_op<Scalar1,Scalar2>,
54 const CwiseNullaryOp<internal::scalar_constant_op<Scalar1>, Plain1>,
55 const Product<Lhs, Rhs, DefaultProduct> > XprType;
56 typedef evaluator<Product<EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(Scalar1,Lhs,product), Rhs, DefaultProduct> > Base;
57
58 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
59 : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs())
60 {}
61};
62
63
64template<typename Lhs, typename Rhs, int DiagIndex>
65struct evaluator<Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> >
66 : public evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> >
67{
68 typedef Diagonal<const Product<Lhs, Rhs, DefaultProduct>, DiagIndex> XprType;
69 typedef evaluator<Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex> > Base;
70
71 EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr)
72 : Base(Diagonal<const Product<Lhs, Rhs, LazyProduct>, DiagIndex>(
73 Product<Lhs, Rhs, LazyProduct>(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()),
74 xpr.index() ))
75 {}
76};
77
78
79// Helper class to perform a matrix product with the destination at hand.
80// Depending on the sizes of the factors, there are different evaluation strategies
81// as controlled by internal::product_type.
82template< typename Lhs, typename Rhs,
83 typename LhsShape = typename evaluator_traits<Lhs>::Shape,
84 typename RhsShape = typename evaluator_traits<Rhs>::Shape,
85 int ProductType = internal::product_type<Lhs,Rhs>::value>
86struct generic_product_impl;
87
88template<typename Lhs, typename Rhs>
89struct evaluator_assume_aliasing<Product<Lhs, Rhs, DefaultProduct> > {
90 static const bool value = true;
91};
92
93// This is the default evaluator implementation for products:
94// It creates a temporary and call generic_product_impl
95template<typename Lhs, typename Rhs, int Options, int ProductTag, typename LhsShape, typename RhsShape>
96struct product_evaluator<Product<Lhs, Rhs, Options>, ProductTag, LhsShape, RhsShape>
97 : public evaluator<typename Product<Lhs, Rhs, Options>::PlainObject>
98{
99 typedef Product<Lhs, Rhs, Options> XprType;
100 typedef typename XprType::PlainObject PlainObject;
101 typedef evaluator<PlainObject> Base;
102 enum {
103 Flags = Base::Flags | EvalBeforeNestingBit
104 };
105
106 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
107 explicit product_evaluator(const XprType& xpr)
108 : m_result(xpr.rows(), xpr.cols())
109 {
110 ::new (static_cast<Base*>(this)) Base(m_result);
111
112// FIXME shall we handle nested_eval here?,
113// if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in permutation_matrix_product, transposition_matrix_product, etc.)
114// typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
115// typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
116// typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
117// typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
118//
119// const LhsNested lhs(xpr.lhs());
120// const RhsNested rhs(xpr.rhs());
121//
122// generic_product_impl<LhsNestedCleaned, RhsNestedCleaned>::evalTo(m_result, lhs, rhs);
123
124 generic_product_impl<Lhs, Rhs, LhsShape, RhsShape, ProductTag>::evalTo(m_result, xpr.lhs(), xpr.rhs());
125 }
126
127protected:
128 PlainObject m_result;
129};
130
131// The following three shortcuts are enabled only if the scalar types match excatly.
132// TODO: we could enable them for different scalar types when the product is not vectorized.
133
134// Dense = Product
135template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
136struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::assign_op<Scalar,Scalar>, Dense2Dense,
137 typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
138{
139 typedef Product<Lhs,Rhs,Options> SrcXprType;
140 static EIGEN_STRONG_INLINE
141 void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op<Scalar,Scalar> &)
142 {
143 Index dstRows = src.rows();
144 Index dstCols = src.cols();
145 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
146 dst.resize(dstRows, dstCols);
147 // FIXME shall we handle nested_eval here?
148 generic_product_impl<Lhs, Rhs>::evalTo(dst, src.lhs(), src.rhs());
149 }
150};
151
152// Dense += Product
153template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
154struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::add_assign_op<Scalar,Scalar>, Dense2Dense,
155 typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
156{
157 typedef Product<Lhs,Rhs,Options> SrcXprType;
158 static EIGEN_STRONG_INLINE
159 void run(DstXprType &dst, const SrcXprType &src, const internal::add_assign_op<Scalar,Scalar> &)
160 {
161 Index dstRows = src.rows();
162 Index dstCols = src.cols();
163 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
164 dst.resize(dstRows, dstCols);
165 // FIXME shall we handle nested_eval here?
166 generic_product_impl<Lhs, Rhs>::addTo(dst, src.lhs(), src.rhs());
167 }
168};
169
170// Dense -= Product
171template< typename DstXprType, typename Lhs, typename Rhs, int Options, typename Scalar>
172struct Assignment<DstXprType, Product<Lhs,Rhs,Options>, internal::sub_assign_op<Scalar,Scalar>, Dense2Dense,
173 typename enable_if<(Options==DefaultProduct || Options==AliasFreeProduct)>::type>
174{
175 typedef Product<Lhs,Rhs,Options> SrcXprType;
176 static EIGEN_STRONG_INLINE
177 void run(DstXprType &dst, const SrcXprType &src, const internal::sub_assign_op<Scalar,Scalar> &)
178 {
179 Index dstRows = src.rows();
180 Index dstCols = src.cols();
181 if((dst.rows()!=dstRows) || (dst.cols()!=dstCols))
182 dst.resize(dstRows, dstCols);
183 // FIXME shall we handle nested_eval here?
184 generic_product_impl<Lhs, Rhs>::subTo(dst, src.lhs(), src.rhs());
185 }
186};
187
188
189// Dense ?= scalar * Product
190// TODO we should apply that rule if that's really helpful
191// for instance, this is not good for inner products
192template< typename DstXprType, typename Lhs, typename Rhs, typename AssignFunc, typename Scalar, typename ScalarBis, typename Plain>
193struct Assignment<DstXprType, CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>, const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
194 const Product<Lhs,Rhs,DefaultProduct> >, AssignFunc, Dense2Dense>
195{
196 typedef CwiseBinaryOp<internal::scalar_product_op<ScalarBis,Scalar>,
197 const CwiseNullaryOp<internal::scalar_constant_op<ScalarBis>,Plain>,
198 const Product<Lhs,Rhs,DefaultProduct> > SrcXprType;
199 static EIGEN_STRONG_INLINE
200 void run(DstXprType &dst, const SrcXprType &src, const AssignFunc& func)
201 {
202 call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs())*src.rhs().rhs(), func);
203 }
204};
205
206//----------------------------------------
207// Catch "Dense ?= xpr + Product<>" expression to save one temporary
208// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct
209
210template<typename OtherXpr, typename Lhs, typename Rhs>
211struct evaluator_assume_aliasing<CwiseBinaryOp<internal::scalar_sum_op<typename OtherXpr::Scalar,typename Product<Lhs,Rhs,DefaultProduct>::Scalar>, const OtherXpr,
212 const Product<Lhs,Rhs,DefaultProduct> >, DenseShape > {
213 static const bool value = true;
214};
215
216template<typename DstXprType, typename OtherXpr, typename ProductType, typename Func1, typename Func2>
217struct assignment_from_xpr_op_product
218{
219 template<typename SrcXprType, typename InitialFunc>
220 static EIGEN_STRONG_INLINE
221 void run(DstXprType &dst, const SrcXprType &src, const InitialFunc& /*func*/)
222 {
223 call_assignment_no_alias(dst, src.lhs(), Func1());
224 call_assignment_no_alias(dst, src.rhs(), Func2());
225 }
226};
227
228#define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP,BINOP,ASSIGN_OP2) \
229 template< typename DstXprType, typename OtherXpr, typename Lhs, typename Rhs, typename DstScalar, typename SrcScalar, typename OtherScalar,typename ProdScalar> \
230 struct Assignment<DstXprType, CwiseBinaryOp<internal::BINOP<OtherScalar,ProdScalar>, const OtherXpr, \
231 const Product<Lhs,Rhs,DefaultProduct> >, internal::ASSIGN_OP<DstScalar,SrcScalar>, Dense2Dense> \
232 : assignment_from_xpr_op_product<DstXprType, OtherXpr, Product<Lhs,Rhs,DefaultProduct>, internal::ASSIGN_OP<DstScalar,OtherScalar>, internal::ASSIGN_OP2<DstScalar,ProdScalar> > \
233 {}
234
235EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op,add_assign_op);
236EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_sum_op,add_assign_op);
237EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_sum_op,sub_assign_op);
238
239EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op,sub_assign_op);
240EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op,scalar_difference_op,sub_assign_op);
241EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op,scalar_difference_op,add_assign_op);
242
243//----------------------------------------
244
245template<typename Lhs, typename Rhs>
246struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,InnerProduct>
247{
248 template<typename Dst>
249 static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
250 {
251 dst.coeffRef(0,0) = (lhs.transpose().cwiseProduct(rhs)).sum();
252 }
253
254 template<typename Dst>
255 static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
256 {
257 dst.coeffRef(0,0) += (lhs.transpose().cwiseProduct(rhs)).sum();
258 }
259
260 template<typename Dst>
261 static void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
262 { dst.coeffRef(0,0) -= (lhs.transpose().cwiseProduct(rhs)).sum(); }
263};
264
265
266/***********************************************************************
267* Implementation of outer dense * dense vector product
268***********************************************************************/
269
270// Column major result
271template<typename Dst, typename Lhs, typename Rhs, typename Func>
272void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const false_type&)
273{
274 evaluator<Rhs> rhsEval(rhs);
275 typename nested_eval<Lhs,Rhs::SizeAtCompileTime>::type actual_lhs(lhs);
276 // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored
277 // FIXME not very good if rhs is real and lhs complex while alpha is real too
278 const Index cols = dst.cols();
279 for (Index j=0; j<cols; ++j)
280 func(dst.col(j), rhsEval.coeff(Index(0),j) * actual_lhs);
281}
282
283// Row major result
284template<typename Dst, typename Lhs, typename Rhs, typename Func>
285void outer_product_selector_run(Dst& dst, const Lhs &lhs, const Rhs &rhs, const Func& func, const true_type&)
286{
287 evaluator<Lhs> lhsEval(lhs);
288 typename nested_eval<Rhs,Lhs::SizeAtCompileTime>::type actual_rhs(rhs);
289 // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored
290 // FIXME not very good if lhs is real and rhs complex while alpha is real too
291 const Index rows = dst.rows();
292 for (Index i=0; i<rows; ++i)
293 func(dst.row(i), lhsEval.coeff(i,Index(0)) * actual_rhs);
294}
295
296template<typename Lhs, typename Rhs>
297struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,OuterProduct>
298{
299 template<typename T> struct is_row_major : internal::conditional<(int(T::Flags)&RowMajorBit), internal::true_type, internal::false_type>::type {};
300 typedef typename Product<Lhs,Rhs>::Scalar Scalar;
301
302 // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose
303 struct set { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() = src; } };
304 struct add { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() += src; } };
305 struct sub { template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const { dst.const_cast_derived() -= src; } };
306 struct adds {
307 Scalar m_scale;
308 explicit adds(const Scalar& s) : m_scale(s) {}
309 template<typename Dst, typename Src> void operator()(const Dst& dst, const Src& src) const {
310 dst.const_cast_derived() += m_scale * src;
311 }
312 };
313
314 template<typename Dst>
315 static inline void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
316 {
317 internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major<Dst>());
318 }
319
320 template<typename Dst>
321 static inline void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
322 {
323 internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major<Dst>());
324 }
325
326 template<typename Dst>
327 static inline void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
328 {
329 internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major<Dst>());
330 }
331
332 template<typename Dst>
333 static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
334 {
335 internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major<Dst>());
336 }
337
338};
339
340
341// This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo
342template<typename Lhs, typename Rhs, typename Derived>
343struct generic_product_impl_base
344{
345 typedef typename Product<Lhs,Rhs>::Scalar Scalar;
346
347 template<typename Dst>
348 static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
349 { dst.setZero(); scaleAndAddTo(dst, lhs, rhs, Scalar(1)); }
350
351 template<typename Dst>
352 static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
353 { scaleAndAddTo(dst,lhs, rhs, Scalar(1)); }
354
355 template<typename Dst>
356 static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
357 { scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); }
358
359 template<typename Dst>
360 static EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
361 { Derived::scaleAndAddTo(dst,lhs,rhs,alpha); }
362
363};
364
365template<typename Lhs, typename Rhs>
366struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct>
367 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,GemvProduct> >
368{
369 typedef typename Product<Lhs,Rhs>::Scalar Scalar;
370 enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight };
371 typedef typename internal::conditional<int(Side)==OnTheRight,Lhs,Rhs>::type MatrixType;
372
373 template<typename Dest>
374 static EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
375 {
376 internal::gemv_dense_selector<Side,
377 (int(MatrixType::Flags)&RowMajorBit) ? RowMajor : ColMajor,
378 bool(internal::blas_traits<MatrixType>::HasUsableDirectAccess)
379 >::run(lhs, rhs, dst, alpha);
380 }
381};
382
383template<typename Lhs, typename Rhs>
384struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode>
385{
386 typedef typename Product<Lhs,Rhs>::Scalar Scalar;
387
388 template<typename Dst>
389 static EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
390 {
391 // Same as: dst.noalias() = lhs.lazyProduct(rhs);
392 // but easier on the compiler side
393 call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op<typename Dst::Scalar,Scalar>());
394 }
395
396 template<typename Dst>
397 static EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
398 {
399 // dst.noalias() += lhs.lazyProduct(rhs);
400 call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op<typename Dst::Scalar,Scalar>());
401 }
402
403 template<typename Dst>
404 static EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs)
405 {
406 // dst.noalias() -= lhs.lazyProduct(rhs);
407 call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op<typename Dst::Scalar,Scalar>());
408 }
409
410// template<typename Dst>
411// static inline void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
412// { dst.noalias() += alpha * lhs.lazyProduct(rhs); }
413};
414
415// This specialization enforces the use of a coefficient-based evaluation strategy
416template<typename Lhs, typename Rhs>
417struct generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,LazyCoeffBasedProductMode>
418 : generic_product_impl<Lhs,Rhs,DenseShape,DenseShape,CoeffBasedProductMode> {};
419
420// Case 2: Evaluate coeff by coeff
421//
422// This is mostly taken from CoeffBasedProduct.h
423// The main difference is that we add an extra argument to the etor_product_*_impl::run() function
424// for the inner dimension of the product, because evaluator object do not know their size.
425
426template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
427struct etor_product_coeff_impl;
428
429template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
430struct etor_product_packet_impl;
431
432template<typename Lhs, typename Rhs, int ProductTag>
433struct product_evaluator<Product<Lhs, Rhs, LazyProduct>, ProductTag, DenseShape, DenseShape>
434 : evaluator_base<Product<Lhs, Rhs, LazyProduct> >
435{
436 typedef Product<Lhs, Rhs, LazyProduct> XprType;
437 typedef typename XprType::Scalar Scalar;
438 typedef typename XprType::CoeffReturnType CoeffReturnType;
439
440 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
441 explicit product_evaluator(const XprType& xpr)
442 : m_lhs(xpr.lhs()),
443 m_rhs(xpr.rhs()),
444 m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that!
445 m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable them when not needed,
446 // or perhaps declare them on the fly on the packet method... We have experiment to check what's best.
447 m_innerDim(xpr.lhs().cols())
448 {
449 EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
450 EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::AddCost);
451 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
452#if 0
453 std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n";
454 std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n";
455 std::cerr << "LhsAlignment= " << LhsAlignment << "\n";
456 std::cerr << "RhsAlignment= " << RhsAlignment << "\n";
457 std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n";
458 std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n";
459 std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n";
460 std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n";
461 std::cerr << "Alignment= " << Alignment << "\n";
462 std::cerr << "Flags= " << Flags << "\n";
463#endif
464 }
465
466 // Everything below here is taken from CoeffBasedProduct.h
467
468 typedef typename internal::nested_eval<Lhs,Rhs::ColsAtCompileTime>::type LhsNested;
469 typedef typename internal::nested_eval<Rhs,Lhs::RowsAtCompileTime>::type RhsNested;
470
471 typedef typename internal::remove_all<LhsNested>::type LhsNestedCleaned;
472 typedef typename internal::remove_all<RhsNested>::type RhsNestedCleaned;
473
474 typedef evaluator<LhsNestedCleaned> LhsEtorType;
475 typedef evaluator<RhsNestedCleaned> RhsEtorType;
476
477 enum {
478 RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime,
479 ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime,
480 InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime),
481 MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime,
482 MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime
483 };
484
485 typedef typename find_best_packet<Scalar,RowsAtCompileTime>::type LhsVecPacketType;
486 typedef typename find_best_packet<Scalar,ColsAtCompileTime>::type RhsVecPacketType;
487
488 enum {
489
490 LhsCoeffReadCost = LhsEtorType::CoeffReadCost,
491 RhsCoeffReadCost = RhsEtorType::CoeffReadCost,
492 CoeffReadCost = InnerSize==0 ? NumTraits<Scalar>::ReadCost
493 : InnerSize == Dynamic ? HugeCost
494 : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
495 + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
496
497 Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
498
499 LhsFlags = LhsEtorType::Flags,
500 RhsFlags = RhsEtorType::Flags,
501
502 LhsRowMajor = LhsFlags & RowMajorBit,
503 RhsRowMajor = RhsFlags & RowMajorBit,
504
505 LhsVecPacketSize = unpacket_traits<LhsVecPacketType>::size,
506 RhsVecPacketSize = unpacket_traits<RhsVecPacketType>::size,
507
508 // Here, we don't care about alignment larger than the usable packet size.
509 LhsAlignment = EIGEN_PLAIN_ENUM_MIN(LhsEtorType::Alignment,LhsVecPacketSize*int(sizeof(typename LhsNestedCleaned::Scalar))),
510 RhsAlignment = EIGEN_PLAIN_ENUM_MIN(RhsEtorType::Alignment,RhsVecPacketSize*int(sizeof(typename RhsNestedCleaned::Scalar))),
511
512 SameType = is_same<typename LhsNestedCleaned::Scalar,typename RhsNestedCleaned::Scalar>::value,
513
514 CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime!=1),
515 CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime!=1),
516
517 EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
518 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
519 : (bool(RhsRowMajor) && !CanVectorizeLhs),
520
521 Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
522 | (EvalToRowMajor ? RowMajorBit : 0)
523 // TODO enable vectorization for mixed types
524 | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0)
525 | (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0),
526
527 LhsOuterStrideBytes = int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)),
528 RhsOuterStrideBytes = int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)),
529
530 Alignment = bool(CanVectorizeLhs) ? (LhsOuterStrideBytes<=0 || (int(LhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,LhsAlignment))!=0 ? 0 : LhsAlignment)
531 : bool(CanVectorizeRhs) ? (RhsOuterStrideBytes<=0 || (int(RhsOuterStrideBytes) % EIGEN_PLAIN_ENUM_MAX(1,RhsAlignment))!=0 ? 0 : RhsAlignment)
532 : 0,
533
534 /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside
535 * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner
536 * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect
537 * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI.
538 */
539 CanVectorizeInner = SameType
540 && LhsRowMajor
541 && (!RhsRowMajor)
542 && (LhsFlags & RhsFlags & ActualPacketAccessBit)
543 && (InnerSize % packet_traits<Scalar>::size == 0)
544 };
545
546 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const
547 {
548 return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
549 }
550
551 /* Allow index-based non-packet access. It is impossible though to allow index-based packed access,
552 * which is why we don't set the LinearAccessBit.
553 * TODO: this seems possible when the result is a vector
554 */
555 EIGEN_DEVICE_FUNC const CoeffReturnType coeff(Index index) const
556 {
557 const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
558 const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
559 return (m_lhs.row(row).transpose().cwiseProduct( m_rhs.col(col) )).sum();
560 }
561
562 template<int LoadMode, typename PacketType>
563 const PacketType packet(Index row, Index col) const
564 {
565 PacketType res;
566 typedef etor_product_packet_impl<bool(int(Flags)&RowMajorBit) ? RowMajor : ColMajor,
567 Unroll ? int(InnerSize) : Dynamic,
568 LhsEtorType, RhsEtorType, PacketType, LoadMode> PacketImpl;
569 PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res);
570 return res;
571 }
572
573 template<int LoadMode, typename PacketType>
574 const PacketType packet(Index index) const
575 {
576 const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? 0 : index;
577 const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime==1) ? index : 0;
578 return packet<LoadMode,PacketType>(row,col);
579 }
580
581protected:
582 typename internal::add_const_on_value_type<LhsNested>::type m_lhs;
583 typename internal::add_const_on_value_type<RhsNested>::type m_rhs;
584
585 LhsEtorType m_lhsImpl;
586 RhsEtorType m_rhsImpl;
587
588 // TODO: Get rid of m_innerDim if known at compile time
589 Index m_innerDim;
590};
591
592template<typename Lhs, typename Rhs>
593struct product_evaluator<Product<Lhs, Rhs, DefaultProduct>, LazyCoeffBasedProductMode, DenseShape, DenseShape>
594 : product_evaluator<Product<Lhs, Rhs, LazyProduct>, CoeffBasedProductMode, DenseShape, DenseShape>
595{
596 typedef Product<Lhs, Rhs, DefaultProduct> XprType;
597 typedef Product<Lhs, Rhs, LazyProduct> BaseProduct;
598 typedef product_evaluator<BaseProduct, CoeffBasedProductMode, DenseShape, DenseShape> Base;
599 enum {
600 Flags = Base::Flags | EvalBeforeNestingBit
601 };
602 EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
603 : Base(BaseProduct(xpr.lhs(),xpr.rhs()))
604 {}
605};
606
607/****************************************
608*** Coeff based product, Packet path ***
609****************************************/
610
611template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
612struct etor_product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
613{
614 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
615 {
616 etor_product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
617 res = pmadd(pset1<Packet>(lhs.coeff(row, Index(UnrollingIndex-1))), rhs.template packet<LoadMode,Packet>(Index(UnrollingIndex-1), col), res);
618 }
619};
620
621template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
622struct etor_product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
623{
624 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet &res)
625 {
626 etor_product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, innerDim, res);
627 res = pmadd(lhs.template packet<LoadMode,Packet>(row, Index(UnrollingIndex-1)), pset1<Packet>(rhs.coeff(Index(UnrollingIndex-1), col)), res);
628 }
629};
630
631template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
632struct etor_product_packet_impl<RowMajor, 1, Lhs, Rhs, Packet, LoadMode>
633{
634 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
635 {
636 res = pmul(pset1<Packet>(lhs.coeff(row, Index(0))),rhs.template packet<LoadMode,Packet>(Index(0), col));
637 }
638};
639
640template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
641struct etor_product_packet_impl<ColMajor, 1, Lhs, Rhs, Packet, LoadMode>
642{
643 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index /*innerDim*/, Packet &res)
644 {
645 res = pmul(lhs.template packet<LoadMode,Packet>(row, Index(0)), pset1<Packet>(rhs.coeff(Index(0), col)));
646 }
647};
648
649template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
650struct etor_product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
651{
652 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
653 {
654 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
655 }
656};
657
658template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
659struct etor_product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
660{
661 static EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, const Rhs& /*rhs*/, Index /*innerDim*/, Packet &res)
662 {
663 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
664 }
665};
666
667template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
668struct etor_product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
669{
670 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
671 {
672 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
673 for(Index i = 0; i < innerDim; ++i)
674 res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode,Packet>(i, col), res);
675 }
676};
677
678template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
679struct etor_product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
680{
681 static EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Index innerDim, Packet& res)
682 {
683 res = pset1<Packet>(typename unpacket_traits<Packet>::type(0));
684 for(Index i = 0; i < innerDim; ++i)
685 res = pmadd(lhs.template packet<LoadMode,Packet>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
686 }
687};
688
689
690/***************************************************************************
691* Triangular products
692***************************************************************************/
693template<int Mode, bool LhsIsTriangular,
694 typename Lhs, bool LhsIsVector,
695 typename Rhs, bool RhsIsVector>
696struct triangular_product_impl;
697
698template<typename Lhs, typename Rhs, int ProductTag>
699struct generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag>
700 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,TriangularShape,DenseShape,ProductTag> >
701{
702 typedef typename Product<Lhs,Rhs>::Scalar Scalar;
703
704 template<typename Dest>
705 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
706 {
707 triangular_product_impl<Lhs::Mode,true,typename Lhs::MatrixType,false,Rhs, Rhs::ColsAtCompileTime==1>
708 ::run(dst, lhs.nestedExpression(), rhs, alpha);
709 }
710};
711
712template<typename Lhs, typename Rhs, int ProductTag>
713struct generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag>
714: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,TriangularShape,ProductTag> >
715{
716 typedef typename Product<Lhs,Rhs>::Scalar Scalar;
717
718 template<typename Dest>
719 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
720 {
721 triangular_product_impl<Rhs::Mode,false,Lhs,Lhs::RowsAtCompileTime==1, typename Rhs::MatrixType, false>::run(dst, lhs, rhs.nestedExpression(), alpha);
722 }
723};
724
725
726/***************************************************************************
727* SelfAdjoint products
728***************************************************************************/
729template <typename Lhs, int LhsMode, bool LhsIsVector,
730 typename Rhs, int RhsMode, bool RhsIsVector>
731struct selfadjoint_product_impl;
732
733template<typename Lhs, typename Rhs, int ProductTag>
734struct generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag>
735 : generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,SelfAdjointShape,DenseShape,ProductTag> >
736{
737 typedef typename Product<Lhs,Rhs>::Scalar Scalar;
738
739 template<typename Dest>
740 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
741 {
742 selfadjoint_product_impl<typename Lhs::MatrixType,Lhs::Mode,false,Rhs,0,Rhs::IsVectorAtCompileTime>::run(dst, lhs.nestedExpression(), rhs, alpha);
743 }
744};
745
746template<typename Lhs, typename Rhs, int ProductTag>
747struct generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag>
748: generic_product_impl_base<Lhs,Rhs,generic_product_impl<Lhs,Rhs,DenseShape,SelfAdjointShape,ProductTag> >
749{
750 typedef typename Product<Lhs,Rhs>::Scalar Scalar;
751
752 template<typename Dest>
753 static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha)
754 {
755 selfadjoint_product_impl<Lhs,0,Lhs::IsVectorAtCompileTime,typename Rhs::MatrixType,Rhs::Mode,false>::run(dst, lhs, rhs.nestedExpression(), alpha);
756 }
757};
758
759
760/***************************************************************************
761* Diagonal products
762***************************************************************************/
763
764template<typename MatrixType, typename DiagonalType, typename Derived, int ProductOrder>
765struct diagonal_product_evaluator_base
766 : evaluator_base<Derived>
767{
768 typedef typename ScalarBinaryOpTraits<typename MatrixType::Scalar, typename DiagonalType::Scalar>::ReturnType Scalar;
769public:
770 enum {
771 CoeffReadCost = NumTraits<Scalar>::MulCost + evaluator<MatrixType>::CoeffReadCost + evaluator<DiagonalType>::CoeffReadCost,
772
773 MatrixFlags = evaluator<MatrixType>::Flags,
774 DiagFlags = evaluator<DiagonalType>::Flags,
775 _StorageOrder = MatrixFlags & RowMajorBit ? RowMajor : ColMajor,
776 _ScalarAccessOnDiag = !((int(_StorageOrder) == ColMajor && int(ProductOrder) == OnTheLeft)
777 ||(int(_StorageOrder) == RowMajor && int(ProductOrder) == OnTheRight)),
778 _SameTypes = is_same<typename MatrixType::Scalar, typename DiagonalType::Scalar>::value,
779 // FIXME currently we need same types, but in the future the next rule should be the one
780 //_Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (_SameTypes && bool(int(DiagFlags)&PacketAccessBit))),
781 _Vectorizable = bool(int(MatrixFlags)&PacketAccessBit) && _SameTypes && (_ScalarAccessOnDiag || (bool(int(DiagFlags)&PacketAccessBit))),
782 _LinearAccessMask = (MatrixType::RowsAtCompileTime==1 || MatrixType::ColsAtCompileTime==1) ? LinearAccessBit : 0,
783 Flags = ((HereditaryBits|_LinearAccessMask) & (unsigned int)(MatrixFlags)) | (_Vectorizable ? PacketAccessBit : 0),
784 Alignment = evaluator<MatrixType>::Alignment
785 };
786
787 diagonal_product_evaluator_base(const MatrixType &mat, const DiagonalType &diag)
788 : m_diagImpl(diag), m_matImpl(mat)
789 {
790 EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits<Scalar>::MulCost);
791 EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost);
792 }
793
794 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const
795 {
796 return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx);
797 }
798
799protected:
800 template<int LoadMode,typename PacketType>
801 EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const
802 {
803 return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
804 internal::pset1<PacketType>(m_diagImpl.coeff(id)));
805 }
806
807 template<int LoadMode,typename PacketType>
808 EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const
809 {
810 enum {
811 InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime,
812 DiagonalPacketLoadMode = EIGEN_PLAIN_ENUM_MIN(LoadMode,((InnerSize%16) == 0) ? int(Aligned16) : int(evaluator<DiagonalType>::Alignment)) // FIXME hardcoded 16!!
813 };
814 return internal::pmul(m_matImpl.template packet<LoadMode,PacketType>(row, col),
815 m_diagImpl.template packet<DiagonalPacketLoadMode,PacketType>(id));
816 }
817
818 evaluator<DiagonalType> m_diagImpl;
819 evaluator<MatrixType> m_matImpl;
820};
821
822// diagonal * dense
823template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
824struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DiagonalShape, DenseShape>
825 : diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft>
826{
827 typedef diagonal_product_evaluator_base<Rhs, typename Lhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheLeft> Base;
828 using Base::m_diagImpl;
829 using Base::m_matImpl;
830 using Base::coeff;
831 typedef typename Base::Scalar Scalar;
832
833 typedef Product<Lhs, Rhs, ProductKind> XprType;
834 typedef typename XprType::PlainObject PlainObject;
835
836 enum {
837 StorageOrder = int(Rhs::Flags) & RowMajorBit ? RowMajor : ColMajor
838 };
839
840 EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
841 : Base(xpr.rhs(), xpr.lhs().diagonal())
842 {
843 }
844
845 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
846 {
847 return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col);
848 }
849
850#ifndef __CUDACC__
851 template<int LoadMode,typename PacketType>
852 EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
853 {
854 // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case.
855 // See also similar calls below.
856 return this->template packet_impl<LoadMode,PacketType>(row,col, row,
857 typename internal::conditional<int(StorageOrder)==RowMajor, internal::true_type, internal::false_type>::type());
858 }
859
860 template<int LoadMode,typename PacketType>
861 EIGEN_STRONG_INLINE PacketType packet(Index idx) const
862 {
863 return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
864 }
865#endif
866};
867
868// dense * diagonal
869template<typename Lhs, typename Rhs, int ProductKind, int ProductTag>
870struct product_evaluator<Product<Lhs, Rhs, ProductKind>, ProductTag, DenseShape, DiagonalShape>
871 : diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight>
872{
873 typedef diagonal_product_evaluator_base<Lhs, typename Rhs::DiagonalVectorType, Product<Lhs, Rhs, LazyProduct>, OnTheRight> Base;
874 using Base::m_diagImpl;
875 using Base::m_matImpl;
876 using Base::coeff;
877 typedef typename Base::Scalar Scalar;
878
879 typedef Product<Lhs, Rhs, ProductKind> XprType;
880 typedef typename XprType::PlainObject PlainObject;
881
882 enum { StorageOrder = int(Lhs::Flags) & RowMajorBit ? RowMajor : ColMajor };
883
884 EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr)
885 : Base(xpr.lhs(), xpr.rhs().diagonal())
886 {
887 }
888
889 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
890 {
891 return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col);
892 }
893
894#ifndef __CUDACC__
895 template<int LoadMode,typename PacketType>
896 EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const
897 {
898 return this->template packet_impl<LoadMode,PacketType>(row,col, col,
899 typename internal::conditional<int(StorageOrder)==ColMajor, internal::true_type, internal::false_type>::type());
900 }
901
902 template<int LoadMode,typename PacketType>
903 EIGEN_STRONG_INLINE PacketType packet(Index idx) const
904 {
905 return packet<LoadMode,PacketType>(int(StorageOrder)==ColMajor?idx:0,int(StorageOrder)==ColMajor?0:idx);
906 }
907#endif
908};
909
910/***************************************************************************
911* Products with permutation matrices
912***************************************************************************/
913
919template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
920struct permutation_matrix_product;
921
922template<typename ExpressionType, int Side, bool Transposed>
923struct permutation_matrix_product<ExpressionType, Side, Transposed, DenseShape>
924{
925 typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
926 typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
927
928 template<typename Dest, typename PermutationType>
929 static inline void run(Dest& dst, const PermutationType& perm, const ExpressionType& xpr)
930 {
931 MatrixType mat(xpr);
932 const Index n = Side==OnTheLeft ? mat.rows() : mat.cols();
933 // FIXME we need an is_same for expression that is not sensitive to constness. For instance
934 // is_same_xpr<Block<const Matrix>, Block<Matrix> >::value should be true.
935 //if(is_same<MatrixTypeCleaned,Dest>::value && extract_data(dst) == extract_data(mat))
936 if(is_same_dense(dst, mat))
937 {
938 // apply the permutation inplace
939 Matrix<bool,PermutationType::RowsAtCompileTime,1,0,PermutationType::MaxRowsAtCompileTime> mask(perm.size());
940 mask.fill(false);
941 Index r = 0;
942 while(r < perm.size())
943 {
944 // search for the next seed
945 while(r<perm.size() && mask[r]) r++;
946 if(r>=perm.size())
947 break;
948 // we got one, let's follow it until we are back to the seed
949 Index k0 = r++;
950 Index kPrev = k0;
951 mask.coeffRef(k0) = true;
952 for(Index k=perm.indices().coeff(k0); k!=k0; k=perm.indices().coeff(k))
953 {
954 Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>(dst, k)
955 .swap(Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
956 (dst,((Side==OnTheLeft) ^ Transposed) ? k0 : kPrev));
957
958 mask.coeffRef(k) = true;
959 kPrev = k;
960 }
961 }
962 }
963 else
964 {
965 for(Index i = 0; i < n; ++i)
966 {
967 Block<Dest, Side==OnTheLeft ? 1 : Dest::RowsAtCompileTime, Side==OnTheRight ? 1 : Dest::ColsAtCompileTime>
968 (dst, ((Side==OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i)
969
970 =
971
972 Block<const MatrixTypeCleaned,Side==OnTheLeft ? 1 : MatrixTypeCleaned::RowsAtCompileTime,Side==OnTheRight ? 1 : MatrixTypeCleaned::ColsAtCompileTime>
973 (mat, ((Side==OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i);
974 }
975 }
976 }
977};
978
979template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
980struct generic_product_impl<Lhs, Rhs, PermutationShape, MatrixShape, ProductTag>
981{
982 template<typename Dest>
983 static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
984 {
985 permutation_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
986 }
987};
988
989template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
990struct generic_product_impl<Lhs, Rhs, MatrixShape, PermutationShape, ProductTag>
991{
992 template<typename Dest>
993 static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
994 {
995 permutation_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
996 }
997};
998
999template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1000struct generic_product_impl<Inverse<Lhs>, Rhs, PermutationShape, MatrixShape, ProductTag>
1001{
1002 template<typename Dest>
1003 static void evalTo(Dest& dst, const Inverse<Lhs>& lhs, const Rhs& rhs)
1004 {
1005 permutation_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
1006 }
1007};
1008
1009template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1010struct generic_product_impl<Lhs, Inverse<Rhs>, MatrixShape, PermutationShape, ProductTag>
1011{
1012 template<typename Dest>
1013 static void evalTo(Dest& dst, const Lhs& lhs, const Inverse<Rhs>& rhs)
1014 {
1015 permutation_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
1016 }
1017};
1018
1019
1020/***************************************************************************
1021* Products with transpositions matrices
1022***************************************************************************/
1023
1024// FIXME could we unify Transpositions and Permutation into a single "shape"??
1025
1030template<typename ExpressionType, int Side, bool Transposed, typename ExpressionShape>
1031struct transposition_matrix_product
1032{
1033 typedef typename nested_eval<ExpressionType, 1>::type MatrixType;
1034 typedef typename remove_all<MatrixType>::type MatrixTypeCleaned;
1035
1036 template<typename Dest, typename TranspositionType>
1037 static inline void run(Dest& dst, const TranspositionType& tr, const ExpressionType& xpr)
1038 {
1039 MatrixType mat(xpr);
1040 typedef typename TranspositionType::StorageIndex StorageIndex;
1041 const Index size = tr.size();
1042 StorageIndex j = 0;
1043
1044 if(!is_same_dense(dst,mat))
1045 dst = mat;
1046
1047 for(Index k=(Transposed?size-1:0) ; Transposed?k>=0:k<size ; Transposed?--k:++k)
1048 if(Index(j=tr.coeff(k))!=k)
1049 {
1050 if(Side==OnTheLeft) dst.row(k).swap(dst.row(j));
1051 else if(Side==OnTheRight) dst.col(k).swap(dst.col(j));
1052 }
1053 }
1054};
1055
1056template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1057struct generic_product_impl<Lhs, Rhs, TranspositionsShape, MatrixShape, ProductTag>
1058{
1059 template<typename Dest>
1060 static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1061 {
1062 transposition_matrix_product<Rhs, OnTheLeft, false, MatrixShape>::run(dst, lhs, rhs);
1063 }
1064};
1065
1066template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1067struct generic_product_impl<Lhs, Rhs, MatrixShape, TranspositionsShape, ProductTag>
1068{
1069 template<typename Dest>
1070 static void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs)
1071 {
1072 transposition_matrix_product<Lhs, OnTheRight, false, MatrixShape>::run(dst, rhs, lhs);
1073 }
1074};
1075
1076
1077template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1078struct generic_product_impl<Transpose<Lhs>, Rhs, TranspositionsShape, MatrixShape, ProductTag>
1079{
1080 template<typename Dest>
1081 static void evalTo(Dest& dst, const Transpose<Lhs>& lhs, const Rhs& rhs)
1082 {
1083 transposition_matrix_product<Rhs, OnTheLeft, true, MatrixShape>::run(dst, lhs.nestedExpression(), rhs);
1084 }
1085};
1086
1087template<typename Lhs, typename Rhs, int ProductTag, typename MatrixShape>
1088struct generic_product_impl<Lhs, Transpose<Rhs>, MatrixShape, TranspositionsShape, ProductTag>
1089{
1090 template<typename Dest>
1091 static void evalTo(Dest& dst, const Lhs& lhs, const Transpose<Rhs>& rhs)
1092 {
1093 transposition_matrix_product<Lhs, OnTheRight, true, MatrixShape>::run(dst, rhs.nestedExpression(), lhs);
1094 }
1095};
1096
1097} // end namespace internal
1098
1099} // end namespace Eigen
1100
1101#endif // EIGEN_PRODUCT_EVALUATORS_H
@ ColMajor
Definition: Constants.h:320
@ RowMajor
Definition: Constants.h:322
@ OnTheLeft
Definition: Constants.h:333
@ OnTheRight
Definition: Constants.h:335
const unsigned int PacketAccessBit
Definition: Constants.h:89
const unsigned int EvalBeforeNestingBit
Definition: Constants.h:65
const unsigned int RowMajorBit
Definition: Constants.h:61
Namespace containing all symbols from the Eigen library.
Definition: Core:287
const int HugeCost
Definition: Constants.h:39
EIGEN_DEFAULT_DENSE_INDEX_TYPE Index
The Index type as used for the API.
Definition: Meta.h:33
const int Dynamic
Definition: Constants.h:21