Loading...
Searching...
No Matches
TensorMorphing.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Copyright (C) 2014 Benoit Steiner <benoit.steiner.goog@gmail.com>
5//
6// This Source Code Form is subject to the terms of the Mozilla
7// Public License v. 2.0. If a copy of the MPL was not distributed
8// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
10#ifndef EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
11#define EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
12
13namespace Eigen {
14
22namespace internal {
23template<typename NewDimensions, typename XprType>
24struct traits<TensorReshapingOp<NewDimensions, XprType> > : public traits<XprType>
25{
26 typedef typename XprType::Scalar Scalar;
27 typedef traits<XprType> XprTraits;
28 typedef typename XprTraits::StorageKind StorageKind;
29 typedef typename XprTraits::Index Index;
30 typedef typename XprType::Nested Nested;
31 typedef typename remove_reference<Nested>::type _Nested;
32 static const int NumDimensions = array_size<NewDimensions>::value;
33 static const int Layout = XprTraits::Layout;
34};
35
36template<typename NewDimensions, typename XprType>
37struct eval<TensorReshapingOp<NewDimensions, XprType>, Eigen::Dense>
38{
39 typedef const TensorReshapingOp<NewDimensions, XprType>& type;
40};
41
42template<typename NewDimensions, typename XprType>
43struct nested<TensorReshapingOp<NewDimensions, XprType>, 1, typename eval<TensorReshapingOp<NewDimensions, XprType> >::type>
44{
45 typedef TensorReshapingOp<NewDimensions, XprType> type;
46};
47
48} // end namespace internal
49
50
51
52template<typename NewDimensions, typename XprType>
53class TensorReshapingOp : public TensorBase<TensorReshapingOp<NewDimensions, XprType>, WriteAccessors>
54{
55 public:
56 typedef typename Eigen::internal::traits<TensorReshapingOp>::Scalar Scalar;
57 typedef typename internal::remove_const<typename XprType::CoeffReturnType>::type CoeffReturnType;
58 typedef typename Eigen::internal::nested<TensorReshapingOp>::type Nested;
59 typedef typename Eigen::internal::traits<TensorReshapingOp>::StorageKind StorageKind;
60 typedef typename Eigen::internal::traits<TensorReshapingOp>::Index Index;
61
62 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorReshapingOp(const XprType& expr, const NewDimensions& dims)
63 : m_xpr(expr), m_dims(dims) {}
64
65 EIGEN_DEVICE_FUNC
66 const NewDimensions& dimensions() const { return m_dims; }
67
68 EIGEN_DEVICE_FUNC
69 const typename internal::remove_all<typename XprType::Nested>::type&
70 expression() const { return m_xpr; }
71
72 EIGEN_DEVICE_FUNC
73 EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const TensorReshapingOp& other)
74 {
75 typedef TensorAssignOp<TensorReshapingOp, const TensorReshapingOp> Assign;
76 Assign assign(*this, other);
77 internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
78 return *this;
79 }
80
81 template<typename OtherDerived>
82 EIGEN_DEVICE_FUNC
83 EIGEN_STRONG_INLINE TensorReshapingOp& operator = (const OtherDerived& other)
84 {
85 typedef TensorAssignOp<TensorReshapingOp, const OtherDerived> Assign;
86 Assign assign(*this, other);
87 internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
88 return *this;
89 }
90
91 protected:
92 typename XprType::Nested m_xpr;
93 const NewDimensions m_dims;
94};
95
96
97// Eval as rvalue
98template<typename NewDimensions, typename ArgType, typename Device>
99struct TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
100{
101 typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
102 typedef NewDimensions Dimensions;
103
104 enum {
105 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
106 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
107 Layout = TensorEvaluator<ArgType, Device>::Layout,
108 CoordAccess = false, // to be implemented
109 RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
110 };
111
112 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
113 : m_impl(op.expression(), device), m_dimensions(op.dimensions())
114 {
115 // The total size of the reshaped tensor must be equal to the total size
116 // of the input tensor.
117 eigen_assert(internal::array_prod(m_impl.dimensions()) == internal::array_prod(op.dimensions()));
118 }
119
120 typedef typename XprType::Index Index;
121 typedef typename XprType::Scalar Scalar;
122 typedef typename XprType::CoeffReturnType CoeffReturnType;
123 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
124
125 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
126
127 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
128 return m_impl.evalSubExprsIfNeeded(data);
129 }
130 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
131 m_impl.cleanup();
132 }
133
134 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
135 {
136 return m_impl.coeff(index);
137 }
138
139 template<int LoadMode>
140 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
141 {
142 return m_impl.template packet<LoadMode>(index);
143 }
144
145 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
146 return m_impl.costPerCoeff(vectorized);
147 }
148
149 EIGEN_DEVICE_FUNC Scalar* data() const { return const_cast<Scalar*>(m_impl.data()); }
150
151 EIGEN_DEVICE_FUNC const TensorEvaluator<ArgType, Device>& impl() const { return m_impl; }
152
153 protected:
154 TensorEvaluator<ArgType, Device> m_impl;
155 NewDimensions m_dimensions;
156};
157
158
159// Eval as lvalue
160template<typename NewDimensions, typename ArgType, typename Device>
161 struct TensorEvaluator<TensorReshapingOp<NewDimensions, ArgType>, Device>
162 : public TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device>
163
164{
165 typedef TensorEvaluator<const TensorReshapingOp<NewDimensions, ArgType>, Device> Base;
166 typedef TensorReshapingOp<NewDimensions, ArgType> XprType;
167 typedef NewDimensions Dimensions;
168
169 enum {
170 IsAligned = TensorEvaluator<ArgType, Device>::IsAligned,
171 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
172 Layout = TensorEvaluator<ArgType, Device>::Layout,
173 CoordAccess = false, // to be implemented
174 RawAccess = TensorEvaluator<ArgType, Device>::RawAccess
175 };
176
177 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
178 : Base(op, device)
179 { }
180
181 typedef typename XprType::Index Index;
182 typedef typename XprType::Scalar Scalar;
183 typedef typename XprType::CoeffReturnType CoeffReturnType;
184 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
185
186 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
187 {
188 return this->m_impl.coeffRef(index);
189 }
190 template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
191 void writePacket(Index index, const PacketReturnType& x)
192 {
193 this->m_impl.template writePacket<StoreMode>(index, x);
194 }
195};
196
197
205namespace internal {
206template<typename StartIndices, typename Sizes, typename XprType>
207struct traits<TensorSlicingOp<StartIndices, Sizes, XprType> > : public traits<XprType>
208{
209 typedef typename XprType::Scalar Scalar;
210 typedef traits<XprType> XprTraits;
211 typedef typename XprTraits::StorageKind StorageKind;
212 typedef typename XprTraits::Index Index;
213 typedef typename XprType::Nested Nested;
214 typedef typename remove_reference<Nested>::type _Nested;
215 static const int NumDimensions = array_size<StartIndices>::value;
216 static const int Layout = XprTraits::Layout;
217};
218
219template<typename StartIndices, typename Sizes, typename XprType>
220struct eval<TensorSlicingOp<StartIndices, Sizes, XprType>, Eigen::Dense>
221{
222 typedef const TensorSlicingOp<StartIndices, Sizes, XprType>& type;
223};
224
225template<typename StartIndices, typename Sizes, typename XprType>
226struct nested<TensorSlicingOp<StartIndices, Sizes, XprType>, 1, typename eval<TensorSlicingOp<StartIndices, Sizes, XprType> >::type>
227{
228 typedef TensorSlicingOp<StartIndices, Sizes, XprType> type;
229};
230
231} // end namespace internal
232
233
234
235template<typename StartIndices, typename Sizes, typename XprType>
236class TensorSlicingOp : public TensorBase<TensorSlicingOp<StartIndices, Sizes, XprType> >
237{
238 public:
239 typedef typename Eigen::internal::traits<TensorSlicingOp>::Scalar Scalar;
240 typedef typename XprType::CoeffReturnType CoeffReturnType;
241 typedef typename Eigen::internal::nested<TensorSlicingOp>::type Nested;
242 typedef typename Eigen::internal::traits<TensorSlicingOp>::StorageKind StorageKind;
243 typedef typename Eigen::internal::traits<TensorSlicingOp>::Index Index;
244
245 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorSlicingOp(const XprType& expr, const StartIndices& indices, const Sizes& sizes)
246 : m_xpr(expr), m_indices(indices), m_sizes(sizes) {}
247
248 EIGEN_DEVICE_FUNC
249 const StartIndices& startIndices() const { return m_indices; }
250 EIGEN_DEVICE_FUNC
251 const Sizes& sizes() const { return m_sizes; }
252
253 EIGEN_DEVICE_FUNC
254 const typename internal::remove_all<typename XprType::Nested>::type&
255 expression() const { return m_xpr; }
256
257 template<typename OtherDerived>
258 EIGEN_DEVICE_FUNC
259 EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const OtherDerived& other)
260 {
261 typedef TensorAssignOp<TensorSlicingOp, const OtherDerived> Assign;
262 Assign assign(*this, other);
263 internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
264 return *this;
265 }
266
267 EIGEN_DEVICE_FUNC
268 EIGEN_STRONG_INLINE TensorSlicingOp& operator = (const TensorSlicingOp& other)
269 {
270 typedef TensorAssignOp<TensorSlicingOp, const TensorSlicingOp> Assign;
271 Assign assign(*this, other);
272 internal::TensorExecutor<const Assign, DefaultDevice>::run(assign, DefaultDevice());
273 return *this;
274 }
275
276
277 protected:
278 typename XprType::Nested m_xpr;
279 const StartIndices m_indices;
280 const Sizes m_sizes;
281};
282
283
284// Fixme: figure out the exact threshold
285namespace {
286template <typename Index, typename Device> struct MemcpyTriggerForSlicing {
287 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const Device& device) : threshold_(2 * device.numThreads()) { }
288 EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > threshold_; }
289
290 private:
291 Index threshold_;
292};
293
294// It is very expensive to start the memcpy kernel on GPU: we therefore only
295// use it for large copies.
296#ifdef EIGEN_USE_GPU
297template <typename Index> struct MemcpyTriggerForSlicing<Index, GpuDevice> {
298 EIGEN_DEVICE_FUNC MemcpyTriggerForSlicing(const GpuDevice&) { }
299 EIGEN_DEVICE_FUNC bool operator ()(Index val) const { return val > 4*1024*1024; }
300};
301#endif
302}
303
304// Eval as rvalue
305template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
306struct TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
307{
308 typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
309 static const int NumDims = internal::array_size<Sizes>::value;
310
311 enum {
312 // Alignment can't be guaranteed at compile time since it depends on the
313 // slice offsets and sizes.
314 IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
315 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
316 Layout = TensorEvaluator<ArgType, Device>::Layout,
317 CoordAccess = false,
318 RawAccess = false
319 };
320
321 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
322 : m_impl(op.expression(), device), m_device(device), m_dimensions(op.sizes()), m_offsets(op.startIndices())
323 {
324 for (std::size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
325 eigen_assert(m_impl.dimensions()[i] >= op.sizes()[i] + op.startIndices()[i]);
326 }
327
328 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
329 const Sizes& output_dims = op.sizes();
330 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
331 m_inputStrides[0] = 1;
332 for (int i = 1; i < NumDims; ++i) {
333 m_inputStrides[i] = m_inputStrides[i-1] * input_dims[i-1];
334 }
335
336 // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
337 m_outputStrides[0] = 1;
338 for (int i = 1; i < NumDims; ++i) {
339 m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
340 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
341 }
342 } else {
343 m_inputStrides[NumDims-1] = 1;
344 for (int i = NumDims - 2; i >= 0; --i) {
345 m_inputStrides[i] = m_inputStrides[i+1] * input_dims[i+1];
346 }
347
348 // Don't initialize m_fastOutputStrides[NumDims-1] since it won't ever be accessed.
349 m_outputStrides[NumDims-1] = 1;
350 for (int i = NumDims - 2; i >= 0; --i) {
351 m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
352 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(m_outputStrides[i]);
353 }
354 }
355 }
356
357 typedef typename XprType::Index Index;
358 typedef typename XprType::Scalar Scalar;
359 typedef typename XprType::CoeffReturnType CoeffReturnType;
360 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
361 typedef Sizes Dimensions;
362
363 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
364
365
366 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType* data) {
367 m_impl.evalSubExprsIfNeeded(NULL);
368 if (!NumTraits<typename internal::remove_const<Scalar>::type>::RequireInitialization && data && m_impl.data()) {
369 Index contiguous_values = 1;
370 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
371 for (int i = 0; i < NumDims; ++i) {
372 contiguous_values *= dimensions()[i];
373 if (dimensions()[i] != m_impl.dimensions()[i]) {
374 break;
375 }
376 }
377 } else {
378 for (int i = NumDims-1; i >= 0; --i) {
379 contiguous_values *= dimensions()[i];
380 if (dimensions()[i] != m_impl.dimensions()[i]) {
381 break;
382 }
383 }
384 }
385 // Use memcpy if it's going to be faster than using the regular evaluation.
386 const MemcpyTriggerForSlicing<Index, Device> trigger(m_device);
387 if (trigger(contiguous_values)) {
388 Scalar* src = (Scalar*)m_impl.data();
389 for (int i = 0; i < internal::array_prod(dimensions()); i += contiguous_values) {
390 Index offset = srcCoeff(i);
391 m_device.memcpy((void*)(data+i), src+offset, contiguous_values * sizeof(Scalar));
392 }
393 return false;
394 }
395 }
396 return true;
397 }
398
399 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
400 m_impl.cleanup();
401 }
402
403 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
404 {
405 return m_impl.coeff(srcCoeff(index));
406 }
407
408 template<int LoadMode>
409 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const
410 {
411 const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
412 EIGEN_STATIC_ASSERT((packetSize > 1), YOU_MADE_A_PROGRAMMING_MISTAKE)
413 eigen_assert(index+packetSize-1 < internal::array_prod(dimensions()));
414
415 Index inputIndices[] = {0, 0};
416 Index indices[] = {index, index + packetSize - 1};
417 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
418 for (int i = NumDims - 1; i > 0; --i) {
419 const Index idx0 = indices[0] / m_fastOutputStrides[i];
420 const Index idx1 = indices[1] / m_fastOutputStrides[i];
421 inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
422 inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
423 indices[0] -= idx0 * m_outputStrides[i];
424 indices[1] -= idx1 * m_outputStrides[i];
425 }
426 inputIndices[0] += (indices[0] + m_offsets[0]);
427 inputIndices[1] += (indices[1] + m_offsets[0]);
428 } else {
429 for (int i = 0; i < NumDims - 1; ++i) {
430 const Index idx0 = indices[0] / m_fastOutputStrides[i];
431 const Index idx1 = indices[1] / m_fastOutputStrides[i];
432 inputIndices[0] += (idx0 + m_offsets[i]) * m_inputStrides[i];
433 inputIndices[1] += (idx1 + m_offsets[i]) * m_inputStrides[i];
434 indices[0] -= idx0 * m_outputStrides[i];
435 indices[1] -= idx1 * m_outputStrides[i];
436 }
437 inputIndices[0] += (indices[0] + m_offsets[NumDims-1]);
438 inputIndices[1] += (indices[1] + m_offsets[NumDims-1]);
439 }
440 if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
441 PacketReturnType rslt = m_impl.template packet<Unaligned>(inputIndices[0]);
442 return rslt;
443 }
444 else {
445 EIGEN_ALIGN_MAX typename internal::remove_const<CoeffReturnType>::type values[packetSize];
446 values[0] = m_impl.coeff(inputIndices[0]);
447 values[packetSize-1] = m_impl.coeff(inputIndices[1]);
448 for (int i = 1; i < packetSize-1; ++i) {
449 values[i] = coeff(index+i);
450 }
451 PacketReturnType rslt = internal::pload<PacketReturnType>(values);
452 return rslt;
453 }
454 }
455
456 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
457 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims);
458 }
459
460
461 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const {
462 Scalar* result = m_impl.data();
463 if (result) {
464 Index offset = 0;
465 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
466 for (int i = 0; i < NumDims; ++i) {
467 if (m_dimensions[i] != m_impl.dimensions()[i]) {
468 offset += m_offsets[i] * m_inputStrides[i];
469 for (int j = i+1; j < NumDims; ++j) {
470 if (m_dimensions[j] > 1) {
471 return NULL;
472 }
473 offset += m_offsets[j] * m_inputStrides[j];
474 }
475 break;
476 }
477 }
478 } else {
479 for (int i = NumDims - 1; i >= 0; --i) {
480 if (m_dimensions[i] != m_impl.dimensions()[i]) {
481 offset += m_offsets[i] * m_inputStrides[i];
482 for (int j = i-1; j >= 0; --j) {
483 if (m_dimensions[j] > 1) {
484 return NULL;
485 }
486 offset += m_offsets[j] * m_inputStrides[j];
487 }
488 break;
489 }
490 }
491 }
492 return result + offset;
493 }
494 return NULL;
495 }
496
497 protected:
498 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
499 {
500 Index inputIndex = 0;
501 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
502 for (int i = NumDims - 1; i > 0; --i) {
503 const Index idx = index / m_fastOutputStrides[i];
504 inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
505 index -= idx * m_outputStrides[i];
506 }
507 inputIndex += (index + m_offsets[0]);
508 } else {
509 for (int i = 0; i < NumDims - 1; ++i) {
510 const Index idx = index / m_fastOutputStrides[i];
511 inputIndex += (idx + m_offsets[i]) * m_inputStrides[i];
512 index -= idx * m_outputStrides[i];
513 }
514 inputIndex += (index + m_offsets[NumDims-1]);
515 }
516 return inputIndex;
517 }
518
519 array<Index, NumDims> m_outputStrides;
520 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
521 array<Index, NumDims> m_inputStrides;
522 TensorEvaluator<ArgType, Device> m_impl;
523 const Device& m_device;
524 Dimensions m_dimensions;
525 const StartIndices m_offsets;
526};
527
528
529// Eval as lvalue
530template<typename StartIndices, typename Sizes, typename ArgType, typename Device>
531struct TensorEvaluator<TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
532 : public TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device>
533{
534 typedef TensorEvaluator<const TensorSlicingOp<StartIndices, Sizes, ArgType>, Device> Base;
535 typedef TensorSlicingOp<StartIndices, Sizes, ArgType> XprType;
536 static const int NumDims = internal::array_size<Sizes>::value;
537
538 enum {
539 IsAligned = /*TensorEvaluator<ArgType, Device>::IsAligned*/false,
540 PacketAccess = TensorEvaluator<ArgType, Device>::PacketAccess,
541 Layout = TensorEvaluator<ArgType, Device>::Layout,
542 CoordAccess = false,
543 RawAccess = false
544 };
545
546 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
547 : Base(op, device)
548 { }
549
550 typedef typename XprType::Index Index;
551 typedef typename XprType::Scalar Scalar;
552 typedef typename XprType::CoeffReturnType CoeffReturnType;
553 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
554 typedef Sizes Dimensions;
555
556 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
557 {
558 return this->m_impl.coeffRef(this->srcCoeff(index));
559 }
560
561 template <int StoreMode> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE
562 void writePacket(Index index, const PacketReturnType& x)
563 {
564 const int packetSize = internal::unpacket_traits<PacketReturnType>::size;
565 Index inputIndices[] = {0, 0};
566 Index indices[] = {index, index + packetSize - 1};
567 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
568 for (int i = NumDims - 1; i > 0; --i) {
569 const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
570 const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
571 inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
572 inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
573 indices[0] -= idx0 * this->m_outputStrides[i];
574 indices[1] -= idx1 * this->m_outputStrides[i];
575 }
576 inputIndices[0] += (indices[0] + this->m_offsets[0]);
577 inputIndices[1] += (indices[1] + this->m_offsets[0]);
578 } else {
579 for (int i = 0; i < NumDims - 1; ++i) {
580 const Index idx0 = indices[0] / this->m_fastOutputStrides[i];
581 const Index idx1 = indices[1] / this->m_fastOutputStrides[i];
582 inputIndices[0] += (idx0 + this->m_offsets[i]) * this->m_inputStrides[i];
583 inputIndices[1] += (idx1 + this->m_offsets[i]) * this->m_inputStrides[i];
584 indices[0] -= idx0 * this->m_outputStrides[i];
585 indices[1] -= idx1 * this->m_outputStrides[i];
586 }
587 inputIndices[0] += (indices[0] + this->m_offsets[NumDims-1]);
588 inputIndices[1] += (indices[1] + this->m_offsets[NumDims-1]);
589 }
590 if (inputIndices[1] - inputIndices[0] == packetSize - 1) {
591 this->m_impl.template writePacket<StoreMode>(inputIndices[0], x);
592 }
593 else {
594 EIGEN_ALIGN_MAX CoeffReturnType values[packetSize];
595 internal::pstore<CoeffReturnType, PacketReturnType>(values, x);
596 this->m_impl.coeffRef(inputIndices[0]) = values[0];
597 this->m_impl.coeffRef(inputIndices[1]) = values[packetSize-1];
598 for (int i = 1; i < packetSize-1; ++i) {
599 this->coeffRef(index+i) = values[i];
600 }
601 }
602 }
603};
604
605
606
607namespace internal {
608template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
609struct traits<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> > : public traits<XprType>
610{
611 typedef typename XprType::Scalar Scalar;
612 typedef traits<XprType> XprTraits;
613 typedef typename XprTraits::StorageKind StorageKind;
614 typedef typename XprTraits::Index Index;
615 typedef typename XprType::Nested Nested;
616 typedef typename remove_reference<Nested>::type _Nested;
617 static const int NumDimensions = array_size<StartIndices>::value;
618 static const int Layout = XprTraits::Layout;
619};
620
621template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
622struct eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, Eigen::Dense>
623{
624 typedef const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>& type;
625};
626
627template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
628struct nested<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType>, 1, typename eval<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >::type>
629{
630 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> type;
631};
632
633} // end namespace internal
634
635
636template<typename StartIndices, typename StopIndices, typename Strides, typename XprType>
637class TensorStridingSlicingOp : public TensorBase<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, XprType> >
638{
639 public:
640 typedef typename internal::traits<TensorStridingSlicingOp>::Scalar Scalar;
641 typedef typename XprType::CoeffReturnType CoeffReturnType;
642 typedef typename internal::nested<TensorStridingSlicingOp>::type Nested;
643 typedef typename internal::traits<TensorStridingSlicingOp>::StorageKind StorageKind;
644 typedef typename internal::traits<TensorStridingSlicingOp>::Index Index;
645
646 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorStridingSlicingOp(
647 const XprType& expr, const StartIndices& startIndices,
648 const StopIndices& stopIndices, const Strides& strides)
649 : m_xpr(expr), m_startIndices(startIndices), m_stopIndices(stopIndices),
650 m_strides(strides) {}
651
652 EIGEN_DEVICE_FUNC
653 const StartIndices& startIndices() const { return m_startIndices; }
654 EIGEN_DEVICE_FUNC
655 const StartIndices& stopIndices() const { return m_stopIndices; }
656 EIGEN_DEVICE_FUNC
657 const StartIndices& strides() const { return m_strides; }
658
659 EIGEN_DEVICE_FUNC
660 const typename internal::remove_all<typename XprType::Nested>::type&
661 expression() const { return m_xpr; }
662
663 EIGEN_DEVICE_FUNC
664 EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const TensorStridingSlicingOp& other)
665 {
666 typedef TensorAssignOp<TensorStridingSlicingOp, const TensorStridingSlicingOp> Assign;
667 Assign assign(*this, other);
668 internal::TensorExecutor<const Assign, DefaultDevice>::run(
669 assign, DefaultDevice());
670 return *this;
671 }
672
673 template<typename OtherDerived>
674 EIGEN_DEVICE_FUNC
675 EIGEN_STRONG_INLINE TensorStridingSlicingOp& operator = (const OtherDerived& other)
676 {
677 typedef TensorAssignOp<TensorStridingSlicingOp, const OtherDerived> Assign;
678 Assign assign(*this, other);
679 internal::TensorExecutor<const Assign, DefaultDevice>::run(
680 assign, DefaultDevice());
681 return *this;
682 }
683
684 protected:
685 typename XprType::Nested m_xpr;
686 const StartIndices m_startIndices;
687 const StopIndices m_stopIndices;
688 const Strides m_strides;
689};
690
691// Eval as rvalue
692template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
693struct TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
694{
695 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
696 static const int NumDims = internal::array_size<Strides>::value;
697
698 enum {
699 // Alignment can't be guaranteed at compile time since it depends on the
700 // slice offsets and sizes.
701 IsAligned = false,
702 PacketAccess = false,
703 BlockAccess = false,
704 Layout = TensorEvaluator<ArgType, Device>::Layout,
705 RawAccess = false
706 };
707
708 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
709 : m_impl(op.expression(), device), m_device(device), m_strides(op.strides())
710 {
711 // Handle degenerate intervals by gracefully clamping and allowing m_dimensions to be zero
712 DSizes<Index,NumDims> startIndicesClamped, stopIndicesClamped;
713 for (size_t i = 0; i < internal::array_size<Dimensions>::value; ++i) {
714 eigen_assert(m_strides[i] != 0 && "0 stride is invalid");
715 if(m_strides[i]>0){
716 startIndicesClamped[i] = clamp(op.startIndices()[i], 0, m_impl.dimensions()[i]);
717 stopIndicesClamped[i] = clamp(op.stopIndices()[i], 0, m_impl.dimensions()[i]);
718 }else{
719 /* implies m_strides[i]<0 by assert */
720 startIndicesClamped[i] = clamp(op.startIndices()[i], -1, m_impl.dimensions()[i] - 1);
721 stopIndicesClamped[i] = clamp(op.stopIndices()[i], -1, m_impl.dimensions()[i] - 1);
722 }
723 m_startIndices[i] = startIndicesClamped[i];
724 }
725
726 const typename TensorEvaluator<ArgType, Device>::Dimensions& input_dims = m_impl.dimensions();
727
728 // check for degenerate intervals and compute output tensor shape
729 bool degenerate = false;;
730 for(int i = 0; i < NumDims; i++){
731 Index interval = stopIndicesClamped[i] - startIndicesClamped[i];
732 if(interval == 0 || ((interval<0) != (m_strides[i]<0))){
733 m_dimensions[i] = 0;
734 degenerate = true;
735 }else{
736 m_dimensions[i] = interval / m_strides[i]
737 + (interval % m_strides[i] != 0 ? 1 : 0);
738 eigen_assert(m_dimensions[i] >= 0);
739 }
740 }
741 Strides output_dims = m_dimensions;
742
743 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
744 m_inputStrides[0] = m_strides[0];
745 m_offsets[0] = startIndicesClamped[0];
746 Index previousDimProduct = 1;
747 for (int i = 1; i < NumDims; ++i) {
748 previousDimProduct *= input_dims[i-1];
749 m_inputStrides[i] = previousDimProduct * m_strides[i];
750 m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
751 }
752
753 // Don't initialize m_fastOutputStrides[0] since it won't ever be accessed.
754 m_outputStrides[0] = 1;
755 for (int i = 1; i < NumDims; ++i) {
756 m_outputStrides[i] = m_outputStrides[i-1] * output_dims[i-1];
757 // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
758 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
759 }
760 } else {
761 m_inputStrides[NumDims-1] = m_strides[NumDims-1];
762 m_offsets[NumDims-1] = startIndicesClamped[NumDims-1];
763 Index previousDimProduct = 1;
764 for (int i = NumDims - 2; i >= 0; --i) {
765 previousDimProduct *= input_dims[i+1];
766 m_inputStrides[i] = previousDimProduct * m_strides[i];
767 m_offsets[i] = startIndicesClamped[i] * previousDimProduct;
768 }
769
770 m_outputStrides[NumDims-1] = 1;
771 for (int i = NumDims - 2; i >= 0; --i) {
772 m_outputStrides[i] = m_outputStrides[i+1] * output_dims[i+1];
773 // NOTE: if tensor is degenerate, we send 1 to prevent TensorIntDivisor constructor crash
774 m_fastOutputStrides[i] = internal::TensorIntDivisor<Index>(degenerate ? 1 : m_outputStrides[i]);
775 }
776 }
777 m_block_total_size_max = numext::maxi(static_cast<std::size_t>(1),
778 device.lastLevelCacheSize() /
779 sizeof(Scalar));
780 }
781
782 typedef typename XprType::Index Index;
783 typedef typename XprType::Scalar Scalar;
784 typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
785 typedef typename XprType::CoeffReturnType CoeffReturnType;
786 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
787 typedef Strides Dimensions;
788
789 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Dimensions& dimensions() const { return m_dimensions; }
790
791
792 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool evalSubExprsIfNeeded(CoeffReturnType*) {
793 m_impl.evalSubExprsIfNeeded(NULL);
794 return true;
795 }
796
797 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void cleanup() {
798 m_impl.cleanup();
799 }
800
801 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const
802 {
803 return m_impl.coeff(srcCoeff(index));
804 }
805
806 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorOpCost costPerCoeff(bool vectorized) const {
807 return m_impl.costPerCoeff(vectorized) + TensorOpCost(0, 0, NumDims);
808 }
809
810 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar* data() const {
811 return NULL;
812 }
813
814 protected:
815 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index srcCoeff(Index index) const
816 {
817 Index inputIndex = 0;
818 if (static_cast<int>(Layout) == static_cast<int>(ColMajor)) {
819 for (int i = NumDims - 1; i >= 0; --i) {
820 const Index idx = index / m_fastOutputStrides[i];
821 inputIndex += idx * m_inputStrides[i] + m_offsets[i];
822 index -= idx * m_outputStrides[i];
823 }
824 } else {
825 for (int i = 0; i < NumDims; ++i) {
826 const Index idx = index / m_fastOutputStrides[i];
827 inputIndex += idx * m_inputStrides[i] + m_offsets[i];
828 index -= idx * m_outputStrides[i];
829 }
830 }
831 return inputIndex;
832 }
833
834 static EIGEN_STRONG_INLINE Index clamp(Index value, Index min, Index max) {
835 return numext::maxi(min, numext::mini(max,value));
836 }
837
838 array<Index, NumDims> m_outputStrides;
839 array<internal::TensorIntDivisor<Index>, NumDims> m_fastOutputStrides;
840 array<Index, NumDims> m_inputStrides;
841 TensorEvaluator<ArgType, Device> m_impl;
842 const Device& m_device;
843 DSizes<Index, NumDims> m_startIndices; // clamped startIndices
844 DSizes<Index, NumDims> m_dimensions;
845 DSizes<Index, NumDims> m_offsets; // offset in a flattened shape
846 const Strides m_strides;
847 std::size_t m_block_total_size_max;
848};
849
850// Eval as lvalue
851template<typename StartIndices, typename StopIndices, typename Strides, typename ArgType, typename Device>
852struct TensorEvaluator<TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
853 : public TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device>
854{
855 typedef TensorEvaluator<const TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType>, Device> Base;
856 typedef TensorStridingSlicingOp<StartIndices, StopIndices, Strides, ArgType> XprType;
857 static const int NumDims = internal::array_size<Strides>::value;
858
859 enum {
860 IsAligned = false,
861 PacketAccess = false,
862 BlockAccess = false,
863 Layout = TensorEvaluator<ArgType, Device>::Layout,
864 CoordAccess = TensorEvaluator<ArgType, Device>::CoordAccess,
865 RawAccess = false
866 };
867
868 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TensorEvaluator(const XprType& op, const Device& device)
869 : Base(op, device)
870 { }
871
872 typedef typename XprType::Index Index;
873 typedef typename XprType::Scalar Scalar;
874 typedef typename internal::remove_const<Scalar>::type ScalarNonConst;
875 typedef typename XprType::CoeffReturnType CoeffReturnType;
876 typedef typename PacketType<CoeffReturnType, Device>::type PacketReturnType;
877 typedef Strides Dimensions;
878
879 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType& coeffRef(Index index)
880 {
881 return this->m_impl.coeffRef(this->srcCoeff(index));
882 }
883};
884
885
886} // end namespace Eigen
887
888#endif // EIGEN_CXX11_TENSOR_TENSOR_MORPHING_H
Namespace containing all symbols from the Eigen library.
Definition: AdolcForward:45
const Device & device() const
required by sycl in order to construct sycl buffer from raw pointer
Definition: TensorEvaluator.h:114