Loading...
Searching...
No Matches
TensorSyclRun.h
1// This file is part of Eigen, a lightweight C++ template library
2// for linear algebra.
3//
4// Mehdi Goli Codeplay Software Ltd.
5// Ralph Potter Codeplay Software Ltd.
6// Luke Iwanski Codeplay Software Ltd.
7// Cummins Chris PhD student at The University of Edinburgh.
8// Contact: <eigen@codeplay.com>
9//
10// This Source Code Form is subject to the terms of the Mozilla
11// Public License v. 2.0. If a copy of the MPL was not distributed
12// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
13
14/*****************************************************************
15 * TensorSyclRun.h
16 *
17 * \brief:
18 * Schedule_kernel invoke an specialised version of kernel struct. The
19 * specialisation is based on the data dimension in sycl buffer
20 *
21*****************************************************************/
22
23#ifndef UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
24#define UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
25
26namespace Eigen {
27namespace TensorSycl {
32template <typename Expr, typename Dev>
33void run(Expr &expr, Dev &dev) {
34 Eigen::TensorEvaluator<Expr, Dev> evaluator(expr, dev);
35 const bool needs_assign = evaluator.evalSubExprsIfNeeded(NULL);
36 if (needs_assign) {
37 typedef typename internal::createPlaceHolderExpression<Expr>::Type PlaceHolderExpr;
38 auto functors = internal::extractFunctors(evaluator);
39
40 size_t tileSize =dev.m_queue.get_device(). template get_info<cl::sycl::info::device::max_work_group_size>()/2;
41 dev.m_queue.submit([&](cl::sycl::handler &cgh) {
42
43 // create a tuple of accessors from Evaluator
44 auto tuple_of_accessors = internal::createTupleOfAccessors<decltype(evaluator)>(cgh, evaluator);
45 const auto range = utility::tuple::get<0>(tuple_of_accessors).get_range()[0];
46 size_t GRange=range;
47 if (tileSize>GRange) tileSize=GRange;
48 else if(GRange>tileSize){
49 size_t xMode = GRange % tileSize;
50 if (xMode != 0) GRange += (tileSize - xMode);
51 }
52 // run the kernel
53 cgh.parallel_for<PlaceHolderExpr>( cl::sycl::nd_range<1>(cl::sycl::range<1>(GRange), cl::sycl::range<1>(tileSize)), [=](cl::sycl::nd_item<1> itemID) {
54 typedef typename internal::ConvertToDeviceExpression<Expr>::Type DevExpr;
55 auto device_expr =internal::createDeviceExpression<DevExpr, PlaceHolderExpr>(functors, tuple_of_accessors);
56 auto device_evaluator = Eigen::TensorEvaluator<decltype(device_expr.expr), Eigen::DefaultDevice>(device_expr.expr, Eigen::DefaultDevice());
57 if (itemID.get_global_linear_id() < range) {
58 device_evaluator.evalScalar(static_cast<int>(itemID.get_global_linear_id()));
59 }
60 });
61 });
62 dev.m_queue.throw_asynchronous();
63 }
64
65 evaluator.cleanup();
66}
67} // namespace TensorSycl
68} // namespace Eigen
69
70#endif // UNSUPPORTED_EIGEN_CXX11_SRC_TENSOR_TENSORSYCL_SYCLRUN_HPP
Namespace containing all symbols from the Eigen library.
Definition: AdolcForward:45
A cost model used to limit the number of threads used for evaluating tensor expression.
Definition: TensorEvaluator.h:29