@@ -128,34 +128,34 @@ T inner_product(ExecutionPolicy &exec, InputIt1 first1, InputIt1 last1,
128128 auto buf1 = sycl::helpers::make_const_buffer (first1, last1);
129129 auto buf2 = sycl::helpers::make_const_buffer (first2, last2);
130130 cl::sycl::buffer<T, 1 > bufr ((cl::sycl::range<1 >(vectorSize)));
131- size_t length = vectorSize;
131+ auto length = vectorSize;
132132 auto ndRange = exec.calculateNdRange (length);
133133 const auto local = ndRange.get_local ()[0 ];
134134 int passes = 0 ;
135+ auto cg = [&passes, &length, &ndRange, local, &buf1, &buf2, &bufr, op1, op2](
136+ cl::sycl::handler &h) mutable {
137+ auto a1 = buf1.template get_access <cl::sycl::access::mode::read>(h);
138+ auto a2 = buf2.template get_access <cl::sycl::access::mode::read>(h);
139+ auto aR =
140+ bufr.template get_access <cl::sycl::access::mode::read_write>(h);
141+ cl::sycl::accessor<T, 1 , cl::sycl::access::mode::read_write,
142+ cl::sycl::access::target::local>
143+ scratch (ndRange.get_local (), h);
144+
145+ h.parallel_for <typename ExecutionPolicy::kernelName>(
146+ ndRange, [a1, a2, aR, scratch, length, local, passes, op1, op2](
147+ cl::sycl::nd_item<1 > id) {
148+ auto r = ReductionStrategy<T>(local, length, id, scratch);
149+ if (passes == 0 ) {
150+ r.workitem_get_from (op2, a1, a2);
151+ } else {
152+ r.workitem_get_from (aR);
153+ }
154+ r.combine_threads (op1);
155+ r.workgroup_write_to (aR);
156+ }); // end kernel
157+ }; // end command group
135158 do {
136- auto cg = [passes, length, ndRange, local, &buf1, &buf2, &bufr, op1, op2](
137- cl::sycl::handler &h) mutable {
138- auto a1 = buf1.template get_access <cl::sycl::access::mode::read>(h);
139- auto a2 = buf2.template get_access <cl::sycl::access::mode::read>(h);
140- auto aR =
141- bufr.template get_access <cl::sycl::access::mode::read_write>(h);
142- cl::sycl::accessor<T, 1 , cl::sycl::access::mode::read_write,
143- cl::sycl::access::target::local>
144- scratch (ndRange.get_local (), h);
145-
146- h.parallel_for <typename ExecutionPolicy::kernelName>(
147- ndRange, [a1, a2, aR, scratch, length, local, passes, op1, op2](
148- cl::sycl::nd_item<1 > id) {
149- auto r = ReductionStrategy<T>(local, length, id, scratch);
150- if (passes == 0 ) {
151- r.workitem_get_from (op2, a1, a2);
152- } else {
153- r.workitem_get_from (aR);
154- }
155- r.combine_threads (op1);
156- r.workgroup_write_to (aR);
157- }); // end kernel
158- }; // end command group
159159 q.submit (cg);
160160 passes++;
161161 length = length / local;
0 commit comments