Various cleaning

Rbiessy · Ruyman · commit f4da71fd001b · 2018-05-23T06:50:05.000+01:00
diff --git a/benchmarks/sycl_reduce.cpp b/benchmarks/sycl_reduce.cpp
@@ -55,9 +55,11 @@ benchmark<>::time_units_t benchmark_reduce(const unsigned numReps,
 
   cl::sycl::queue q(cds);
   auto device = q.get_device();
-  auto local = std::min(
+  const auto maxWorkGroupItemSizes =
+    device.get_info<cl::sycl::info::device::max_work_item_sizes>();
+  const auto local = std::min(
       device.get_info<cl::sycl::info::device::max_work_group_size>(),
-      device.get_info<cl::sycl::info::device::max_work_item_sizes>()[0]);
+      maxWorkGroupItemSizes[0]);
   sycl::sycl_execution_policy<class ReduceAlgorithmBench> snp(q);
   auto bufI = sycl::helpers::make_const_buffer(v.begin(), v.end());
   size_t length = N;
diff --git a/include/sycl/algorithm/buffer_algorithms.hpp b/include/sycl/algorithm/buffer_algorithms.hpp
@@ -87,10 +87,11 @@ sycl_algorithm_descriptor compute_mapreduce_descriptor(cl::sycl::device device,
   size_t max_work_group =
     device.get_info<cl::sycl::info::device::max_compute_units>();
 
-  //maximal number of work item per work group
-  size_t max_work_item = min(
+  const auto max_work_item_sizes =
+    device.get_info<cl::sycl::info::device::max_work_item_sizes>();
+  const auto max_work_item = min(
     device.get_info<cl::sycl::info::device::max_work_group_size>(),
-    device.get_info<cl::sycl::info::device::max_work_item_sizes>()[0]);
+    max_work_item_sizes[0]);
 
   size_t local_mem_size =
     device.get_info<cl::sycl::info::device::local_mem_size>();
@@ -330,31 +331,26 @@ B buffer_map2reduce(ExecutionPolicy &snp,
 sycl_algorithm_descriptor compute_mapscan_descriptor(cl::sycl::device device,
                                               size_t size,
                                               size_t sizeofB) {
-  //std::cout << "size=\t" << size << std::endl;
   using std::min;
   using std::max;
   if (size == 0)
     return sycl_algorithm_descriptor {};
   size_t local_mem_size =
     device.get_info<cl::sycl::info::device::local_mem_size>();
-  //std::cout << "local_mem_size=\t" << local_mem_size << std::endl;
   size_t size_per_work_group = min(size, local_mem_size / sizeofB);
-  //std::cout << "size_per_work_group=\t" << size_per_work_group << std::endl;
   if (size_per_work_group <= 0)
     return sycl_algorithm_descriptor { size };
 
   size_t nb_work_group = up_rounded_division(size, size_per_work_group);
-  //std::cout << "nb_work_group=\t" << nb_work_group << std::endl;
 
-  size_t max_work_item = min(
+  const auto max_work_item_sizes =
+    device.get_info<cl::sycl::info::device::max_work_item_sizes>();
+  const auto max_work_item = min(
     device.get_info<cl::sycl::info::device::max_work_group_size>(),
-    device.get_info<cl::sycl::info::device::max_work_item_sizes>()[0]);
-  //std::cout << "max_work_item=\t" << max_work_item << std::endl;
+    max_work_item_sizes[0]);
   size_t nb_work_item = min(max_work_item, size_per_work_group);
-  //std::cout << "nb_work_item=\t" << nb_work_item << std::endl;
   size_t size_per_work_item =
     up_rounded_division(size_per_work_group, nb_work_item);
-  //std::cout << "size_per_work_item=\t" << size_per_work_item << std::endl;
   return sycl_algorithm_descriptor {
     size,
     size_per_work_group,
diff --git a/include/sycl/algorithm/count_if.hpp b/include/sycl/algorithm/count_if.hpp
@@ -65,33 +65,33 @@ typename std::iterator_traits<InputIterator>::difference_type count_if(
   auto device = q.get_device();
   auto bufI = sycl::helpers::make_const_buffer(first, last);
   cl::sycl::buffer<int, 1> bufR((cl::sycl::range<1>(vectorSize)));
-  size_t length = vectorSize;
+  auto length = vectorSize;
   auto ndRange = exec.calculateNdRange(vectorSize);
   const auto local = ndRange.get_local()[0];
   int passes = 0;
 
+  auto f = [&passes, &length, &ndRange, local, &bufI, &bufR, unary_op, binary_op](
+      cl::sycl::handler& h) mutable {
+    auto aI = bufI.template get_access<cl::sycl::access::mode::read>(h);
+    auto aR = bufR.template get_access<cl::sycl::access::mode::read_write>(h);
+    cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write,
+                       cl::sycl::access::target::local>
+        scratch(ndRange.get_local(), h);
+
+    h.parallel_for<typename ExecutionPolicy::kernelName>(
+        ndRange, [aI, aR, scratch, passes, local, length, unary_op, binary_op](
+               cl::sycl::nd_item<1> id) {
+          auto r = ReductionStrategy<int>(local, length, id, scratch);
+          if (passes == 0) {
+            r.workitem_get_from(unary_op, aI);
+          } else {
+            r.workitem_get_from(aR);
+          }
+          r.combine_threads(binary_op);
+          r.workgroup_write_to(aR);
+        });  // end kernel
+  };         // end command group
   do {
-    auto f = [passes, length, ndRange, local, &bufI, &bufR, unary_op, binary_op](
-        cl::sycl::handler& h) mutable {
-      auto aI = bufI.template get_access<cl::sycl::access::mode::read>(h);
-      auto aR = bufR.template get_access<cl::sycl::access::mode::read_write>(h);
-      cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write,
-                         cl::sycl::access::target::local>
-          scratch(ndRange.get_local(), h);
-
-      h.parallel_for<typename ExecutionPolicy::kernelName>(
-          ndRange, [aI, aR, scratch, passes, local, length, unary_op, binary_op](
-                 cl::sycl::nd_item<1> id) {
-            auto r = ReductionStrategy<int>(local, length, id, scratch);
-            if (passes == 0) {
-              r.workitem_get_from(unary_op, aI);
-            } else {
-              r.workitem_get_from(aR);
-            }
-            r.combine_threads(binary_op);
-            r.workgroup_write_to(aR);
-          });  // end kernel
-    };         // end command group
     q.submit(f);
     length = length / local;
     ndRange = cl::sycl::nd_range<1>{cl::sycl::range<1>(std::max(length, local)),
diff --git a/include/sycl/algorithm/equal.hpp b/include/sycl/algorithm/equal.hpp
@@ -67,7 +67,7 @@ bool equal(ExecutionPolicy& exec, ForwardIt1 first1, ForwardIt1 last1,
 
   auto device = q.get_device();
 
-  size_t length = size1;
+  auto length = size1;
   auto ndRange = exec.calculateNdRange(size1);
   const auto local = ndRange.get_local()[0];
 
diff --git a/include/sycl/algorithm/inner_product.hpp b/include/sycl/algorithm/inner_product.hpp
@@ -128,34 +128,34 @@ T inner_product(ExecutionPolicy &exec, InputIt1 first1, InputIt1 last1,
     auto buf1 = sycl::helpers::make_const_buffer(first1, last1);
     auto buf2 = sycl::helpers::make_const_buffer(first2, last2);
     cl::sycl::buffer<T, 1> bufr((cl::sycl::range<1>(vectorSize)));
-    size_t length = vectorSize;
+    auto length = vectorSize;
     auto ndRange = exec.calculateNdRange(length);
     const auto local = ndRange.get_local()[0];
     int passes = 0;
+    auto cg = [&passes, &length, &ndRange, local, &buf1, &buf2, &bufr, op1, op2](
+        cl::sycl::handler &h) mutable {
+      auto a1 = buf1.template get_access<cl::sycl::access::mode::read>(h);
+      auto a2 = buf2.template get_access<cl::sycl::access::mode::read>(h);
+      auto aR =
+          bufr.template get_access<cl::sycl::access::mode::read_write>(h);
+      cl::sycl::accessor<T, 1, cl::sycl::access::mode::read_write,
+                         cl::sycl::access::target::local>
+          scratch(ndRange.get_local(), h);
+
+      h.parallel_for<typename ExecutionPolicy::kernelName>(
+          ndRange, [a1, a2, aR, scratch, length, local, passes, op1, op2](
+                 cl::sycl::nd_item<1> id) {
+            auto r = ReductionStrategy<T>(local, length, id, scratch);
+            if (passes == 0) {
+              r.workitem_get_from(op2, a1, a2);
+            } else {
+              r.workitem_get_from(aR);
+            }
+            r.combine_threads(op1);
+            r.workgroup_write_to(aR);
+          });  // end kernel
+    };         // end command group
     do {
-      auto cg = [passes, length, ndRange, local, &buf1, &buf2, &bufr, op1, op2](
-          cl::sycl::handler &h) mutable {
-        auto a1 = buf1.template get_access<cl::sycl::access::mode::read>(h);
-        auto a2 = buf2.template get_access<cl::sycl::access::mode::read>(h);
-        auto aR =
-            bufr.template get_access<cl::sycl::access::mode::read_write>(h);
-        cl::sycl::accessor<T, 1, cl::sycl::access::mode::read_write,
-                           cl::sycl::access::target::local>
-            scratch(ndRange.get_local(), h);
-
-        h.parallel_for<typename ExecutionPolicy::kernelName>(
-            ndRange, [a1, a2, aR, scratch, length, local, passes, op1, op2](
-                   cl::sycl::nd_item<1> id) {
-              auto r = ReductionStrategy<T>(local, length, id, scratch);
-              if (passes == 0) {
-                r.workitem_get_from(op2, a1, a2);
-              } else {
-                r.workitem_get_from(aR);
-              }
-              r.combine_threads(op1);
-              r.workgroup_write_to(aR);
-            });  // end kernel
-      };         // end command group
       q.submit(cg);
       passes++;
       length = length / local;
diff --git a/include/sycl/algorithm/mismatch.hpp b/include/sycl/algorithm/mismatch.hpp
@@ -93,27 +93,27 @@ std::pair<ForwardIt1, ForwardIt2> mismatch(ExecutionPolicy& exec,
   auto current_length = length;
   int passes = 0;
 
-  do {
-    const auto f = [passes, current_length, ndRange, local,
-                    &bufR](cl::sycl::handler& h) mutable {
-      const auto aR =
-          bufR.template get_access<cl::sycl::access::mode::read_write>(h);
-      cl::sycl::accessor<std::size_t, 1, cl::sycl::access::mode::read_write,
-                         cl::sycl::access::target::local>
-          scratch(ndRange.get_local(), h);
-
-      h.parallel_for<typename ExecutionPolicy::kernelName>(
-          ndRange, [aR, scratch, passes, local,
-              current_length](cl::sycl::nd_item<1> id) {
-            auto r = ReductionStrategy<std::size_t>(local, current_length, id,
-                                                    scratch);
-            r.workitem_get_from(aR);
-            r.combine_threads([](std::size_t x, std::size_t y) {
-              return cl::sycl::min(x, y);
-            });
-            r.workgroup_write_to(aR);
+  const auto f = [&passes, &current_length, &ndRange, local,
+                  &bufR](cl::sycl::handler& h) mutable {
+    const auto aR =
+        bufR.template get_access<cl::sycl::access::mode::read_write>(h);
+    cl::sycl::accessor<std::size_t, 1, cl::sycl::access::mode::read_write,
+                       cl::sycl::access::target::local>
+        scratch(ndRange.get_local(), h);
+
+    h.parallel_for<typename ExecutionPolicy::kernelName>(
+        ndRange, [aR, scratch, passes, local,
+            current_length](cl::sycl::nd_item<1> id) {
+          auto r = ReductionStrategy<std::size_t>(local, current_length, id,
+                                                  scratch);
+          r.workitem_get_from(aR);
+          r.combine_threads([](std::size_t x, std::size_t y) {
+            return cl::sycl::min(x, y);
           });
-    };
+          r.workgroup_write_to(aR);
+        });
+  };
+  do {
     q.submit(f);
     ++passes;
     current_length = current_length / local;
diff --git a/include/sycl/algorithm/reduce.hpp b/include/sycl/algorithm/reduce.hpp
@@ -69,29 +69,29 @@ typename std::iterator_traits<Iterator>::value_type reduce(
 
   typedef typename std::iterator_traits<Iterator>::value_type type_;
   auto bufI = sycl::helpers::make_const_buffer(b, e);
-  size_t length = vectorSize;
+  auto length = vectorSize;
   auto ndRange = sep.calculateNdRange(length);
   const auto local = ndRange.get_local()[0];
 
+  auto f = [&length, &ndRange, local, &bufI, bop](cl::sycl::handler &h) mutable {
+    auto aI = bufI.template get_access<cl::sycl::access::mode::read_write>(h);
+    cl::sycl::accessor<type_, 1, cl::sycl::access::mode::read_write,
+                       cl::sycl::access::target::local>
+        scratch(ndRange.get_local(), h);
+
+    h.parallel_for<typename ExecutionPolicy::kernelName>(
+        ndRange, [aI, scratch, local, length, bop](cl::sycl::nd_item<1> id) {
+          auto r = ReductionStrategy<T>(local, length, id, scratch);
+          r.workitem_get_from(aI);
+          r.combine_threads(bop);
+          r.workgroup_write_to(aI);
+        });
+  };
   do {
-    auto f = [length, ndRange, local, &bufI, bop](cl::sycl::handler &h) mutable {
-      auto aI = bufI.template get_access<cl::sycl::access::mode::read_write>(h);
-      cl::sycl::accessor<type_, 1, cl::sycl::access::mode::read_write,
-                         cl::sycl::access::target::local>
-          scratch(ndRange.get_local(), h);
-
-      h.parallel_for<typename ExecutionPolicy::kernelName>(
-          ndRange, [aI, scratch, local, length, bop](cl::sycl::nd_item<1> id) {
-            auto r = ReductionStrategy<T>(local, length, id, scratch);
-            r.workitem_get_from(aI);
-            r.combine_threads(bop);
-            r.workgroup_write_to(aI);
-          });
-    };
     q.submit(f);
     length = length / local;
-    cl::sycl::nd_range<1> r{cl::sycl::range<1>{std::max(length, local)},
-                            ndRange.get_local()};
+    ndRange = cl::sycl::nd_range<1>{cl::sycl::range<1>(std::max(length, local)),
+                                    ndRange.get_local()};
   } while (length > 1);
   q.wait_and_throw();
   auto hI = bufI.template get_access<cl::sycl::access::mode::read>();