Skip to content
This repository was archived by the owner on Jan 26, 2024. It is now read-only.

Commit f4da71f

Browse files
RbiessyRuyman
authored andcommitted
Various cleaning
1 parent dc11bbb commit f4da71f

7 files changed

Lines changed: 96 additions & 98 deletions

File tree

benchmarks/sycl_reduce.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,9 +55,11 @@ benchmark<>::time_units_t benchmark_reduce(const unsigned numReps,
5555

5656
cl::sycl::queue q(cds);
5757
auto device = q.get_device();
58-
auto local = std::min(
58+
const auto maxWorkGroupItemSizes =
59+
device.get_info<cl::sycl::info::device::max_work_item_sizes>();
60+
const auto local = std::min(
5961
device.get_info<cl::sycl::info::device::max_work_group_size>(),
60-
device.get_info<cl::sycl::info::device::max_work_item_sizes>()[0]);
62+
maxWorkGroupItemSizes[0]);
6163
sycl::sycl_execution_policy<class ReduceAlgorithmBench> snp(q);
6264
auto bufI = sycl::helpers::make_const_buffer(v.begin(), v.end());
6365
size_t length = N;

include/sycl/algorithm/buffer_algorithms.hpp

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -87,10 +87,11 @@ sycl_algorithm_descriptor compute_mapreduce_descriptor(cl::sycl::device device,
8787
size_t max_work_group =
8888
device.get_info<cl::sycl::info::device::max_compute_units>();
8989

90-
//maximal number of work item per work group
91-
size_t max_work_item = min(
90+
const auto max_work_item_sizes =
91+
device.get_info<cl::sycl::info::device::max_work_item_sizes>();
92+
const auto max_work_item = min(
9293
device.get_info<cl::sycl::info::device::max_work_group_size>(),
93-
device.get_info<cl::sycl::info::device::max_work_item_sizes>()[0]);
94+
max_work_item_sizes[0]);
9495

9596
size_t local_mem_size =
9697
device.get_info<cl::sycl::info::device::local_mem_size>();
@@ -330,31 +331,26 @@ B buffer_map2reduce(ExecutionPolicy &snp,
330331
sycl_algorithm_descriptor compute_mapscan_descriptor(cl::sycl::device device,
331332
size_t size,
332333
size_t sizeofB) {
333-
//std::cout << "size=\t" << size << std::endl;
334334
using std::min;
335335
using std::max;
336336
if (size == 0)
337337
return sycl_algorithm_descriptor {};
338338
size_t local_mem_size =
339339
device.get_info<cl::sycl::info::device::local_mem_size>();
340-
//std::cout << "local_mem_size=\t" << local_mem_size << std::endl;
341340
size_t size_per_work_group = min(size, local_mem_size / sizeofB);
342-
//std::cout << "size_per_work_group=\t" << size_per_work_group << std::endl;
343341
if (size_per_work_group <= 0)
344342
return sycl_algorithm_descriptor { size };
345343

346344
size_t nb_work_group = up_rounded_division(size, size_per_work_group);
347-
//std::cout << "nb_work_group=\t" << nb_work_group << std::endl;
348345

349-
size_t max_work_item = min(
346+
const auto max_work_item_sizes =
347+
device.get_info<cl::sycl::info::device::max_work_item_sizes>();
348+
const auto max_work_item = min(
350349
device.get_info<cl::sycl::info::device::max_work_group_size>(),
351-
device.get_info<cl::sycl::info::device::max_work_item_sizes>()[0]);
352-
//std::cout << "max_work_item=\t" << max_work_item << std::endl;
350+
max_work_item_sizes[0]);
353351
size_t nb_work_item = min(max_work_item, size_per_work_group);
354-
//std::cout << "nb_work_item=\t" << nb_work_item << std::endl;
355352
size_t size_per_work_item =
356353
up_rounded_division(size_per_work_group, nb_work_item);
357-
//std::cout << "size_per_work_item=\t" << size_per_work_item << std::endl;
358354
return sycl_algorithm_descriptor {
359355
size,
360356
size_per_work_group,

include/sycl/algorithm/count_if.hpp

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -65,33 +65,33 @@ typename std::iterator_traits<InputIterator>::difference_type count_if(
6565
auto device = q.get_device();
6666
auto bufI = sycl::helpers::make_const_buffer(first, last);
6767
cl::sycl::buffer<int, 1> bufR((cl::sycl::range<1>(vectorSize)));
68-
size_t length = vectorSize;
68+
auto length = vectorSize;
6969
auto ndRange = exec.calculateNdRange(vectorSize);
7070
const auto local = ndRange.get_local()[0];
7171
int passes = 0;
7272

73+
auto f = [&passes, &length, &ndRange, local, &bufI, &bufR, unary_op, binary_op](
74+
cl::sycl::handler& h) mutable {
75+
auto aI = bufI.template get_access<cl::sycl::access::mode::read>(h);
76+
auto aR = bufR.template get_access<cl::sycl::access::mode::read_write>(h);
77+
cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write,
78+
cl::sycl::access::target::local>
79+
scratch(ndRange.get_local(), h);
80+
81+
h.parallel_for<typename ExecutionPolicy::kernelName>(
82+
ndRange, [aI, aR, scratch, passes, local, length, unary_op, binary_op](
83+
cl::sycl::nd_item<1> id) {
84+
auto r = ReductionStrategy<int>(local, length, id, scratch);
85+
if (passes == 0) {
86+
r.workitem_get_from(unary_op, aI);
87+
} else {
88+
r.workitem_get_from(aR);
89+
}
90+
r.combine_threads(binary_op);
91+
r.workgroup_write_to(aR);
92+
}); // end kernel
93+
}; // end command group
7394
do {
74-
auto f = [passes, length, ndRange, local, &bufI, &bufR, unary_op, binary_op](
75-
cl::sycl::handler& h) mutable {
76-
auto aI = bufI.template get_access<cl::sycl::access::mode::read>(h);
77-
auto aR = bufR.template get_access<cl::sycl::access::mode::read_write>(h);
78-
cl::sycl::accessor<int, 1, cl::sycl::access::mode::read_write,
79-
cl::sycl::access::target::local>
80-
scratch(ndRange.get_local(), h);
81-
82-
h.parallel_for<typename ExecutionPolicy::kernelName>(
83-
ndRange, [aI, aR, scratch, passes, local, length, unary_op, binary_op](
84-
cl::sycl::nd_item<1> id) {
85-
auto r = ReductionStrategy<int>(local, length, id, scratch);
86-
if (passes == 0) {
87-
r.workitem_get_from(unary_op, aI);
88-
} else {
89-
r.workitem_get_from(aR);
90-
}
91-
r.combine_threads(binary_op);
92-
r.workgroup_write_to(aR);
93-
}); // end kernel
94-
}; // end command group
9595
q.submit(f);
9696
length = length / local;
9797
ndRange = cl::sycl::nd_range<1>{cl::sycl::range<1>(std::max(length, local)),

include/sycl/algorithm/equal.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ bool equal(ExecutionPolicy& exec, ForwardIt1 first1, ForwardIt1 last1,
6767

6868
auto device = q.get_device();
6969

70-
size_t length = size1;
70+
auto length = size1;
7171
auto ndRange = exec.calculateNdRange(size1);
7272
const auto local = ndRange.get_local()[0];
7373

include/sycl/algorithm/inner_product.hpp

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -128,34 +128,34 @@ T inner_product(ExecutionPolicy &exec, InputIt1 first1, InputIt1 last1,
128128
auto buf1 = sycl::helpers::make_const_buffer(first1, last1);
129129
auto buf2 = sycl::helpers::make_const_buffer(first2, last2);
130130
cl::sycl::buffer<T, 1> bufr((cl::sycl::range<1>(vectorSize)));
131-
size_t length = vectorSize;
131+
auto length = vectorSize;
132132
auto ndRange = exec.calculateNdRange(length);
133133
const auto local = ndRange.get_local()[0];
134134
int passes = 0;
135+
auto cg = [&passes, &length, &ndRange, local, &buf1, &buf2, &bufr, op1, op2](
136+
cl::sycl::handler &h) mutable {
137+
auto a1 = buf1.template get_access<cl::sycl::access::mode::read>(h);
138+
auto a2 = buf2.template get_access<cl::sycl::access::mode::read>(h);
139+
auto aR =
140+
bufr.template get_access<cl::sycl::access::mode::read_write>(h);
141+
cl::sycl::accessor<T, 1, cl::sycl::access::mode::read_write,
142+
cl::sycl::access::target::local>
143+
scratch(ndRange.get_local(), h);
144+
145+
h.parallel_for<typename ExecutionPolicy::kernelName>(
146+
ndRange, [a1, a2, aR, scratch, length, local, passes, op1, op2](
147+
cl::sycl::nd_item<1> id) {
148+
auto r = ReductionStrategy<T>(local, length, id, scratch);
149+
if (passes == 0) {
150+
r.workitem_get_from(op2, a1, a2);
151+
} else {
152+
r.workitem_get_from(aR);
153+
}
154+
r.combine_threads(op1);
155+
r.workgroup_write_to(aR);
156+
}); // end kernel
157+
}; // end command group
135158
do {
136-
auto cg = [passes, length, ndRange, local, &buf1, &buf2, &bufr, op1, op2](
137-
cl::sycl::handler &h) mutable {
138-
auto a1 = buf1.template get_access<cl::sycl::access::mode::read>(h);
139-
auto a2 = buf2.template get_access<cl::sycl::access::mode::read>(h);
140-
auto aR =
141-
bufr.template get_access<cl::sycl::access::mode::read_write>(h);
142-
cl::sycl::accessor<T, 1, cl::sycl::access::mode::read_write,
143-
cl::sycl::access::target::local>
144-
scratch(ndRange.get_local(), h);
145-
146-
h.parallel_for<typename ExecutionPolicy::kernelName>(
147-
ndRange, [a1, a2, aR, scratch, length, local, passes, op1, op2](
148-
cl::sycl::nd_item<1> id) {
149-
auto r = ReductionStrategy<T>(local, length, id, scratch);
150-
if (passes == 0) {
151-
r.workitem_get_from(op2, a1, a2);
152-
} else {
153-
r.workitem_get_from(aR);
154-
}
155-
r.combine_threads(op1);
156-
r.workgroup_write_to(aR);
157-
}); // end kernel
158-
}; // end command group
159159
q.submit(cg);
160160
passes++;
161161
length = length / local;

include/sycl/algorithm/mismatch.hpp

Lines changed: 20 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -93,27 +93,27 @@ std::pair<ForwardIt1, ForwardIt2> mismatch(ExecutionPolicy& exec,
9393
auto current_length = length;
9494
int passes = 0;
9595

96-
do {
97-
const auto f = [passes, current_length, ndRange, local,
98-
&bufR](cl::sycl::handler& h) mutable {
99-
const auto aR =
100-
bufR.template get_access<cl::sycl::access::mode::read_write>(h);
101-
cl::sycl::accessor<std::size_t, 1, cl::sycl::access::mode::read_write,
102-
cl::sycl::access::target::local>
103-
scratch(ndRange.get_local(), h);
104-
105-
h.parallel_for<typename ExecutionPolicy::kernelName>(
106-
ndRange, [aR, scratch, passes, local,
107-
current_length](cl::sycl::nd_item<1> id) {
108-
auto r = ReductionStrategy<std::size_t>(local, current_length, id,
109-
scratch);
110-
r.workitem_get_from(aR);
111-
r.combine_threads([](std::size_t x, std::size_t y) {
112-
return cl::sycl::min(x, y);
113-
});
114-
r.workgroup_write_to(aR);
96+
const auto f = [&passes, &current_length, &ndRange, local,
97+
&bufR](cl::sycl::handler& h) mutable {
98+
const auto aR =
99+
bufR.template get_access<cl::sycl::access::mode::read_write>(h);
100+
cl::sycl::accessor<std::size_t, 1, cl::sycl::access::mode::read_write,
101+
cl::sycl::access::target::local>
102+
scratch(ndRange.get_local(), h);
103+
104+
h.parallel_for<typename ExecutionPolicy::kernelName>(
105+
ndRange, [aR, scratch, passes, local,
106+
current_length](cl::sycl::nd_item<1> id) {
107+
auto r = ReductionStrategy<std::size_t>(local, current_length, id,
108+
scratch);
109+
r.workitem_get_from(aR);
110+
r.combine_threads([](std::size_t x, std::size_t y) {
111+
return cl::sycl::min(x, y);
115112
});
116-
};
113+
r.workgroup_write_to(aR);
114+
});
115+
};
116+
do {
117117
q.submit(f);
118118
++passes;
119119
current_length = current_length / local;

include/sycl/algorithm/reduce.hpp

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -69,29 +69,29 @@ typename std::iterator_traits<Iterator>::value_type reduce(
6969

7070
typedef typename std::iterator_traits<Iterator>::value_type type_;
7171
auto bufI = sycl::helpers::make_const_buffer(b, e);
72-
size_t length = vectorSize;
72+
auto length = vectorSize;
7373
auto ndRange = sep.calculateNdRange(length);
7474
const auto local = ndRange.get_local()[0];
7575

76+
auto f = [&length, &ndRange, local, &bufI, bop](cl::sycl::handler &h) mutable {
77+
auto aI = bufI.template get_access<cl::sycl::access::mode::read_write>(h);
78+
cl::sycl::accessor<type_, 1, cl::sycl::access::mode::read_write,
79+
cl::sycl::access::target::local>
80+
scratch(ndRange.get_local(), h);
81+
82+
h.parallel_for<typename ExecutionPolicy::kernelName>(
83+
ndRange, [aI, scratch, local, length, bop](cl::sycl::nd_item<1> id) {
84+
auto r = ReductionStrategy<T>(local, length, id, scratch);
85+
r.workitem_get_from(aI);
86+
r.combine_threads(bop);
87+
r.workgroup_write_to(aI);
88+
});
89+
};
7690
do {
77-
auto f = [length, ndRange, local, &bufI, bop](cl::sycl::handler &h) mutable {
78-
auto aI = bufI.template get_access<cl::sycl::access::mode::read_write>(h);
79-
cl::sycl::accessor<type_, 1, cl::sycl::access::mode::read_write,
80-
cl::sycl::access::target::local>
81-
scratch(ndRange.get_local(), h);
82-
83-
h.parallel_for<typename ExecutionPolicy::kernelName>(
84-
ndRange, [aI, scratch, local, length, bop](cl::sycl::nd_item<1> id) {
85-
auto r = ReductionStrategy<T>(local, length, id, scratch);
86-
r.workitem_get_from(aI);
87-
r.combine_threads(bop);
88-
r.workgroup_write_to(aI);
89-
});
90-
};
9191
q.submit(f);
9292
length = length / local;
93-
cl::sycl::nd_range<1> r{cl::sycl::range<1>{std::max(length, local)},
94-
ndRange.get_local()};
93+
ndRange = cl::sycl::nd_range<1>{cl::sycl::range<1>(std::max(length, local)),
94+
ndRange.get_local()};
9595
} while (length > 1);
9696
q.wait_and_throw();
9797
auto hI = bufI.template get_access<cl::sycl::access::mode::read>();

0 commit comments

Comments
 (0)