Line data Source code
1 : //@HEADER
2 : // ************************************************************************
3 : //
4 : // Kokkos v. 4.0
5 : // Copyright (2022) National Technology & Engineering
6 : // Solutions of Sandia, LLC (NTESS).
7 : //
8 : // Under the terms of Contract DE-NA0003525 with NTESS,
9 : // the U.S. Government retains certain rights in this software.
10 : //
11 : // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 : // See https://kokkos.org/LICENSE for license information.
13 : // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 : //
15 : //@HEADER
16 :
17 : /// \file Kokkos_Parallel.hpp
18 : /// \brief Declaration of parallel operators
19 :
20 : #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
21 : #include <Kokkos_Macros.hpp>
22 : static_assert(false,
23 : "Including non-public Kokkos header files is not allowed.");
24 : #endif
25 : #ifndef KOKKOS_PARALLEL_HPP
26 : #define KOKKOS_PARALLEL_HPP
27 :
28 : #include <Kokkos_Core_fwd.hpp>
29 : #include <Kokkos_DetectionIdiom.hpp>
30 : #include <Kokkos_ExecPolicy.hpp>
31 : #include <Kokkos_View.hpp>
32 :
33 : #include <impl/Kokkos_Tools.hpp>
34 : #include <impl/Kokkos_Tools_Generic.hpp>
35 :
36 : #include <impl/Kokkos_Traits.hpp>
37 : #include <impl/Kokkos_FunctorAnalysis.hpp>
38 :
39 : #include <cstddef>
40 : #include <type_traits>
41 : #include <typeinfo>
42 :
43 : //----------------------------------------------------------------------------
44 : //----------------------------------------------------------------------------
45 :
46 : namespace Kokkos {
47 : namespace Impl {
48 :
49 : template <class T>
50 : using execution_space_t = typename T::execution_space;
51 :
52 : template <class T>
53 : using device_type_t = typename T::device_type;
54 :
55 : //----------------------------------------------------------------------------
56 : /** \brief Given a Functor and Execution Policy query an execution space.
57 : *
58 : * if the Policy has an execution space use that
59 : * else if the Functor has an execution_space use that
60 : * else if the Functor has a device_type use that for backward compatibility
61 : * else use the default
62 : */
63 :
64 : template <class Functor, class Policy>
65 : struct FunctorPolicyExecutionSpace {
66 : using policy_execution_space = detected_t<execution_space_t, Policy>;
67 : using functor_execution_space = detected_t<execution_space_t, Functor>;
68 : using functor_device_type = detected_t<device_type_t, Functor>;
69 : using functor_device_type_execution_space =
70 : detected_t<execution_space_t, functor_device_type>;
71 :
72 : static_assert(
73 : !is_detected<execution_space_t, Policy>::value ||
74 : !is_detected<execution_space_t, Functor>::value ||
75 : std::is_same_v<policy_execution_space, functor_execution_space>,
76 : "A policy with an execution space and a functor with an execution space "
77 : "are given but the execution space types do not match!");
78 : static_assert(!is_detected<execution_space_t, Policy>::value ||
79 : !is_detected<device_type_t, Functor>::value ||
80 : std::is_same_v<policy_execution_space,
81 : functor_device_type_execution_space>,
82 : "A policy with an execution space and a functor with a device "
83 : "type are given but the execution space types do not match!");
84 : static_assert(!is_detected<device_type_t, Functor>::value ||
85 : !is_detected<execution_space_t, Functor>::value ||
86 : std::is_same_v<functor_device_type_execution_space,
87 : functor_execution_space>,
88 : "A functor with both an execution space and device type is "
89 : "given but their execution space types do not match!");
90 :
91 : using execution_space = detected_or_t<
92 : detected_or_t<
93 : std::conditional_t<
94 : is_detected<device_type_t, Functor>::value,
95 : detected_t<execution_space_t, detected_t<device_type_t, Functor>>,
96 : Kokkos::DefaultExecutionSpace>,
97 : execution_space_t, Functor>,
98 : execution_space_t, Policy>;
99 : };
100 :
101 : } // namespace Impl
102 : } // namespace Kokkos
103 :
104 : //----------------------------------------------------------------------------
105 : //----------------------------------------------------------------------------
106 :
107 : namespace Kokkos {
108 :
109 : /** \brief Execute \c functor in parallel according to the execution \c policy.
110 : *
111 : * A "functor" is a class containing the function to execute in parallel,
112 : * data needed for that execution, and an optional \c execution_space
113 : * alias. Here is an example functor for parallel_for:
114 : *
115 : * \code
116 : * class FunctorType {
117 : * public:
118 : * using execution_space = ...;
119 : * void operator() ( WorkType iwork ) const ;
120 : * };
121 : * \endcode
122 : *
123 : * In the above example, \c WorkType is any integer type for which a
124 : * valid conversion from \c size_t to \c IntType exists. Its
125 : * <tt>operator()</tt> method defines the operation to parallelize,
126 : * over the range of integer indices <tt>iwork=[0,work_count-1]</tt>.
127 : * This compares to a single iteration \c iwork of a \c for loop.
128 : * If \c execution_space is not defined DefaultExecutionSpace will be used.
129 : */
130 : template <
131 : class ExecPolicy, class FunctorType,
132 : class Enable = std::enable_if_t<is_execution_policy<ExecPolicy>::value>>
133 1236 : inline void parallel_for(const std::string& str, const ExecPolicy& policy,
134 : const FunctorType& functor) {
135 1236 : uint64_t kpID = 0;
136 :
137 : /** Request a tuned policy from the tools subsystem */
138 1236 : const auto& response =
139 : Kokkos::Tools::Impl::begin_parallel_for(policy, functor, str, kpID);
140 1236 : const auto& inner_policy = response.policy;
141 :
142 1236 : auto closure =
143 : Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
144 : Impl::ParallelFor<FunctorType, ExecPolicy>>(functor, inner_policy);
145 :
146 1236 : closure.execute();
147 :
148 1236 : Kokkos::Tools::Impl::end_parallel_for(inner_policy, functor, str, kpID);
149 1236 : }
150 :
151 : template <class ExecPolicy, class FunctorType>
152 1236 : inline void parallel_for(
153 : const ExecPolicy& policy, const FunctorType& functor,
154 : std::enable_if_t<is_execution_policy<ExecPolicy>::value>* = nullptr) {
155 1236 : Kokkos::parallel_for("", policy, functor);
156 1236 : }
157 :
158 : template <class FunctorType>
159 : inline void parallel_for(const std::string& str, const size_t work_count,
160 : const FunctorType& functor) {
161 : using execution_space =
162 : typename Impl::FunctorPolicyExecutionSpace<FunctorType,
163 : void>::execution_space;
164 : using policy = RangePolicy<execution_space>;
165 :
166 : policy execution_policy = policy(0, work_count);
167 : ::Kokkos::parallel_for(str, execution_policy, functor);
168 : }
169 :
170 : template <class FunctorType>
171 : inline void parallel_for(const size_t work_count, const FunctorType& functor) {
172 : ::Kokkos::parallel_for("", work_count, functor);
173 : }
174 :
175 : } // namespace Kokkos
176 :
177 : #include <Kokkos_Parallel_Reduce.hpp>
178 : //----------------------------------------------------------------------------
179 : //----------------------------------------------------------------------------
180 :
181 : namespace Kokkos {
182 :
183 : /// \fn parallel_scan
184 : /// \tparam ExecutionPolicy The execution policy type.
185 : /// \tparam FunctorType The scan functor type.
186 : ///
187 : /// \param policy [in] The execution policy.
188 : /// \param functor [in] The scan functor.
189 : ///
190 : /// This function implements a parallel scan pattern. The scan can
191 : /// be either inclusive or exclusive, depending on how you implement
192 : /// the scan functor.
193 : ///
194 : /// A scan functor looks almost exactly like a reduce functor, except
195 : /// that its operator() takes a third \c bool argument, \c final_pass,
196 : /// which indicates whether this is the last pass of the scan
197 : /// operation. We will show below how to use the \c final_pass
198 : /// argument to control whether the scan is inclusive or exclusive.
199 : ///
200 : /// Here is the minimum required interface of a scan functor for a POD
201 : /// (plain old data) value type \c PodType. That is, the result is a
202 : /// View of zero or more PodType. It is also possible for the result
203 : /// to be an array of (same-sized) arrays of PodType, but we do not
204 : /// show the required interface for that here.
205 : /// \code
206 : /// template< class ExecPolicy , class FunctorType >
207 : /// class ScanFunctor {
208 : /// public:
209 : /// // The Kokkos device type
210 : /// using execution_space = ...;
211 : /// // Type of an entry of the array containing the result;
212 : /// // also the type of each of the entries combined using
213 : /// // operator() or join().
214 : /// using value_type = PodType;
215 : ///
216 : /// void operator () (const ExecPolicy::member_type & i,
217 : /// value_type& update,
218 : /// const bool final_pass) const;
219 : /// void init (value_type& update) const;
220 : /// void join (value_type& update,
221 : // const value_type& input) const
222 : /// };
223 : /// \endcode
224 : ///
225 : /// Here is an example of a functor which computes an inclusive plus-scan
226 : /// of an array of \c int, in place. If given an array [1, 2, 3, 4], this
227 : /// scan will overwrite that array with [1, 3, 6, 10].
228 : ///
229 : /// \code
230 : /// template<class SpaceType>
231 : /// class InclScanFunctor {
232 : /// public:
233 : /// using execution_space = SpaceType;
234 : /// using value_type = int;
235 : /// using size_type = typename SpaceType::size_type;
236 : ///
237 : /// InclScanFunctor( Kokkos::View<value_type*, execution_space> x
238 : /// , Kokkos::View<value_type*, execution_space> y ) : m_x(x),
239 : /// m_y(y) {}
240 : ///
241 : /// void operator () (const size_type i, value_type& update, const bool
242 : /// final_pass) const {
243 : /// update += m_x(i);
244 : /// if (final_pass) {
245 : /// m_y(i) = update;
246 : /// }
247 : /// }
248 : /// void init (value_type& update) const {
249 : /// update = 0;
250 : /// }
251 : /// void join (value_type& update, const value_type& input)
252 : /// const {
253 : /// update += input;
254 : /// }
255 : ///
256 : /// private:
257 : /// Kokkos::View<value_type*, execution_space> m_x;
258 : /// Kokkos::View<value_type*, execution_space> m_y;
259 : /// };
260 : /// \endcode
261 : ///
262 : /// Here is an example of a functor which computes an <i>exclusive</i>
263 : /// scan of an array of \c int, in place. In operator(), note both
264 : /// that the final_pass test and the update have switched places, and
265 : /// the use of a temporary. If given an array [1, 2, 3, 4], this scan
266 : /// will overwrite that array with [0, 1, 3, 6].
267 : ///
268 : /// \code
269 : /// template<class SpaceType>
270 : /// class ExclScanFunctor {
271 : /// public:
272 : /// using execution_space = SpaceType;
273 : /// using value_type = int;
274 : /// using size_type = typename SpaceType::size_type;
275 : ///
276 : /// ExclScanFunctor (Kokkos::View<value_type*, execution_space> x) : x_ (x) {}
277 : ///
278 : /// void operator () (const size_type i, value_type& update, const bool
279 : /// final_pass) const {
280 : /// const value_type x_i = x_(i);
281 : /// if (final_pass) {
282 : /// x_(i) = update;
283 : /// }
284 : /// update += x_i;
285 : /// }
286 : /// void init (value_type& update) const {
287 : /// update = 0;
288 : /// }
289 : /// void join (value_type& update, const value_type& input)
290 : /// const {
291 : /// update += input;
292 : /// }
293 : ///
294 : /// private:
295 : /// Kokkos::View<value_type*, execution_space> x_;
296 : /// };
297 : /// \endcode
298 : ///
299 : /// Here is an example of a functor which builds on the above
300 : /// exclusive scan example, to compute an offsets array from a
301 : /// population count array, in place. We assume that the pop count
302 : /// array has an extra entry at the end to store the final count. If
303 : /// given an array [1, 2, 3, 4, 0], this scan will overwrite that
304 : /// array with [0, 1, 3, 6, 10].
305 : ///
306 : /// \code
307 : /// template<class SpaceType>
308 : /// class OffsetScanFunctor {
309 : /// public:
310 : /// using execution_space = SpaceType;
311 : /// using value_type = int;
312 : /// using size_type = typename SpaceType::size_type;
313 : ///
314 : /// // lastIndex_ is the last valid index (zero-based) of x.
315 : /// // If x has length zero, then lastIndex_ won't be used anyway.
316 : /// OffsetScanFunctor( Kokkos::View<value_type*, execution_space> x
317 : /// , Kokkos::View<value_type*, execution_space> y )
318 : /// : m_x(x), m_y(y), last_index_ (x.dimension_0 () == 0 ? 0 :
319 : /// x.dimension_0 () - 1)
320 : /// {}
321 : ///
322 : /// void operator () (const size_type i, int& update, const bool final_pass)
323 : /// const {
324 : /// if (final_pass) {
325 : /// m_y(i) = update;
326 : /// }
327 : /// update += m_x(i);
328 : /// // The last entry of m_y gets the final sum.
329 : /// if (final_pass && i == last_index_) {
330 : /// m_y(i+1) = update;
331 : // i/ }
332 : /// }
333 : /// void init (value_type& update) const {
334 : /// update = 0;
335 : /// }
336 : /// void join (value_type& update, const value_type& input)
337 : /// const {
338 : /// update += input;
339 : /// }
340 : ///
341 : /// private:
342 : /// Kokkos::View<value_type*, execution_space> m_x;
343 : /// Kokkos::View<value_type*, execution_space> m_y;
344 : /// const size_type last_index_;
345 : /// };
346 : /// \endcode
347 : ///
348 : template <class ExecutionPolicy, class FunctorType,
349 : class Enable =
350 : std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>>
351 : inline void parallel_scan(const std::string& str, const ExecutionPolicy& policy,
352 : const FunctorType& functor) {
353 : uint64_t kpID = 0;
354 : /** Request a tuned policy from the tools subsystem */
355 : const auto& response =
356 : Kokkos::Tools::Impl::begin_parallel_scan(policy, functor, str, kpID);
357 : const auto& inner_policy = response.policy;
358 :
359 : auto closure =
360 : Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
361 : Impl::ParallelScan<FunctorType, ExecutionPolicy>>(functor,
362 : inner_policy);
363 :
364 : closure.execute();
365 :
366 : Kokkos::Tools::Impl::end_parallel_scan(inner_policy, functor, str, kpID);
367 : }
368 :
369 : template <class ExecutionPolicy, class FunctorType>
370 : inline void parallel_scan(
371 : const ExecutionPolicy& policy, const FunctorType& functor,
372 : std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>* = nullptr) {
373 : ::Kokkos::parallel_scan("", policy, functor);
374 : }
375 :
376 : template <class FunctorType>
377 : inline void parallel_scan(const std::string& str, const size_t work_count,
378 : const FunctorType& functor) {
379 : using execution_space =
380 : typename Kokkos::Impl::FunctorPolicyExecutionSpace<FunctorType,
381 : void>::execution_space;
382 :
383 : using policy = Kokkos::RangePolicy<execution_space>;
384 :
385 : policy execution_policy(0, work_count);
386 : parallel_scan(str, execution_policy, functor);
387 : }
388 :
389 : template <class FunctorType>
390 : inline void parallel_scan(const size_t work_count, const FunctorType& functor) {
391 : ::Kokkos::parallel_scan("", work_count, functor);
392 : }
393 :
394 : template <class ExecutionPolicy, class FunctorType, class ReturnType,
395 : class Enable =
396 : std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>>
397 : inline void parallel_scan(const std::string& str, const ExecutionPolicy& policy,
398 : const FunctorType& functor,
399 : ReturnType& return_value) {
400 : uint64_t kpID = 0;
401 : ExecutionPolicy inner_policy = policy;
402 : Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID);
403 :
404 : if constexpr (Kokkos::is_view<ReturnType>::value) {
405 : auto closure =
406 : Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
407 : Impl::ParallelScanWithTotal<FunctorType, ExecutionPolicy,
408 : typename ReturnType::value_type>>(
409 : functor, inner_policy, return_value);
410 : closure.execute();
411 : } else {
412 : Kokkos::View<ReturnType, Kokkos::HostSpace> view(&return_value);
413 : auto closure =
414 : Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
415 : Impl::ParallelScanWithTotal<FunctorType, ExecutionPolicy,
416 : ReturnType>>(functor, inner_policy,
417 : view);
418 : closure.execute();
419 : }
420 :
421 : Kokkos::Tools::Impl::end_parallel_scan(inner_policy, functor, str, kpID);
422 :
423 : if (!Kokkos::is_view<ReturnType>::value)
424 : policy.space().fence(
425 : "Kokkos::parallel_scan: fence due to result being a value, not a view");
426 : }
427 :
428 : template <class ExecutionPolicy, class FunctorType, class ReturnType>
429 : inline void parallel_scan(
430 : const ExecutionPolicy& policy, const FunctorType& functor,
431 : ReturnType& return_value,
432 : std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>* = nullptr) {
433 : ::Kokkos::parallel_scan("", policy, functor, return_value);
434 : }
435 :
436 : template <class FunctorType, class ReturnType>
437 : inline void parallel_scan(const std::string& str, const size_t work_count,
438 : const FunctorType& functor,
439 : ReturnType& return_value) {
440 : using execution_space =
441 : typename Kokkos::Impl::FunctorPolicyExecutionSpace<FunctorType,
442 : void>::execution_space;
443 :
444 : using policy = Kokkos::RangePolicy<execution_space>;
445 :
446 : policy execution_policy(0, work_count);
447 : parallel_scan(str, execution_policy, functor, return_value);
448 : }
449 :
450 : template <class FunctorType, class ReturnType>
451 : inline void parallel_scan(const size_t work_count, const FunctorType& functor,
452 : ReturnType& return_value) {
453 : ::Kokkos::parallel_scan("", work_count, functor, return_value);
454 : }
455 :
456 : } // namespace Kokkos
457 :
458 : //----------------------------------------------------------------------------
459 : //----------------------------------------------------------------------------
460 :
461 : namespace Kokkos {
462 : namespace Impl {
463 :
464 : template <class FunctorType,
465 : bool HasTeamShmemSize =
466 : has_member_team_shmem_size<FunctorType>::value,
467 : bool HasShmemSize = has_member_shmem_size<FunctorType>::value>
468 : struct FunctorTeamShmemSize {
469 : KOKKOS_INLINE_FUNCTION static size_t value(const FunctorType&, int) {
470 : return 0;
471 : }
472 : };
473 :
474 : template <class FunctorType>
475 : struct FunctorTeamShmemSize<FunctorType, true, false> {
476 : static inline size_t value(const FunctorType& f, int team_size) {
477 : return f.team_shmem_size(team_size);
478 : }
479 : };
480 :
481 : template <class FunctorType>
482 : struct FunctorTeamShmemSize<FunctorType, false, true> {
483 : static inline size_t value(const FunctorType& f, int team_size) {
484 : return f.shmem_size(team_size);
485 : }
486 : };
487 : template <class FunctorType>
488 : struct FunctorTeamShmemSize<FunctorType, true, true> {
489 : static inline size_t value(const FunctorType& /*f*/, int /*team_size*/) {
490 : Kokkos::abort(
491 : "Functor with both team_shmem_size and shmem_size defined is "
492 : "not allowed");
493 : return 0;
494 : }
495 : };
496 :
497 : } // namespace Impl
498 : } // namespace Kokkos
499 :
500 : //----------------------------------------------------------------------------
501 : //----------------------------------------------------------------------------
502 :
503 : #endif /* KOKKOS_PARALLEL_HPP */
|