LCOV - code coverage report
Current view: top level - build/_deps/kokkos-src/core/src - Kokkos_Parallel.hpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 11 11 100.0 %
Date: 2026-02-16 14:39:39 Functions: 10 16 62.5 %

          Line data    Source code
       1             : //@HEADER
       2             : // ************************************************************************
       3             : //
       4             : //                        Kokkos v. 4.0
       5             : //       Copyright (2022) National Technology & Engineering
       6             : //               Solutions of Sandia, LLC (NTESS).
       7             : //
       8             : // Under the terms of Contract DE-NA0003525 with NTESS,
       9             : // the U.S. Government retains certain rights in this software.
      10             : //
      11             : // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
      12             : // See https://kokkos.org/LICENSE for license information.
      13             : // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      14             : //
      15             : //@HEADER
      16             : 
      17             : /// \file Kokkos_Parallel.hpp
      18             : /// \brief Declaration of parallel operators
      19             : 
      20             : #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
      21             : #include <Kokkos_Macros.hpp>
      22             : static_assert(false,
      23             :               "Including non-public Kokkos header files is not allowed.");
      24             : #endif
      25             : #ifndef KOKKOS_PARALLEL_HPP
      26             : #define KOKKOS_PARALLEL_HPP
      27             : 
      28             : #include <Kokkos_Core_fwd.hpp>
      29             : #include <Kokkos_DetectionIdiom.hpp>
      30             : #include <Kokkos_ExecPolicy.hpp>
      31             : #include <Kokkos_View.hpp>
      32             : 
      33             : #include <impl/Kokkos_Tools.hpp>
      34             : #include <impl/Kokkos_Tools_Generic.hpp>
      35             : 
      36             : #include <impl/Kokkos_Traits.hpp>
      37             : #include <impl/Kokkos_FunctorAnalysis.hpp>
      38             : 
      39             : #include <cstddef>
      40             : #include <type_traits>
      41             : #include <typeinfo>
      42             : 
      43             : //----------------------------------------------------------------------------
      44             : //----------------------------------------------------------------------------
      45             : 
      46             : namespace Kokkos {
      47             : namespace Impl {
      48             : 
      49             : template <class T>
      50             : using execution_space_t = typename T::execution_space;
      51             : 
      52             : template <class T>
      53             : using device_type_t = typename T::device_type;
      54             : 
      55             : //----------------------------------------------------------------------------
      56             : /** \brief  Given a Functor and Execution Policy query an execution space.
      57             :  *
      58             :  *  if       the Policy has an execution space use that
      59             :  *  else if  the Functor has an execution_space use that
      60             :  *  else if  the Functor has a device_type use that for backward compatibility
      61             :  *  else     use the default
      62             :  */
      63             : 
      64             : template <class Functor, class Policy>
      65             : struct FunctorPolicyExecutionSpace {
      66             :   using policy_execution_space  = detected_t<execution_space_t, Policy>;
      67             :   using functor_execution_space = detected_t<execution_space_t, Functor>;
      68             :   using functor_device_type     = detected_t<device_type_t, Functor>;
      69             :   using functor_device_type_execution_space =
      70             :       detected_t<execution_space_t, functor_device_type>;
      71             : 
      72             :   static_assert(
      73             :       !is_detected<execution_space_t, Policy>::value ||
      74             :           !is_detected<execution_space_t, Functor>::value ||
      75             :           std::is_same_v<policy_execution_space, functor_execution_space>,
      76             :       "A policy with an execution space and a functor with an execution space "
      77             :       "are given but the execution space types do not match!");
      78             :   static_assert(!is_detected<execution_space_t, Policy>::value ||
      79             :                     !is_detected<device_type_t, Functor>::value ||
      80             :                     std::is_same_v<policy_execution_space,
      81             :                                    functor_device_type_execution_space>,
      82             :                 "A policy with an execution space and a functor with a device "
      83             :                 "type are given but the execution space types do not match!");
      84             :   static_assert(!is_detected<device_type_t, Functor>::value ||
      85             :                     !is_detected<execution_space_t, Functor>::value ||
      86             :                     std::is_same_v<functor_device_type_execution_space,
      87             :                                    functor_execution_space>,
      88             :                 "A functor with both an execution space and device type is "
      89             :                 "given but their execution space types do not match!");
      90             : 
      91             :   using execution_space = detected_or_t<
      92             :       detected_or_t<
      93             :           std::conditional_t<
      94             :               is_detected<device_type_t, Functor>::value,
      95             :               detected_t<execution_space_t, detected_t<device_type_t, Functor>>,
      96             :               Kokkos::DefaultExecutionSpace>,
      97             :           execution_space_t, Functor>,
      98             :       execution_space_t, Policy>;
      99             : };
     100             : 
     101             : }  // namespace Impl
     102             : }  // namespace Kokkos
     103             : 
     104             : //----------------------------------------------------------------------------
     105             : //----------------------------------------------------------------------------
     106             : 
     107             : namespace Kokkos {
     108             : 
     109             : /** \brief Execute \c functor in parallel according to the execution \c policy.
     110             :  *
     111             :  * A "functor" is a class containing the function to execute in parallel,
     112             :  * data needed for that execution, and an optional \c execution_space
     113             :  * alias.  Here is an example functor for parallel_for:
     114             :  *
     115             :  * \code
     116             :  *  class FunctorType {
     117             :  *  public:
     118             :  *    using execution_space = ...;
     119             :  *    void operator() ( WorkType iwork ) const ;
     120             :  *  };
     121             :  * \endcode
     122             :  *
     123             :  * In the above example, \c WorkType is any integer type for which a
     124             :  * valid conversion from \c size_t to \c IntType exists.  Its
     125             :  * <tt>operator()</tt> method defines the operation to parallelize,
     126             :  * over the range of integer indices <tt>iwork=[0,work_count-1]</tt>.
     127             :  * This compares to a single iteration \c iwork of a \c for loop.
     128             :  * If \c execution_space is not defined DefaultExecutionSpace will be used.
     129             :  */
     130             : template <
     131             :     class ExecPolicy, class FunctorType,
     132             :     class Enable = std::enable_if_t<is_execution_policy<ExecPolicy>::value>>
     133        1236 : inline void parallel_for(const std::string& str, const ExecPolicy& policy,
     134             :                          const FunctorType& functor) {
     135        1236 :   uint64_t kpID = 0;
     136             : 
     137             :   /** Request a tuned policy from the tools subsystem */
     138        1236 :   const auto& response =
     139             :       Kokkos::Tools::Impl::begin_parallel_for(policy, functor, str, kpID);
     140        1236 :   const auto& inner_policy = response.policy;
     141             : 
     142        1236 :   auto closure =
     143             :       Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
     144             :           Impl::ParallelFor<FunctorType, ExecPolicy>>(functor, inner_policy);
     145             : 
     146        1236 :   closure.execute();
     147             : 
     148        1236 :   Kokkos::Tools::Impl::end_parallel_for(inner_policy, functor, str, kpID);
     149        1236 : }
     150             : 
     151             : template <class ExecPolicy, class FunctorType>
     152        1236 : inline void parallel_for(
     153             :     const ExecPolicy& policy, const FunctorType& functor,
     154             :     std::enable_if_t<is_execution_policy<ExecPolicy>::value>* = nullptr) {
     155        1236 :   Kokkos::parallel_for("", policy, functor);
     156        1236 : }
     157             : 
     158             : template <class FunctorType>
     159             : inline void parallel_for(const std::string& str, const size_t work_count,
     160             :                          const FunctorType& functor) {
     161             :   using execution_space =
     162             :       typename Impl::FunctorPolicyExecutionSpace<FunctorType,
     163             :                                                  void>::execution_space;
     164             :   using policy = RangePolicy<execution_space>;
     165             : 
     166             :   policy execution_policy = policy(0, work_count);
     167             :   ::Kokkos::parallel_for(str, execution_policy, functor);
     168             : }
     169             : 
     170             : template <class FunctorType>
     171             : inline void parallel_for(const size_t work_count, const FunctorType& functor) {
     172             :   ::Kokkos::parallel_for("", work_count, functor);
     173             : }
     174             : 
     175             : }  // namespace Kokkos
     176             : 
     177             : #include <Kokkos_Parallel_Reduce.hpp>
     178             : //----------------------------------------------------------------------------
     179             : //----------------------------------------------------------------------------
     180             : 
     181             : namespace Kokkos {
     182             : 
     183             : /// \fn parallel_scan
     184             : /// \tparam ExecutionPolicy The execution policy type.
     185             : /// \tparam FunctorType     The scan functor type.
     186             : ///
     187             : /// \param policy  [in] The execution policy.
     188             : /// \param functor [in] The scan functor.
     189             : ///
     190             : /// This function implements a parallel scan pattern.  The scan can
     191             : /// be either inclusive or exclusive, depending on how you implement
     192             : /// the scan functor.
     193             : ///
     194             : /// A scan functor looks almost exactly like a reduce functor, except
     195             : /// that its operator() takes a third \c bool argument, \c final_pass,
     196             : /// which indicates whether this is the last pass of the scan
     197             : /// operation.  We will show below how to use the \c final_pass
     198             : /// argument to control whether the scan is inclusive or exclusive.
     199             : ///
     200             : /// Here is the minimum required interface of a scan functor for a POD
     201             : /// (plain old data) value type \c PodType.  That is, the result is a
     202             : /// View of zero or more PodType.  It is also possible for the result
     203             : /// to be an array of (same-sized) arrays of PodType, but we do not
     204             : /// show the required interface for that here.
     205             : /// \code
     206             : /// template< class ExecPolicy , class FunctorType >
     207             : /// class ScanFunctor {
     208             : /// public:
     209             : ///   // The Kokkos device type
     210             : ///   using execution_space = ...;
     211             : ///   // Type of an entry of the array containing the result;
     212             : ///   // also the type of each of the entries combined using
     213             : ///   // operator() or join().
     214             : ///   using value_type = PodType;
     215             : ///
     216             : ///   void operator () (const ExecPolicy::member_type & i,
     217             : ///                     value_type& update,
     218             : ///                     const bool final_pass) const;
     219             : ///   void init (value_type& update) const;
     220             : ///   void join (value_type& update,
     221             : //               const value_type& input) const
     222             : /// };
     223             : /// \endcode
     224             : ///
     225             : /// Here is an example of a functor which computes an inclusive plus-scan
     226             : /// of an array of \c int, in place.  If given an array [1, 2, 3, 4], this
     227             : /// scan will overwrite that array with [1, 3, 6, 10].
     228             : ///
     229             : /// \code
     230             : /// template<class SpaceType>
     231             : /// class InclScanFunctor {
     232             : /// public:
     233             : ///   using execution_space = SpaceType;
     234             : ///   using value_type = int;
     235             : ///   using size_type = typename SpaceType::size_type;
     236             : ///
     237             : ///   InclScanFunctor( Kokkos::View<value_type*, execution_space> x
     238             : ///                  , Kokkos::View<value_type*, execution_space> y ) : m_x(x),
     239             : ///                  m_y(y) {}
     240             : ///
     241             : ///   void operator () (const size_type i, value_type& update, const bool
     242             : ///   final_pass) const {
     243             : ///     update += m_x(i);
     244             : ///     if (final_pass) {
     245             : ///       m_y(i) = update;
     246             : ///     }
     247             : ///   }
     248             : ///   void init (value_type& update) const {
     249             : ///     update = 0;
     250             : ///   }
     251             : ///   void join (value_type& update, const value_type& input)
     252             : ///   const {
     253             : ///     update += input;
     254             : ///   }
     255             : ///
     256             : /// private:
     257             : ///   Kokkos::View<value_type*, execution_space> m_x;
     258             : ///   Kokkos::View<value_type*, execution_space> m_y;
     259             : /// };
     260             : /// \endcode
     261             : ///
     262             : /// Here is an example of a functor which computes an <i>exclusive</i>
     263             : /// scan of an array of \c int, in place.  In operator(), note both
     264             : /// that the final_pass test and the update have switched places, and
     265             : /// the use of a temporary.  If given an array [1, 2, 3, 4], this scan
     266             : /// will overwrite that array with [0, 1, 3, 6].
     267             : ///
     268             : /// \code
     269             : /// template<class SpaceType>
     270             : /// class ExclScanFunctor {
     271             : /// public:
     272             : ///   using execution_space = SpaceType;
     273             : ///   using value_type = int;
     274             : ///   using size_type = typename SpaceType::size_type;
     275             : ///
     276             : ///   ExclScanFunctor (Kokkos::View<value_type*, execution_space> x) : x_ (x) {}
     277             : ///
     278             : ///   void operator () (const size_type i, value_type& update, const bool
     279             : ///   final_pass) const {
     280             : ///     const value_type x_i = x_(i);
     281             : ///     if (final_pass) {
     282             : ///       x_(i) = update;
     283             : ///     }
     284             : ///     update += x_i;
     285             : ///   }
     286             : ///   void init (value_type& update) const {
     287             : ///     update = 0;
     288             : ///   }
     289             : ///   void join (value_type& update, const value_type& input)
     290             : ///   const {
     291             : ///     update += input;
     292             : ///   }
     293             : ///
     294             : /// private:
     295             : ///   Kokkos::View<value_type*, execution_space> x_;
     296             : /// };
     297             : /// \endcode
     298             : ///
     299             : /// Here is an example of a functor which builds on the above
     300             : /// exclusive scan example, to compute an offsets array from a
     301             : /// population count array, in place.  We assume that the pop count
     302             : /// array has an extra entry at the end to store the final count.  If
     303             : /// given an array [1, 2, 3, 4, 0], this scan will overwrite that
     304             : /// array with [0, 1, 3, 6, 10].
     305             : ///
     306             : /// \code
     307             : /// template<class SpaceType>
     308             : /// class OffsetScanFunctor {
     309             : /// public:
     310             : ///   using execution_space = SpaceType;
     311             : ///   using value_type = int;
     312             : ///   using size_type = typename SpaceType::size_type;
     313             : ///
     314             : ///   // lastIndex_ is the last valid index (zero-based) of x.
     315             : ///   // If x has length zero, then lastIndex_ won't be used anyway.
     316             : ///   OffsetScanFunctor( Kokkos::View<value_type*, execution_space> x
     317             : ///                    , Kokkos::View<value_type*, execution_space> y )
     318             : ///      : m_x(x), m_y(y), last_index_ (x.dimension_0 () == 0 ? 0 :
     319             : ///      x.dimension_0 () - 1)
     320             : ///   {}
     321             : ///
     322             : ///   void operator () (const size_type i, int& update, const bool final_pass)
     323             : ///   const {
     324             : ///     if (final_pass) {
     325             : ///       m_y(i) = update;
     326             : ///     }
     327             : ///     update += m_x(i);
     328             : ///     // The last entry of m_y gets the final sum.
     329             : ///     if (final_pass && i == last_index_) {
     330             : ///       m_y(i+1) = update;
     331             : // i/     }
     332             : ///   }
     333             : ///   void init (value_type& update) const {
     334             : ///     update = 0;
     335             : ///   }
     336             : ///   void join (value_type& update, const value_type& input)
     337             : ///   const {
     338             : ///     update += input;
     339             : ///   }
     340             : ///
     341             : /// private:
     342             : ///   Kokkos::View<value_type*, execution_space> m_x;
     343             : ///   Kokkos::View<value_type*, execution_space> m_y;
     344             : ///   const size_type last_index_;
     345             : /// };
     346             : /// \endcode
     347             : ///
     348             : template <class ExecutionPolicy, class FunctorType,
     349             :           class Enable =
     350             :               std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>>
     351             : inline void parallel_scan(const std::string& str, const ExecutionPolicy& policy,
     352             :                           const FunctorType& functor) {
     353             :   uint64_t kpID = 0;
     354             :   /** Request a tuned policy from the tools subsystem */
     355             :   const auto& response =
     356             :       Kokkos::Tools::Impl::begin_parallel_scan(policy, functor, str, kpID);
     357             :   const auto& inner_policy = response.policy;
     358             : 
     359             :   auto closure =
     360             :       Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
     361             :           Impl::ParallelScan<FunctorType, ExecutionPolicy>>(functor,
     362             :                                                             inner_policy);
     363             : 
     364             :   closure.execute();
     365             : 
     366             :   Kokkos::Tools::Impl::end_parallel_scan(inner_policy, functor, str, kpID);
     367             : }
     368             : 
     369             : template <class ExecutionPolicy, class FunctorType>
     370             : inline void parallel_scan(
     371             :     const ExecutionPolicy& policy, const FunctorType& functor,
     372             :     std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>* = nullptr) {
     373             :   ::Kokkos::parallel_scan("", policy, functor);
     374             : }
     375             : 
     376             : template <class FunctorType>
     377             : inline void parallel_scan(const std::string& str, const size_t work_count,
     378             :                           const FunctorType& functor) {
     379             :   using execution_space =
     380             :       typename Kokkos::Impl::FunctorPolicyExecutionSpace<FunctorType,
     381             :                                                          void>::execution_space;
     382             : 
     383             :   using policy = Kokkos::RangePolicy<execution_space>;
     384             : 
     385             :   policy execution_policy(0, work_count);
     386             :   parallel_scan(str, execution_policy, functor);
     387             : }
     388             : 
     389             : template <class FunctorType>
     390             : inline void parallel_scan(const size_t work_count, const FunctorType& functor) {
     391             :   ::Kokkos::parallel_scan("", work_count, functor);
     392             : }
     393             : 
     394             : template <class ExecutionPolicy, class FunctorType, class ReturnType,
     395             :           class Enable =
     396             :               std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>>
     397             : inline void parallel_scan(const std::string& str, const ExecutionPolicy& policy,
     398             :                           const FunctorType& functor,
     399             :                           ReturnType& return_value) {
     400             :   uint64_t kpID                = 0;
     401             :   ExecutionPolicy inner_policy = policy;
     402             :   Kokkos::Tools::Impl::begin_parallel_scan(inner_policy, functor, str, kpID);
     403             : 
     404             :   if constexpr (Kokkos::is_view<ReturnType>::value) {
     405             :     auto closure =
     406             :         Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
     407             :             Impl::ParallelScanWithTotal<FunctorType, ExecutionPolicy,
     408             :                                         typename ReturnType::value_type>>(
     409             :             functor, inner_policy, return_value);
     410             :     closure.execute();
     411             :   } else {
     412             :     Kokkos::View<ReturnType, Kokkos::HostSpace> view(&return_value);
     413             :     auto closure =
     414             :         Kokkos::Impl::construct_with_shared_allocation_tracking_disabled<
     415             :             Impl::ParallelScanWithTotal<FunctorType, ExecutionPolicy,
     416             :                                         ReturnType>>(functor, inner_policy,
     417             :                                                      view);
     418             :     closure.execute();
     419             :   }
     420             : 
     421             :   Kokkos::Tools::Impl::end_parallel_scan(inner_policy, functor, str, kpID);
     422             : 
     423             :   if (!Kokkos::is_view<ReturnType>::value)
     424             :     policy.space().fence(
     425             :         "Kokkos::parallel_scan: fence due to result being a value, not a view");
     426             : }
     427             : 
     428             : template <class ExecutionPolicy, class FunctorType, class ReturnType>
     429             : inline void parallel_scan(
     430             :     const ExecutionPolicy& policy, const FunctorType& functor,
     431             :     ReturnType& return_value,
     432             :     std::enable_if_t<is_execution_policy<ExecutionPolicy>::value>* = nullptr) {
     433             :   ::Kokkos::parallel_scan("", policy, functor, return_value);
     434             : }
     435             : 
     436             : template <class FunctorType, class ReturnType>
     437             : inline void parallel_scan(const std::string& str, const size_t work_count,
     438             :                           const FunctorType& functor,
     439             :                           ReturnType& return_value) {
     440             :   using execution_space =
     441             :       typename Kokkos::Impl::FunctorPolicyExecutionSpace<FunctorType,
     442             :                                                          void>::execution_space;
     443             : 
     444             :   using policy = Kokkos::RangePolicy<execution_space>;
     445             : 
     446             :   policy execution_policy(0, work_count);
     447             :   parallel_scan(str, execution_policy, functor, return_value);
     448             : }
     449             : 
     450             : template <class FunctorType, class ReturnType>
     451             : inline void parallel_scan(const size_t work_count, const FunctorType& functor,
     452             :                           ReturnType& return_value) {
     453             :   ::Kokkos::parallel_scan("", work_count, functor, return_value);
     454             : }
     455             : 
     456             : }  // namespace Kokkos
     457             : 
     458             : //----------------------------------------------------------------------------
     459             : //----------------------------------------------------------------------------
     460             : 
     461             : namespace Kokkos {
     462             : namespace Impl {
     463             : 
     464             : template <class FunctorType,
     465             :           bool HasTeamShmemSize =
     466             :               has_member_team_shmem_size<FunctorType>::value,
     467             :           bool HasShmemSize = has_member_shmem_size<FunctorType>::value>
     468             : struct FunctorTeamShmemSize {
     469             :   KOKKOS_INLINE_FUNCTION static size_t value(const FunctorType&, int) {
     470             :     return 0;
     471             :   }
     472             : };
     473             : 
     474             : template <class FunctorType>
     475             : struct FunctorTeamShmemSize<FunctorType, true, false> {
     476             :   static inline size_t value(const FunctorType& f, int team_size) {
     477             :     return f.team_shmem_size(team_size);
     478             :   }
     479             : };
     480             : 
     481             : template <class FunctorType>
     482             : struct FunctorTeamShmemSize<FunctorType, false, true> {
     483             :   static inline size_t value(const FunctorType& f, int team_size) {
     484             :     return f.shmem_size(team_size);
     485             :   }
     486             : };
     487             : template <class FunctorType>
     488             : struct FunctorTeamShmemSize<FunctorType, true, true> {
     489             :   static inline size_t value(const FunctorType& /*f*/, int /*team_size*/) {
     490             :     Kokkos::abort(
     491             :         "Functor with both team_shmem_size and shmem_size defined is "
     492             :         "not allowed");
     493             :     return 0;
     494             :   }
     495             : };
     496             : 
     497             : }  // namespace Impl
     498             : }  // namespace Kokkos
     499             : 
     500             : //----------------------------------------------------------------------------
     501             : //----------------------------------------------------------------------------
     502             : 
     503             : #endif /* KOKKOS_PARALLEL_HPP */

Generated by: LCOV version 1.14