LCOV - code coverage report
Current view: top level - build/_deps/kokkos-src/core/src - Kokkos_Tuners.hpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 0 2 0.0 %
Date: 2026-02-16 14:39:39 Functions: 0 0 -

          Line data    Source code
       1             : //@HEADER
       2             : // ************************************************************************
       3             : //
       4             : //                        Kokkos v. 4.0
       5             : //       Copyright (2022) National Technology & Engineering
       6             : //               Solutions of Sandia, LLC (NTESS).
       7             : //
       8             : // Under the terms of Contract DE-NA0003525 with NTESS,
       9             : // the U.S. Government retains certain rights in this software.
      10             : //
      11             : // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
      12             : // See https://kokkos.org/LICENSE for license information.
      13             : // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      14             : //
      15             : //@HEADER
      16             : 
      17             : #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
      18             : #include <Kokkos_Macros.hpp>
      19             : static_assert(false,
      20             :               "Including non-public Kokkos header files is not allowed.");
      21             : #endif
      22             : #ifndef KOKKOS_KOKKOS_TUNERS_HPP
      23             : #define KOKKOS_KOKKOS_TUNERS_HPP
      24             : 
      25             : #include <Kokkos_Macros.hpp>
      26             : #include <Kokkos_Core_fwd.hpp>
      27             : #include <Kokkos_ExecPolicy.hpp>
      28             : #include <KokkosExp_MDRangePolicy.hpp>
      29             : #include <impl/Kokkos_Profiling_Interface.hpp>
      30             : 
      31             : #include <array>
      32             : #include <utility>
      33             : #include <tuple>
      34             : #include <string>
      35             : #include <vector>
      36             : #include <map>
      37             : #include <cassert>
      38             : 
      39             : namespace Kokkos {
      40             : namespace Tools {
      41             : 
      42             : namespace Experimental {
      43             : 
      44             : // forward declarations
      45             : SetOrRange make_candidate_set(size_t size, int64_t* data);
      46             : bool have_tuning_tool();
      47             : size_t declare_output_type(const std::string&,
      48             :                            Kokkos::Tools::Experimental::VariableInfo);
      49             : void request_output_values(size_t, size_t,
      50             :                            Kokkos::Tools::Experimental::VariableValue*);
      51             : VariableValue make_variable_value(size_t, int64_t);
      52             : VariableValue make_variable_value(size_t, double);
      53             : SetOrRange make_candidate_range(double lower, double upper, double step,
      54             :                                 bool openLower, bool openUpper);
      55             : SetOrRange make_candidate_range(int64_t lower, int64_t upper, int64_t step,
      56             :                                 bool openLower, bool openUpper);
      57             : size_t get_new_context_id();
      58             : void begin_context(size_t context_id);
      59             : void end_context(size_t context_id);
      60             : namespace Impl {
      61             : 
      62             : /** We're going to take in search space descriptions
      63             :  * as nested maps, which aren't efficient to
      64             :  * iterate across by index. These are very similar
      65             :  * to nested maps, but better for index-based lookup
      66             :  */
      67             : template <typename ValueType, typename ContainedType>
      68             : struct ValueHierarchyNode;
      69             : 
      70             : template <typename ValueType, typename ContainedType>
      71             : struct ValueHierarchyNode {
      72             :   std::vector<ValueType> root_values;
      73             :   std::vector<ContainedType> sub_values;
      74             :   void add_root_value(const ValueType& in) noexcept {
      75             :     root_values.push_back(in);
      76             :   }
      77             :   void add_sub_container(const ContainedType& in) { sub_values.push_back(in); }
      78             :   const ValueType& get_root_value(const size_t index) const {
      79             :     return root_values[index];
      80             :   }
      81             :   const ContainedType& get_sub_value(const size_t index) const {
      82             :     return sub_values[index];
      83             :   }
      84             : };
      85             : 
      86             : template <typename ValueType>
      87           0 : struct ValueHierarchyNode<ValueType, void> {
      88             :   std::vector<ValueType> root_values;
      89             :   explicit ValueHierarchyNode(std::vector<ValueType> rv)
      90             :       : root_values(std::move(rv)) {}
      91             :   void add_root_value(const ValueType& in) noexcept {
      92             :     root_values.push_back(in);
      93             :   }
      94             :   const ValueType& get_root_value(const size_t index) const {
      95             :     return root_values[index];
      96             :   }
      97             : };
      98             : 
      99             : /** For a given nested map type, we need a way to
     100             :  * declare the equivalent ValueHierarchyNode
     101             :  * structure
     102             :  */
     103             : 
     104             : template <class NestedMap>
     105             : struct MapTypeConverter;
     106             : 
     107             : // Vectors are our lowest-level, no nested values
     108             : template <class T>
     109             : struct MapTypeConverter<std::vector<T>> {
     110             :   using type = ValueHierarchyNode<T, void>;
     111             : };
     112             : 
     113             : // Maps contain both the "root" types and sub-vectors
     114             : template <class K, class V>
     115             : struct MapTypeConverter<std::map<K, V>> {
     116             :   using type = ValueHierarchyNode<K, typename MapTypeConverter<V>::type>;
     117             : };
     118             : 
     119             : /**
     120             :  * We also need to be able to construct a ValueHierarchyNode set from a
     121             :  * map
     122             :  */
     123             : 
     124             : template <class NestedMap>
     125             : struct ValueHierarchyConstructor;
     126             : 
     127             : // Vectors are our lowest-level, no nested values. Just fill in the fundamental
     128             : // values
     129             : template <class T>
     130             : struct ValueHierarchyConstructor<std::vector<T>> {
     131             :   using return_type = typename MapTypeConverter<std::vector<T>>::type;
     132             :   static return_type build(const std::vector<T>& in) { return return_type{in}; }
     133             : };
     134             : 
     135             : // For maps, we need to fill in the fundamental values, and construct child
     136             : // nodes
     137             : template <class K, class V>
     138             : struct ValueHierarchyConstructor<std::map<K, V>> {
     139             :   using return_type = typename MapTypeConverter<std::map<K, V>>::type;
     140             :   static return_type build(const std::map<K, V>& in) {
     141             :     return_type node_to_build;
     142             :     for (auto& entry : in) {
     143             :       node_to_build.add_root_value(entry.first);
     144             :       node_to_build.add_sub_container(
     145             :           ValueHierarchyConstructor<V>::build(entry.second));
     146             :     }
     147             :     return node_to_build;
     148             :   }
     149             : };
     150             : 
     151             : /**
     152             :  * We're going to be declaring a sparse multidimensional
     153             :  * tuning space as a set of nested maps. The innermost level
     154             :  * will be a vector. The dimensionality of such a space is the number of
     155             :  * maps + 1.
     156             :  *
     157             :  * The following templates implement such logic recursively
     158             :  */
     159             : template <class InspectForDepth>
     160             : struct get_space_dimensionality;
     161             : 
     162             : // The dimensionality of a vector is 1
     163             : template <class T>
     164             : struct get_space_dimensionality<std::vector<T>> {
     165             :   static constexpr int value = 1;
     166             : };
     167             : 
     168             : // The dimensionality of a map is 1 (the map) plus the dimensionality
     169             : // of the map's value type
     170             : template <class K, class V>
     171             : struct get_space_dimensionality<std::map<K, V>> {
     172             :   static constexpr int value = 1 + get_space_dimensionality<V>::value;
     173             : };
     174             : 
     175             : template <class T, int N>
     176             : struct n_dimensional_sparse_structure;
     177             : 
     178             : template <class T>
     179             : struct n_dimensional_sparse_structure<T, 1> {
     180             :   using type = std::vector<T>;
     181             : };
     182             : 
     183             : template <class T, int N>
     184             : struct n_dimensional_sparse_structure {
     185             :   using type =
     186             :       std::map<T, typename n_dimensional_sparse_structure<T, N - 1>::type>;
     187             : };
     188             : 
     189             : /**
     190             :  * This is the ugly part of this implementation: mapping a set of doubles in
     191             :  * [0.0,1.0) into a point in this multidimensional space. We're going to
     192             :  * implement this concept recursively, building up a tuple at each level.
     193             :  */
     194             : 
     195             : // First, a helper to get the value in one dimension
     196             : template <class Container>
     197             : struct DimensionValueExtractor;
     198             : 
     199             : // At any given level, just return your value at that level
     200             : template <class RootType, class Subtype>
     201             : struct DimensionValueExtractor<ValueHierarchyNode<RootType, Subtype>> {
     202             :   static RootType get(const ValueHierarchyNode<RootType, Subtype>& dimension,
     203             :                       double fraction_to_traverse) {
     204             :     size_t index = dimension.root_values.size() * fraction_to_traverse;
     205             :     return dimension.get_root_value(index);
     206             :   }
     207             : };
     208             : 
     209             : /** Now we're going to do the full "get a point in the space".
     210             :  * At a root level, we'll take in a ValueHierarchyNode and a set of doubles
     211             :  * representing the value in [0.0,1.0) we want to pick
     212             :  */
     213             : 
     214             : // At the bottom level, we have one double and a base-level ValueHierarchyNode
     215             : 
     216             : template <class HierarchyNode, class... InterpolationIndices>
     217             : struct GetMultidimensionalPoint;
     218             : 
     219             : template <class ValueType>
     220             : struct GetMultidimensionalPoint<ValueHierarchyNode<ValueType, void>, double> {
     221             :   using node_type   = ValueHierarchyNode<ValueType, void>;
     222             :   using return_type = std::tuple<ValueType>;
     223             :   static return_type build(const node_type& in, double index) {
     224             :     return std::make_tuple(DimensionValueExtractor<node_type>::get(in, index));
     225             :   }
     226             : };
     227             : 
     228             : // At levels above the bottom, we tuple_cat the result of our child on the end
     229             : // of our own tuple
     230             : template <class ValueType, class Subtype, class... Indices>
     231             : struct GetMultidimensionalPoint<ValueHierarchyNode<ValueType, Subtype>, double,
     232             :                                 Indices...> {
     233             :   using node_type = ValueHierarchyNode<ValueType, Subtype>;
     234             :   using sub_tuple =
     235             :       typename GetMultidimensionalPoint<Subtype, Indices...>::return_type;
     236             :   using return_type = decltype(std::tuple_cat(
     237             :       std::declval<std::tuple<ValueType>>(), std::declval<sub_tuple>()));
     238             :   static return_type build(const node_type& in, double fraction_to_traverse,
     239             :                            Indices... indices) {
     240             :     size_t index         = in.sub_values.size() * fraction_to_traverse;
     241             :     auto dimension_value = std::make_tuple(
     242             :         DimensionValueExtractor<node_type>::get(in, fraction_to_traverse));
     243             :     return std::tuple_cat(dimension_value,
     244             :                           GetMultidimensionalPoint<Subtype, Indices...>::build(
     245             :                               in.get_sub_value(index), indices...));
     246             :   }
     247             : };
     248             : 
     249             : template <typename PointType, class ArrayType, size_t... Is>
     250             : auto get_point_helper(const PointType& in, const ArrayType& indices,
     251             :                       std::index_sequence<Is...>) {
     252             :   using helper = GetMultidimensionalPoint<
     253             :       PointType,
     254             :       decltype(std::get<Is>(std::declval<ArrayType>()).value.double_value)...>;
     255             :   return helper::build(in, std::get<Is>(indices).value.double_value...);
     256             : }
     257             : 
     258             : template <typename PointType, typename ArrayType>
     259             : struct GetPoint;
     260             : 
     261             : template <typename PointType, size_t ArraySize>
     262             : struct GetPoint<
     263             :     PointType,
     264             :     std::array<Kokkos::Tools::Experimental::VariableValue, ArraySize>> {
     265             :   using index_set_type =
     266             :       std::array<Kokkos::Tools::Experimental::VariableValue, ArraySize>;
     267             :   static auto build(const PointType& in, const index_set_type& indices) {
     268             :     return get_point_helper(in, indices, std::make_index_sequence<ArraySize>{});
     269             :   }
     270             : };
     271             : 
     272             : template <typename PointType, typename ArrayType>
     273             : auto get_point(const PointType& point, const ArrayType& indices) {
     274             :   return GetPoint<PointType, ArrayType>::build(point, indices);
     275             : }
     276             : 
     277             : }  // namespace Impl
     278             : 
     279             : template <template <class...> class Container, size_t MaxDimensionSize = 100,
     280             :           class... TemplateArguments>
     281             : class MultidimensionalSparseTuningProblem {
     282             :  public:
     283             :   using ProblemSpaceInput = Container<TemplateArguments...>;
     284             :   static constexpr int space_dimensionality =
     285             :       Impl::get_space_dimensionality<ProblemSpaceInput>::value;
     286             :   static constexpr size_t max_space_dimension_size = MaxDimensionSize;
     287             :   static constexpr double tuning_min               = 0.0;
     288             :   static constexpr double tuning_max               = 0.999;
     289             : 
     290             :   // Not declared as static constexpr to work around the following compiler bug
     291             :   // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96862
     292             :   // where a floating-point expression cannot be constexpr under -frounding-math
     293             :   double tuning_step = tuning_max / max_space_dimension_size;
     294             : 
     295             :   using StoredProblemSpace =
     296             :       typename Impl::MapTypeConverter<ProblemSpaceInput>::type;
     297             :   using HierarchyConstructor =
     298             :       typename Impl::ValueHierarchyConstructor<Container<TemplateArguments...>>;
     299             : 
     300             :   using ValueArray = std::array<Kokkos::Tools::Experimental::VariableValue,
     301             :                                 space_dimensionality>;
     302             :   template <class Key, class Value>
     303             :   using extended_map = std::map<Key, Value>;
     304             :   template <typename Key>
     305             :   using extended_problem =
     306             :       MultidimensionalSparseTuningProblem<extended_map, MaxDimensionSize, Key,
     307             :                                           ProblemSpaceInput>;
     308             :   template <typename Key, typename Value>
     309             :   using ExtendedProblemSpace =
     310             :       typename Impl::MapTypeConverter<extended_map<Key, Value>>::type;
     311             : 
     312             :   template <typename Key>
     313             :   auto extend(const std::string& axis_name,
     314             :               const std::vector<Key>& new_tuning_axis) const
     315             :       -> extended_problem<Key> {
     316             :     ExtendedProblemSpace<Key, ProblemSpaceInput> extended_space;
     317             :     for (auto& key : new_tuning_axis) {
     318             :       extended_space.add_root_value(key);
     319             :       extended_space.add_sub_container(m_space);
     320             :     }
     321             :     std::vector<std::string> extended_names;
     322             :     extended_names.reserve(m_variable_names.size() + 1);
     323             :     extended_names.push_back(axis_name);
     324             :     extended_names.insert(extended_names.end(), m_variable_names.begin(),
     325             :                           m_variable_names.end());
     326             :     return extended_problem<Key>(extended_space, extended_names);
     327             :   }
     328             : 
     329             :  private:
     330             :   StoredProblemSpace m_space;
     331             :   std::array<size_t, space_dimensionality> variable_ids;
     332             :   std::vector<std::string> m_variable_names;
     333             :   size_t context;
     334             : 
     335             :  public:
     336             :   MultidimensionalSparseTuningProblem() = default;
     337             : 
     338             :   MultidimensionalSparseTuningProblem(StoredProblemSpace space,
     339             :                                       const std::vector<std::string>& names)
     340             :       : m_space(std::move(space)), m_variable_names(names) {
     341             :     assert(names.size() == space_dimensionality);
     342             :     for (unsigned long x = 0; x < names.size(); ++x) {
     343             :       VariableInfo info;
     344             :       info.type = Kokkos::Tools::Experimental::ValueType::kokkos_value_double;
     345             :       info.category = Kokkos::Tools::Experimental::StatisticalCategory::
     346             :           kokkos_value_interval;
     347             :       info.valueQuantity =
     348             :           Kokkos::Tools::Experimental::CandidateValueType::kokkos_value_range;
     349             :       info.candidates = Kokkos::Tools::Experimental::make_candidate_range(
     350             :           tuning_min, tuning_max, tuning_step, true, true);
     351             :       variable_ids[x] = declare_output_type(names[x], info);
     352             :     }
     353             :   }
     354             : 
     355             :   MultidimensionalSparseTuningProblem(ProblemSpaceInput space,
     356             :                                       const std::vector<std::string>& names)
     357             :       : MultidimensionalSparseTuningProblem(HierarchyConstructor::build(space),
     358             :                                             names) {}
     359             : 
     360             :   template <typename... Coordinates>
     361             :   auto get_point(Coordinates... coordinates) {
     362             :     using ArrayType = std::array<Kokkos::Tools::Experimental::VariableValue,
     363             :                                  sizeof...(coordinates)>;
     364             :     return Impl::get_point(
     365             :         m_space, ArrayType({Kokkos::Tools::Experimental::make_variable_value(
     366             :                      0, static_cast<double>(coordinates))...}));
     367             :   }
     368             : 
     369             :   auto begin() {
     370             :     context = Kokkos::Tools::Experimental::get_new_context_id();
     371             :     ValueArray values;
     372             :     for (int x = 0; x < space_dimensionality; ++x) {
     373             :       values[x] = Kokkos::Tools::Experimental::make_variable_value(
     374             :           variable_ids[x], 0.0);
     375             :     }
     376             :     begin_context(context);
     377             :     request_output_values(context, space_dimensionality, values.data());
     378             :     return Impl::get_point(m_space, values);
     379             :   }
     380             : 
     381             :   auto end() { end_context(context); }
     382             : };
     383             : 
     384             : template <typename Tuner>
     385             : struct ExtendableTunerMixin {
     386             :   template <typename Key>
     387             :   auto combine(const std::string& axis_name,
     388             :                const std::vector<Key>& new_axis) const {
     389             :     const auto& sub_tuner = static_cast<const Tuner*>(this)->get_tuner();
     390             :     return sub_tuner.extend(axis_name, new_axis);
     391             :   }
     392             : 
     393             :   template <typename... Coordinates>
     394             :   auto get_point(Coordinates... coordinates) {
     395             :     const auto& sub_tuner = static_cast<const Tuner*>(this)->get_tuner();
     396             :     return sub_tuner.get_point(coordinates...);
     397             :   }
     398             : 
     399             :  private:
     400             :   ExtendableTunerMixin() = default;
     401             :   friend Tuner;
     402             : };
     403             : 
     404             : template <size_t MaxDimensionSize = 100, template <class...> class Container,
     405             :           class... TemplateArguments>
     406             : auto make_multidimensional_sparse_tuning_problem(
     407             :     const Container<TemplateArguments...>& in, std::vector<std::string> names) {
     408             :   return MultidimensionalSparseTuningProblem<Container, MaxDimensionSize,
     409             :                                              TemplateArguments...>(in, names);
     410             : }
     411             : 
     412           0 : class TeamSizeTuner : public ExtendableTunerMixin<TeamSizeTuner> {
     413             :  private:
     414             :   using SpaceDescription = std::map<int64_t, std::vector<int64_t>>;
     415             :   using TunerType = decltype(make_multidimensional_sparse_tuning_problem<20>(
     416             :       std::declval<SpaceDescription>(),
     417             :       std::declval<std::vector<std::string>>()));
     418             :   TunerType tuner;
     419             : 
     420             :  public:
     421             :   TeamSizeTuner()                                      = default;
     422             :   TeamSizeTuner& operator=(const TeamSizeTuner& other) = default;
     423             :   TeamSizeTuner(const TeamSizeTuner& other)            = default;
     424             :   TeamSizeTuner& operator=(TeamSizeTuner&& other)      = default;
     425             :   TeamSizeTuner(TeamSizeTuner&& other)                 = default;
     426             :   template <typename ViableConfigurationCalculator, typename Functor,
     427             :             typename TagType, typename... Properties>
     428             :   TeamSizeTuner(const std::string& name,
     429             :                 const Kokkos::TeamPolicy<Properties...>& policy_in,
     430             :                 const Functor& functor, const TagType& tag,
     431             :                 ViableConfigurationCalculator calc) {
     432             :     using PolicyType = Kokkos::TeamPolicy<Properties...>;
     433             :     PolicyType policy(policy_in);
     434             :     auto initial_vector_length = policy.impl_vector_length();
     435             :     if (initial_vector_length < 1) {
     436             :       policy.impl_set_vector_length(1);
     437             :     }
     438             :     /**
     439             :      * Here we attempt to enumerate all of the possible configurations
     440             :      * to expose to an autotuner. There are three possibilities
     441             :      *
     442             :      * 1) We're tuning both vector length and team size
     443             :      * 2) We're tuning vector length but not team size
     444             :      * 3) We're tuning team size but not vector length
     445             :      *
     446             :      * (In the fourth case where nothing is tuned
     447             :      * this function won't be called)
     448             :      *
     449             :      * The set of valid team sizes is dependent on
     450             :      * a vector length, so this leads to three
     451             :      * algorithms
     452             :      *
     453             :      * 1) Loop over vector lengths to get the set
     454             :      *    of team sizes for each vector length,
     455             :      *    add it all to the set
     456             :      * 2) Loop over vector lengths to see if the
     457             :      *    provided team size is valid for that
     458             :      *    vector length. If so, add it
     459             :      * 3) A special case of (1) in which we only
     460             :      *    have one vector length
     461             :      *
     462             :      */
     463             :     SpaceDescription space_description;
     464             : 
     465             :     auto max_vector_length = PolicyType::vector_length_max();
     466             :     std::vector<int64_t> allowed_vector_lengths;
     467             : 
     468             :     if (policy.impl_auto_vector_length()) {  // case 1 or 2
     469             :       for (int vector_length = max_vector_length; vector_length >= 1;
     470             :            vector_length /= 2) {
     471             :         policy.impl_set_vector_length(vector_length);
     472             :         /**
     473             :          * Figuring out whether a vector length is valid depends
     474             :          * on whether we're in case 1 (tune everything) or 2 (just tune vector
     475             :          * length)
     476             :          *
     477             :          * If we're tuning everything, all legal vector lengths are valid.
     478             :          * If we're just tuning vector length, we need to check that if we
     479             :          * set this vector length, the team size provided will be valid.
     480             :          *
     481             :          * These are the left and right hand sides of the "or" in this
     482             :          * conditional, respectively.
     483             :          */
     484             :         auto max_team_size = calc.get_max_team_size(policy, functor, tag);
     485             :         if ((policy.impl_auto_team_size()) ||
     486             :             (policy.team_size() <= max_team_size)) {
     487             :           allowed_vector_lengths.push_back(vector_length);
     488             :         }
     489             :       }
     490             :     } else {  // case 3, there's only one vector length to care about
     491             :       allowed_vector_lengths.push_back(policy.impl_vector_length());
     492             :     }
     493             : 
     494             :     for (const auto vector_length : allowed_vector_lengths) {
     495             :       std::vector<int64_t> allowed_team_sizes;
     496             :       policy.impl_set_vector_length(vector_length);
     497             :       auto max_team_size = calc.get_max_team_size(policy, functor, tag);
     498             :       if (policy.impl_auto_team_size()) {  // case 1 or 3, try all legal team
     499             :                                            // sizes
     500             :         for (int team_size = max_team_size; team_size >= 1; team_size /= 2) {
     501             :           allowed_team_sizes.push_back(team_size);
     502             :         }
     503             :       } else {  // case 2, just try the provided team size
     504             :         allowed_team_sizes.push_back(policy.team_size());
     505             :       }
     506             :       space_description[vector_length] = allowed_team_sizes;
     507             :     }
     508             :     tuner = make_multidimensional_sparse_tuning_problem<20>(
     509             :         space_description, {std::string(name + "_vector_length"),
     510             :                             std::string(name + "_team_size")});
     511             :     policy.impl_set_vector_length(initial_vector_length);
     512             :   }
     513             : 
     514             :   template <typename... Properties>
     515             :   auto tune(const Kokkos::TeamPolicy<Properties...>& policy_in) {
     516             :     Kokkos::TeamPolicy<Properties...> policy(policy_in);
     517             :     if (Kokkos::Tools::Experimental::have_tuning_tool()) {
     518             :       auto configuration = tuner.begin();
     519             :       auto team_size     = std::get<1>(configuration);
     520             :       auto vector_length = std::get<0>(configuration);
     521             :       if (vector_length > 0) {
     522             :         policy.impl_set_team_size(team_size);
     523             :         policy.impl_set_vector_length(vector_length);
     524             :       }
     525             :     }
     526             :     return policy;
     527             :   }
     528             :   void end() {
     529             :     if (Kokkos::Tools::Experimental::have_tuning_tool()) {
     530             :       tuner.end();
     531             :     }
     532             :   }
     533             : 
     534             :   TunerType get_tuner() const { return tuner; }
     535             : };
     536             : namespace Impl {
     537             : template <class T>
     538             : struct tuning_type_for;
     539             : 
     540             : template <>
     541             : struct tuning_type_for<double> {
     542             :   static constexpr Kokkos::Tools::Experimental::ValueType value =
     543             :       Kokkos::Tools::Experimental::ValueType::kokkos_value_double;
     544             :   static double get(
     545             :       const Kokkos::Tools::Experimental::VariableValue& value_struct) {
     546             :     return value_struct.value.double_value;
     547             :   }
     548             : };
     549             : template <>
     550             : struct tuning_type_for<int64_t> {
     551             :   static constexpr Kokkos::Tools::Experimental::ValueType value =
     552             :       Kokkos::Tools::Experimental::ValueType::kokkos_value_int64;
     553             :   static int64_t get(
     554             :       const Kokkos::Tools::Experimental::VariableValue& value_struct) {
     555             :     return value_struct.value.int_value;
     556             :   }
     557             : };
     558             : }  // namespace Impl
     559             : template <class Bound>
     560             : class SingleDimensionalRangeTuner {
     561             :   size_t id;
     562             :   size_t context;
     563             :   using tuning_util = Impl::tuning_type_for<Bound>;
     564             : 
     565             :   Bound default_value;
     566             : 
     567             :  public:
     568             :   SingleDimensionalRangeTuner() = default;
     569             :   SingleDimensionalRangeTuner(
     570             :       const std::string& name,
     571             :       Kokkos::Tools::Experimental::StatisticalCategory category,
     572             :       Bound default_val, Bound lower, Bound upper, Bound step = (Bound)0) {
     573             :     default_value = default_val;
     574             :     Kokkos::Tools::Experimental::VariableInfo info;
     575             :     info.category   = category;
     576             :     info.candidates = make_candidate_range(
     577             :         static_cast<Bound>(lower), static_cast<Bound>(upper),
     578             :         static_cast<Bound>(step), false, false);
     579             :     info.valueQuantity =
     580             :         Kokkos::Tools::Experimental::CandidateValueType::kokkos_value_range;
     581             :     info.type = tuning_util::value;
     582             :     id        = Kokkos::Tools::Experimental::declare_output_type(name, info);
     583             :   }
     584             : 
     585             :   Bound begin() {
     586             :     context = Kokkos::Tools::Experimental::get_new_context_id();
     587             :     Kokkos::Tools::Experimental::begin_context(context);
     588             :     auto tuned_value =
     589             :         Kokkos::Tools::Experimental::make_variable_value(id, default_value);
     590             :     Kokkos::Tools::Experimental::request_output_values(context, 1,
     591             :                                                        &tuned_value);
     592             :     return tuning_util::get(tuned_value);
     593             :   }
     594             : 
     595             :   void end() { Kokkos::Tools::Experimental::end_context(context); }
     596             : 
     597             :   template <typename Functor>
     598             :   void with_tuned_value(Functor& func) {
     599             :     func(begin());
     600             :     end();
     601             :   }
     602             : };
     603             : 
     604             : class RangePolicyOccupancyTuner {
     605             :  private:
     606             :   using TunerType = SingleDimensionalRangeTuner<int64_t>;
     607             :   TunerType tuner;
     608             : 
     609             :  public:
     610             :   RangePolicyOccupancyTuner() = default;
     611             :   template <typename ViableConfigurationCalculator, typename Functor,
     612             :             typename TagType, typename... Properties>
     613             :   RangePolicyOccupancyTuner(const std::string& name,
     614             :                             const Kokkos::RangePolicy<Properties...>&,
     615             :                             const Functor&, const TagType&,
     616             :                             ViableConfigurationCalculator)
     617             :       : tuner(TunerType(name,
     618             :                         Kokkos::Tools::Experimental::StatisticalCategory::
     619             :                             kokkos_value_ratio,
     620             :                         100, 5, 100, 5)) {}
     621             : 
     622             :   template <typename... Properties>
     623             :   auto tune(const Kokkos::RangePolicy<Properties...>& policy_in) {
     624             :     Kokkos::RangePolicy<Properties...> policy(policy_in);
     625             :     if (Kokkos::Tools::Experimental::have_tuning_tool()) {
     626             :       auto occupancy = tuner.begin();
     627             :       policy.impl_set_desired_occupancy(
     628             :           Kokkos::Experimental::DesiredOccupancy{static_cast<int>(occupancy)});
     629             :     }
     630             :     return policy;
     631             :   }
     632             :   void end() {
     633             :     if (Kokkos::Tools::Experimental::have_tuning_tool()) {
     634             :       tuner.end();
     635             :     }
     636             :   }
     637             : 
     638             :   TunerType get_tuner() const { return tuner; }
     639             : };
     640             : 
     641             : namespace Impl {
     642             : 
     643             : template <typename T>
     644             : void fill_tile(std::vector<T>& cont, int tile_size) {
     645             :   for (int x = 1; x < tile_size; x *= 2) {
     646             :     cont.push_back(x);
     647             :   }
     648             : }
     649             : template <typename T, typename Mapped>
     650             : void fill_tile(std::map<T, Mapped>& cont, int tile_size) {
     651             :   for (int x = 1; x < tile_size; x *= 2) {
     652             :     fill_tile(cont[x], tile_size / x);
     653             :   }
     654             : }
     655             : }  // namespace Impl
     656             : 
     657             : template <int MDRangeRank>
     658             : struct MDRangeTuner : public ExtendableTunerMixin<MDRangeTuner<MDRangeRank>> {
     659             :  private:
     660             :   static constexpr int rank       = MDRangeRank;
     661             :   static constexpr int max_slices = 15;
     662             :   using SpaceDescription =
     663             :       typename Impl::n_dimensional_sparse_structure<int, rank>::type;
     664             :   using TunerType =
     665             :       decltype(make_multidimensional_sparse_tuning_problem<max_slices>(
     666             :           std::declval<SpaceDescription>(),
     667             :           std::declval<std::vector<std::string>>()));
     668             :   TunerType tuner;
     669             : 
     670             :  public:
     671             :   MDRangeTuner() = default;
     672             :   template <typename Functor, typename TagType, typename Calculator,
     673             :             typename... Properties>
     674             :   MDRangeTuner(const std::string& name,
     675             :                const Kokkos::MDRangePolicy<Properties...>& policy,
     676             :                const Functor& functor, const TagType& tag, Calculator calc) {
     677             :     SpaceDescription desc;
     678             :     int max_tile_size =
     679             :         calc.get_mdrange_max_tile_size_product(policy, functor, tag);
     680             :     Impl::fill_tile(desc, max_tile_size);
     681             :     std::vector<std::string> feature_names;
     682             :     for (int x = 0; x < rank; ++x) {
     683             :       feature_names.push_back(name + "_tile_size_" + std::to_string(x));
     684             :     }
     685             :     tuner = make_multidimensional_sparse_tuning_problem<max_slices>(
     686             :         desc, feature_names);
     687             :   }
     688             :   template <typename Policy, typename Tuple, size_t... Indices>
     689             :   void set_policy_tile(Policy& policy, const Tuple& tuple,
     690             :                        const std::index_sequence<Indices...>&) {
     691             :     policy.impl_change_tile_size({std::get<Indices>(tuple)...});
     692             :   }
     693             :   template <typename... Properties>
     694             :   auto tune(const Kokkos::MDRangePolicy<Properties...>& policy_in) {
     695             :     Kokkos::MDRangePolicy<Properties...> policy(policy_in);
     696             :     if (Kokkos::Tools::Experimental::have_tuning_tool()) {
     697             :       auto configuration = tuner.begin();
     698             :       set_policy_tile(policy, configuration, std::make_index_sequence<rank>{});
     699             :     }
     700             :     return policy;
     701             :   }
     702             :   void end() {
     703             :     if (Kokkos::Tools::Experimental::have_tuning_tool()) {
     704             :       tuner.end();
     705             :     }
     706             :   }
     707             : 
     708             :   TunerType get_tuner() const { return tuner; }
     709             : };
     710             : 
     711             : template <class Choice>
     712             : struct CategoricalTuner {
     713             :   using choice_list = std::vector<Choice>;
     714             :   choice_list choices;
     715             :   size_t context;
     716             :   size_t tuning_variable_id;
     717             :   CategoricalTuner(std::string name, choice_list m_choices)
     718             :       : choices(m_choices) {
     719             :     std::vector<int64_t> indices;
     720             :     for (typename decltype(choices)::size_type x = 0; x < choices.size(); ++x) {
     721             :       indices.push_back(x);
     722             :     }
     723             :     VariableInfo info;
     724             :     info.category      = StatisticalCategory::kokkos_value_categorical;
     725             :     info.valueQuantity = CandidateValueType::kokkos_value_set;
     726             :     info.type          = ValueType::kokkos_value_int64;
     727             :     info.candidates    = make_candidate_set(indices.size(), indices.data());
     728             :     tuning_variable_id = declare_output_type(name, info);
     729             :   }
     730             :   const Choice& begin() {
     731             :     context = get_new_context_id();
     732             :     begin_context(context);
     733             :     VariableValue value = make_variable_value(tuning_variable_id, int64_t(0));
     734             :     request_output_values(context, 1, &value);
     735             :     return choices[value.value.int_value];
     736             :   }
     737             :   void end() { end_context(context); }
     738             : };
     739             : 
     740             : template <typename Choice>
     741             : auto make_categorical_tuner(std::string name, std::vector<Choice> choices)
     742             :     -> CategoricalTuner<Choice> {
     743             :   return CategoricalTuner<Choice>(name, choices);
     744             : }
     745             : 
     746             : }  // namespace Experimental
     747             : }  // namespace Tools
     748             : }  // namespace Kokkos
     749             : 
     750             : #endif

Generated by: LCOV version 1.14