Line data Source code
1 : //@HEADER
2 : // ************************************************************************
3 : //
4 : // Kokkos v. 4.0
5 : // Copyright (2022) National Technology & Engineering
6 : // Solutions of Sandia, LLC (NTESS).
7 : //
8 : // Under the terms of Contract DE-NA0003525 with NTESS,
9 : // the U.S. Government retains certain rights in this software.
10 : //
11 : // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 : // See https://kokkos.org/LICENSE for license information.
13 : // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 : //
15 : //@HEADER
16 :
17 : #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
18 : #include <Kokkos_Macros.hpp>
19 : static_assert(false,
20 : "Including non-public Kokkos header files is not allowed.");
21 : #endif
22 : #ifndef KOKKOS_KOKKOS_TUNERS_HPP
23 : #define KOKKOS_KOKKOS_TUNERS_HPP
24 :
25 : #include <Kokkos_Macros.hpp>
26 : #include <Kokkos_Core_fwd.hpp>
27 : #include <Kokkos_ExecPolicy.hpp>
28 : #include <KokkosExp_MDRangePolicy.hpp>
29 : #include <impl/Kokkos_Profiling_Interface.hpp>
30 :
31 : #include <array>
32 : #include <utility>
33 : #include <tuple>
34 : #include <string>
35 : #include <vector>
36 : #include <map>
37 : #include <cassert>
38 :
39 : namespace Kokkos {
40 : namespace Tools {
41 :
42 : namespace Experimental {
43 :
44 : // forward declarations
45 : SetOrRange make_candidate_set(size_t size, int64_t* data);
46 : bool have_tuning_tool();
47 : size_t declare_output_type(const std::string&,
48 : Kokkos::Tools::Experimental::VariableInfo);
49 : void request_output_values(size_t, size_t,
50 : Kokkos::Tools::Experimental::VariableValue*);
51 : VariableValue make_variable_value(size_t, int64_t);
52 : VariableValue make_variable_value(size_t, double);
53 : SetOrRange make_candidate_range(double lower, double upper, double step,
54 : bool openLower, bool openUpper);
55 : SetOrRange make_candidate_range(int64_t lower, int64_t upper, int64_t step,
56 : bool openLower, bool openUpper);
57 : size_t get_new_context_id();
58 : void begin_context(size_t context_id);
59 : void end_context(size_t context_id);
60 : namespace Impl {
61 :
62 : /** We're going to take in search space descriptions
63 : * as nested maps, which aren't efficient to
64 : * iterate across by index. These are very similar
65 : * to nested maps, but better for index-based lookup
66 : */
67 : template <typename ValueType, typename ContainedType>
68 : struct ValueHierarchyNode;
69 :
70 : template <typename ValueType, typename ContainedType>
71 : struct ValueHierarchyNode {
72 : std::vector<ValueType> root_values;
73 : std::vector<ContainedType> sub_values;
74 : void add_root_value(const ValueType& in) noexcept {
75 : root_values.push_back(in);
76 : }
77 : void add_sub_container(const ContainedType& in) { sub_values.push_back(in); }
78 : const ValueType& get_root_value(const size_t index) const {
79 : return root_values[index];
80 : }
81 : const ContainedType& get_sub_value(const size_t index) const {
82 : return sub_values[index];
83 : }
84 : };
85 :
86 : template <typename ValueType>
87 0 : struct ValueHierarchyNode<ValueType, void> {
88 : std::vector<ValueType> root_values;
89 : explicit ValueHierarchyNode(std::vector<ValueType> rv)
90 : : root_values(std::move(rv)) {}
91 : void add_root_value(const ValueType& in) noexcept {
92 : root_values.push_back(in);
93 : }
94 : const ValueType& get_root_value(const size_t index) const {
95 : return root_values[index];
96 : }
97 : };
98 :
99 : /** For a given nested map type, we need a way to
100 : * declare the equivalent ValueHierarchyNode
101 : * structure
102 : */
103 :
104 : template <class NestedMap>
105 : struct MapTypeConverter;
106 :
107 : // Vectors are our lowest-level, no nested values
108 : template <class T>
109 : struct MapTypeConverter<std::vector<T>> {
110 : using type = ValueHierarchyNode<T, void>;
111 : };
112 :
113 : // Maps contain both the "root" types and sub-vectors
114 : template <class K, class V>
115 : struct MapTypeConverter<std::map<K, V>> {
116 : using type = ValueHierarchyNode<K, typename MapTypeConverter<V>::type>;
117 : };
118 :
119 : /**
120 : * We also need to be able to construct a ValueHierarchyNode set from a
121 : * map
122 : */
123 :
124 : template <class NestedMap>
125 : struct ValueHierarchyConstructor;
126 :
127 : // Vectors are our lowest-level, no nested values. Just fill in the fundamental
128 : // values
129 : template <class T>
130 : struct ValueHierarchyConstructor<std::vector<T>> {
131 : using return_type = typename MapTypeConverter<std::vector<T>>::type;
132 : static return_type build(const std::vector<T>& in) { return return_type{in}; }
133 : };
134 :
135 : // For maps, we need to fill in the fundamental values, and construct child
136 : // nodes
137 : template <class K, class V>
138 : struct ValueHierarchyConstructor<std::map<K, V>> {
139 : using return_type = typename MapTypeConverter<std::map<K, V>>::type;
140 : static return_type build(const std::map<K, V>& in) {
141 : return_type node_to_build;
142 : for (auto& entry : in) {
143 : node_to_build.add_root_value(entry.first);
144 : node_to_build.add_sub_container(
145 : ValueHierarchyConstructor<V>::build(entry.second));
146 : }
147 : return node_to_build;
148 : }
149 : };
150 :
151 : /**
152 : * We're going to be declaring a sparse multidimensional
153 : * tuning space as a set of nested maps. The innermost level
154 : * will be a vector. The dimensionality of such a space is the number of
155 : * maps + 1.
156 : *
157 : * The following templates implement such logic recursively
158 : */
159 : template <class InspectForDepth>
160 : struct get_space_dimensionality;
161 :
162 : // The dimensionality of a vector is 1
163 : template <class T>
164 : struct get_space_dimensionality<std::vector<T>> {
165 : static constexpr int value = 1;
166 : };
167 :
168 : // The dimensionality of a map is 1 (the map) plus the dimensionality
169 : // of the map's value type
170 : template <class K, class V>
171 : struct get_space_dimensionality<std::map<K, V>> {
172 : static constexpr int value = 1 + get_space_dimensionality<V>::value;
173 : };
174 :
175 : template <class T, int N>
176 : struct n_dimensional_sparse_structure;
177 :
178 : template <class T>
179 : struct n_dimensional_sparse_structure<T, 1> {
180 : using type = std::vector<T>;
181 : };
182 :
183 : template <class T, int N>
184 : struct n_dimensional_sparse_structure {
185 : using type =
186 : std::map<T, typename n_dimensional_sparse_structure<T, N - 1>::type>;
187 : };
188 :
189 : /**
190 : * This is the ugly part of this implementation: mapping a set of doubles in
191 : * [0.0,1.0) into a point in this multidimensional space. We're going to
192 : * implement this concept recursively, building up a tuple at each level.
193 : */
194 :
195 : // First, a helper to get the value in one dimension
196 : template <class Container>
197 : struct DimensionValueExtractor;
198 :
199 : // At any given level, just return your value at that level
200 : template <class RootType, class Subtype>
201 : struct DimensionValueExtractor<ValueHierarchyNode<RootType, Subtype>> {
202 : static RootType get(const ValueHierarchyNode<RootType, Subtype>& dimension,
203 : double fraction_to_traverse) {
204 : size_t index = dimension.root_values.size() * fraction_to_traverse;
205 : return dimension.get_root_value(index);
206 : }
207 : };
208 :
209 : /** Now we're going to do the full "get a point in the space".
210 : * At a root level, we'll take in a ValueHierarchyNode and a set of doubles
211 : * representing the value in [0.0,1.0) we want to pick
212 : */
213 :
214 : // At the bottom level, we have one double and a base-level ValueHierarchyNode
215 :
216 : template <class HierarchyNode, class... InterpolationIndices>
217 : struct GetMultidimensionalPoint;
218 :
219 : template <class ValueType>
220 : struct GetMultidimensionalPoint<ValueHierarchyNode<ValueType, void>, double> {
221 : using node_type = ValueHierarchyNode<ValueType, void>;
222 : using return_type = std::tuple<ValueType>;
223 : static return_type build(const node_type& in, double index) {
224 : return std::make_tuple(DimensionValueExtractor<node_type>::get(in, index));
225 : }
226 : };
227 :
228 : // At levels above the bottom, we tuple_cat the result of our child on the end
229 : // of our own tuple
230 : template <class ValueType, class Subtype, class... Indices>
231 : struct GetMultidimensionalPoint<ValueHierarchyNode<ValueType, Subtype>, double,
232 : Indices...> {
233 : using node_type = ValueHierarchyNode<ValueType, Subtype>;
234 : using sub_tuple =
235 : typename GetMultidimensionalPoint<Subtype, Indices...>::return_type;
236 : using return_type = decltype(std::tuple_cat(
237 : std::declval<std::tuple<ValueType>>(), std::declval<sub_tuple>()));
238 : static return_type build(const node_type& in, double fraction_to_traverse,
239 : Indices... indices) {
240 : size_t index = in.sub_values.size() * fraction_to_traverse;
241 : auto dimension_value = std::make_tuple(
242 : DimensionValueExtractor<node_type>::get(in, fraction_to_traverse));
243 : return std::tuple_cat(dimension_value,
244 : GetMultidimensionalPoint<Subtype, Indices...>::build(
245 : in.get_sub_value(index), indices...));
246 : }
247 : };
248 :
249 : template <typename PointType, class ArrayType, size_t... Is>
250 : auto get_point_helper(const PointType& in, const ArrayType& indices,
251 : std::index_sequence<Is...>) {
252 : using helper = GetMultidimensionalPoint<
253 : PointType,
254 : decltype(std::get<Is>(std::declval<ArrayType>()).value.double_value)...>;
255 : return helper::build(in, std::get<Is>(indices).value.double_value...);
256 : }
257 :
258 : template <typename PointType, typename ArrayType>
259 : struct GetPoint;
260 :
261 : template <typename PointType, size_t ArraySize>
262 : struct GetPoint<
263 : PointType,
264 : std::array<Kokkos::Tools::Experimental::VariableValue, ArraySize>> {
265 : using index_set_type =
266 : std::array<Kokkos::Tools::Experimental::VariableValue, ArraySize>;
267 : static auto build(const PointType& in, const index_set_type& indices) {
268 : return get_point_helper(in, indices, std::make_index_sequence<ArraySize>{});
269 : }
270 : };
271 :
272 : template <typename PointType, typename ArrayType>
273 : auto get_point(const PointType& point, const ArrayType& indices) {
274 : return GetPoint<PointType, ArrayType>::build(point, indices);
275 : }
276 :
277 : } // namespace Impl
278 :
279 : template <template <class...> class Container, size_t MaxDimensionSize = 100,
280 : class... TemplateArguments>
281 : class MultidimensionalSparseTuningProblem {
282 : public:
283 : using ProblemSpaceInput = Container<TemplateArguments...>;
284 : static constexpr int space_dimensionality =
285 : Impl::get_space_dimensionality<ProblemSpaceInput>::value;
286 : static constexpr size_t max_space_dimension_size = MaxDimensionSize;
287 : static constexpr double tuning_min = 0.0;
288 : static constexpr double tuning_max = 0.999;
289 :
290 : // Not declared as static constexpr to work around the following compiler bug
291 : // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96862
292 : // where a floating-point expression cannot be constexpr under -frounding-math
293 : double tuning_step = tuning_max / max_space_dimension_size;
294 :
295 : using StoredProblemSpace =
296 : typename Impl::MapTypeConverter<ProblemSpaceInput>::type;
297 : using HierarchyConstructor =
298 : typename Impl::ValueHierarchyConstructor<Container<TemplateArguments...>>;
299 :
300 : using ValueArray = std::array<Kokkos::Tools::Experimental::VariableValue,
301 : space_dimensionality>;
302 : template <class Key, class Value>
303 : using extended_map = std::map<Key, Value>;
304 : template <typename Key>
305 : using extended_problem =
306 : MultidimensionalSparseTuningProblem<extended_map, MaxDimensionSize, Key,
307 : ProblemSpaceInput>;
308 : template <typename Key, typename Value>
309 : using ExtendedProblemSpace =
310 : typename Impl::MapTypeConverter<extended_map<Key, Value>>::type;
311 :
312 : template <typename Key>
313 : auto extend(const std::string& axis_name,
314 : const std::vector<Key>& new_tuning_axis) const
315 : -> extended_problem<Key> {
316 : ExtendedProblemSpace<Key, ProblemSpaceInput> extended_space;
317 : for (auto& key : new_tuning_axis) {
318 : extended_space.add_root_value(key);
319 : extended_space.add_sub_container(m_space);
320 : }
321 : std::vector<std::string> extended_names;
322 : extended_names.reserve(m_variable_names.size() + 1);
323 : extended_names.push_back(axis_name);
324 : extended_names.insert(extended_names.end(), m_variable_names.begin(),
325 : m_variable_names.end());
326 : return extended_problem<Key>(extended_space, extended_names);
327 : }
328 :
329 : private:
330 : StoredProblemSpace m_space;
331 : std::array<size_t, space_dimensionality> variable_ids;
332 : std::vector<std::string> m_variable_names;
333 : size_t context;
334 :
335 : public:
336 : MultidimensionalSparseTuningProblem() = default;
337 :
338 : MultidimensionalSparseTuningProblem(StoredProblemSpace space,
339 : const std::vector<std::string>& names)
340 : : m_space(std::move(space)), m_variable_names(names) {
341 : assert(names.size() == space_dimensionality);
342 : for (unsigned long x = 0; x < names.size(); ++x) {
343 : VariableInfo info;
344 : info.type = Kokkos::Tools::Experimental::ValueType::kokkos_value_double;
345 : info.category = Kokkos::Tools::Experimental::StatisticalCategory::
346 : kokkos_value_interval;
347 : info.valueQuantity =
348 : Kokkos::Tools::Experimental::CandidateValueType::kokkos_value_range;
349 : info.candidates = Kokkos::Tools::Experimental::make_candidate_range(
350 : tuning_min, tuning_max, tuning_step, true, true);
351 : variable_ids[x] = declare_output_type(names[x], info);
352 : }
353 : }
354 :
355 : MultidimensionalSparseTuningProblem(ProblemSpaceInput space,
356 : const std::vector<std::string>& names)
357 : : MultidimensionalSparseTuningProblem(HierarchyConstructor::build(space),
358 : names) {}
359 :
360 : template <typename... Coordinates>
361 : auto get_point(Coordinates... coordinates) {
362 : using ArrayType = std::array<Kokkos::Tools::Experimental::VariableValue,
363 : sizeof...(coordinates)>;
364 : return Impl::get_point(
365 : m_space, ArrayType({Kokkos::Tools::Experimental::make_variable_value(
366 : 0, static_cast<double>(coordinates))...}));
367 : }
368 :
369 : auto begin() {
370 : context = Kokkos::Tools::Experimental::get_new_context_id();
371 : ValueArray values;
372 : for (int x = 0; x < space_dimensionality; ++x) {
373 : values[x] = Kokkos::Tools::Experimental::make_variable_value(
374 : variable_ids[x], 0.0);
375 : }
376 : begin_context(context);
377 : request_output_values(context, space_dimensionality, values.data());
378 : return Impl::get_point(m_space, values);
379 : }
380 :
381 : auto end() { end_context(context); }
382 : };
383 :
384 : template <typename Tuner>
385 : struct ExtendableTunerMixin {
386 : template <typename Key>
387 : auto combine(const std::string& axis_name,
388 : const std::vector<Key>& new_axis) const {
389 : const auto& sub_tuner = static_cast<const Tuner*>(this)->get_tuner();
390 : return sub_tuner.extend(axis_name, new_axis);
391 : }
392 :
393 : template <typename... Coordinates>
394 : auto get_point(Coordinates... coordinates) {
395 : const auto& sub_tuner = static_cast<const Tuner*>(this)->get_tuner();
396 : return sub_tuner.get_point(coordinates...);
397 : }
398 :
399 : private:
400 : ExtendableTunerMixin() = default;
401 : friend Tuner;
402 : };
403 :
404 : template <size_t MaxDimensionSize = 100, template <class...> class Container,
405 : class... TemplateArguments>
406 : auto make_multidimensional_sparse_tuning_problem(
407 : const Container<TemplateArguments...>& in, std::vector<std::string> names) {
408 : return MultidimensionalSparseTuningProblem<Container, MaxDimensionSize,
409 : TemplateArguments...>(in, names);
410 : }
411 :
412 0 : class TeamSizeTuner : public ExtendableTunerMixin<TeamSizeTuner> {
413 : private:
414 : using SpaceDescription = std::map<int64_t, std::vector<int64_t>>;
415 : using TunerType = decltype(make_multidimensional_sparse_tuning_problem<20>(
416 : std::declval<SpaceDescription>(),
417 : std::declval<std::vector<std::string>>()));
418 : TunerType tuner;
419 :
420 : public:
421 : TeamSizeTuner() = default;
422 : TeamSizeTuner& operator=(const TeamSizeTuner& other) = default;
423 : TeamSizeTuner(const TeamSizeTuner& other) = default;
424 : TeamSizeTuner& operator=(TeamSizeTuner&& other) = default;
425 : TeamSizeTuner(TeamSizeTuner&& other) = default;
426 : template <typename ViableConfigurationCalculator, typename Functor,
427 : typename TagType, typename... Properties>
428 : TeamSizeTuner(const std::string& name,
429 : const Kokkos::TeamPolicy<Properties...>& policy_in,
430 : const Functor& functor, const TagType& tag,
431 : ViableConfigurationCalculator calc) {
432 : using PolicyType = Kokkos::TeamPolicy<Properties...>;
433 : PolicyType policy(policy_in);
434 : auto initial_vector_length = policy.impl_vector_length();
435 : if (initial_vector_length < 1) {
436 : policy.impl_set_vector_length(1);
437 : }
438 : /**
439 : * Here we attempt to enumerate all of the possible configurations
440 : * to expose to an autotuner. There are three possibilities
441 : *
442 : * 1) We're tuning both vector length and team size
443 : * 2) We're tuning vector length but not team size
444 : * 3) We're tuning team size but not vector length
445 : *
446 : * (In the fourth case where nothing is tuned
447 : * this function won't be called)
448 : *
449 : * The set of valid team sizes is dependent on
450 : * a vector length, so this leads to three
451 : * algorithms
452 : *
453 : * 1) Loop over vector lengths to get the set
454 : * of team sizes for each vector length,
455 : * add it all to the set
456 : * 2) Loop over vector lengths to see if the
457 : * provided team size is valid for that
458 : * vector length. If so, add it
459 : * 3) A special case of (1) in which we only
460 : * have one vector length
461 : *
462 : */
463 : SpaceDescription space_description;
464 :
465 : auto max_vector_length = PolicyType::vector_length_max();
466 : std::vector<int64_t> allowed_vector_lengths;
467 :
468 : if (policy.impl_auto_vector_length()) { // case 1 or 2
469 : for (int vector_length = max_vector_length; vector_length >= 1;
470 : vector_length /= 2) {
471 : policy.impl_set_vector_length(vector_length);
472 : /**
473 : * Figuring out whether a vector length is valid depends
474 : * on whether we're in case 1 (tune everything) or 2 (just tune vector
475 : * length)
476 : *
477 : * If we're tuning everything, all legal vector lengths are valid.
478 : * If we're just tuning vector length, we need to check that if we
479 : * set this vector length, the team size provided will be valid.
480 : *
481 : * These are the left and right hand sides of the "or" in this
482 : * conditional, respectively.
483 : */
484 : auto max_team_size = calc.get_max_team_size(policy, functor, tag);
485 : if ((policy.impl_auto_team_size()) ||
486 : (policy.team_size() <= max_team_size)) {
487 : allowed_vector_lengths.push_back(vector_length);
488 : }
489 : }
490 : } else { // case 3, there's only one vector length to care about
491 : allowed_vector_lengths.push_back(policy.impl_vector_length());
492 : }
493 :
494 : for (const auto vector_length : allowed_vector_lengths) {
495 : std::vector<int64_t> allowed_team_sizes;
496 : policy.impl_set_vector_length(vector_length);
497 : auto max_team_size = calc.get_max_team_size(policy, functor, tag);
498 : if (policy.impl_auto_team_size()) { // case 1 or 3, try all legal team
499 : // sizes
500 : for (int team_size = max_team_size; team_size >= 1; team_size /= 2) {
501 : allowed_team_sizes.push_back(team_size);
502 : }
503 : } else { // case 2, just try the provided team size
504 : allowed_team_sizes.push_back(policy.team_size());
505 : }
506 : space_description[vector_length] = allowed_team_sizes;
507 : }
508 : tuner = make_multidimensional_sparse_tuning_problem<20>(
509 : space_description, {std::string(name + "_vector_length"),
510 : std::string(name + "_team_size")});
511 : policy.impl_set_vector_length(initial_vector_length);
512 : }
513 :
514 : template <typename... Properties>
515 : auto tune(const Kokkos::TeamPolicy<Properties...>& policy_in) {
516 : Kokkos::TeamPolicy<Properties...> policy(policy_in);
517 : if (Kokkos::Tools::Experimental::have_tuning_tool()) {
518 : auto configuration = tuner.begin();
519 : auto team_size = std::get<1>(configuration);
520 : auto vector_length = std::get<0>(configuration);
521 : if (vector_length > 0) {
522 : policy.impl_set_team_size(team_size);
523 : policy.impl_set_vector_length(vector_length);
524 : }
525 : }
526 : return policy;
527 : }
528 : void end() {
529 : if (Kokkos::Tools::Experimental::have_tuning_tool()) {
530 : tuner.end();
531 : }
532 : }
533 :
534 : TunerType get_tuner() const { return tuner; }
535 : };
536 : namespace Impl {
537 : template <class T>
538 : struct tuning_type_for;
539 :
540 : template <>
541 : struct tuning_type_for<double> {
542 : static constexpr Kokkos::Tools::Experimental::ValueType value =
543 : Kokkos::Tools::Experimental::ValueType::kokkos_value_double;
544 : static double get(
545 : const Kokkos::Tools::Experimental::VariableValue& value_struct) {
546 : return value_struct.value.double_value;
547 : }
548 : };
549 : template <>
550 : struct tuning_type_for<int64_t> {
551 : static constexpr Kokkos::Tools::Experimental::ValueType value =
552 : Kokkos::Tools::Experimental::ValueType::kokkos_value_int64;
553 : static int64_t get(
554 : const Kokkos::Tools::Experimental::VariableValue& value_struct) {
555 : return value_struct.value.int_value;
556 : }
557 : };
558 : } // namespace Impl
559 : template <class Bound>
560 : class SingleDimensionalRangeTuner {
561 : size_t id;
562 : size_t context;
563 : using tuning_util = Impl::tuning_type_for<Bound>;
564 :
565 : Bound default_value;
566 :
567 : public:
568 : SingleDimensionalRangeTuner() = default;
569 : SingleDimensionalRangeTuner(
570 : const std::string& name,
571 : Kokkos::Tools::Experimental::StatisticalCategory category,
572 : Bound default_val, Bound lower, Bound upper, Bound step = (Bound)0) {
573 : default_value = default_val;
574 : Kokkos::Tools::Experimental::VariableInfo info;
575 : info.category = category;
576 : info.candidates = make_candidate_range(
577 : static_cast<Bound>(lower), static_cast<Bound>(upper),
578 : static_cast<Bound>(step), false, false);
579 : info.valueQuantity =
580 : Kokkos::Tools::Experimental::CandidateValueType::kokkos_value_range;
581 : info.type = tuning_util::value;
582 : id = Kokkos::Tools::Experimental::declare_output_type(name, info);
583 : }
584 :
585 : Bound begin() {
586 : context = Kokkos::Tools::Experimental::get_new_context_id();
587 : Kokkos::Tools::Experimental::begin_context(context);
588 : auto tuned_value =
589 : Kokkos::Tools::Experimental::make_variable_value(id, default_value);
590 : Kokkos::Tools::Experimental::request_output_values(context, 1,
591 : &tuned_value);
592 : return tuning_util::get(tuned_value);
593 : }
594 :
595 : void end() { Kokkos::Tools::Experimental::end_context(context); }
596 :
597 : template <typename Functor>
598 : void with_tuned_value(Functor& func) {
599 : func(begin());
600 : end();
601 : }
602 : };
603 :
604 : class RangePolicyOccupancyTuner {
605 : private:
606 : using TunerType = SingleDimensionalRangeTuner<int64_t>;
607 : TunerType tuner;
608 :
609 : public:
610 : RangePolicyOccupancyTuner() = default;
611 : template <typename ViableConfigurationCalculator, typename Functor,
612 : typename TagType, typename... Properties>
613 : RangePolicyOccupancyTuner(const std::string& name,
614 : const Kokkos::RangePolicy<Properties...>&,
615 : const Functor&, const TagType&,
616 : ViableConfigurationCalculator)
617 : : tuner(TunerType(name,
618 : Kokkos::Tools::Experimental::StatisticalCategory::
619 : kokkos_value_ratio,
620 : 100, 5, 100, 5)) {}
621 :
622 : template <typename... Properties>
623 : auto tune(const Kokkos::RangePolicy<Properties...>& policy_in) {
624 : Kokkos::RangePolicy<Properties...> policy(policy_in);
625 : if (Kokkos::Tools::Experimental::have_tuning_tool()) {
626 : auto occupancy = tuner.begin();
627 : policy.impl_set_desired_occupancy(
628 : Kokkos::Experimental::DesiredOccupancy{static_cast<int>(occupancy)});
629 : }
630 : return policy;
631 : }
632 : void end() {
633 : if (Kokkos::Tools::Experimental::have_tuning_tool()) {
634 : tuner.end();
635 : }
636 : }
637 :
638 : TunerType get_tuner() const { return tuner; }
639 : };
640 :
641 : namespace Impl {
642 :
643 : template <typename T>
644 : void fill_tile(std::vector<T>& cont, int tile_size) {
645 : for (int x = 1; x < tile_size; x *= 2) {
646 : cont.push_back(x);
647 : }
648 : }
649 : template <typename T, typename Mapped>
650 : void fill_tile(std::map<T, Mapped>& cont, int tile_size) {
651 : for (int x = 1; x < tile_size; x *= 2) {
652 : fill_tile(cont[x], tile_size / x);
653 : }
654 : }
655 : } // namespace Impl
656 :
657 : template <int MDRangeRank>
658 : struct MDRangeTuner : public ExtendableTunerMixin<MDRangeTuner<MDRangeRank>> {
659 : private:
660 : static constexpr int rank = MDRangeRank;
661 : static constexpr int max_slices = 15;
662 : using SpaceDescription =
663 : typename Impl::n_dimensional_sparse_structure<int, rank>::type;
664 : using TunerType =
665 : decltype(make_multidimensional_sparse_tuning_problem<max_slices>(
666 : std::declval<SpaceDescription>(),
667 : std::declval<std::vector<std::string>>()));
668 : TunerType tuner;
669 :
670 : public:
671 : MDRangeTuner() = default;
672 : template <typename Functor, typename TagType, typename Calculator,
673 : typename... Properties>
674 : MDRangeTuner(const std::string& name,
675 : const Kokkos::MDRangePolicy<Properties...>& policy,
676 : const Functor& functor, const TagType& tag, Calculator calc) {
677 : SpaceDescription desc;
678 : int max_tile_size =
679 : calc.get_mdrange_max_tile_size_product(policy, functor, tag);
680 : Impl::fill_tile(desc, max_tile_size);
681 : std::vector<std::string> feature_names;
682 : for (int x = 0; x < rank; ++x) {
683 : feature_names.push_back(name + "_tile_size_" + std::to_string(x));
684 : }
685 : tuner = make_multidimensional_sparse_tuning_problem<max_slices>(
686 : desc, feature_names);
687 : }
688 : template <typename Policy, typename Tuple, size_t... Indices>
689 : void set_policy_tile(Policy& policy, const Tuple& tuple,
690 : const std::index_sequence<Indices...>&) {
691 : policy.impl_change_tile_size({std::get<Indices>(tuple)...});
692 : }
693 : template <typename... Properties>
694 : auto tune(const Kokkos::MDRangePolicy<Properties...>& policy_in) {
695 : Kokkos::MDRangePolicy<Properties...> policy(policy_in);
696 : if (Kokkos::Tools::Experimental::have_tuning_tool()) {
697 : auto configuration = tuner.begin();
698 : set_policy_tile(policy, configuration, std::make_index_sequence<rank>{});
699 : }
700 : return policy;
701 : }
702 : void end() {
703 : if (Kokkos::Tools::Experimental::have_tuning_tool()) {
704 : tuner.end();
705 : }
706 : }
707 :
708 : TunerType get_tuner() const { return tuner; }
709 : };
710 :
711 : template <class Choice>
712 : struct CategoricalTuner {
713 : using choice_list = std::vector<Choice>;
714 : choice_list choices;
715 : size_t context;
716 : size_t tuning_variable_id;
717 : CategoricalTuner(std::string name, choice_list m_choices)
718 : : choices(m_choices) {
719 : std::vector<int64_t> indices;
720 : for (typename decltype(choices)::size_type x = 0; x < choices.size(); ++x) {
721 : indices.push_back(x);
722 : }
723 : VariableInfo info;
724 : info.category = StatisticalCategory::kokkos_value_categorical;
725 : info.valueQuantity = CandidateValueType::kokkos_value_set;
726 : info.type = ValueType::kokkos_value_int64;
727 : info.candidates = make_candidate_set(indices.size(), indices.data());
728 : tuning_variable_id = declare_output_type(name, info);
729 : }
730 : const Choice& begin() {
731 : context = get_new_context_id();
732 : begin_context(context);
733 : VariableValue value = make_variable_value(tuning_variable_id, int64_t(0));
734 : request_output_values(context, 1, &value);
735 : return choices[value.value.int_value];
736 : }
737 : void end() { end_context(context); }
738 : };
739 :
740 : template <typename Choice>
741 : auto make_categorical_tuner(std::string name, std::vector<Choice> choices)
742 : -> CategoricalTuner<Choice> {
743 : return CategoricalTuner<Choice>(name, choices);
744 : }
745 :
746 : } // namespace Experimental
747 : } // namespace Tools
748 : } // namespace Kokkos
749 :
750 : #endif
|