Line data Source code
1 : //@HEADER
2 : // ************************************************************************
3 : //
4 : // Kokkos v. 4.0
5 : // Copyright (2022) National Technology & Engineering
6 : // Solutions of Sandia, LLC (NTESS).
7 : //
8 : // Under the terms of Contract DE-NA0003525 with NTESS,
9 : // the U.S. Government retains certain rights in this software.
10 : //
11 : // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 : // See https://kokkos.org/LICENSE for license information.
13 : // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 : //
15 : //@HEADER
16 :
17 : #ifndef KOKKOS_IMPL_KOKKOS_PROFILING_HPP
18 : #define KOKKOS_IMPL_KOKKOS_PROFILING_HPP
19 :
20 : #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
21 : #define KOKKOS_IMPL_PUBLIC_INCLUDE
22 : #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_PROFILING
23 : #endif
24 :
25 : #include <Kokkos_Core_fwd.hpp>
26 : #include <Kokkos_ExecPolicy.hpp>
27 : #include <Kokkos_Macros.hpp>
28 : #include <Kokkos_Tuners.hpp>
29 : #include <impl/Kokkos_Profiling_Interface.hpp>
30 : #include <memory>
31 : #include <iosfwd>
32 : #include <unordered_map>
33 : #include <map>
34 : #include <string>
35 : #include <type_traits>
36 : #include <mutex>
37 : namespace Kokkos {
38 :
39 : // forward declaration
40 : bool show_warnings() noexcept;
41 : bool tune_internals() noexcept;
42 :
43 : namespace Tools {
44 :
45 : struct InitArguments {
46 : // NOTE DZP: PossiblyUnsetOption was introduced
47 : // before C++17, std::optional is a better choice
48 : // for this long-term
49 : static const std::string unset_string_option;
50 : enum PossiblyUnsetOption { unset, off, on };
51 : PossiblyUnsetOption help = unset;
52 : std::string lib = unset_string_option;
53 : std::string args = unset_string_option;
54 : };
55 :
56 : namespace Impl {
57 :
58 : struct InitializationStatus {
59 : enum InitializationResult {
60 : success,
61 : failure,
62 : help_request,
63 : environment_argument_mismatch
64 : };
65 : InitializationResult result;
66 : std::string error_message;
67 : };
68 : InitializationStatus initialize_tools_subsystem(
69 : const Kokkos::Tools::InitArguments& args);
70 :
71 : void parse_command_line_arguments(int& narg, char* arg[],
72 : InitArguments& arguments);
73 : Kokkos::Tools::Impl::InitializationStatus parse_environment_variables(
74 : InitArguments& arguments);
75 :
76 : template <typename PolicyType, typename Functor>
77 1236 : struct ToolResponse {
78 : PolicyType policy;
79 : };
80 :
81 : } // namespace Impl
82 :
83 : bool profileLibraryLoaded();
84 :
85 : void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID,
86 : uint64_t* kernelID);
87 : void endParallelFor(const uint64_t kernelID);
88 : void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID,
89 : uint64_t* kernelID);
90 : void endParallelScan(const uint64_t kernelID);
91 : void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID,
92 : uint64_t* kernelID);
93 : void endParallelReduce(const uint64_t kernelID);
94 :
95 : void pushRegion(const std::string& kName);
96 : void popRegion();
97 :
98 : void createProfileSection(const std::string& sectionName, uint32_t* secID);
99 : void startSection(const uint32_t secID);
100 : void stopSection(const uint32_t secID);
101 : void destroyProfileSection(const uint32_t secID);
102 :
103 : void markEvent(const std::string& evName);
104 :
105 : void allocateData(const SpaceHandle space, const std::string label,
106 : const void* ptr, const uint64_t size);
107 : void deallocateData(const SpaceHandle space, const std::string label,
108 : const void* ptr, const uint64_t size);
109 :
110 : void beginDeepCopy(const SpaceHandle dst_space, const std::string dst_label,
111 : const void* dst_ptr, const SpaceHandle src_space,
112 : const std::string src_label, const void* src_ptr,
113 : const uint64_t size);
114 : void endDeepCopy();
115 : void beginFence(const std::string name, const uint32_t deviceId,
116 : uint64_t* handle);
117 : void endFence(const uint64_t handle);
118 :
119 : /**
120 : * syncDualView declares to the tool that a given DualView
121 : * has been synced.
122 : *
123 : * Arguments:
124 : *
125 : * label: name of the View within the DualView
126 : * ptr: that View's data ptr
127 : * to_device: true if the data is being synchronized to the device
128 : * false otherwise
129 : */
130 : void syncDualView(const std::string& label, const void* const ptr,
131 : bool to_device);
132 : /**
133 : * modifyDualView declares to the tool that a given DualView
134 : * has been modified. Note: this means that somebody *called*
135 : * modify on the DualView, this doesn't get called any time
136 : * somebody touches the data
137 : *
138 : * Arguments:
139 : *
140 : * label: name of the View within the DualView
141 : * ptr: that View's data ptr
142 : * on_device: true if the data is being modified on the device
143 : * false otherwise
144 : */
145 : void modifyDualView(const std::string& label, const void* const ptr,
146 : bool on_device);
147 :
148 : void declareMetadata(const std::string& key, const std::string& value);
149 : void initialize(
150 : const std::string& = {}); // should rename to impl_initialize ASAP
151 : void initialize(const Kokkos::Tools::InitArguments&);
152 : void initialize(int argc, char* argv[]);
153 : void finalize();
154 : bool printHelp(const std::string&);
155 : void parseArgs(const std::string&);
156 :
157 : Kokkos_Profiling_SpaceHandle make_space_handle(const char* space_name);
158 :
159 : namespace Experimental {
160 :
161 : namespace Impl {
162 : struct DirectFenceIDHandle {
163 : uint32_t value;
164 : };
165 : //
166 : template <typename Space>
167 : uint32_t idForInstance(const uintptr_t instance) {
168 : static std::mutex instance_mutex;
169 : const std::lock_guard<std::mutex> lock(instance_mutex);
170 : /** Needed to be a ptr due to initialization order problems*/
171 : using map_type = std::map<uintptr_t, uint32_t>;
172 :
173 : static std::shared_ptr<map_type> map;
174 : if (map.get() == nullptr) {
175 : map = std::make_shared<map_type>(map_type());
176 : }
177 :
178 : static uint32_t value = 0;
179 : constexpr const uint32_t offset =
180 : Kokkos::Tools::Experimental::NumReservedDeviceIDs;
181 :
182 : auto find = map->find(instance);
183 : if (find == map->end()) {
184 : auto ret = offset + value++;
185 : (*map)[instance] = ret;
186 : return ret;
187 : }
188 :
189 : return find->second;
190 : }
191 :
192 : template <typename Space, typename FencingFunctor>
193 : void profile_fence_event(const std::string& name, DirectFenceIDHandle devIDTag,
194 : const FencingFunctor& func) {
195 : uint64_t handle = 0;
196 : Kokkos::Tools::beginFence(
197 : name,
198 : Kokkos::Tools::Experimental::device_id_root<Space>() + devIDTag.value,
199 : &handle);
200 : func();
201 : Kokkos::Tools::endFence(handle);
202 : }
203 :
204 : template <typename Space, typename FencingFunctor>
205 : void profile_fence_event(
206 : const std::string& name,
207 : Kokkos::Tools::Experimental::SpecialSynchronizationCases reason,
208 : const FencingFunctor& func) {
209 : uint64_t handle = 0;
210 : Kokkos::Tools::beginFence(
211 : name, device_id_root<Space>() + int_for_synchronization_reason(reason),
212 : &handle); // TODO: correct ID
213 : func();
214 : Kokkos::Tools::endFence(handle);
215 : }
216 : } // namespace Impl
217 : void set_init_callback(initFunction callback);
218 : void set_finalize_callback(finalizeFunction callback);
219 : void set_parse_args_callback(parseArgsFunction callback);
220 : void set_print_help_callback(printHelpFunction callback);
221 : void set_begin_parallel_for_callback(beginFunction callback);
222 : void set_end_parallel_for_callback(endFunction callback);
223 : void set_begin_parallel_reduce_callback(beginFunction callback);
224 : void set_end_parallel_reduce_callback(endFunction callback);
225 : void set_begin_parallel_scan_callback(beginFunction callback);
226 : void set_end_parallel_scan_callback(endFunction callback);
227 : void set_push_region_callback(pushFunction callback);
228 : void set_pop_region_callback(popFunction callback);
229 : void set_allocate_data_callback(allocateDataFunction callback);
230 : void set_deallocate_data_callback(deallocateDataFunction callback);
231 : void set_create_profile_section_callback(createProfileSectionFunction callback);
232 : void set_start_profile_section_callback(startProfileSectionFunction callback);
233 : void set_stop_profile_section_callback(stopProfileSectionFunction callback);
234 : void set_destroy_profile_section_callback(
235 : destroyProfileSectionFunction callback);
236 : void set_profile_event_callback(profileEventFunction callback);
237 : void set_begin_deep_copy_callback(beginDeepCopyFunction callback);
238 : void set_end_deep_copy_callback(endDeepCopyFunction callback);
239 : void set_begin_fence_callback(beginFenceFunction callback);
240 : void set_end_fence_callback(endFenceFunction callback);
241 : void set_dual_view_sync_callback(dualViewSyncFunction callback);
242 : void set_dual_view_modify_callback(dualViewModifyFunction callback);
243 : void set_declare_metadata_callback(declareMetadataFunction callback);
244 : void set_request_tool_settings_callback(requestToolSettingsFunction callback);
245 : void set_provide_tool_programming_interface_callback(
246 : provideToolProgrammingInterfaceFunction callback);
247 : void set_declare_output_type_callback(outputTypeDeclarationFunction callback);
248 : void set_declare_input_type_callback(inputTypeDeclarationFunction callback);
249 : void set_request_output_values_callback(requestValueFunction callback);
250 : void set_declare_optimization_goal_callback(
251 : optimizationGoalDeclarationFunction callback);
252 : void set_end_context_callback(contextEndFunction callback);
253 : void set_begin_context_callback(contextBeginFunction callback);
254 :
255 : void pause_tools();
256 : void resume_tools();
257 :
258 : EventSet get_callbacks();
259 : void set_callbacks(EventSet new_events);
260 : } // namespace Experimental
261 :
262 : namespace Experimental {
263 : // forward declarations
264 : size_t get_new_context_id();
265 : size_t get_current_context_id();
266 : } // namespace Experimental
267 :
268 : namespace Impl {} // namespace Impl
269 :
270 : } // namespace Tools
271 : namespace Profiling {
272 :
273 : // don't let ClangFormat reorder the using-declarations below
274 : // clang-format off
275 : using Kokkos::Tools::profileLibraryLoaded;
276 :
277 : using Kokkos::Tools::printHelp;
278 : using Kokkos::Tools::parseArgs;
279 :
280 : using Kokkos::Tools::initialize;
281 : using Kokkos::Tools::finalize;
282 :
283 : using Kokkos::Tools::beginParallelFor;
284 : using Kokkos::Tools::beginParallelReduce;
285 : using Kokkos::Tools::beginParallelScan;
286 : using Kokkos::Tools::endParallelFor;
287 : using Kokkos::Tools::endParallelReduce;
288 : using Kokkos::Tools::endParallelScan;
289 :
290 : using Kokkos::Tools::allocateData;
291 : using Kokkos::Tools::deallocateData;
292 :
293 : using Kokkos::Tools::beginDeepCopy;
294 : using Kokkos::Tools::endDeepCopy;
295 :
296 : using Kokkos::Tools::pushRegion;
297 : using Kokkos::Tools::popRegion;
298 :
299 : using Kokkos::Tools::createProfileSection;
300 : using Kokkos::Tools::destroyProfileSection;
301 : using Kokkos::Tools::startSection;
302 : using Kokkos::Tools::stopSection;
303 :
304 : using Kokkos::Tools::markEvent;
305 :
306 : using Kokkos::Tools::make_space_handle;
307 : // clang-format on
308 :
309 : namespace Experimental {
310 : using Kokkos::Tools::Experimental::set_allocate_data_callback;
311 : using Kokkos::Tools::Experimental::set_begin_deep_copy_callback;
312 : using Kokkos::Tools::Experimental::set_begin_parallel_for_callback;
313 : using Kokkos::Tools::Experimental::set_begin_parallel_reduce_callback;
314 : using Kokkos::Tools::Experimental::set_begin_parallel_scan_callback;
315 : using Kokkos::Tools::Experimental::set_create_profile_section_callback;
316 : using Kokkos::Tools::Experimental::set_deallocate_data_callback;
317 : using Kokkos::Tools::Experimental::set_destroy_profile_section_callback;
318 : using Kokkos::Tools::Experimental::set_end_deep_copy_callback;
319 : using Kokkos::Tools::Experimental::set_end_parallel_for_callback;
320 : using Kokkos::Tools::Experimental::set_end_parallel_reduce_callback;
321 : using Kokkos::Tools::Experimental::set_end_parallel_scan_callback;
322 : using Kokkos::Tools::Experimental::set_finalize_callback;
323 : using Kokkos::Tools::Experimental::set_init_callback;
324 : using Kokkos::Tools::Experimental::set_parse_args_callback;
325 : using Kokkos::Tools::Experimental::set_pop_region_callback;
326 : using Kokkos::Tools::Experimental::set_print_help_callback;
327 : using Kokkos::Tools::Experimental::set_profile_event_callback;
328 : using Kokkos::Tools::Experimental::set_push_region_callback;
329 : using Kokkos::Tools::Experimental::set_start_profile_section_callback;
330 : using Kokkos::Tools::Experimental::set_stop_profile_section_callback;
331 :
332 : using Kokkos::Tools::Experimental::EventSet;
333 :
334 : using Kokkos::Tools::Experimental::pause_tools;
335 : using Kokkos::Tools::Experimental::resume_tools;
336 :
337 : using Kokkos::Tools::Experimental::get_callbacks;
338 : using Kokkos::Tools::Experimental::set_callbacks;
339 :
340 : } // namespace Experimental
341 : } // namespace Profiling
342 :
343 : namespace Tools {
344 : namespace Experimental {
345 :
346 : VariableValue make_variable_value(size_t id, int64_t val);
347 : VariableValue make_variable_value(size_t id, double val);
348 : VariableValue make_variable_value(size_t id, const std::string& val);
349 :
350 : SetOrRange make_candidate_set(size_t size, std::string* data);
351 : SetOrRange make_candidate_set(size_t size, int64_t* data);
352 : SetOrRange make_candidate_set(size_t size, double* data);
353 : SetOrRange make_candidate_range(double lower, double upper, double step,
354 : bool openLower, bool openUpper);
355 :
356 : SetOrRange make_candidate_range(int64_t lower, int64_t upper, int64_t step,
357 : bool openLower, bool openUpper);
358 :
359 : void declare_optimization_goal(const size_t context,
360 : const OptimizationGoal& goal);
361 :
362 : size_t declare_output_type(const std::string& typeName, VariableInfo info);
363 :
364 : size_t declare_input_type(const std::string& typeName, VariableInfo info);
365 :
366 : void set_input_values(size_t contextId, size_t count, VariableValue* values);
367 :
368 : void end_context(size_t contextId);
369 : void begin_context(size_t contextId);
370 :
371 : void request_output_values(size_t contextId, size_t count,
372 : VariableValue* values);
373 :
374 : bool have_tuning_tool();
375 :
376 : size_t get_new_context_id();
377 : size_t get_current_context_id();
378 :
379 : size_t get_new_variable_id();
380 : } // namespace Experimental
381 : } // namespace Tools
382 :
383 : } // namespace Kokkos
384 :
385 : #ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_PROFILING
386 : #undef KOKKOS_IMPL_PUBLIC_INCLUDE
387 : #undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_PROFILING
388 : #endif
389 :
390 : #endif
|