LCOV - code coverage report
Current view: top level - build/_deps/kokkos-src/core/src/impl - Kokkos_Profiling.hpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 1 1 100.0 %
Date: 2026-02-16 14:39:39 Functions: 0 0 -

          Line data    Source code
       1             : //@HEADER
       2             : // ************************************************************************
       3             : //
       4             : //                        Kokkos v. 4.0
       5             : //       Copyright (2022) National Technology & Engineering
       6             : //               Solutions of Sandia, LLC (NTESS).
       7             : //
       8             : // Under the terms of Contract DE-NA0003525 with NTESS,
       9             : // the U.S. Government retains certain rights in this software.
      10             : //
      11             : // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
      12             : // See https://kokkos.org/LICENSE for license information.
      13             : // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      14             : //
      15             : //@HEADER
      16             : 
      17             : #ifndef KOKKOS_IMPL_KOKKOS_PROFILING_HPP
      18             : #define KOKKOS_IMPL_KOKKOS_PROFILING_HPP
      19             : 
      20             : #ifndef KOKKOS_IMPL_PUBLIC_INCLUDE
      21             : #define KOKKOS_IMPL_PUBLIC_INCLUDE
      22             : #define KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_PROFILING
      23             : #endif
      24             : 
      25             : #include <Kokkos_Core_fwd.hpp>
      26             : #include <Kokkos_ExecPolicy.hpp>
      27             : #include <Kokkos_Macros.hpp>
      28             : #include <Kokkos_Tuners.hpp>
      29             : #include <impl/Kokkos_Profiling_Interface.hpp>
      30             : #include <memory>
      31             : #include <iosfwd>
      32             : #include <unordered_map>
      33             : #include <map>
      34             : #include <string>
      35             : #include <type_traits>
      36             : #include <mutex>
      37             : namespace Kokkos {
      38             : 
      39             : // forward declaration
      40             : bool show_warnings() noexcept;
      41             : bool tune_internals() noexcept;
      42             : 
      43             : namespace Tools {
      44             : 
      45             : struct InitArguments {
      46             :   // NOTE DZP: PossiblyUnsetOption was introduced
      47             :   // before C++17, std::optional is a better choice
      48             :   // for this long-term
      49             :   static const std::string unset_string_option;
      50             :   enum PossiblyUnsetOption { unset, off, on };
      51             :   PossiblyUnsetOption help = unset;
      52             :   std::string lib          = unset_string_option;
      53             :   std::string args         = unset_string_option;
      54             : };
      55             : 
      56             : namespace Impl {
      57             : 
      58             : struct InitializationStatus {
      59             :   enum InitializationResult {
      60             :     success,
      61             :     failure,
      62             :     help_request,
      63             :     environment_argument_mismatch
      64             :   };
      65             :   InitializationResult result;
      66             :   std::string error_message;
      67             : };
      68             : InitializationStatus initialize_tools_subsystem(
      69             :     const Kokkos::Tools::InitArguments& args);
      70             : 
      71             : void parse_command_line_arguments(int& narg, char* arg[],
      72             :                                   InitArguments& arguments);
      73             : Kokkos::Tools::Impl::InitializationStatus parse_environment_variables(
      74             :     InitArguments& arguments);
      75             : 
      76             : template <typename PolicyType, typename Functor>
      77        1236 : struct ToolResponse {
      78             :   PolicyType policy;
      79             : };
      80             : 
      81             : }  // namespace Impl
      82             : 
      83             : bool profileLibraryLoaded();
      84             : 
      85             : void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID,
      86             :                       uint64_t* kernelID);
      87             : void endParallelFor(const uint64_t kernelID);
      88             : void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID,
      89             :                        uint64_t* kernelID);
      90             : void endParallelScan(const uint64_t kernelID);
      91             : void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID,
      92             :                          uint64_t* kernelID);
      93             : void endParallelReduce(const uint64_t kernelID);
      94             : 
      95             : void pushRegion(const std::string& kName);
      96             : void popRegion();
      97             : 
      98             : void createProfileSection(const std::string& sectionName, uint32_t* secID);
      99             : void startSection(const uint32_t secID);
     100             : void stopSection(const uint32_t secID);
     101             : void destroyProfileSection(const uint32_t secID);
     102             : 
     103             : void markEvent(const std::string& evName);
     104             : 
     105             : void allocateData(const SpaceHandle space, const std::string label,
     106             :                   const void* ptr, const uint64_t size);
     107             : void deallocateData(const SpaceHandle space, const std::string label,
     108             :                     const void* ptr, const uint64_t size);
     109             : 
     110             : void beginDeepCopy(const SpaceHandle dst_space, const std::string dst_label,
     111             :                    const void* dst_ptr, const SpaceHandle src_space,
     112             :                    const std::string src_label, const void* src_ptr,
     113             :                    const uint64_t size);
     114             : void endDeepCopy();
     115             : void beginFence(const std::string name, const uint32_t deviceId,
     116             :                 uint64_t* handle);
     117             : void endFence(const uint64_t handle);
     118             : 
     119             : /**
     120             :  * syncDualView declares to the tool that a given DualView
     121             :  * has been synced.
     122             :  *
     123             :  * Arguments:
     124             :  *
     125             :  * label:     name of the View within the DualView
     126             :  * ptr:       that View's data ptr
     127             :  * to_device: true if the data is being synchronized to the device
     128             :  *              false otherwise
     129             :  */
     130             : void syncDualView(const std::string& label, const void* const ptr,
     131             :                   bool to_device);
     132             : /**
     133             :  * modifyDualView declares to the tool that a given DualView
     134             :  * has been modified. Note: this means that somebody *called*
     135             :  * modify on the DualView, this doesn't get called any time
     136             :  * somebody touches the data
     137             :  *
     138             :  * Arguments:
     139             :  *
     140             :  * label:     name of the View within the DualView
     141             :  * ptr:       that View's data ptr
     142             :  * on_device: true if the data is being modified on the device
     143             :  *              false otherwise
     144             :  */
     145             : void modifyDualView(const std::string& label, const void* const ptr,
     146             :                     bool on_device);
     147             : 
     148             : void declareMetadata(const std::string& key, const std::string& value);
     149             : void initialize(
     150             :     const std::string& = {});  // should rename to impl_initialize ASAP
     151             : void initialize(const Kokkos::Tools::InitArguments&);
     152             : void initialize(int argc, char* argv[]);
     153             : void finalize();
     154             : bool printHelp(const std::string&);
     155             : void parseArgs(const std::string&);
     156             : 
     157             : Kokkos_Profiling_SpaceHandle make_space_handle(const char* space_name);
     158             : 
     159             : namespace Experimental {
     160             : 
     161             : namespace Impl {
     162             : struct DirectFenceIDHandle {
     163             :   uint32_t value;
     164             : };
     165             : //
     166             : template <typename Space>
     167             : uint32_t idForInstance(const uintptr_t instance) {
     168             :   static std::mutex instance_mutex;
     169             :   const std::lock_guard<std::mutex> lock(instance_mutex);
     170             :   /** Needed to be a ptr due to initialization order problems*/
     171             :   using map_type = std::map<uintptr_t, uint32_t>;
     172             : 
     173             :   static std::shared_ptr<map_type> map;
     174             :   if (map.get() == nullptr) {
     175             :     map = std::make_shared<map_type>(map_type());
     176             :   }
     177             : 
     178             :   static uint32_t value = 0;
     179             :   constexpr const uint32_t offset =
     180             :       Kokkos::Tools::Experimental::NumReservedDeviceIDs;
     181             : 
     182             :   auto find = map->find(instance);
     183             :   if (find == map->end()) {
     184             :     auto ret         = offset + value++;
     185             :     (*map)[instance] = ret;
     186             :     return ret;
     187             :   }
     188             : 
     189             :   return find->second;
     190             : }
     191             : 
     192             : template <typename Space, typename FencingFunctor>
     193             : void profile_fence_event(const std::string& name, DirectFenceIDHandle devIDTag,
     194             :                          const FencingFunctor& func) {
     195             :   uint64_t handle = 0;
     196             :   Kokkos::Tools::beginFence(
     197             :       name,
     198             :       Kokkos::Tools::Experimental::device_id_root<Space>() + devIDTag.value,
     199             :       &handle);
     200             :   func();
     201             :   Kokkos::Tools::endFence(handle);
     202             : }
     203             : 
     204             : template <typename Space, typename FencingFunctor>
     205             : void profile_fence_event(
     206             :     const std::string& name,
     207             :     Kokkos::Tools::Experimental::SpecialSynchronizationCases reason,
     208             :     const FencingFunctor& func) {
     209             :   uint64_t handle = 0;
     210             :   Kokkos::Tools::beginFence(
     211             :       name, device_id_root<Space>() + int_for_synchronization_reason(reason),
     212             :       &handle);  // TODO: correct ID
     213             :   func();
     214             :   Kokkos::Tools::endFence(handle);
     215             : }
     216             : }  // namespace Impl
     217             : void set_init_callback(initFunction callback);
     218             : void set_finalize_callback(finalizeFunction callback);
     219             : void set_parse_args_callback(parseArgsFunction callback);
     220             : void set_print_help_callback(printHelpFunction callback);
     221             : void set_begin_parallel_for_callback(beginFunction callback);
     222             : void set_end_parallel_for_callback(endFunction callback);
     223             : void set_begin_parallel_reduce_callback(beginFunction callback);
     224             : void set_end_parallel_reduce_callback(endFunction callback);
     225             : void set_begin_parallel_scan_callback(beginFunction callback);
     226             : void set_end_parallel_scan_callback(endFunction callback);
     227             : void set_push_region_callback(pushFunction callback);
     228             : void set_pop_region_callback(popFunction callback);
     229             : void set_allocate_data_callback(allocateDataFunction callback);
     230             : void set_deallocate_data_callback(deallocateDataFunction callback);
     231             : void set_create_profile_section_callback(createProfileSectionFunction callback);
     232             : void set_start_profile_section_callback(startProfileSectionFunction callback);
     233             : void set_stop_profile_section_callback(stopProfileSectionFunction callback);
     234             : void set_destroy_profile_section_callback(
     235             :     destroyProfileSectionFunction callback);
     236             : void set_profile_event_callback(profileEventFunction callback);
     237             : void set_begin_deep_copy_callback(beginDeepCopyFunction callback);
     238             : void set_end_deep_copy_callback(endDeepCopyFunction callback);
     239             : void set_begin_fence_callback(beginFenceFunction callback);
     240             : void set_end_fence_callback(endFenceFunction callback);
     241             : void set_dual_view_sync_callback(dualViewSyncFunction callback);
     242             : void set_dual_view_modify_callback(dualViewModifyFunction callback);
     243             : void set_declare_metadata_callback(declareMetadataFunction callback);
     244             : void set_request_tool_settings_callback(requestToolSettingsFunction callback);
     245             : void set_provide_tool_programming_interface_callback(
     246             :     provideToolProgrammingInterfaceFunction callback);
     247             : void set_declare_output_type_callback(outputTypeDeclarationFunction callback);
     248             : void set_declare_input_type_callback(inputTypeDeclarationFunction callback);
     249             : void set_request_output_values_callback(requestValueFunction callback);
     250             : void set_declare_optimization_goal_callback(
     251             :     optimizationGoalDeclarationFunction callback);
     252             : void set_end_context_callback(contextEndFunction callback);
     253             : void set_begin_context_callback(contextBeginFunction callback);
     254             : 
     255             : void pause_tools();
     256             : void resume_tools();
     257             : 
     258             : EventSet get_callbacks();
     259             : void set_callbacks(EventSet new_events);
     260             : }  // namespace Experimental
     261             : 
     262             : namespace Experimental {
     263             : // forward declarations
     264             : size_t get_new_context_id();
     265             : size_t get_current_context_id();
     266             : }  // namespace Experimental
     267             : 
     268             : namespace Impl {}  // namespace Impl
     269             : 
     270             : }  // namespace Tools
     271             : namespace Profiling {
     272             : 
     273             : // don't let ClangFormat reorder the using-declarations below
     274             : // clang-format off
     275             : using Kokkos::Tools::profileLibraryLoaded;
     276             : 
     277             : using Kokkos::Tools::printHelp;
     278             : using Kokkos::Tools::parseArgs;
     279             : 
     280             : using Kokkos::Tools::initialize;
     281             : using Kokkos::Tools::finalize;
     282             : 
     283             : using Kokkos::Tools::beginParallelFor;
     284             : using Kokkos::Tools::beginParallelReduce;
     285             : using Kokkos::Tools::beginParallelScan;
     286             : using Kokkos::Tools::endParallelFor;
     287             : using Kokkos::Tools::endParallelReduce;
     288             : using Kokkos::Tools::endParallelScan;
     289             : 
     290             : using Kokkos::Tools::allocateData;
     291             : using Kokkos::Tools::deallocateData;
     292             : 
     293             : using Kokkos::Tools::beginDeepCopy;
     294             : using Kokkos::Tools::endDeepCopy;
     295             : 
     296             : using Kokkos::Tools::pushRegion;
     297             : using Kokkos::Tools::popRegion;
     298             : 
     299             : using Kokkos::Tools::createProfileSection;
     300             : using Kokkos::Tools::destroyProfileSection;
     301             : using Kokkos::Tools::startSection;
     302             : using Kokkos::Tools::stopSection;
     303             : 
     304             : using Kokkos::Tools::markEvent;
     305             : 
     306             : using Kokkos::Tools::make_space_handle;
     307             : // clang-format on
     308             : 
     309             : namespace Experimental {
     310             : using Kokkos::Tools::Experimental::set_allocate_data_callback;
     311             : using Kokkos::Tools::Experimental::set_begin_deep_copy_callback;
     312             : using Kokkos::Tools::Experimental::set_begin_parallel_for_callback;
     313             : using Kokkos::Tools::Experimental::set_begin_parallel_reduce_callback;
     314             : using Kokkos::Tools::Experimental::set_begin_parallel_scan_callback;
     315             : using Kokkos::Tools::Experimental::set_create_profile_section_callback;
     316             : using Kokkos::Tools::Experimental::set_deallocate_data_callback;
     317             : using Kokkos::Tools::Experimental::set_destroy_profile_section_callback;
     318             : using Kokkos::Tools::Experimental::set_end_deep_copy_callback;
     319             : using Kokkos::Tools::Experimental::set_end_parallel_for_callback;
     320             : using Kokkos::Tools::Experimental::set_end_parallel_reduce_callback;
     321             : using Kokkos::Tools::Experimental::set_end_parallel_scan_callback;
     322             : using Kokkos::Tools::Experimental::set_finalize_callback;
     323             : using Kokkos::Tools::Experimental::set_init_callback;
     324             : using Kokkos::Tools::Experimental::set_parse_args_callback;
     325             : using Kokkos::Tools::Experimental::set_pop_region_callback;
     326             : using Kokkos::Tools::Experimental::set_print_help_callback;
     327             : using Kokkos::Tools::Experimental::set_profile_event_callback;
     328             : using Kokkos::Tools::Experimental::set_push_region_callback;
     329             : using Kokkos::Tools::Experimental::set_start_profile_section_callback;
     330             : using Kokkos::Tools::Experimental::set_stop_profile_section_callback;
     331             : 
     332             : using Kokkos::Tools::Experimental::EventSet;
     333             : 
     334             : using Kokkos::Tools::Experimental::pause_tools;
     335             : using Kokkos::Tools::Experimental::resume_tools;
     336             : 
     337             : using Kokkos::Tools::Experimental::get_callbacks;
     338             : using Kokkos::Tools::Experimental::set_callbacks;
     339             : 
     340             : }  // namespace Experimental
     341             : }  // namespace Profiling
     342             : 
     343             : namespace Tools {
     344             : namespace Experimental {
     345             : 
     346             : VariableValue make_variable_value(size_t id, int64_t val);
     347             : VariableValue make_variable_value(size_t id, double val);
     348             : VariableValue make_variable_value(size_t id, const std::string& val);
     349             : 
     350             : SetOrRange make_candidate_set(size_t size, std::string* data);
     351             : SetOrRange make_candidate_set(size_t size, int64_t* data);
     352             : SetOrRange make_candidate_set(size_t size, double* data);
     353             : SetOrRange make_candidate_range(double lower, double upper, double step,
     354             :                                 bool openLower, bool openUpper);
     355             : 
     356             : SetOrRange make_candidate_range(int64_t lower, int64_t upper, int64_t step,
     357             :                                 bool openLower, bool openUpper);
     358             : 
     359             : void declare_optimization_goal(const size_t context,
     360             :                                const OptimizationGoal& goal);
     361             : 
     362             : size_t declare_output_type(const std::string& typeName, VariableInfo info);
     363             : 
     364             : size_t declare_input_type(const std::string& typeName, VariableInfo info);
     365             : 
     366             : void set_input_values(size_t contextId, size_t count, VariableValue* values);
     367             : 
     368             : void end_context(size_t contextId);
     369             : void begin_context(size_t contextId);
     370             : 
     371             : void request_output_values(size_t contextId, size_t count,
     372             :                            VariableValue* values);
     373             : 
     374             : bool have_tuning_tool();
     375             : 
     376             : size_t get_new_context_id();
     377             : size_t get_current_context_id();
     378             : 
     379             : size_t get_new_variable_id();
     380             : }  // namespace Experimental
     381             : }  // namespace Tools
     382             : 
     383             : }  // namespace Kokkos
     384             : 
     385             : #ifdef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_PROFILING
     386             : #undef KOKKOS_IMPL_PUBLIC_INCLUDE
     387             : #undef KOKKOS_IMPL_PUBLIC_INCLUDE_NOTDEFINED_PROFILING
     388             : #endif
     389             : 
     390             : #endif

Generated by: LCOV version 1.14