LCOV - code coverage report
Current view: top level - build/_deps/kokkos-src/core/src/impl - Kokkos_SharedAlloc.hpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 23 30 76.7 %
Date: 2026-02-16 14:39:39 Functions: 8 14 57.1 %

          Line data    Source code
       1             : //@HEADER
       2             : // ************************************************************************
       3             : //
       4             : //                        Kokkos v. 4.0
       5             : //       Copyright (2022) National Technology & Engineering
       6             : //               Solutions of Sandia, LLC (NTESS).
       7             : //
       8             : // Under the terms of Contract DE-NA0003525 with NTESS,
       9             : // the U.S. Government retains certain rights in this software.
      10             : //
      11             : // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
      12             : // See https://kokkos.org/LICENSE for license information.
      13             : // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      14             : //
      15             : //@HEADER
      16             : 
      17             : #ifndef KOKKOS_SHARED_ALLOC_HPP
      18             : #define KOKKOS_SHARED_ALLOC_HPP
      19             : 
      20             : #include <Kokkos_Macros.hpp>
      21             : #include <Kokkos_Core_fwd.hpp>
      22             : #include <impl/Kokkos_Error.hpp>  // Impl::throw_runtime_exception
      23             : 
      24             : #include <cstdint>
      25             : #include <string>
      26             : 
      27             : namespace Kokkos {
      28             : namespace Impl {
      29             : 
      30             : template <class MemorySpace = void, class DestroyFunctor = void>
      31             : class SharedAllocationRecord;
      32             : 
      33             : template <class MemorySpace>
      34             : class SharedAllocationRecordCommon;
      35             : 
      36             : class SharedAllocationHeader {
      37             :  private:
      38             :   using Record = SharedAllocationRecord<void, void>;
      39             : 
      40             : #if defined(KOKKOS_ARCH_AMD_GPU)
      41             :   static constexpr unsigned maximum_label_length =
      42             :       (1u << 8 /* 256 */) - sizeof(Record*);
      43             : #else
      44             :   static constexpr unsigned maximum_label_length =
      45             :       (1u << 7 /* 128 */) - sizeof(Record*);
      46             : #endif
      47             : 
      48             :   template <class, class>
      49             :   friend class SharedAllocationRecord;
      50             :   template <class>
      51             :   friend class SharedAllocationRecordCommon;
      52             :   template <class>
      53             :   friend class HostInaccessibleSharedAllocationRecordCommon;
      54             :   friend void fill_host_accessible_header_info(
      55             :       SharedAllocationRecord<void, void>*, SharedAllocationHeader&,
      56             :       std::string const&);
      57             : 
      58             :   Record* m_record;
      59             :   char m_label[maximum_label_length];
      60             : 
      61             :  public:
      62             :   /* Given user memory get pointer to the header */
      63             :   KOKKOS_INLINE_FUNCTION static const SharedAllocationHeader* get_header(
      64             :       void const* alloc_ptr) {
      65             :     return reinterpret_cast<SharedAllocationHeader const*>(
      66             :         static_cast<char const*>(alloc_ptr) - sizeof(SharedAllocationHeader));
      67             :   }
      68             : 
      69             :   KOKKOS_INLINE_FUNCTION
      70             :   const char* label() const { return m_label; }
      71             : };
      72             : 
      73             : template <>
      74             : class SharedAllocationRecord<void, void> {
      75             :  protected:
      76             : #if defined(KOKKOS_ARCH_AMD_GPU)
      77             :   static_assert(sizeof(SharedAllocationHeader) == (1u << 8 /* 256 */),
      78             :                 "sizeof(SharedAllocationHeader) != 256");
      79             : #else
      80             :   static_assert(sizeof(SharedAllocationHeader) == (1u << 7 /* 128 */),
      81             :                 "sizeof(SharedAllocationHeader) != 128");
      82             : #endif
      83             : 
      84             :   template <class, class>
      85             :   friend class SharedAllocationRecord;
      86             :   template <class>
      87             :   friend class SharedAllocationRecordCommon;
      88             :   template <class>
      89             :   friend class HostInaccessibleSharedAllocationRecordCommon;
      90             : 
      91             :   using function_type = void (*)(SharedAllocationRecord<void, void>*);
      92             : 
      93             :   SharedAllocationHeader* const m_alloc_ptr;
      94             :   size_t const m_alloc_size;
      95             :   function_type const m_dealloc;
      96             : #ifdef KOKKOS_ENABLE_DEBUG
      97             :   SharedAllocationRecord* const m_root;
      98             :   SharedAllocationRecord* m_prev;
      99             :   SharedAllocationRecord* m_next;
     100             : #endif
     101             :   int m_count;
     102             :   std::string m_label;
     103             : 
     104             :   SharedAllocationRecord(SharedAllocationRecord&&)                 = delete;
     105             :   SharedAllocationRecord(const SharedAllocationRecord&)            = delete;
     106             :   SharedAllocationRecord& operator=(SharedAllocationRecord&&)      = delete;
     107             :   SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
     108             : 
     109             :   /**\brief  Construct and insert into 'arg_root' tracking set.
     110             :    *         use_count is zero.
     111             :    */
     112             :   SharedAllocationRecord(
     113             : #ifdef KOKKOS_ENABLE_DEBUG
     114             :       SharedAllocationRecord* arg_root,
     115             : #endif
     116             :       SharedAllocationHeader* arg_alloc_ptr, size_t arg_alloc_size,
     117             :       function_type arg_dealloc, const std::string& label);
     118             :  private:
     119             :   static inline thread_local int t_tracking_enabled = 1;
     120             : 
     121             :  public:
     122           0 :   virtual std::string get_label() const { return std::string("Unmanaged"); }
     123             : 
     124             : #if defined(__EDG__)
     125             : #pragma push
     126             : #pragma diag_suppress implicit_return_from_non_void_function
     127             : #endif
     128       36186 :   static KOKKOS_FUNCTION int tracking_enabled() {
     129       36186 :     KOKKOS_IF_ON_HOST(return t_tracking_enabled;)
     130       36186 :     KOKKOS_IF_ON_DEVICE(return 0;)
     131             :   }
     132             : #if defined(__EDG__)
     133             : #pragma pop
     134             : #endif
     135             : 
     136             :   /**\brief A host process thread claims and disables the
     137             :    *        shared allocation tracking flag.
     138             :    */
     139        1236 :   static void tracking_disable() { t_tracking_enabled = 0; }
     140             : 
     141             :   /**\brief A host process thread releases and enables the
     142             :    *        shared allocation tracking flag.
     143             :    */
     144        1236 :   static void tracking_enable() { t_tracking_enabled = 1; }
     145             : 
     146         868 :   virtual ~SharedAllocationRecord() = default;
     147             : 
     148             :   SharedAllocationRecord()
     149             :       : m_alloc_ptr(nullptr),
     150             :         m_alloc_size(0),
     151             :         m_dealloc(nullptr),
     152             : #ifdef KOKKOS_ENABLE_DEBUG
     153             :         m_root(this),
     154             :         m_prev(this),
     155             :         m_next(this),
     156             : #endif
     157             :         m_count(0) {
     158             :   }
     159             : 
     160             :   static constexpr unsigned maximum_label_length =
     161             :       SharedAllocationHeader::maximum_label_length;
     162             : 
     163             :   KOKKOS_FUNCTION
     164             :   const SharedAllocationHeader* head() const { return m_alloc_ptr; }
     165             : 
     166             :   /* User's memory begins at the end of the header */
     167             :   KOKKOS_FUNCTION
     168             :   void* data() const { return static_cast<void*>(m_alloc_ptr + 1); }
     169             : 
     170             :   /* User's memory begins at the end of the header */
     171             :   size_t size() const { return m_alloc_size - sizeof(SharedAllocationHeader); }
     172             : 
     173             :   /* Cannot be 'constexpr' because 'm_count' is volatile */
     174             :   int use_count() const { return *static_cast<const volatile int*>(&m_count); }
     175             : 
     176             :   /* Increment use count */
     177             :   static void increment(SharedAllocationRecord*);
     178             : 
     179             :   /* Decrement use count. If 1->0 then remove from the tracking list and invoke
     180             :    * m_dealloc */
     181             :   static SharedAllocationRecord* decrement(SharedAllocationRecord*);
     182             : 
     183             :   /* Given a root record and data pointer find the record */
     184             :   static SharedAllocationRecord* find(SharedAllocationRecord* const,
     185             :                                       void* const);
     186             : 
     187             :   /*  Sanity check for the whole set of records to which the input record
     188             :    * belongs. Locks the set's insert/erase operations until the sanity check is
     189             :    * complete.
     190             :    */
     191             :   static bool is_sane(SharedAllocationRecord*);
     192             : 
     193             :   /*  Print host-accessible records */
     194             :   static void print_host_accessible_records(
     195             :       std::ostream&, const char* const space_name,
     196             :       const SharedAllocationRecord* const root, const bool detail);
     197             : };
     198             : 
     199             : template <class MemorySpace>
     200             : SharedAllocationHeader* checked_allocation_with_header(MemorySpace const& space,
     201             :                                                        std::string const& label,
     202             :                                                        size_t alloc_size) {
     203             :   return reinterpret_cast<SharedAllocationHeader*>(space.allocate(
     204             :       label.c_str(), alloc_size + sizeof(SharedAllocationHeader), alloc_size));
     205             : }
     206             : 
     207             : template <class ExecutionSpace, class MemorySpace>
     208             : SharedAllocationHeader* checked_allocation_with_header(
     209             :     ExecutionSpace const& exec_space, MemorySpace const& space,
     210             :     std::string const& label, size_t alloc_size) {
     211             :   return reinterpret_cast<SharedAllocationHeader*>(
     212             :       space.allocate(exec_space, label.c_str(),
     213             :                      alloc_size + sizeof(SharedAllocationHeader), alloc_size));
     214             : }
     215             : 
     216             : void fill_host_accessible_header_info(SharedAllocationHeader& arg_header,
     217             :                                       std::string const& arg_label);
     218             : 
     219             : template <class MemorySpace>
     220             : class SharedAllocationRecordCommon : public SharedAllocationRecord<void, void> {
     221             :  private:
     222             :   using derived_t     = SharedAllocationRecord<MemorySpace, void>;
     223             :   using record_base_t = SharedAllocationRecord<void, void>;
     224             : 
     225             :  protected:
     226             :   using record_base_t::record_base_t;
     227             : 
     228             :   MemorySpace m_space;
     229             : 
     230             : #ifdef KOKKOS_ENABLE_DEBUG
     231             :   static record_base_t s_root_record;
     232             : #endif
     233             : 
     234             :   static void deallocate(record_base_t* arg_rec);
     235             : 
     236             :  public:
     237             :   ~SharedAllocationRecordCommon();
     238             :   template <class ExecutionSpace>
     239             :   SharedAllocationRecordCommon(
     240             :       ExecutionSpace const& exec, MemorySpace const& space,
     241             :       std::string const& label, std::size_t alloc_size,
     242             :       record_base_t::function_type dealloc = &deallocate)
     243             :       : SharedAllocationRecord<void, void>(
     244             : #ifdef KOKKOS_ENABLE_DEBUG
     245             :             &s_root_record,
     246             : #endif
     247             :             checked_allocation_with_header(exec, space, label, alloc_size),
     248             :             sizeof(SharedAllocationHeader) + alloc_size, dealloc, label),
     249             :         m_space(space) {
     250             :     auto& header = *SharedAllocationRecord<void, void>::m_alloc_ptr;
     251             :     fill_host_accessible_header_info(this, header, label);
     252             :   }
     253             :   SharedAllocationRecordCommon(
     254             :       MemorySpace const& space, std::string const& label, std::size_t size,
     255             :       record_base_t::function_type dealloc = &deallocate);
     256             : 
     257             :   static auto allocate(MemorySpace const& arg_space,
     258             :                        std::string const& arg_label, size_t arg_alloc_size)
     259             :       -> derived_t*;
     260             :   /**\brief  Allocate tracked memory in the space */
     261             :   static void* allocate_tracked(MemorySpace const& arg_space,
     262             :                                 std::string const& arg_alloc_label,
     263             :                                 size_t arg_alloc_size);
     264             :   /**\brief  Deallocate tracked memory in the space */
     265             :   static void deallocate_tracked(void* arg_alloc_ptr);
     266             :   /**\brief  Reallocate tracked memory in the space
     267             :    * \note The ExecutionSpace template parameter is used to force
     268             :    * templatization of the method to delay its definition. Otherwise, the
     269             :    * method would use an execution space which is not complete yet.
     270             :    */
     271             :   template <class ExecutionSpace = typename MemorySpace::execution_space>
     272             :   static void* reallocate_tracked(void* arg_alloc_ptr, size_t arg_alloc_size);
     273             :   static auto get_record(void* alloc_ptr) -> derived_t*;
     274             :   std::string get_label() const override;
     275             :   static void print_records(std::ostream& s, MemorySpace const&,
     276             :                             bool detail = false);
     277             : };
     278             : 
     279             : /**
     280             :  * \note This method is implemented here to prevent circular dependencies.
     281             :  */
     282             : template <class MemorySpace>
     283             : template <class ExecutionSpace>
     284             : void* SharedAllocationRecordCommon<MemorySpace>::reallocate_tracked(
     285             :     void* arg_alloc_ptr, size_t arg_alloc_size) {
     286             :   derived_t* const r_old = derived_t::get_record(arg_alloc_ptr);
     287             :   derived_t* const r_new =
     288             :       allocate(r_old->m_space, r_old->get_label(), arg_alloc_size);
     289             : 
     290             :   Kokkos::Impl::DeepCopy<MemorySpace, MemorySpace>(
     291             :       ExecutionSpace{}, r_new->data(), r_old->data(),
     292             :       std::min(r_old->size(), r_new->size()));
     293             :   Kokkos::fence(std::string("SharedAllocationRecord<") + MemorySpace::name() +
     294             :                 ", void>::reallocate_tracked(): fence after copying data");
     295             : 
     296             :   record_base_t::increment(r_new);
     297             :   record_base_t::decrement(r_old);
     298             : 
     299             :   return r_new->data();
     300             : }
     301             : 
     302             : template <class MemorySpace>
     303             : class HostInaccessibleSharedAllocationRecordCommon
     304             :     : public SharedAllocationRecord<void, void> {
     305             :  private:
     306             :   using derived_t     = SharedAllocationRecord<MemorySpace, void>;
     307             :   using record_base_t = SharedAllocationRecord<void, void>;
     308             : 
     309             :  protected:
     310             :   using record_base_t::record_base_t;
     311             : 
     312             :   MemorySpace m_space;
     313             : 
     314             : #ifdef KOKKOS_ENABLE_DEBUG
     315             :   static record_base_t s_root_record;
     316             : #endif
     317             : 
     318             :   static void deallocate(record_base_t* arg_rec);
     319             : 
     320             :  public:
     321             :   ~HostInaccessibleSharedAllocationRecordCommon();
     322             :   template <class ExecutionSpace>
     323             :   HostInaccessibleSharedAllocationRecordCommon(
     324             :       ExecutionSpace const& exec, MemorySpace const& space,
     325             :       std::string const& label, std::size_t alloc_size,
     326             :       record_base_t::function_type dealloc = &deallocate)
     327             :       : SharedAllocationRecord<void, void>(
     328             : #ifdef KOKKOS_ENABLE_DEBUG
     329             :             &s_root_record,
     330             : #endif
     331             :             checked_allocation_with_header(exec, space, label, alloc_size),
     332             :             sizeof(SharedAllocationHeader) + alloc_size, dealloc, label),
     333             :         m_space(space) {
     334             :     SharedAllocationHeader header;
     335             : 
     336             :     fill_host_accessible_header_info(this, header, label);
     337             : 
     338             :     Kokkos::Impl::DeepCopy<MemorySpace, HostSpace>(
     339             :         exec, SharedAllocationRecord<void, void>::m_alloc_ptr, &header,
     340             :         sizeof(SharedAllocationHeader));
     341             :   }
     342             :   HostInaccessibleSharedAllocationRecordCommon(
     343             :       MemorySpace const& space, std::string const& label, std::size_t size,
     344             :       record_base_t::function_type dealloc = &deallocate);
     345             : 
     346             :   static auto allocate(MemorySpace const& arg_space,
     347             :                        std::string const& arg_label, size_t arg_alloc_size)
     348             :       -> derived_t*;
     349             :   /**\brief  Allocate tracked memory in the space */
     350             :   static void* allocate_tracked(MemorySpace const& arg_space,
     351             :                                 std::string const& arg_alloc_label,
     352             :                                 size_t arg_alloc_size);
     353             :   /**\brief  Deallocate tracked memory in the space */
     354             :   static void deallocate_tracked(void* arg_alloc_ptr);
     355             :   /**\brief  Reallocate tracked memory in the space
     356             :    * \note The ExecutionSpace template parameter is used to force
     357             :    * templatization of the method to delay its definition. Otherwise, the
     358             :    * method would use an execution space which is not complete yet.
     359             :    */
     360             :   template <class ExecutionSpace = typename MemorySpace::execution_space>
     361             :   static void* reallocate_tracked(void* arg_alloc_ptr, size_t arg_alloc_size);
     362             : 
     363             :   /**
     364             :    * \note The ExecutionSpace template parameter is used to force
     365             :    * templatization of the method to delay its definition. Otherwise, the
     366             :    * method would use an execution space which is not complete yet.
     367             :    */
     368             :   template <class ExecutionSpace = Kokkos::DefaultHostExecutionSpace>
     369             :   static void print_records(std::ostream& s, MemorySpace const&,
     370             :                             bool detail = false);
     371             :   static auto get_record(void* alloc_ptr) -> derived_t*;
     372             :   std::string get_label() const override;
     373             : };
     374             : 
     375             : /**
     376             :  * \note This method is implemented here to prevent circular dependencies.
     377             :  */
     378             : template <class MemorySpace>
     379             : template <class ExecutionSpace>
     380             : void* HostInaccessibleSharedAllocationRecordCommon<
     381             :     MemorySpace>::reallocate_tracked(void* arg_alloc_ptr,
     382             :                                      size_t arg_alloc_size) {
     383             :   derived_t* const r_old = derived_t::get_record(arg_alloc_ptr);
     384             :   derived_t* const r_new =
     385             :       allocate(r_old->m_space, r_old->get_label(), arg_alloc_size);
     386             : 
     387             :   Kokkos::Impl::DeepCopy<MemorySpace, MemorySpace>(
     388             :       ExecutionSpace{}, r_new->data(), r_old->data(),
     389             :       std::min(r_old->size(), r_new->size()));
     390             :   Kokkos::fence(std::string("SharedAllocationRecord<") + MemorySpace::name() +
     391             :                 ", void>::reallocate_tracked(): fence after copying data");
     392             : 
     393             :   record_base_t::increment(r_new);
     394             :   record_base_t::decrement(r_old);
     395             : 
     396             :   return r_new->data();
     397             : }
     398             : 
     399             : #ifdef KOKKOS_ENABLE_DEBUG
     400             : template <class MemorySpace>
     401             : SharedAllocationRecord<void, void>
     402             :     SharedAllocationRecordCommon<MemorySpace>::s_root_record;
     403             : 
     404             : template <class MemorySpace>
     405             : SharedAllocationRecord<void, void>
     406             :     HostInaccessibleSharedAllocationRecordCommon<MemorySpace>::s_root_record;
     407             : #endif
     408             : 
     409             : #define KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(MEMORY_SPACE)        \
     410             :   template <>                                                             \
     411             :   class Kokkos::Impl::SharedAllocationRecord<MEMORY_SPACE, void>          \
     412             :       : public Kokkos::Impl::SharedAllocationRecordCommon<MEMORY_SPACE> { \
     413             :     using SharedAllocationRecordCommon<                                   \
     414             :         MEMORY_SPACE>::SharedAllocationRecordCommon;                      \
     415             :   }
     416             : 
     417             : #define KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_SPECIALIZATION(    \
     418             :     MEMORY_SPACE)                                                          \
     419             :   template <>                                                              \
     420             :   class Kokkos::Impl::SharedAllocationRecord<MEMORY_SPACE, void>           \
     421             :       : public Kokkos::Impl::HostInaccessibleSharedAllocationRecordCommon< \
     422             :             MEMORY_SPACE> {                                                \
     423             :     using HostInaccessibleSharedAllocationRecordCommon<                    \
     424             :         MEMORY_SPACE>::HostInaccessibleSharedAllocationRecordCommon;       \
     425             :   }
     426             : 
     427             : #define KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION( \
     428             :     MEMORY_SPACE)                                                    \
     429             :   template class Kokkos::Impl::SharedAllocationRecordCommon<MEMORY_SPACE>
     430             : 
     431             : #define KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION( \
     432             :     MEMORY_SPACE)                                                                      \
     433             :   template class Kokkos::Impl::HostInaccessibleSharedAllocationRecordCommon<           \
     434             :       MEMORY_SPACE>
     435             : 
     436             : /* Taking the address of this function so make sure it is unique */
     437             : template <class MemorySpace, class DestroyFunctor>
     438             : inline void deallocate(SharedAllocationRecord<void, void>* record_ptr) {
     439             :   using base_type = SharedAllocationRecord<MemorySpace, void>;
     440             :   using this_type = SharedAllocationRecord<MemorySpace, DestroyFunctor>;
     441             : 
     442             :   this_type* const ptr =
     443             :       static_cast<this_type*>(static_cast<base_type*>(record_ptr));
     444             : 
     445             :   ptr->m_destroy.destroy_shared_allocation();
     446             : 
     447             :   delete ptr;
     448             : }
     449             : 
     450             : /*
     451             :  *  Memory space specialization of SharedAllocationRecord< Space , void >
     452             :  * requires :
     453             :  *
     454             :  *  SharedAllocationRecord< Space , void > : public SharedAllocationRecord< void
     455             :  * , void >
     456             :  *  {
     457             :  *    // delete allocated user memory via static_cast to this type.
     458             :  *    static void deallocate( const SharedAllocationRecord<void,void> * );
     459             :  *    Space m_space ;
     460             :  *  }
     461             :  */
     462             : template <class MemorySpace, class DestroyFunctor>
     463             : class SharedAllocationRecord
     464             :     : public SharedAllocationRecord<MemorySpace, void> {
     465             :  private:
     466             :   template <typename ExecutionSpace>
     467             :   SharedAllocationRecord(const ExecutionSpace& execution_space,
     468             :                          const MemorySpace& arg_space,
     469             :                          const std::string& arg_label, const size_t arg_alloc)
     470             :       /*  Allocate user memory as [ SharedAllocationHeader , user_memory ] */
     471             :       : SharedAllocationRecord<MemorySpace, void>(
     472             :             execution_space, arg_space, arg_label, arg_alloc,
     473             :             &Kokkos::Impl::deallocate<MemorySpace, DestroyFunctor>),
     474             :         m_destroy() {}
     475             : 
     476             :   SharedAllocationRecord(const MemorySpace& arg_space,
     477             :                          const std::string& arg_label, const size_t arg_alloc)
     478             :       /*  Allocate user memory as [ SharedAllocationHeader , user_memory ] */
     479             :       : SharedAllocationRecord<MemorySpace, void>(
     480             :             arg_space, arg_label, arg_alloc,
     481             :             &Kokkos::Impl::deallocate<MemorySpace, DestroyFunctor>),
     482             :         m_destroy() {}
     483             : 
     484             :   SharedAllocationRecord()                                         = delete;
     485             :   SharedAllocationRecord(const SharedAllocationRecord&)            = delete;
     486             :   SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
     487             : 
     488             :  public:
     489             :   DestroyFunctor m_destroy;
     490             : 
     491             :   // Allocate with a zero use count.  Incrementing the use count from zero to
     492             :   // one inserts the record into the tracking list.  Decrementing the count from
     493             :   // one to zero removes from the tracking list and deallocates.
     494             :   KOKKOS_INLINE_FUNCTION static SharedAllocationRecord* allocate(
     495             :       const MemorySpace& arg_space, const std::string& arg_label,
     496             :       const size_t arg_alloc) {
     497             :     KOKKOS_IF_ON_HOST(
     498             :         (return new SharedAllocationRecord(arg_space, arg_label, arg_alloc);))
     499             :     KOKKOS_IF_ON_DEVICE(
     500             :         ((void)arg_space; (void)arg_label; (void)arg_alloc; return nullptr;))
     501             :   }
     502             : 
     503             :   template <typename ExecutionSpace>
     504             :   KOKKOS_INLINE_FUNCTION static SharedAllocationRecord* allocate(
     505             :       const ExecutionSpace& exec_space, const MemorySpace& arg_space,
     506             :       const std::string& arg_label, const size_t arg_alloc) {
     507             :     KOKKOS_IF_ON_HOST(
     508             :         (return new SharedAllocationRecord(exec_space, arg_space, arg_label,
     509             :                                            arg_alloc);))
     510             :     KOKKOS_IF_ON_DEVICE(((void)exec_space; (void)arg_space; (void)arg_label;
     511             :                          (void)arg_alloc; return nullptr;))
     512             :   }
     513             : };
     514             : 
     515             : template <class MemorySpace>
     516             : class SharedAllocationRecord<MemorySpace, void>
     517             :     : public SharedAllocationRecord<void, void> {};
     518             : 
     519             : union SharedAllocationTracker {
     520             :  private:
     521             :   using Record = SharedAllocationRecord<void, void>;
     522             : 
     523             :   enum : uintptr_t { DO_NOT_DEREF_FLAG = 0x01ul };
     524             : 
     525             :   // The allocation record resides in Host memory space
     526             :   uintptr_t m_record_bits;
     527             :   Record* m_record;
     528             : 
     529             :  public:
     530             :   // Use macros instead of inline functions to reduce
     531             :   // pressure on compiler optimization by reducing
     532             :   // number of symbols and inline functions.
     533             : 
     534             : #ifdef KOKKOS_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY
     535             : #define KOKKOS_IMPL_BRANCH_PROB KOKKOS_IMPL_ATTRIBUTE_UNLIKELY
     536             : #else
     537             : #define KOKKOS_IMPL_BRANCH_PROB
     538             : #endif
     539             : 
     540             : #define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT \
     541             :   KOKKOS_IF_ON_HOST(                                    \
     542             :       (if (!(m_record_bits & DO_NOT_DEREF_FLAG))        \
     543             :            KOKKOS_IMPL_BRANCH_PROB { Record::increment(m_record); }))
     544             : 
     545             : #define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT \
     546             :   KOKKOS_IF_ON_HOST(                                    \
     547             :       (if (!(m_record_bits & DO_NOT_DEREF_FLAG))        \
     548             :            KOKKOS_IMPL_BRANCH_PROB { Record::decrement(m_record); }))
     549             : 
     550             : #define KOKKOS_IMPL_SHARED_ALLOCATION_CARRY_RECORD_BITS(rhs,               \
     551             :                                                         override_tracking) \
     552             :   (((!override_tracking) || (rhs.m_record_bits & DO_NOT_DEREF_FLAG) ||     \
     553             :     (!Record::tracking_enabled()))                                         \
     554             :        ? rhs.m_record_bits | DO_NOT_DEREF_FLAG                             \
     555             :        : rhs.m_record_bits)
     556             : 
     557             :   /** \brief  Assign a specialized record */
     558             :   inline void assign_allocated_record_to_uninitialized(Record* arg_record) {
     559             :     if (arg_record) {
     560             :       Record::increment(m_record = arg_record);
     561             :     } else {
     562             :       m_record_bits = DO_NOT_DEREF_FLAG;
     563             :     }
     564             :   }
     565             : 
     566             :   template <class MemorySpace>
     567             :   constexpr SharedAllocationRecord<MemorySpace, void>* get_record()
     568             :       const noexcept {
     569             :     return (m_record_bits & DO_NOT_DEREF_FLAG)
     570             :                ? nullptr
     571             :                : static_cast<SharedAllocationRecord<MemorySpace, void>*>(
     572             :                      m_record);
     573             :   }
     574             : 
     575             :   template <class MemorySpace>
     576           0 :   std::string get_label() const {
     577           0 :     return (m_record_bits == DO_NOT_DEREF_FLAG)
     578             :                ? std::string()
     579             :                : reinterpret_cast<SharedAllocationRecord<MemorySpace, void>*>(
     580           0 :                      m_record_bits & ~DO_NOT_DEREF_FLAG)
     581           0 :                      ->get_label();
     582             :   }
     583             : 
     584             :   KOKKOS_INLINE_FUNCTION
     585             :   int use_count() const {
     586             :     KOKKOS_IF_ON_HOST((Record* const tmp = reinterpret_cast<Record*>(
     587             :                            m_record_bits & ~DO_NOT_DEREF_FLAG);
     588             :                        return (tmp ? tmp->use_count() : 0);))
     589             : 
     590             :     KOKKOS_IF_ON_DEVICE((return 0;))
     591             :   }
     592             : 
     593           0 :   KOKKOS_INLINE_FUNCTION bool has_record() const {
     594           0 :     return (m_record_bits & (~DO_NOT_DEREF_FLAG)) != 0;
     595             :   }
     596             : 
     597             :   KOKKOS_FORCEINLINE_FUNCTION
     598             :   void clear() {
     599             :     // If this is tracking then must decrement
     600             :     KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
     601             :     // Reset to default constructed value.
     602             :     m_record_bits = DO_NOT_DEREF_FLAG;
     603             :   }
     604             : 
     605             :   // Copy:
     606             :   KOKKOS_FORCEINLINE_FUNCTION
     607             :   ~SharedAllocationTracker(){KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT}
     608             : 
     609             :   KOKKOS_FORCEINLINE_FUNCTION constexpr SharedAllocationTracker()
     610             :       : m_record_bits(DO_NOT_DEREF_FLAG) {}
     611             : 
     612             :   // Move:
     613             : 
     614             :   KOKKOS_FORCEINLINE_FUNCTION
     615             :   SharedAllocationTracker(SharedAllocationTracker&& rhs)
     616             :       : m_record_bits(rhs.m_record_bits) {
     617             :     rhs.m_record_bits = DO_NOT_DEREF_FLAG;
     618             :   }
     619             : 
     620             :   KOKKOS_FORCEINLINE_FUNCTION
     621             :   SharedAllocationTracker& operator=(SharedAllocationTracker&& rhs) {
     622             :     if (&rhs == this) return *this;
     623             :     auto swap_tmp     = m_record_bits;
     624             :     m_record_bits     = rhs.m_record_bits;
     625             :     rhs.m_record_bits = swap_tmp;
     626             :     return *this;
     627             :   }
     628             : 
     629             :   // Copy:
     630             : 
     631             :   KOKKOS_FORCEINLINE_FUNCTION
     632             :   SharedAllocationTracker(const SharedAllocationTracker& rhs)
     633             :       : m_record_bits(KOKKOS_IMPL_SHARED_ALLOCATION_CARRY_RECORD_BITS(
     634             :             rhs, true)){KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT}
     635             : 
     636             :         /** \brief  Copy construction may disable tracking. */
     637       33714 :         KOKKOS_FORCEINLINE_FUNCTION SharedAllocationTracker(
     638             :             const SharedAllocationTracker& rhs, const bool enable_tracking)
     639       33714 :       : m_record_bits(KOKKOS_IMPL_SHARED_ALLOCATION_CARRY_RECORD_BITS(
     640             :             rhs,
     641       33714 :             enable_tracking)){KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT}
     642             : 
     643             :         KOKKOS_FORCEINLINE_FUNCTION SharedAllocationTracker
     644             :         &
     645             :         operator=(const SharedAllocationTracker& rhs) {
     646             :     if (&rhs == this) return *this;
     647             :     // If this is tracking then must decrement
     648             :     KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
     649             :     m_record_bits = KOKKOS_IMPL_SHARED_ALLOCATION_CARRY_RECORD_BITS(rhs, true);
     650             :     KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
     651             :     return *this;
     652             :   }
     653             : 
     654             :   /*  The following functions (assign_direct and assign_force_disable)
     655             :    *  are the result of deconstructing the
     656             :    *  KOKKOS_IMPL_SHARED_ALLOCATION_CARRY_RECORD_BITS macro.  This
     657             :    *  allows the caller to do the check for tracking enabled and managed
     658             :    *  apart from the assignment of the record because the tracking
     659             :    *  enabled / managed question may be important for other tasks as well
     660             :    */
     661             : 
     662             :   /** \brief  Copy assignment without the carry bits logic
     663             :    *         This assumes that externally defined tracking is explicitly enabled
     664             :    */
     665             :   KOKKOS_FORCEINLINE_FUNCTION
     666             :   void assign_direct(const SharedAllocationTracker& rhs) {
     667             :     KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
     668             :     m_record_bits = rhs.m_record_bits;
     669             :     KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
     670             :   }
     671             : 
     672             :   /** \brief  Copy assignment without the increment
     673             :    *         we cannot assume that current record is unmanaged
     674             :    *         but with externally defined tracking explicitly disabled
     675             :    *         we can go straight to the do not deref flag     */
     676             :   KOKKOS_FORCEINLINE_FUNCTION
     677             :   void assign_force_disable(const SharedAllocationTracker& rhs) {
     678             :     KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
     679             :     m_record_bits = rhs.m_record_bits | DO_NOT_DEREF_FLAG;
     680             :   }
     681             : 
     682             :   // report if record is tracking or not
     683             :   KOKKOS_FORCEINLINE_FUNCTION
     684             :   bool tracking_enabled() { return (!(m_record_bits & DO_NOT_DEREF_FLAG)); }
     685             : 
     686             :   /** \brief  Copy assignment may disable tracking */
     687             :   KOKKOS_FORCEINLINE_FUNCTION
     688             :   void assign(const SharedAllocationTracker& rhs, const bool enable_tracking) {
     689             :     KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
     690             :     m_record_bits =
     691             :         KOKKOS_IMPL_SHARED_ALLOCATION_CARRY_RECORD_BITS(rhs, enable_tracking);
     692             :     KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
     693             :   }
     694             : 
     695             : #undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
     696             : #undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
     697             : #undef KOKKOS_IMPL_BRANCH_PROB
     698             : };
     699             : 
     700             : struct SharedAllocationDisableTrackingGuard {
     701        1236 :   SharedAllocationDisableTrackingGuard() {
     702        1236 :     KOKKOS_ASSERT(
     703        1236 :         (Kokkos::Impl::SharedAllocationRecord<void, void>::tracking_enabled()));
     704        1236 :     Kokkos::Impl::SharedAllocationRecord<void, void>::tracking_disable();
     705        1236 :   }
     706             : 
     707             :   SharedAllocationDisableTrackingGuard(
     708             :       const SharedAllocationDisableTrackingGuard&) = delete;
     709             :   SharedAllocationDisableTrackingGuard(SharedAllocationDisableTrackingGuard&&) =
     710             :       delete;
     711             : 
     712        1236 :   ~SharedAllocationDisableTrackingGuard() {
     713        1236 :     KOKKOS_ASSERT((
     714        1236 :         !Kokkos::Impl::SharedAllocationRecord<void, void>::tracking_enabled()));
     715        1236 :     Kokkos::Impl::SharedAllocationRecord<void, void>::tracking_enable();
     716        1236 :   }
     717             :   // clang-format off
     718             :   // The old version of clang format we use is particularly egregious here
     719             :   SharedAllocationDisableTrackingGuard& operator=(
     720             :       const SharedAllocationDisableTrackingGuard&) = delete;
     721             :   SharedAllocationDisableTrackingGuard& operator=(
     722             :       SharedAllocationDisableTrackingGuard&&) = delete;
     723             :   // clang-format on
     724             : };
     725             : 
     726             : template <class FunctorType, class... Args>
     727        1236 : inline FunctorType construct_with_shared_allocation_tracking_disabled(
     728             :     Args&&... args) {
     729        1236 :   [[maybe_unused]] auto guard = SharedAllocationDisableTrackingGuard{};
     730        2472 :   return {std::forward<Args>(args)...};
     731        1236 : }
     732             : } /* namespace Impl */
     733             : } /* namespace Kokkos */
     734             : #endif

Generated by: LCOV version 1.14