Line data Source code
1 : //@HEADER
2 : // ************************************************************************
3 : //
4 : // Kokkos v. 4.0
5 : // Copyright (2022) National Technology & Engineering
6 : // Solutions of Sandia, LLC (NTESS).
7 : //
8 : // Under the terms of Contract DE-NA0003525 with NTESS,
9 : // the U.S. Government retains certain rights in this software.
10 : //
11 : // Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
12 : // See https://kokkos.org/LICENSE for license information.
13 : // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
14 : //
15 : //@HEADER
16 :
17 : #ifndef KOKKOS_SHARED_ALLOC_HPP
18 : #define KOKKOS_SHARED_ALLOC_HPP
19 :
20 : #include <Kokkos_Macros.hpp>
21 : #include <Kokkos_Core_fwd.hpp>
22 : #include <impl/Kokkos_Error.hpp> // Impl::throw_runtime_exception
23 :
24 : #include <cstdint>
25 : #include <string>
26 :
27 : namespace Kokkos {
28 : namespace Impl {
29 :
30 : template <class MemorySpace = void, class DestroyFunctor = void>
31 : class SharedAllocationRecord;
32 :
33 : template <class MemorySpace>
34 : class SharedAllocationRecordCommon;
35 :
36 : class SharedAllocationHeader {
37 : private:
38 : using Record = SharedAllocationRecord<void, void>;
39 :
40 : #if defined(KOKKOS_ARCH_AMD_GPU)
41 : static constexpr unsigned maximum_label_length =
42 : (1u << 8 /* 256 */) - sizeof(Record*);
43 : #else
44 : static constexpr unsigned maximum_label_length =
45 : (1u << 7 /* 128 */) - sizeof(Record*);
46 : #endif
47 :
48 : template <class, class>
49 : friend class SharedAllocationRecord;
50 : template <class>
51 : friend class SharedAllocationRecordCommon;
52 : template <class>
53 : friend class HostInaccessibleSharedAllocationRecordCommon;
54 : friend void fill_host_accessible_header_info(
55 : SharedAllocationRecord<void, void>*, SharedAllocationHeader&,
56 : std::string const&);
57 :
58 : Record* m_record;
59 : char m_label[maximum_label_length];
60 :
61 : public:
62 : /* Given user memory get pointer to the header */
63 : KOKKOS_INLINE_FUNCTION static const SharedAllocationHeader* get_header(
64 : void const* alloc_ptr) {
65 : return reinterpret_cast<SharedAllocationHeader const*>(
66 : static_cast<char const*>(alloc_ptr) - sizeof(SharedAllocationHeader));
67 : }
68 :
69 : KOKKOS_INLINE_FUNCTION
70 : const char* label() const { return m_label; }
71 : };
72 :
73 : template <>
74 : class SharedAllocationRecord<void, void> {
75 : protected:
76 : #if defined(KOKKOS_ARCH_AMD_GPU)
77 : static_assert(sizeof(SharedAllocationHeader) == (1u << 8 /* 256 */),
78 : "sizeof(SharedAllocationHeader) != 256");
79 : #else
80 : static_assert(sizeof(SharedAllocationHeader) == (1u << 7 /* 128 */),
81 : "sizeof(SharedAllocationHeader) != 128");
82 : #endif
83 :
84 : template <class, class>
85 : friend class SharedAllocationRecord;
86 : template <class>
87 : friend class SharedAllocationRecordCommon;
88 : template <class>
89 : friend class HostInaccessibleSharedAllocationRecordCommon;
90 :
91 : using function_type = void (*)(SharedAllocationRecord<void, void>*);
92 :
93 : SharedAllocationHeader* const m_alloc_ptr;
94 : size_t const m_alloc_size;
95 : function_type const m_dealloc;
96 : #ifdef KOKKOS_ENABLE_DEBUG
97 : SharedAllocationRecord* const m_root;
98 : SharedAllocationRecord* m_prev;
99 : SharedAllocationRecord* m_next;
100 : #endif
101 : int m_count;
102 : std::string m_label;
103 :
104 : SharedAllocationRecord(SharedAllocationRecord&&) = delete;
105 : SharedAllocationRecord(const SharedAllocationRecord&) = delete;
106 : SharedAllocationRecord& operator=(SharedAllocationRecord&&) = delete;
107 : SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
108 :
109 : /**\brief Construct and insert into 'arg_root' tracking set.
110 : * use_count is zero.
111 : */
112 : SharedAllocationRecord(
113 : #ifdef KOKKOS_ENABLE_DEBUG
114 : SharedAllocationRecord* arg_root,
115 : #endif
116 : SharedAllocationHeader* arg_alloc_ptr, size_t arg_alloc_size,
117 : function_type arg_dealloc, const std::string& label);
118 : private:
119 : static inline thread_local int t_tracking_enabled = 1;
120 :
121 : public:
122 0 : virtual std::string get_label() const { return std::string("Unmanaged"); }
123 :
124 : #if defined(__EDG__)
125 : #pragma push
126 : #pragma diag_suppress implicit_return_from_non_void_function
127 : #endif
128 36186 : static KOKKOS_FUNCTION int tracking_enabled() {
129 36186 : KOKKOS_IF_ON_HOST(return t_tracking_enabled;)
130 36186 : KOKKOS_IF_ON_DEVICE(return 0;)
131 : }
132 : #if defined(__EDG__)
133 : #pragma pop
134 : #endif
135 :
136 : /**\brief A host process thread claims and disables the
137 : * shared allocation tracking flag.
138 : */
139 1236 : static void tracking_disable() { t_tracking_enabled = 0; }
140 :
141 : /**\brief A host process thread releases and enables the
142 : * shared allocation tracking flag.
143 : */
144 1236 : static void tracking_enable() { t_tracking_enabled = 1; }
145 :
146 868 : virtual ~SharedAllocationRecord() = default;
147 :
148 : SharedAllocationRecord()
149 : : m_alloc_ptr(nullptr),
150 : m_alloc_size(0),
151 : m_dealloc(nullptr),
152 : #ifdef KOKKOS_ENABLE_DEBUG
153 : m_root(this),
154 : m_prev(this),
155 : m_next(this),
156 : #endif
157 : m_count(0) {
158 : }
159 :
160 : static constexpr unsigned maximum_label_length =
161 : SharedAllocationHeader::maximum_label_length;
162 :
163 : KOKKOS_FUNCTION
164 : const SharedAllocationHeader* head() const { return m_alloc_ptr; }
165 :
166 : /* User's memory begins at the end of the header */
167 : KOKKOS_FUNCTION
168 : void* data() const { return static_cast<void*>(m_alloc_ptr + 1); }
169 :
170 : /* User's memory begins at the end of the header */
171 : size_t size() const { return m_alloc_size - sizeof(SharedAllocationHeader); }
172 :
173 : /* Cannot be 'constexpr' because 'm_count' is volatile */
174 : int use_count() const { return *static_cast<const volatile int*>(&m_count); }
175 :
176 : /* Increment use count */
177 : static void increment(SharedAllocationRecord*);
178 :
179 : /* Decrement use count. If 1->0 then remove from the tracking list and invoke
180 : * m_dealloc */
181 : static SharedAllocationRecord* decrement(SharedAllocationRecord*);
182 :
183 : /* Given a root record and data pointer find the record */
184 : static SharedAllocationRecord* find(SharedAllocationRecord* const,
185 : void* const);
186 :
187 : /* Sanity check for the whole set of records to which the input record
188 : * belongs. Locks the set's insert/erase operations until the sanity check is
189 : * complete.
190 : */
191 : static bool is_sane(SharedAllocationRecord*);
192 :
193 : /* Print host-accessible records */
194 : static void print_host_accessible_records(
195 : std::ostream&, const char* const space_name,
196 : const SharedAllocationRecord* const root, const bool detail);
197 : };
198 :
199 : template <class MemorySpace>
200 : SharedAllocationHeader* checked_allocation_with_header(MemorySpace const& space,
201 : std::string const& label,
202 : size_t alloc_size) {
203 : return reinterpret_cast<SharedAllocationHeader*>(space.allocate(
204 : label.c_str(), alloc_size + sizeof(SharedAllocationHeader), alloc_size));
205 : }
206 :
207 : template <class ExecutionSpace, class MemorySpace>
208 : SharedAllocationHeader* checked_allocation_with_header(
209 : ExecutionSpace const& exec_space, MemorySpace const& space,
210 : std::string const& label, size_t alloc_size) {
211 : return reinterpret_cast<SharedAllocationHeader*>(
212 : space.allocate(exec_space, label.c_str(),
213 : alloc_size + sizeof(SharedAllocationHeader), alloc_size));
214 : }
215 :
216 : void fill_host_accessible_header_info(SharedAllocationHeader& arg_header,
217 : std::string const& arg_label);
218 :
219 : template <class MemorySpace>
220 : class SharedAllocationRecordCommon : public SharedAllocationRecord<void, void> {
221 : private:
222 : using derived_t = SharedAllocationRecord<MemorySpace, void>;
223 : using record_base_t = SharedAllocationRecord<void, void>;
224 :
225 : protected:
226 : using record_base_t::record_base_t;
227 :
228 : MemorySpace m_space;
229 :
230 : #ifdef KOKKOS_ENABLE_DEBUG
231 : static record_base_t s_root_record;
232 : #endif
233 :
234 : static void deallocate(record_base_t* arg_rec);
235 :
236 : public:
237 : ~SharedAllocationRecordCommon();
238 : template <class ExecutionSpace>
239 : SharedAllocationRecordCommon(
240 : ExecutionSpace const& exec, MemorySpace const& space,
241 : std::string const& label, std::size_t alloc_size,
242 : record_base_t::function_type dealloc = &deallocate)
243 : : SharedAllocationRecord<void, void>(
244 : #ifdef KOKKOS_ENABLE_DEBUG
245 : &s_root_record,
246 : #endif
247 : checked_allocation_with_header(exec, space, label, alloc_size),
248 : sizeof(SharedAllocationHeader) + alloc_size, dealloc, label),
249 : m_space(space) {
250 : auto& header = *SharedAllocationRecord<void, void>::m_alloc_ptr;
251 : fill_host_accessible_header_info(this, header, label);
252 : }
253 : SharedAllocationRecordCommon(
254 : MemorySpace const& space, std::string const& label, std::size_t size,
255 : record_base_t::function_type dealloc = &deallocate);
256 :
257 : static auto allocate(MemorySpace const& arg_space,
258 : std::string const& arg_label, size_t arg_alloc_size)
259 : -> derived_t*;
260 : /**\brief Allocate tracked memory in the space */
261 : static void* allocate_tracked(MemorySpace const& arg_space,
262 : std::string const& arg_alloc_label,
263 : size_t arg_alloc_size);
264 : /**\brief Deallocate tracked memory in the space */
265 : static void deallocate_tracked(void* arg_alloc_ptr);
266 : /**\brief Reallocate tracked memory in the space
267 : * \note The ExecutionSpace template parameter is used to force
268 : * templatization of the method to delay its definition. Otherwise, the
269 : * method would use an execution space which is not complete yet.
270 : */
271 : template <class ExecutionSpace = typename MemorySpace::execution_space>
272 : static void* reallocate_tracked(void* arg_alloc_ptr, size_t arg_alloc_size);
273 : static auto get_record(void* alloc_ptr) -> derived_t*;
274 : std::string get_label() const override;
275 : static void print_records(std::ostream& s, MemorySpace const&,
276 : bool detail = false);
277 : };
278 :
279 : /**
280 : * \note This method is implemented here to prevent circular dependencies.
281 : */
282 : template <class MemorySpace>
283 : template <class ExecutionSpace>
284 : void* SharedAllocationRecordCommon<MemorySpace>::reallocate_tracked(
285 : void* arg_alloc_ptr, size_t arg_alloc_size) {
286 : derived_t* const r_old = derived_t::get_record(arg_alloc_ptr);
287 : derived_t* const r_new =
288 : allocate(r_old->m_space, r_old->get_label(), arg_alloc_size);
289 :
290 : Kokkos::Impl::DeepCopy<MemorySpace, MemorySpace>(
291 : ExecutionSpace{}, r_new->data(), r_old->data(),
292 : std::min(r_old->size(), r_new->size()));
293 : Kokkos::fence(std::string("SharedAllocationRecord<") + MemorySpace::name() +
294 : ", void>::reallocate_tracked(): fence after copying data");
295 :
296 : record_base_t::increment(r_new);
297 : record_base_t::decrement(r_old);
298 :
299 : return r_new->data();
300 : }
301 :
302 : template <class MemorySpace>
303 : class HostInaccessibleSharedAllocationRecordCommon
304 : : public SharedAllocationRecord<void, void> {
305 : private:
306 : using derived_t = SharedAllocationRecord<MemorySpace, void>;
307 : using record_base_t = SharedAllocationRecord<void, void>;
308 :
309 : protected:
310 : using record_base_t::record_base_t;
311 :
312 : MemorySpace m_space;
313 :
314 : #ifdef KOKKOS_ENABLE_DEBUG
315 : static record_base_t s_root_record;
316 : #endif
317 :
318 : static void deallocate(record_base_t* arg_rec);
319 :
320 : public:
321 : ~HostInaccessibleSharedAllocationRecordCommon();
322 : template <class ExecutionSpace>
323 : HostInaccessibleSharedAllocationRecordCommon(
324 : ExecutionSpace const& exec, MemorySpace const& space,
325 : std::string const& label, std::size_t alloc_size,
326 : record_base_t::function_type dealloc = &deallocate)
327 : : SharedAllocationRecord<void, void>(
328 : #ifdef KOKKOS_ENABLE_DEBUG
329 : &s_root_record,
330 : #endif
331 : checked_allocation_with_header(exec, space, label, alloc_size),
332 : sizeof(SharedAllocationHeader) + alloc_size, dealloc, label),
333 : m_space(space) {
334 : SharedAllocationHeader header;
335 :
336 : fill_host_accessible_header_info(this, header, label);
337 :
338 : Kokkos::Impl::DeepCopy<MemorySpace, HostSpace>(
339 : exec, SharedAllocationRecord<void, void>::m_alloc_ptr, &header,
340 : sizeof(SharedAllocationHeader));
341 : }
342 : HostInaccessibleSharedAllocationRecordCommon(
343 : MemorySpace const& space, std::string const& label, std::size_t size,
344 : record_base_t::function_type dealloc = &deallocate);
345 :
346 : static auto allocate(MemorySpace const& arg_space,
347 : std::string const& arg_label, size_t arg_alloc_size)
348 : -> derived_t*;
349 : /**\brief Allocate tracked memory in the space */
350 : static void* allocate_tracked(MemorySpace const& arg_space,
351 : std::string const& arg_alloc_label,
352 : size_t arg_alloc_size);
353 : /**\brief Deallocate tracked memory in the space */
354 : static void deallocate_tracked(void* arg_alloc_ptr);
355 : /**\brief Reallocate tracked memory in the space
356 : * \note The ExecutionSpace template parameter is used to force
357 : * templatization of the method to delay its definition. Otherwise, the
358 : * method would use an execution space which is not complete yet.
359 : */
360 : template <class ExecutionSpace = typename MemorySpace::execution_space>
361 : static void* reallocate_tracked(void* arg_alloc_ptr, size_t arg_alloc_size);
362 :
363 : /**
364 : * \note The ExecutionSpace template parameter is used to force
365 : * templatization of the method to delay its definition. Otherwise, the
366 : * method would use an execution space which is not complete yet.
367 : */
368 : template <class ExecutionSpace = Kokkos::DefaultHostExecutionSpace>
369 : static void print_records(std::ostream& s, MemorySpace const&,
370 : bool detail = false);
371 : static auto get_record(void* alloc_ptr) -> derived_t*;
372 : std::string get_label() const override;
373 : };
374 :
375 : /**
376 : * \note This method is implemented here to prevent circular dependencies.
377 : */
378 : template <class MemorySpace>
379 : template <class ExecutionSpace>
380 : void* HostInaccessibleSharedAllocationRecordCommon<
381 : MemorySpace>::reallocate_tracked(void* arg_alloc_ptr,
382 : size_t arg_alloc_size) {
383 : derived_t* const r_old = derived_t::get_record(arg_alloc_ptr);
384 : derived_t* const r_new =
385 : allocate(r_old->m_space, r_old->get_label(), arg_alloc_size);
386 :
387 : Kokkos::Impl::DeepCopy<MemorySpace, MemorySpace>(
388 : ExecutionSpace{}, r_new->data(), r_old->data(),
389 : std::min(r_old->size(), r_new->size()));
390 : Kokkos::fence(std::string("SharedAllocationRecord<") + MemorySpace::name() +
391 : ", void>::reallocate_tracked(): fence after copying data");
392 :
393 : record_base_t::increment(r_new);
394 : record_base_t::decrement(r_old);
395 :
396 : return r_new->data();
397 : }
398 :
399 : #ifdef KOKKOS_ENABLE_DEBUG
400 : template <class MemorySpace>
401 : SharedAllocationRecord<void, void>
402 : SharedAllocationRecordCommon<MemorySpace>::s_root_record;
403 :
404 : template <class MemorySpace>
405 : SharedAllocationRecord<void, void>
406 : HostInaccessibleSharedAllocationRecordCommon<MemorySpace>::s_root_record;
407 : #endif
408 :
409 : #define KOKKOS_IMPL_SHARED_ALLOCATION_SPECIALIZATION(MEMORY_SPACE) \
410 : template <> \
411 : class Kokkos::Impl::SharedAllocationRecord<MEMORY_SPACE, void> \
412 : : public Kokkos::Impl::SharedAllocationRecordCommon<MEMORY_SPACE> { \
413 : using SharedAllocationRecordCommon< \
414 : MEMORY_SPACE>::SharedAllocationRecordCommon; \
415 : }
416 :
417 : #define KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_SPECIALIZATION( \
418 : MEMORY_SPACE) \
419 : template <> \
420 : class Kokkos::Impl::SharedAllocationRecord<MEMORY_SPACE, void> \
421 : : public Kokkos::Impl::HostInaccessibleSharedAllocationRecordCommon< \
422 : MEMORY_SPACE> { \
423 : using HostInaccessibleSharedAllocationRecordCommon< \
424 : MEMORY_SPACE>::HostInaccessibleSharedAllocationRecordCommon; \
425 : }
426 :
427 : #define KOKKOS_IMPL_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION( \
428 : MEMORY_SPACE) \
429 : template class Kokkos::Impl::SharedAllocationRecordCommon<MEMORY_SPACE>
430 :
431 : #define KOKKOS_IMPL_HOST_INACCESSIBLE_SHARED_ALLOCATION_RECORD_EXPLICIT_INSTANTIATION( \
432 : MEMORY_SPACE) \
433 : template class Kokkos::Impl::HostInaccessibleSharedAllocationRecordCommon< \
434 : MEMORY_SPACE>
435 :
436 : /* Taking the address of this function so make sure it is unique */
437 : template <class MemorySpace, class DestroyFunctor>
438 : inline void deallocate(SharedAllocationRecord<void, void>* record_ptr) {
439 : using base_type = SharedAllocationRecord<MemorySpace, void>;
440 : using this_type = SharedAllocationRecord<MemorySpace, DestroyFunctor>;
441 :
442 : this_type* const ptr =
443 : static_cast<this_type*>(static_cast<base_type*>(record_ptr));
444 :
445 : ptr->m_destroy.destroy_shared_allocation();
446 :
447 : delete ptr;
448 : }
449 :
450 : /*
451 : * Memory space specialization of SharedAllocationRecord< Space , void >
452 : * requires :
453 : *
454 : * SharedAllocationRecord< Space , void > : public SharedAllocationRecord< void
455 : * , void >
456 : * {
457 : * // delete allocated user memory via static_cast to this type.
458 : * static void deallocate( const SharedAllocationRecord<void,void> * );
459 : * Space m_space ;
460 : * }
461 : */
462 : template <class MemorySpace, class DestroyFunctor>
463 : class SharedAllocationRecord
464 : : public SharedAllocationRecord<MemorySpace, void> {
465 : private:
466 : template <typename ExecutionSpace>
467 : SharedAllocationRecord(const ExecutionSpace& execution_space,
468 : const MemorySpace& arg_space,
469 : const std::string& arg_label, const size_t arg_alloc)
470 : /* Allocate user memory as [ SharedAllocationHeader , user_memory ] */
471 : : SharedAllocationRecord<MemorySpace, void>(
472 : execution_space, arg_space, arg_label, arg_alloc,
473 : &Kokkos::Impl::deallocate<MemorySpace, DestroyFunctor>),
474 : m_destroy() {}
475 :
476 : SharedAllocationRecord(const MemorySpace& arg_space,
477 : const std::string& arg_label, const size_t arg_alloc)
478 : /* Allocate user memory as [ SharedAllocationHeader , user_memory ] */
479 : : SharedAllocationRecord<MemorySpace, void>(
480 : arg_space, arg_label, arg_alloc,
481 : &Kokkos::Impl::deallocate<MemorySpace, DestroyFunctor>),
482 : m_destroy() {}
483 :
484 : SharedAllocationRecord() = delete;
485 : SharedAllocationRecord(const SharedAllocationRecord&) = delete;
486 : SharedAllocationRecord& operator=(const SharedAllocationRecord&) = delete;
487 :
488 : public:
489 : DestroyFunctor m_destroy;
490 :
491 : // Allocate with a zero use count. Incrementing the use count from zero to
492 : // one inserts the record into the tracking list. Decrementing the count from
493 : // one to zero removes from the tracking list and deallocates.
494 : KOKKOS_INLINE_FUNCTION static SharedAllocationRecord* allocate(
495 : const MemorySpace& arg_space, const std::string& arg_label,
496 : const size_t arg_alloc) {
497 : KOKKOS_IF_ON_HOST(
498 : (return new SharedAllocationRecord(arg_space, arg_label, arg_alloc);))
499 : KOKKOS_IF_ON_DEVICE(
500 : ((void)arg_space; (void)arg_label; (void)arg_alloc; return nullptr;))
501 : }
502 :
503 : template <typename ExecutionSpace>
504 : KOKKOS_INLINE_FUNCTION static SharedAllocationRecord* allocate(
505 : const ExecutionSpace& exec_space, const MemorySpace& arg_space,
506 : const std::string& arg_label, const size_t arg_alloc) {
507 : KOKKOS_IF_ON_HOST(
508 : (return new SharedAllocationRecord(exec_space, arg_space, arg_label,
509 : arg_alloc);))
510 : KOKKOS_IF_ON_DEVICE(((void)exec_space; (void)arg_space; (void)arg_label;
511 : (void)arg_alloc; return nullptr;))
512 : }
513 : };
514 :
515 : template <class MemorySpace>
516 : class SharedAllocationRecord<MemorySpace, void>
517 : : public SharedAllocationRecord<void, void> {};
518 :
519 : union SharedAllocationTracker {
520 : private:
521 : using Record = SharedAllocationRecord<void, void>;
522 :
523 : enum : uintptr_t { DO_NOT_DEREF_FLAG = 0x01ul };
524 :
525 : // The allocation record resides in Host memory space
526 : uintptr_t m_record_bits;
527 : Record* m_record;
528 :
529 : public:
530 : // Use macros instead of inline functions to reduce
531 : // pressure on compiler optimization by reducing
532 : // number of symbols and inline functions.
533 :
534 : #ifdef KOKKOS_ENABLE_IMPL_REF_COUNT_BRANCH_UNLIKELY
535 : #define KOKKOS_IMPL_BRANCH_PROB KOKKOS_IMPL_ATTRIBUTE_UNLIKELY
536 : #else
537 : #define KOKKOS_IMPL_BRANCH_PROB
538 : #endif
539 :
540 : #define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT \
541 : KOKKOS_IF_ON_HOST( \
542 : (if (!(m_record_bits & DO_NOT_DEREF_FLAG)) \
543 : KOKKOS_IMPL_BRANCH_PROB { Record::increment(m_record); }))
544 :
545 : #define KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT \
546 : KOKKOS_IF_ON_HOST( \
547 : (if (!(m_record_bits & DO_NOT_DEREF_FLAG)) \
548 : KOKKOS_IMPL_BRANCH_PROB { Record::decrement(m_record); }))
549 :
550 : #define KOKKOS_IMPL_SHARED_ALLOCATION_CARRY_RECORD_BITS(rhs, \
551 : override_tracking) \
552 : (((!override_tracking) || (rhs.m_record_bits & DO_NOT_DEREF_FLAG) || \
553 : (!Record::tracking_enabled())) \
554 : ? rhs.m_record_bits | DO_NOT_DEREF_FLAG \
555 : : rhs.m_record_bits)
556 :
557 : /** \brief Assign a specialized record */
558 : inline void assign_allocated_record_to_uninitialized(Record* arg_record) {
559 : if (arg_record) {
560 : Record::increment(m_record = arg_record);
561 : } else {
562 : m_record_bits = DO_NOT_DEREF_FLAG;
563 : }
564 : }
565 :
566 : template <class MemorySpace>
567 : constexpr SharedAllocationRecord<MemorySpace, void>* get_record()
568 : const noexcept {
569 : return (m_record_bits & DO_NOT_DEREF_FLAG)
570 : ? nullptr
571 : : static_cast<SharedAllocationRecord<MemorySpace, void>*>(
572 : m_record);
573 : }
574 :
575 : template <class MemorySpace>
576 0 : std::string get_label() const {
577 0 : return (m_record_bits == DO_NOT_DEREF_FLAG)
578 : ? std::string()
579 : : reinterpret_cast<SharedAllocationRecord<MemorySpace, void>*>(
580 0 : m_record_bits & ~DO_NOT_DEREF_FLAG)
581 0 : ->get_label();
582 : }
583 :
584 : KOKKOS_INLINE_FUNCTION
585 : int use_count() const {
586 : KOKKOS_IF_ON_HOST((Record* const tmp = reinterpret_cast<Record*>(
587 : m_record_bits & ~DO_NOT_DEREF_FLAG);
588 : return (tmp ? tmp->use_count() : 0);))
589 :
590 : KOKKOS_IF_ON_DEVICE((return 0;))
591 : }
592 :
593 0 : KOKKOS_INLINE_FUNCTION bool has_record() const {
594 0 : return (m_record_bits & (~DO_NOT_DEREF_FLAG)) != 0;
595 : }
596 :
597 : KOKKOS_FORCEINLINE_FUNCTION
598 : void clear() {
599 : // If this is tracking then must decrement
600 : KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
601 : // Reset to default constructed value.
602 : m_record_bits = DO_NOT_DEREF_FLAG;
603 : }
604 :
605 : // Copy:
606 : KOKKOS_FORCEINLINE_FUNCTION
607 : ~SharedAllocationTracker(){KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT}
608 :
609 : KOKKOS_FORCEINLINE_FUNCTION constexpr SharedAllocationTracker()
610 : : m_record_bits(DO_NOT_DEREF_FLAG) {}
611 :
612 : // Move:
613 :
614 : KOKKOS_FORCEINLINE_FUNCTION
615 : SharedAllocationTracker(SharedAllocationTracker&& rhs)
616 : : m_record_bits(rhs.m_record_bits) {
617 : rhs.m_record_bits = DO_NOT_DEREF_FLAG;
618 : }
619 :
620 : KOKKOS_FORCEINLINE_FUNCTION
621 : SharedAllocationTracker& operator=(SharedAllocationTracker&& rhs) {
622 : if (&rhs == this) return *this;
623 : auto swap_tmp = m_record_bits;
624 : m_record_bits = rhs.m_record_bits;
625 : rhs.m_record_bits = swap_tmp;
626 : return *this;
627 : }
628 :
629 : // Copy:
630 :
631 : KOKKOS_FORCEINLINE_FUNCTION
632 : SharedAllocationTracker(const SharedAllocationTracker& rhs)
633 : : m_record_bits(KOKKOS_IMPL_SHARED_ALLOCATION_CARRY_RECORD_BITS(
634 : rhs, true)){KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT}
635 :
636 : /** \brief Copy construction may disable tracking. */
637 33714 : KOKKOS_FORCEINLINE_FUNCTION SharedAllocationTracker(
638 : const SharedAllocationTracker& rhs, const bool enable_tracking)
639 33714 : : m_record_bits(KOKKOS_IMPL_SHARED_ALLOCATION_CARRY_RECORD_BITS(
640 : rhs,
641 33714 : enable_tracking)){KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT}
642 :
643 : KOKKOS_FORCEINLINE_FUNCTION SharedAllocationTracker
644 : &
645 : operator=(const SharedAllocationTracker& rhs) {
646 : if (&rhs == this) return *this;
647 : // If this is tracking then must decrement
648 : KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
649 : m_record_bits = KOKKOS_IMPL_SHARED_ALLOCATION_CARRY_RECORD_BITS(rhs, true);
650 : KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
651 : return *this;
652 : }
653 :
654 : /* The following functions (assign_direct and assign_force_disable)
655 : * are the result of deconstructing the
656 : * KOKKOS_IMPL_SHARED_ALLOCATION_CARRY_RECORD_BITS macro. This
657 : * allows the caller to do the check for tracking enabled and managed
658 : * apart from the assignment of the record because the tracking
659 : * enabled / managed question may be important for other tasks as well
660 : */
661 :
662 : /** \brief Copy assignment without the carry bits logic
663 : * This assumes that externally defined tracking is explicitly enabled
664 : */
665 : KOKKOS_FORCEINLINE_FUNCTION
666 : void assign_direct(const SharedAllocationTracker& rhs) {
667 : KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
668 : m_record_bits = rhs.m_record_bits;
669 : KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
670 : }
671 :
672 : /** \brief Copy assignment without the increment
673 : * we cannot assume that current record is unmanaged
674 : * but with externally defined tracking explicitly disabled
675 : * we can go straight to the do not deref flag */
676 : KOKKOS_FORCEINLINE_FUNCTION
677 : void assign_force_disable(const SharedAllocationTracker& rhs) {
678 : KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
679 : m_record_bits = rhs.m_record_bits | DO_NOT_DEREF_FLAG;
680 : }
681 :
682 : // report if record is tracking or not
683 : KOKKOS_FORCEINLINE_FUNCTION
684 : bool tracking_enabled() { return (!(m_record_bits & DO_NOT_DEREF_FLAG)); }
685 :
686 : /** \brief Copy assignment may disable tracking */
687 : KOKKOS_FORCEINLINE_FUNCTION
688 : void assign(const SharedAllocationTracker& rhs, const bool enable_tracking) {
689 : KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
690 : m_record_bits =
691 : KOKKOS_IMPL_SHARED_ALLOCATION_CARRY_RECORD_BITS(rhs, enable_tracking);
692 : KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
693 : }
694 :
695 : #undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_INCREMENT
696 : #undef KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT
697 : #undef KOKKOS_IMPL_BRANCH_PROB
698 : };
699 :
700 : struct SharedAllocationDisableTrackingGuard {
701 1236 : SharedAllocationDisableTrackingGuard() {
702 1236 : KOKKOS_ASSERT(
703 1236 : (Kokkos::Impl::SharedAllocationRecord<void, void>::tracking_enabled()));
704 1236 : Kokkos::Impl::SharedAllocationRecord<void, void>::tracking_disable();
705 1236 : }
706 :
707 : SharedAllocationDisableTrackingGuard(
708 : const SharedAllocationDisableTrackingGuard&) = delete;
709 : SharedAllocationDisableTrackingGuard(SharedAllocationDisableTrackingGuard&&) =
710 : delete;
711 :
712 1236 : ~SharedAllocationDisableTrackingGuard() {
713 1236 : KOKKOS_ASSERT((
714 1236 : !Kokkos::Impl::SharedAllocationRecord<void, void>::tracking_enabled()));
715 1236 : Kokkos::Impl::SharedAllocationRecord<void, void>::tracking_enable();
716 1236 : }
717 : // clang-format off
718 : // The old version of clang format we use is particularly egregious here
719 : SharedAllocationDisableTrackingGuard& operator=(
720 : const SharedAllocationDisableTrackingGuard&) = delete;
721 : SharedAllocationDisableTrackingGuard& operator=(
722 : SharedAllocationDisableTrackingGuard&&) = delete;
723 : // clang-format on
724 : };
725 :
726 : template <class FunctorType, class... Args>
727 1236 : inline FunctorType construct_with_shared_allocation_tracking_disabled(
728 : Args&&... args) {
729 1236 : [[maybe_unused]] auto guard = SharedAllocationDisableTrackingGuard{};
730 2472 : return {std::forward<Args>(args)...};
731 1236 : }
732 : } /* namespace Impl */
733 : } /* namespace Kokkos */
734 : #endif
|