RMM: tracking_resource_adaptor.hpp 源文件

 /*

  * Copyright (c) 2020-2025, NVIDIA CORPORATION.

  *

  * 根据 Apache 许可证 2.0 版（以下简称“许可证”）获得许可；

  * 除非符合许可证的规定，否则您不得使用此文件。

  * 您可以在以下网址获取许可证的副本：

  *

  * https://apache.ac.cn/licenses/LICENSE-2.0

  *

  * 除非适用法律要求或书面同意，根据许可证分发的软件

  * 按“现状”分发，不附带任何明示或暗示的担保或条件。

  * 有关管理权限和

  * 许可证下的限制，请参阅许可证。

  */


 #pragma once


 #include <rmm/detail/error.hpp>

 #include <rmm/detail/export.hpp>

 #include <rmm/detail/stack_trace.hpp>

 #include <rmm/logger.hpp>

 #include <rmm/mr/device/device_memory_resource.hpp>

 #include <rmm/mr/device/per_device_resource.hpp>

 #include <rmm/resource_ref.hpp>


 #include <atomic>

 #include <cstddef>

 #include <map>

 #include <mutex>

 #include <shared_mutex>

 #include <sstream>


 namespace RMM_NAMESPACE {

 namespace mr {

 template <typename Upstream>

 class tracking_resource_adaptor final : public device_memory_resource {

  public

  using read_lock_t =

  std::shared_lock<std::shared_mutex>;

  using write_lock_t =

  std::unique_lock<std::shared_mutex>;

  struct allocation_info {

  std::unique_ptr<rmm::detail::stack_trace> strace;

  std::size_t allocation_size;


  allocation_info() = delete;

  allocation_info(std::size_t size, bool capture_stack)

  : strace{[&]() {

  return capture_stack ? std::make_unique<rmm::detail::stack_trace>() : nullptr;

  }()},

  allocation_size{size} {};

  };


  tracking_resource_adaptor(device_async_resource_ref upstream, bool capture_stacks = false)

  : capture_stacks_{capture_stacks}, allocated_bytes_{0}, upstream_{upstream}

  {

  }


  tracking_resource_adaptor(Upstream* upstream, bool capture_stacks = false)

  : capture_stacks_{capture_stacks},

  allocated_bytes_{0},

  upstream_{to_device_async_resource_ref_checked(upstream)}

  {

  }


  tracking_resource_adaptor() = delete;

  ~tracking_resource_adaptor() override = default;

  tracking_resource_adaptor(tracking_resource_adaptor const&) = delete;

  tracking_resource_adaptor(tracking_resource_adaptor&&) noexcept =

  default;

  tracking_resource_adaptor& operator=(tracking_resource_adaptor const&) = delete;

  tracking_resource_adaptor& operator=(tracking_resource_adaptor&&) noexcept =

  default;


  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept

  {

  return upstream_;

  }


  std::map<void*, allocation_info> const& get_outstanding_allocations() const noexcept

  {

  return allocations_;

  }


  std::size_t get_allocated_bytes() const noexcept { return allocated_bytes_; }


  std::string get_outstanding_allocations_str() const

  {

  read_lock_t lock(mtx_);


  std::ostringstream oss;


  if (!allocations_.empty()) {

  for (auto const& alloc : allocations_) {

  oss << alloc.first << ": " << alloc.second.allocation_size << " B";

  if (alloc.second.strace != nullptr) {

  oss << " : callstack:" << std::endl << *alloc.second.strace;

  }

  oss << std::endl;

  }

  }


  return oss.str();

  }


  void log_outstanding_allocations() const

  {

 #if RMM_LOG_ACTIVE_LEVEL <= RMM_LOG_LEVEL_DEBUG

  RMM_LOG_DEBUG("Outstanding Allocations: %s", get_outstanding_allocations_str());

 #endif // RMM_LOG_ACTIVE_LEVEL <= RMM_LOG_LEVEL_DEBUG

  }


  private

  void* do_allocate(std::size_t bytes, cuda_stream_view stream) override

  {

  void* ptr = get_upstream_resource().allocate_async(bytes, stream);

  // 追踪它。

  {

  write_lock_t lock(mtx_);

  allocations_.emplace(ptr, allocation_info{bytes, capture_stacks_});

  }

  allocated_bytes_ += bytes;


  return ptr;

  }


  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override

  {

  get_upstream_resource().deallocate_async(ptr, bytes, stream);

  {

  write_lock_t lock(mtx_);


  const auto found = allocations_.find(ptr);


  // 确保找到了分配，并且字节数匹配

  if (found == allocations_.end()) {

  // 不要抛出异常，而是记录错误。在析构函数（或任何 noexcept）中抛出异常将调用

  // std::terminate

  RMM_LOG_ERROR(

  "Deallocating a pointer that was not tracked. Ptr: %p [%zuB], Current Num. Allocations: "

  "%zu",

  ptr,

  bytes,

  this->allocations_.size());

  } else {

  allocations_.erase(found);


  auto allocated_bytes = found->second.allocation_size;


  if (allocated_bytes != bytes) {

  // 不要抛出异常，而是记录错误。在析构函数（或任何 noexcept）中抛出异常将调用

  // std::terminate

  RMM_LOG_ERROR(

  "Alloc bytes (%zu) and Dealloc bytes (%zu) do not match", allocated_bytes, bytes);


  bytes = allocated_bytes;

  }

  }

  }

  allocated_bytes_ -= bytes;

  }


  bool do_is_equal(device_memory_resource const& other) const noexcept override

  {

  if (this == &other) { return true; }

  auto cast = dynamic_cast<tracking_resource_adaptor<Upstream> const*>(&other);

  if (cast == nullptr) { return false; }

  return get_upstream_resource() == cast->get_upstream_resource();

  }


  bool capture_stacks_; // 是否捕获调用栈

  std::map<void*, allocation_info> allocations_; // 活跃分配的映射

  std::atomic<std::size_t> allocated_bytes_; // 当前分配的字节数

  std::shared_mutex mutable mtx_; // 用于线程安全访问 allocations_ 的互斥锁

  device_async_resource_ref upstream_; // 用于满足

  // 分配请求的上游资源

 };

  // 组结束

 } // 命名空间 mr

 } // 命名空间 RMM_NAMESPACE

rmm::cuda_stream_view
CUDA 流的强类型非拥有包装器，带默认构造函数。
定义： cuda_stream_view.hpp:39

rmm::mr::device_memory_resource
所有 librmm 设备内存分配的基类。
定义： device_memory_resource.hpp:92

rmm::mr::tracking_resource_adaptor
使用 Upstream 分配内存并跟踪分配的资源。
定义： tracking_resource_adaptor.hpp:56

rmm::mr::tracking_resource_adaptor::tracking_resource_adaptor
tracking_resource_adaptor(Upstream *upstream, bool capture_stacks=false)
构造一个新的跟踪资源适配器，使用 upstream 来满足分配请求。
定义： tracking_resource_adaptor.hpp:107

rmm::mr::tracking_resource_adaptor::tracking_resource_adaptor
tracking_resource_adaptor(device_async_resource_ref upstream, bool capture_stacks=false)
构造一个新的跟踪资源适配器，使用 upstream 来满足分配请求。
定义： tracking_resource_adaptor.hpp:93

rmm::mr::tracking_resource_adaptor::get_allocated_bytes
std::size_t get_allocated_bytes() const noexcept
查询已分配的字节数。请注意，这不能用于了解有多大...
定义： tracking_resource_adaptor.hpp:152

rmm::mr::tracking_resource_adaptor::write_lock_t
std::unique_lock< std::shared_mutex > write_lock_t
用于同步写访问的锁类型。
定义： tracking_resource_adaptor.hpp:61

rmm::mr::tracking_resource_adaptor::tracking_resource_adaptor
tracking_resource_adaptor(tracking_resource_adaptor &&) noexcept=default
默认移动构造函数。

rmm::mr::tracking_resource_adaptor::get_outstanding_allocations_str
std::string get_outstanding_allocations_str() const
获取一个字符串，其中包含未完成的分配指针、它们的大小以及可选的调用栈...
定义： tracking_resource_adaptor.hpp:164

rmm::mr::tracking_resource_adaptor::get_outstanding_allocations
std::map< void *, allocation_info > const & get_outstanding_allocations() const noexcept
获取未完成的分配映射。
定义： tracking_resource_adaptor.hpp:138

rmm::mr::tracking_resource_adaptor::log_outstanding_allocations
void log_outstanding_allocations() const
通过 RMM_LOG_DEBUG 记录任何未完成的分配。
定义： tracking_resource_adaptor.hpp:187

rmm::mr::tracking_resource_adaptor::read_lock_t
std::shared_lock< std::shared_mutex > read_lock_t
用于同步读访问的锁类型。
定义： tracking_resource_adaptor.hpp:59

device_memory_resource.hpp

rmm::device_async_resource_ref
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
具有 cuda::mr::device_accessible 属性的 cuda::mr::async_resource_ref 的别名。
定义： resource_ref.hpp:40

rmm::to_device_async_resource_ref_checked
device_async_resource_ref to_device_async_resource_ref_checked(Resource *res)
将内存资源指针转换为 device_async_resource_ref，检查是否为 nullptr
定义： resource_ref.hpp:78

per_device_resource.hpp
管理每设备的 device_memory_resource。

resource_ref.hpp

rmm::mr::tracking_resource_adaptor::allocation_info
关于分配存储的信息。包括大小以及如果跟踪资源启用了捕获调用栈，则还包括调用栈。
定义： tracking_resource_adaptor.hpp:68

rmm::mr::tracking_resource_adaptor::allocation_info::strace
std::unique_ptr< rmm::detail::stack_trace > strace
分配的调用栈。
定义： tracking_resource_adaptor.hpp:69

rmm::mr::tracking_resource_adaptor::allocation_info::allocation_size
std::size_t allocation_size
分配的大小。
定义： tracking_resource_adaptor.hpp:70

rmm::mr::tracking_resource_adaptor::allocation_info::allocation_info
allocation_info(std::size_t size, bool capture_stack)
构造一个新的 allocation info 对象。
定义： tracking_resource_adaptor.hpp:79