RMM: sam_headroom_memory_resource.hpp 源文件

 /*

  * 版权所有 (c) 2024-2025, 英伟达公司。

  *

  * 根据 Apache 许可，版本 2.0（“许可”）获得许可；

  * 除非符合许可的规定，否则您不得使用此文件。

  * 您可以在以下位置获取许可的副本：

  *

  * https://apache.ac.cn/licenses/LICENSE-2.0

  *

  * 除非适用法律要求或书面同意，否则软件

  * 在许可下分发是基于“按原样”基础分发的，

  * 不附带任何形式的明示或默示担保或条件。

  * 请参阅许可了解管理权限和

  * 限制的特定语言。

  */

 #pragma once


 #include <rmm/cuda_device.hpp>

 #include <rmm/cuda_stream_view.hpp>

 #include <rmm/detail/export.hpp>

 #include <rmm/mr/device/device_memory_resource.hpp>

 #include <rmm/mr/device/system_memory_resource.hpp>

 #include <rmm/resource_ref.hpp>


 #include <algorithm>

 #include <cstddef>


 namespace RMM_NAMESPACE {

 namespace mr {

 /**

  * @brief Resource that uses system memory resource to allocate memory with a headroom.

  * @brief 构建一个预留内存资源。

  */

 explicit sam_headroom_memory_resource(std::size_t headroom) : system_mr_{}, headroom_{headroom} {}


  sam_headroom_memory_resource() = delete;

  ~sam_headroom_memory_resource() override = default;

  sam_headroom_memory_resource(sam_headroom_memory_resource const&) = delete;

  sam_headroom_memory_resource(sam_headroom_memory_resource&&) = delete;

  sam_headroom_memory_resource& operator=(sam_headroom_memory_resource const&) = delete;

  sam_headroom_memory_resource& operator=(sam_headroom_memory_resource&&) = delete;


  private

  /**

  * @brief Allocates memory of size at least bytes.

  * @brief 分配至少 bytes 大小的内存。

  *

  * @param bytes The size of memory to allocate

  * @param stream The stream to associate the allocation with

  * @return void* Pointer to the allocated memory

  */

  void* do_allocate(std::size_t bytes, [[maybe_unused]] cuda_stream_view stream) override

  {

  void* pointer = system_mr_.allocate_async(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT, stream);


  auto const free = rmm::available_device_memory().first;

  auto const allocatable = free > headroom_ ? free - headroom_ : 0UL;

  auto const gpu_portion =

  rmm::align_down(std::min(allocatable, bytes), rmm::CUDA_ALLOCATION_ALIGNMENT);

  auto const cpu_portion = bytes - gpu_portion;

  if (gpu_portion != 0) {

  RMM_CUDA_TRY(cudaMemAdvise(pointer,

  gpu_portion,

  cudaMemAdviseSetPreferredLocation,

  rmm::get_current_cuda_device().value()));

  }

  if (cpu_portion != 0) {

  RMM_CUDA_TRY(cudaMemAdvise(static_cast<char*>(pointer) + gpu_portion,

  cpu_portion,

  cudaMemAdviseSetPreferredLocation,

  cudaCpuDeviceId));

  }


  return pointer;

  }


  /**

  * @brief Deallocate memory pointed to by ptr.

  * @brief 释放 ptr 指向的内存。

  *

  * @param ptr Pointer to be deallocated

  * @param bytes The size of memory to deallocate

  * @param stream The stream to associate the deallocation with

  */

  void do_deallocate(void* ptr,

  [[maybe_unused]] std::size_t bytes,

  [[maybe_unused]] cuda_stream_view stream) override
  {
  system_mr_.deallocate_async(ptr, rmm::CUDA_ALLOCATION_ALIGNMENT, stream);

  }

  /**

  * @brief Returns true if the two resources are equivalent.
  * @brief 如果两个资源等效，则返回 true。
  *
  * @param other The other resource to compare to

  * @return bool true if the resources are equivalent, false otherwise
  */
  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override

  {
  if (this == &other) { return true; }
  auto cast = dynamic_cast<sam_headroom_memory_resource const*>(&other);
  if (cast == nullptr) { return false; }

  return headroom_ == cast->headroom_;

  }


  /// @brief system_memory_resource instance
  system_memory_resource system_mr_;
  /// @brief headroom amount
  std::size_t headroom_;

 }; // 组结束
 } // namespace mr
 } // namespace RMM_NAMESPACE
rmm::cuda_stream_view

具有默认构造函数的 CUDA 流的强类型非拥有包装器。
定义： cuda_stream_view.hpp:39
rmm::mr::device_memory_resource
所有 librmm 设备内存分配的基类。

定义： device_memory_resource.hpp:92
rmm::mr::device_memory_resource::allocate_async
void * allocate_async(std::size_t bytes, std::size_t alignment, cuda_stream_view stream)
分配至少 bytes 大小的内存。

定义： device_memory_resource.hpp:215

rmm::mr::sam_headroom_memory_resource