RMM: cuda_async_view_memory_resource.hpp 源文件

 /*

  * 版权所有 (c) 2021-2025, NVIDIA CORPORATION.

  *

  * 根据 Apache 许可，版本 2.0 (以下称“许可”) 获得许可；

  * 除非遵守许可条款，否则不得使用此文件。

  * 您可以获得许可协议的副本，网址为

  *

  * https://apache.ac.cn/licenses/LICENSE-2.0

  *

  * 除非适用法律要求或经书面同意，否则软件

  * 根据许可分发的软件是按“原样”分发的，

  * 不附带任何明示或暗示的保证或条件。

  * 有关管理权限和

  * 许可限制的特定语言，请参阅许可协议。

  */

 #pragma once


 #include <rmm/cuda_device.hpp>

 #include <rmm/cuda_stream_view.hpp>

 #include <rmm/detail/error.hpp>

 #include <rmm/detail/export.hpp>

 #include <rmm/detail/thrust_namespace.h>

 #include <rmm/mr/device/device_memory_resource.hpp>


 #include <cuda_runtime_api.h>


 #include <cstddef>


 namespace RMM_NAMESPACE {

 namespace mr {

 class cuda_async_view_memory_resource final : public device_memory_resource {

  public

  cuda_async_view_memory_resource(cudaMemPool_t pool_handle)

  : cuda_pool_handle_{[pool_handle]() {

  RMM_EXPECTS(nullptr != pool_handle, "意外的空内存池句柄。");

  return pool_handle;

  }()}

  {

  // 检查是否支持 cudaMallocAsync 内存池

  auto const device = rmm::get_current_cuda_device();

  int cuda_pool_supported{};

  auto result =

  cudaDeviceGetAttribute(&cuda_pool_supported, cudaDevAttrMemoryPoolsSupported, device.value());

  RMM_EXPECTS(result == cudaSuccess && cuda_pool_supported,

  "此 CUDA 驱动程序/运行时版本不支持 cudaMallocAsync");

  }


  [[nodiscard]] cudaMemPool_t pool_handle() const noexcept { return cuda_pool_handle_; }


  cuda_async_view_memory_resource() = default;

  ~cuda_async_view_memory_resource() = default;

  cuda_async_view_memory_resource(cuda_async_view_memory_resource const&) =

  default;

  cuda_async_view_memory_resource(cuda_async_view_memory_resource&&) =

  default;

  cuda_async_view_memory_resource& operator=(cuda_async_view_memory_resource const&) =

  default;

  cuda_async_view_memory_resource& operator=(cuda_async_view_memory_resource&&) =

  default;


  private

  cudaMemPool_t cuda_pool_handle_{};


  void* do_allocate(std::size_t bytes, rmm::cuda_stream_view stream) override

  {

  void* ptr{nullptr};

  if (bytes > 0) {

  RMM_CUDA_TRY_ALLOC(cudaMallocFromPoolAsync(&ptr, bytes, pool_handle(), stream.value()),

  bytes);

  }

  return ptr;

  }


  void do_deallocate(void* ptr,

  [[maybe_unused]] std::size_t bytes,

  rmm::cuda_stream_view stream) override

  {

  if (ptr != nullptr) { RMM_ASSERT_CUDA_SUCCESS(cudaFreeAsync(ptr, stream.value())); }

  }


  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override

  {

  return dynamic_cast<cuda_async_view_memory_resource const*>(&other) != nullptr;

  }

 };

  // 组结束

 } // 命名空间 mr

 } // 命名空间 RMM_NAMESPACE

rmm::cuda_stream_view
具有默认构造函数的 CUDA 流的强类型非拥有包装器。
定义： cuda_stream_view.hpp:39

rmm::cuda_stream_view::value
constexpr cudaStream_t value() const noexcept
获取包装的流。
定义： cuda_stream_view.hpp:73

rmm::mr::cuda_async_view_memory_resource
使用 cudaMallocAsync/cudaFreeAsync 进行分配/释放的 device_memory_resource 派生类...
定义： cuda_async_view_memory_resource.hpp:41

rmm::mr::cuda_async_view_memory_resource::operator=
cuda_async_view_memory_resource & operator=(cuda_async_view_memory_resource &&)=default
默认移动赋值运算符。

rmm::mr::cuda_async_view_memory_resource::cuda_async_view_memory_resource
cuda_async_view_memory_resource(cuda_async_view_memory_resource &&)=default
默认移动构造函数。

rmm::mr::cuda_async_view_memory_resource::pool_handle
cudaMemPool_t pool_handle() const noexcept
返回 CUDA 内存池的底层原生句柄。
定义： cuda_async_view_memory_resource.hpp:73

rmm::mr::cuda_async_view_memory_resource::cuda_async_view_memory_resource
cuda_async_view_memory_resource(cudaMemPool_t pool_handle)
构造一个 cuda_async_view_memory_resource，它使用现有的 CUDA 内存池...。
定义： cuda_async_view_memory_resource.hpp:53

rmm::mr::cuda_async_view_memory_resource::cuda_async_view_memory_resource
cuda_async_view_memory_resource(cuda_async_view_memory_resource const &)=default
默认复制构造函数。

rmm::mr::cuda_async_view_memory_resource::operator=
cuda_async_view_memory_resource & operator=(cuda_async_view_memory_resource const &)=default
默认复制赋值运算符。

rmm::mr::device_memory_resource
所有 librmm 设备内存分配的基类。
定义： device_memory_resource.hpp:92

cuda_device.hpp

cuda_stream_view.hpp

device_memory_resource.hpp

rmm::get_current_cuda_device
cuda_device_id get_current_cuda_device()
返回当前设备的 cuda_device_id。
定义： cuda_device.hpp:99