RMM: aligned_resource_adaptor.hpp 源文件

 /*

  * Copyright (c) 2021-2025, NVIDIA CORPORATION.

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  * https://apache.ac.cn/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */

 #pragma once


 #include <rmm/aligned.hpp>

 #include <rmm/cuda_stream_view.hpp>

 #include <rmm/detail/error.hpp>

 #include <rmm/detail/export.hpp>

 #include <rmm/mr/device/device_memory_resource.hpp>

 #include <rmm/mr/device/per_device_resource.hpp>

 #include <rmm/resource_ref.hpp>


 #include <cstddef>

 #include <mutex>

 #include <unordered_map>


 namespace RMM_NAMESPACE {

 namespace mr {

 template <typename Upstream>

 class aligned_resource_adaptor final : public device_memory_resource {

  public

  // 使用上游资源构造一个对齐资源适配器来满足分配请求。

  explicit aligned_resource_adaptor(device_async_resource_ref upstream,

  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT,

  std::size_t alignment_threshold = default_alignment_threshold)

  : upstream_{upstream}, alignment_{alignment}, alignment_threshold_{alignment_threshold}

  {

  RMM_EXPECTS(rmm::is_supported_alignment(alignment),

  "Allocation alignment is not a power of 2.");

  }

  // 使用上游资源构造对齐资源适配器以满足分配请求。

  explicit aligned_resource_adaptor(Upstream* upstream,

  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT,

  std::size_t alignment_threshold = default_alignment_threshold)

  : upstream_{to_device_async_resource_ref_checked(upstream)},

  alignment_{alignment},

  alignment_threshold_{alignment_threshold}

  {

  RMM_EXPECTS(rmm::is_supported_alignment(alignment),

  "Allocation alignment is not a power of 2.");

  }


  aligned_resource_adaptor() = delete;

  ~aligned_resource_adaptor() override = default;

  aligned_resource_adaptor(aligned_resource_adaptor const&) = delete;

  aligned_resource_adaptor(aligned_resource_adaptor&&) = delete;

  aligned_resource_adaptor& operator=(aligned_resource_adaptor const&) = delete;

  aligned_resource_adaptor& operator=(aligned_resource_adaptor&&) = delete;

  // 返回上游资源的 rmm::device_async_resource_ref 引用

  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept

  {

  return upstream_;

  }

  static constexpr std::size_t default_alignment_threshold = 0;


  private

  using lock_guard = std::lock_guard<std::mutex>


  // 实现 `do_allocate`

  void* do_allocate(std::size_t bytes, cuda_stream_view stream) override

  {

  if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {

  return get_upstream_resource().allocate_async(bytes, 1, stream);

  }

  auto const size = upstream_allocation_size(bytes);

  void* pointer = get_upstream_resource().allocate_async(size, 1, stream);

  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)

  auto const address = reinterpret_cast<std::size_t>(pointer);

  auto const aligned_address = rmm::align_up(address, alignment_);

  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr)

  void* aligned_pointer = reinterpret_cast<void*>(aligned_address);

  if (pointer != aligned_pointer) {

  lock_guard lock(mtx_);

  pointers_.emplace(aligned_pointer, pointer);

  }

  return aligned_pointer;

  }

  // 实现 `do_deallocate`

  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override

  {

  if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {

  get_upstream_resource().deallocate_async(ptr, bytes, 1, stream);

  } else {

  {

  lock_guard lock(mtx_);

  auto const iter = pointers_.find(ptr);

  if (iter != pointers_.end()) {

  ptr = iter->second;

  pointers_.erase(iter);

  }

  }

  get_upstream_resource().deallocate_async(ptr, upstream_allocation_size(bytes), 1, stream);

  }

  }

  // 比较是否相等

  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override

  {

  if (this == &other) { return true; }

  auto cast = dynamic_cast<aligned_resource_adaptor<Upstream> const*>(&other);

  if (cast == nullptr) { return false; }

  return get_upstream_resource() == cast->get_upstream_resource() &&

  alignment_ == cast->alignment_ && alignment_threshold_ == cast->alignment_threshold_;

  }

  // 计算上游资源所需的分配大小

  std::size_t upstream_allocation_size(std::size_t bytes) const

  {

  auto const aligned_size = rmm::align_up(bytes, alignment_);

  return aligned_size + alignment_ - rmm::CUDA_ALLOCATION_ALIGNMENT;

  }

  device_async_resource_ref upstream_;

  std::unordered_map<void*, void*> pointers_;

  std::size_t alignment_;

  std::size_t alignment_threshold_;

  mutable std::mutex mtx_;

 };

  // 组结束

 } // namespace mr

 } // namespace RMM_NAMESPACE

aligned.hpp

rmm::cuda_stream_view
CUDA 流的强类型非拥有包装器，带默认构造函数。
定义： cuda_stream_view.hpp:39

rmm::mr::aligned_resource_adaptor
调整上游内存资源以指定对齐大小分配内存的资源。
定义： aligned_resource_adaptor.hpp:57

rmm::mr::aligned_resource_adaptor::aligned_resource_adaptor
aligned_resource_adaptor(device_async_resource_ref upstream, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT, std::size_t alignment_threshold=default_alignment_threshold)
使用上游资源构造对齐资源适配器以满足分配请求。
定义： aligned_resource_adaptor.hpp:69

rmm::mr::aligned_resource_adaptor::aligned_resource_adaptor
aligned_resource_adaptor(Upstream *upstream, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT, std::size_t alignment_threshold=default_alignment_threshold)
使用上游资源构造对齐资源适配器以满足分配请求。
定义： aligned_resource_adaptor.hpp:89

rmm::mr::aligned_resource_adaptor::get_upstream_resource
rmm::device_async_resource_ref get_upstream_resource() const noexcept
上游资源的 rmm::device_async_resource_ref 引用
定义： aligned_resource_adaptor.hpp:110

rmm::mr::device_memory_resource
所有 librmm 设备内存分配的基类。
定义： device_memory_resource.hpp:92

cuda_stream_view.hpp

device_memory_resource.hpp

rmm::device_async_resource_ref
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
带属性 cuda::mr::device_accessible 的 cuda::mr::async_resource_ref 别名。
定义： resource_ref.hpp:40

rmm::to_device_async_resource_ref_checked
device_async_resource_ref to_device_async_resource_ref_checked(Resource *res)
将内存资源的指针转换为 device_async_resource_ref，检查是否为 nullptr
定义： resource_ref.hpp:78

rmm::CUDA_ALLOCATION_ALIGNMENT
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
CUDA 内存分配使用的默认对齐方式。
定义： aligned.hpp:43

rmm::is_supported_alignment
constexpr bool is_supported_alignment(std::size_t alignment) noexcept
返回对齐方式是否为有效的内存对齐方式。
定义： aligned.hpp:64

rmm::align_up
constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept
向上对齐到指定 2 的幂的最近倍数。
定义： aligned.hpp:77

per_device_resource.hpp
每设备 device_memory_resources 的管理。

resource_ref.hpp