aligned_resource_adaptor.hpp
前往此文件的文档。
1 /*
2  * Copyright (c) 2021-2025, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * https://apache.ac.cn/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 
18 #include <rmm/aligned.hpp>
19 #include <rmm/cuda_stream_view.hpp>
20 #include <rmm/detail/error.hpp>
21 #include <rmm/detail/export.hpp>
24 #include <rmm/resource_ref.hpp>
25 
26 #include <cstddef>
27 #include <mutex>
28 #include <unordered_map>
29 
30 namespace RMM_NAMESPACE {
31 namespace mr {
56 template <typename Upstream>
58  public
69  // 使用上游资源构造一个对齐资源适配器来满足分配请求。
71  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT,
72  std::size_t alignment_threshold = default_alignment_threshold)
73  : upstream_{upstream}, alignment_{alignment}, alignment_threshold_{alignment_threshold}
74  {
75  RMM_EXPECTS(rmm::is_supported_alignment(alignment),
76  "Allocation alignment is not a power of 2.");
77  }
89  // 使用上游资源构造对齐资源适配器以满足分配请求。
90  explicit aligned_resource_adaptor(Upstream* upstream,
91  std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT,
92  std::size_t alignment_threshold = default_alignment_threshold)
93  : upstream_{to_device_async_resource_ref_checked(upstream)},
94  alignment_{alignment},
95  alignment_threshold_{alignment_threshold}
96  {
97  RMM_EXPECTS(rmm::is_supported_alignment(alignment),
98  "Allocation alignment is not a power of 2.");
99  }
100 
101  aligned_resource_adaptor() = delete;
102  ~aligned_resource_adaptor() override = default;
105  aligned_resource_adaptor& operator=(aligned_resource_adaptor const&) = delete;
106  aligned_resource_adaptor& operator=(aligned_resource_adaptor&&) = delete;
110  // 返回上游资源的 rmm::device_async_resource_ref 引用
111  [[nodiscard]] rmm::device_async_resource_ref get_upstream_resource() const noexcept
112  {
113  return upstream_;
114  }
118  static constexpr std::size_t default_alignment_threshold = 0;
119 
120  private
121  using lock_guard = std::lock_guard<std::mutex>
122 
134  // 实现 `do_allocate`
135  void* do_allocate(std::size_t bytes, cuda_stream_view stream) override
136  {
137  if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {
138  return get_upstream_resource().allocate_async(bytes, 1, stream);
139  }
140  auto const size = upstream_allocation_size(bytes);
141  void* pointer = get_upstream_resource().allocate_async(size, 1, stream);
142  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast)
143  auto const address = reinterpret_cast<std::size_t>(pointer);
144  auto const aligned_address = rmm::align_up(address, alignment_);
145  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr)
146  void* aligned_pointer = reinterpret_cast<void*>(aligned_address);
147  if (pointer != aligned_pointer) {
148  lock_guard lock(mtx_);
149  pointers_.emplace(aligned_pointer, pointer);
150  }
151  return aligned_pointer;
152  }
160  // 实现 `do_deallocate`
161  void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override
162  {
163  if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) {
164  get_upstream_resource().deallocate_async(ptr, bytes, 1, stream);
165  } else {
166  {
167  lock_guard lock(mtx_);
168  auto const iter = pointers_.find(ptr);
169  if (iter != pointers_.end()) {
170  ptr = iter->second;
171  pointers_.erase(iter);
172  }
173  }
174  get_upstream_resource().deallocate_async(ptr, upstream_allocation_size(bytes), 1, stream);
175  }
176  }
184  // 比较是否相等
185  [[nodiscard]] bool do_is_equal(device_memory_resource const& other) const noexcept override
186  {
187  if (this == &other) { return true; }
188  auto cast = dynamic_cast<aligned_resource_adaptor<Upstream> const*>(&other);
189  if (cast == nullptr) { return false; }
190  return get_upstream_resource() == cast->get_upstream_resource() &&
191  alignment_ == cast->alignment_ && alignment_threshold_ == cast->alignment_threshold_;
192  }
200  // 计算上游资源所需的分配大小
201  std::size_t upstream_allocation_size(std::size_t bytes) const
202  {
203  auto const aligned_size = rmm::align_up(bytes, alignment_);
204  return aligned_size + alignment_ - rmm::CUDA_ALLOCATION_ALIGNMENT;
205  }
207  device_async_resource_ref upstream_;
208  std::unordered_map<void*, void*> pointers_;
209  std::size_t alignment_;
210  std::size_t alignment_threshold_;
211  mutable std::mutex mtx_;
212 };
213  // 组结束
215 } // namespace mr
216 } // namespace RMM_NAMESPACE
CUDA 流的强类型非拥有包装器,带默认构造函数。
定义: cuda_stream_view.hpp:39
调整上游内存资源以指定对齐大小分配内存的资源。
定义: aligned_resource_adaptor.hpp:57
aligned_resource_adaptor(device_async_resource_ref upstream, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT, std::size_t alignment_threshold=default_alignment_threshold)
使用上游资源构造对齐资源适配器以满足分配请求。
定义: aligned_resource_adaptor.hpp:69
aligned_resource_adaptor(Upstream *upstream, std::size_t alignment=rmm::CUDA_ALLOCATION_ALIGNMENT, std::size_t alignment_threshold=default_alignment_threshold)
使用上游资源构造对齐资源适配器以满足分配请求。
定义: aligned_resource_adaptor.hpp:89
rmm::device_async_resource_ref get_upstream_resource() const noexcept
上游资源的 rmm::device_async_resource_ref 引用
定义: aligned_resource_adaptor.hpp:110
所有 librmm 设备内存分配的基类。
定义: device_memory_resource.hpp:92
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
带属性 cuda::mr::device_accessible 的 cuda::mr::async_resource_ref 别名。
定义: resource_ref.hpp:40
device_async_resource_ref to_device_async_resource_ref_checked(Resource *res)
将内存资源的指针转换为 device_async_resource_ref,检查是否为 nullptr
定义: resource_ref.hpp:78
static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT
CUDA 内存分配使用的默认对齐方式。
定义: aligned.hpp:43
constexpr bool is_supported_alignment(std::size_t alignment) noexcept
返回对齐方式是否为有效的内存对齐方式。
定义: aligned.hpp:64
constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept
向上对齐到指定 2 的幂的最近倍数。
定义: aligned.hpp:77
每设备 device_memory_resources 的管理。