RMM: device_uvector.hpp 源文件

 /*

  * Copyright (c) 2020-2025, NVIDIA CORPORATION.

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  * https://apache.ac.cn/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */


 #pragma once


 #include <rmm/cuda_stream_view.hpp>

 #include <rmm/detail/error.hpp>

 #include <rmm/detail/exec_check_disable.hpp>

 #include <rmm/detail/export.hpp>

 #include <rmm/device_buffer.hpp>

 #include <rmm/mr/device/per_device_resource.hpp>

 #include <rmm/resource_ref.hpp>


 #include <cstddef>

 #include <type_traits>

 #include <utility>


 namespace RMM_NAMESPACE {

 template <typename T>

 class device_uvector {

  static_assert(std::is_trivially_copyable_v<T>,

  "device_uvector 仅支持可平凡复制的类型。");


  public

  using value_type = T;

  using size_type = std::size_t;

  using reference = value_type&

  using const_reference = value_type const&

  using pointer = value_type*;

  using const_pointer = value_type const*;

  using iterator = pointer;

  using const_iterator = const_pointer;


  RMM_EXEC_CHECK_DISABLE

  ~device_uvector() = default;


  RMM_EXEC_CHECK_DISABLE

  device_uvector(device_uvector&&) noexcept = default;


  RMM_EXEC_CHECK_DISABLE

  device_uvector& operator=(device_uvector&&) noexcept =

  default;


  device_uvector(device_uvector const&) = delete;


  device_uvector& operator=(device_uvector const&) = delete;


  device_uvector() = delete;


  explicit device_uvector(std::size_t size,

  cuda_stream_view stream,

  device_async_resource_ref mr = mr::get_current_device_resource_ref())

  : _storage{elements_to_bytes(size), stream, mr}

  {

  }


  explicit device_uvector(device_uvector const& other,

  cuda_stream_view stream,

  device_async_resource_ref mr = mr::get_current_device_resource_ref())

  : _storage{other._storage, stream, mr}

  {

  }


  [[nodiscard]] pointer element_ptr(std::size_t element_index) noexcept

  {

  assert(element_index < size() && "尝试访问越界元素。");

  return data() + element_index;

  }


  [[nodiscard]] const_pointer element_ptr(std::size_t element_index) const noexcept

  {

  assert(element_index < size() && "尝试访问越界元素。");

  return data() + element_index;

  }


  void set_element_async(std::size_t element_index,

  value_type const& value,

  cuda_stream_view stream)

  {

  RMM_EXPECTS(

  element_index < size(), "尝试访问越界元素。", rmm::out_of_range);


  if constexpr (std::is_same_v<value_type, bool>) {

  RMM_CUDA_TRY(

  cudaMemsetAsync(element_ptr(element_index), value, sizeof(value), stream.value()));

  return;

  }


  if constexpr (std::is_fundamental_v<value_type>) {

  if (value == value_type{0}) {

  set_element_to_zero_async(element_index, stream);

  return;

  }

  }


  RMM_CUDA_TRY(cudaMemcpyAsync(

  element_ptr(element_index), &value, sizeof(value), cudaMemcpyDefault, stream.value()));

  }


  // 我们删除右值引用重载，以防止在字面值或隐式临时值被删除或超出范围后对其进行异步复制。

  // We delete the r-value reference overload to prevent asynchronously copying from a literal or

  // implicit temporary value after it is deleted or goes out of scope.

  void set_element_async(std::size_t, value_type const&&, cuda_stream_view) = delete;

  void set_element_to_zero_async(std::size_t element_index, cuda_stream_view stream)

  {

  RMM_EXPECTS(

  element_index < size(), "尝试访问越界元素。", rmm::out_of_range);

  RMM_CUDA_TRY(

  cudaMemsetAsync(element_ptr(element_index), 0, sizeof(value_type), stream.value()));

  }


  void set_element(std::size_t element_index, T const& value, cuda_stream_view stream)

  {

  set_element_async(element_index, value, stream);

  stream.synchronize_no_throw();

  }


  [[nodiscard]] value_type element(std::size_t element_index, cuda_stream_view stream) const

  {

  RMM_EXPECTS(

  element_index < size(), "尝试访问越界元素。", rmm::out_of_range);

  value_type value;

  RMM_CUDA_TRY(cudaMemcpyAsync(

  &value, element_ptr(element_index), sizeof(value), cudaMemcpyDefault, stream.value()));

  stream.synchronize();

  return value;

  }


  [[nodiscard]] value_type front_element(cuda_stream_view stream) const

  {

  return element(0, stream);

  }


  [[nodiscard]] value_type back_element(cuda_stream_view stream) const

  {

  return element(size() - 1, stream);

  }


  void reserve(std::size_t new_capacity, cuda_stream_view stream)

  {

  _storage.reserve(elements_to_bytes(new_capacity), stream);

  }


  void resize(std::size_t new_size, cuda_stream_view stream)

  {

  _storage.resize(elements_to_bytes(new_size), stream);

  }


  void shrink_to_fit(cuda_stream_view stream) { _storage.shrink_to_fit(stream); }


  device_buffer release() noexcept { return std::move(_storage); }


  [[nodiscard]] std::size_t capacity() const noexcept

  {

  return bytes_to_elements(_storage.capacity());

  }


  [[nodiscard]] pointer data() noexcept { return static_cast<pointer>(_storage.data()); }


  [[nodiscard]] const_pointer data() const noexcept

  {

  return static_cast<const_pointer>(_storage.data());

  }


  [[nodiscard]] iterator begin() noexcept { return data(); }


  [[nodiscard]] const_iterator cbegin() const noexcept { return data(); }


  [[nodiscard]] const_iterator begin() const noexcept { return cbegin(); }


  [[nodiscard]] iterator end() noexcept { return data() + size(); }


  [[nodiscard]] const_iterator cend() const noexcept { return data() + size(); }


  [[nodiscard]] const_iterator end() const noexcept { return cend(); }


  [[nodiscard]] std::size_t size() const noexcept { return bytes_to_elements(_storage.size()); }


  [[nodiscard]] std::int64_t ssize() const noexcept

  {

  assert(size() < static_cast<std::size_t>(std::numeric_limits<int64_t>::max()) &&

  "大小溢出有符号整数");

  return static_cast<int64_t>(size());

  }


  [[nodiscard]] bool is_empty() const noexcept { return size() == 0; }


  [[nodiscard]] rmm::device_async_resource_ref memory_resource() const noexcept

  {

  return _storage.memory_resource();

  }


  [[nodiscard]] cuda_stream_view stream() const noexcept { return _storage.stream(); }


  void set_stream(cuda_stream_view stream) noexcept { _storage.set_stream(stream); }


  private

  device_buffer _storage{};


  [[nodiscard]] std::size_t constexpr elements_to_bytes(std::size_t num_elements) const noexcept

  {

  return num_elements * sizeof(value_type);

  }


  [[nodiscard]] std::size_t constexpr bytes_to_elements(std::size_t num_bytes) const noexcept

  {

  return num_bytes / sizeof(value_type);

  }

 };

  // 组结束

 } // namespace RMM_NAMESPACE

rmm::cuda_stream_view
用于 CUDA stream 的强类型非拥有包装器，带默认构造函数。
定义： cuda_stream_view.hpp:39

rmm::cuda_stream_view::value
constexpr cudaStream_t value() const noexcept
获取包装的 stream。
定义： cuda_stream_view.hpp:73

rmm::cuda_stream_view::synchronize
void synchronize() const
同步所查看的 CUDA stream。
定义： cuda_stream_view.hpp:106

rmm::cuda_stream_view::synchronize_no_throw
void synchronize_no_throw() const noexcept
同步所查看的 CUDA stream。如果发生错误，不会抛出异常。
定义： cuda_stream_view.hpp:113

rmm::device_buffer
用于设备内存分配的 RAII 结构。
定义： device_buffer.hpp:82

rmm::device_uvector
设备内存中元素的未初始化向量。
定义： device_uvector.hpp:76

rmm::device_uvector::cend
const_iterator cend() const noexcept
返回指向向量最后一个元素后一个元素的 const_iterator。
定义： device_uvector.hpp:495

rmm::device_uvector::capacity
std::size_t capacity() const noexcept
返回当前已分配存储中可容纳的元素数量。
定义： device_uvector.hpp:422

rmm::device_uvector::resize
void resize(std::size_t new_size, cuda_stream_view stream)
将向量大小调整为包含 new_size 个元素。
定义： device_uvector.hpp:395

rmm::device_uvector::pointer
value_type * pointer
由 data() 返回的指针类型
定义： device_uvector.hpp:86

rmm::device_uvector::element_ptr
const_pointer element_ptr(std::size_t element_index) const noexcept
返回指向指定元素的指针。
定义： device_uvector.hpp:172

rmm::device_uvector::is_empty
bool is_empty() const noexcept
如果向量不包含元素（即 size() == 0），则为 true
定义： device_uvector.hpp:525

rmm::device_uvector::data
const_pointer data() const noexcept
返回指向底层设备存储的 const 指针。
定义： device_uvector.hpp:445

rmm::device_uvector::size
std::size_t size() const noexcept
向量中的元素数量。
定义： device_uvector.hpp:510

rmm::device_uvector::data
pointer data() noexcept
返回指向底层设备存储的指针。
定义： device_uvector.hpp:435

rmm::device_uvector::shrink_to_fit
void shrink_to_fit(cuda_stream_view stream)
强制解除分配未使用的设备内存。
定义： device_uvector.hpp:407

rmm::device_uvector::end
iterator end() noexcept
返回指向向量最后一个元素后一个元素的 iterator。
定义： device_uvector.hpp:485

rmm::device_uvector::size_type
std::size_t size_type
用于向量大小的类型。
定义： device_uvector.hpp:82

rmm::device_uvector::element_ptr
pointer element_ptr(std::size_t element_index) noexcept
返回指向指定元素的指针。
定义： device_uvector.hpp:158

rmm::device_uvector::ssize
std::int64_t ssize() const noexcept
向量中元素的带符号数量。
定义： device_uvector.hpp:515

rmm::device_uvector::value_type
T value_type
T；存储值的类型。
定义： device_uvector.hpp:81

rmm::device_uvector::cbegin
const_iterator cbegin() const noexcept
返回指向第一个元素的 const_iterator。
定义： device_uvector.hpp:466

rmm::device_uvector::back_element
value_type back_element(cuda_stream_view stream) const
返回最后一个元素。
定义： device_uvector.hpp:357

rmm::device_uvector::set_element_to_zero_async
void set_element_to_zero_async(std::size_t element_index, cuda_stream_view stream)
异步将设备内存中的指定元素设置为零。
定义： device_uvector.hpp:264

rmm::device_uvector::const_iterator
const_pointer const_iterator
由 cbegin() 返回的 const iterator 类型
定义： device_uvector.hpp:89

rmm::device_uvector::release
device_buffer release() noexcept
释放设备内存存储的所有权。
定义： device_uvector.hpp:414

rmm::device_uvector::set_element_async
void set_element_async(std::size_t element_index, value_type const &value, cuda_stream_view stream)
将 v 异步复制到设备内存中的指定元素。
定义： device_uvector.hpp:214

rmm::device_uvector::device_uvector
device_uvector(device_uvector &&) noexcept=default
默认移动构造函数。

rmm::device_uvector::iterator
pointer iterator
由 begin() 返回的 iterator 类型
定义： device_uvector.hpp:88

rmm::device_uvector::reference
value_type & reference
value_type&；由 operator[](size_type) 返回的引用类型
定义： device_uvector.hpp:83

rmm::device_uvector::end
const_iterator end() const noexcept
返回指向向量最后一个元素后一个元素的 iterator。
定义： device_uvector.hpp:505

rmm::device_uvector::reserve
void reserve(std::size_t new_capacity, cuda_stream_view stream)
将向量的容量增加到 new_capacity 个元素。
定义： device_uvector.hpp:374

rmm::device_uvector::element
value_type element(std::size_t element_index, cuda_stream_view stream) const
从设备内存中返回指定的元素。
定义： device_uvector.hpp:319

rmm::device_uvector::front_element
value_type front_element(cuda_stream_view stream) const
返回第一个元素。
定义： device_uvector.hpp:341

rmm::device_uvector::const_pointer
value_type const * const_pointer
由 data() const 返回的指针类型。
定义： device_uvector.hpp:87

rmm::device_uvector::device_uvector
device_uvector(device_uvector const &other, cuda_stream_view stream, device_async_resource_ref mr=mr::get_current_device_resource_ref())
通过深度复制另一个 device_uvector 的内容来构造一个新的 device_uvector。
定义： device_uvector.hpp:143

rmm::device_uvector::const_reference
value_type const & const_reference
定义： device_uvector.hpp:85

rmm::device_uvector::set_element
void set_element(std::size_t element_index, T const &value, cuda_stream_view stream)
将 v 同步复制到设备内存中的指定元素。
定义： device_uvector.hpp:301

rmm::device_uvector::begin
const_iterator begin() const noexcept
返回指向第一个元素的 const_iterator。
定义： device_uvector.hpp:475

rmm::device_uvector::begin
iterator begin() noexcept
返回指向第一个元素的 iterator。
定义： device_uvector.hpp:457

rmm::out_of_range
尝试访问定义范围之外时抛出的异常。
定义： error.hpp:110

cuda_stream_view.hpp

device_buffer.hpp

rmm::device_async_resource_ref
cuda::mr::async_resource_ref< cuda::mr::device_accessible > device_async_resource_ref
具有 cuda::mr::device_accessible 属性的 cuda::mr::async_resource_ref 的别名。
定义： resource_ref.hpp:40

rmm::mr::get_current_device_resource_ref
device_async_resource_ref get_current_device_resource_ref()
获取当前设备的 device_async_resource_ref。
定义： per_device_resource.hpp:411

per_device_resource.hpp
管理每个设备的 device_memory_resources。

resource_ref.hpp