libkvikio: parallel_operation.hpp 源文件

 /*

  * Copyright (c) 2021-2025, NVIDIA CORPORATION.

  *

  * 根据 Apache 许可证 2.0 版本（"许可证"）获得许可；

  * 除非遵守许可证，否则不得使用此文件。

  * 您可以在以下位置获取许可证的副本：

  *

  * https://apache.ac.cn/licenses/LICENSE-2.0

  *

  * 除非适用法律要求或书面同意，根据许可证分发的软件

  * 按“现状”分发，不附带任何明示或暗示的保证或条件。

  * 有关许可证下特定语言权限和限制，请参阅许可证。

  * 查看许可证。

  */

  */

 #pragma once


 #include <atomic>

 #include <cassert>

 #include <future>

 #include <memory>

 #include <numeric>

 #include <system_error>

 #include <type_traits>

 #include <utility>

 #include <vector>


 #include <kvikio/defaults.hpp>

 #include <kvikio/error.hpp>

 #include <kvikio/nvtx.hpp>

 #include <kvikio/utils.hpp>


 namespace kvikio {


 namespace detail {


 /// 将一个 lambda 或可调用对象包装在一个可拷贝的 lambda 中，

 /// 即使原始对象不可拷贝（例如，包含 unique_ptr）。

 template <typename F>

 auto make_copyable_lambda(F op)

 {

  // 通过从 op 移动，在堆上创建可调用对象。使用共享指针管理其生命周期。

  auto sp = std::make_shared<F>(std::move(op));


  // 使用可拷贝的闭包作为仅可移动可调用对象的代理。

  return

  [sp](auto&&... args) -> decltype(auto) { return (*sp)(std::forward<decltype(args)>(args)...); };

 }


 /// @brief 返回用于 NVTX 范围的下一个颜色和调用索引。

 /// @return `pair` 包含下一个颜色和调用索引。

 inline const std::pair<const nvtx_color_type&, std::uint64_t> get_next_color_and_call_idx() noexcept

 {

  static std::atomic_uint64_t call_counter{1ull};

  auto call_idx = call_counter.fetch_add(1ull, std::memory_order_relaxed);

  auto& nvtx_color = NvtxManager::get_color_by_index(call_idx);

  return {nvtx_color, call_idx};

 }


 /// @brief 向默认线程池提交一个任务。

 /// @tparam F 任务的可调用类型。

 /// @tparam T 缓冲区的类型。

 /// @param op 要执行的操作。

 /// @param buf 缓冲区。

 /// @param size 要操作的大小（以字节为单位）。

 /// @param file_offset 文件偏移量（以字节为单位）。

 /// @param devPtr_offset GPU 设备指针偏移量（以字节为单位）。

 /// @param nvtx_payload NVTX 负载。

 /// @param nvtx_color NVTX 颜色。

 /// @return 表示任务结果的 future。

 template <typename F, typename T>

 std::future<std::size_t> submit_task(F op,

  T buf,

  std::size_t size,

  std::size_t file_offset,

  std::size_t devPtr_offset,

  std::uint64_t nvtx_payload = 0ull,

  nvtx_color_type nvtx_color = NvtxManager::default_color())

 {

  static_assert(std::is_invocable_r_v<std::size_t,

  decltype(op),

  decltype(buf),

  decltype(size),

  decltype(file_offset),

  decltype(devPtr_offset)>);


  return defaults::thread_pool().submit_task([=] {

  KVIKIO_NVTX_SCOPED_RANGE("task", nvtx_payload, nvtx_color);

  return op(buf, size, file_offset, devPtr_offset);

  });

 }


 /// @brief 向默认线程池提交一个仅可移动任务。

 /// @tparam F 任务的仅可移动可调用类型。

 /// @param op_move_only 要执行的仅可移动操作。

 /// @param nvtx_payload NVTX 负载。

 /// @param nvtx_color NVTX 颜色。

 /// @return 表示任务结果的 future。

 template <typename F>

 std::future<std::size_t> submit_move_only_task(

  F op_move_only,

  std::uint64_t nvtx_payload = 0ull,

  nvtx_color_type nvtx_color = NvtxManager::default_color())

 {

  static_assert(std::is_invocable_r_v<std::size_t, F>);

  auto op_copyable = make_copyable_lambda(std::move(op_move_only));

  return defaults::thread_pool().submit_task([=] {

  KVIKIO_NVTX_SCOPED_RANGE("task", nvtx_payload, nvtx_color);

  return op_copyable();

  });

 }


 } // namespace detail


 /**

  * @brief 并行应用读或写操作。

  *

  * 这个函数将一个大型的 I/O 操作分割成多个较小的任务，并在后台线程池中异步执行它们。

  * 用户需要通过等待返回的 future 对象来等待操作完成。

  *

  * @tparam F 操作的可调用类型。期望签名是 `size_t op(T buf, size_t size, size_t file_offset, size_t devPtr_offset)`。

  * @tparam T 缓冲区的类型。

  * @param op 要执行的操作。

  * @param buf 缓冲区。可以是 `void*` 用于 CPU 内存，或 `cuda::device::pointer` 用于 GPU 内存。

  * @param size 要操作的总大小（以字节为单位）。

  * @param file_offset 文件偏移量（以字节为单位）。

  * @param task_size 单个任务的最大大小（以字节为单位）。函数将把总大小分割成不超过此大小的任务。

  * @param devPtr_offset GPU 设备指针偏移量（以字节为单位）。对于 CPU 内存操作，这应为 0。

  * @param call_idx 当前 kvikIO 调用索引，用于 NVTX。默认值为 0，表示将自动生成索引。

  * @param nvtx_color 用于 NVTX 的颜色。

  * @return future 表示所有任务完成时读取/写入的总字节数。

  */

 template <typename F, typename T>

 std::future<std::size_t> parallel_io(F op,

  T buf,

  std::size_t size,

  std::size_t file_offset,

  std::size_t task_size,

  std::size_t devPtr_offset,

  std::uint64_t call_idx = 0,

  nvtx_color_type nvtx_color = NvtxManager::default_color())

 {
  KVIKIO_EXPECT(task_size > 0, "`task_size` 必须为正数", std::invalid_argument);
  static_assert(std::is_invocable_r_v<std::size_t,

  decltype(op),
  decltype(buf),
  decltype(size),

  decltype(file_offset),
  decltype(devPtr_offset)>);


  // 单任务守卫
  if (task_size >= size || page_size >= size) {
  return detail::submit_task(op, buf, size, file_offset, devPtr_offset, call_idx, nvtx_color);
  }

kvikio

  std::vector<std::future<std::size_t>> tasks;

  tasks.reserve(size / task_size);

  // 1) 提交除最后一个任务外的所有任务。这些任务的大小都为 `task_size`。
  while (size > task_size) {