libcudf: types.hpp 源文件

 /*

  * Copyright (c) 2018-2024, NVIDIA CORPORATION.

  *

  * Licensed under the Apache License, Version 2.0 (the "License");

  * you may not use this file except in compliance with the License.

  * You may obtain a copy of the License at

  *

  * https://apache.ac.cn/licenses/LICENSE-2.0

  *

  * Unless required by applicable law or agreed to in writing, software

  * distributed under the License is distributed on an "AS IS" BASIS,

  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

  * See the License for the specific language governing permissions and

  * limitations under the License.

  */


 #pragma once


 #ifdef __CUDACC__

 #define CUDF_HOST_DEVICE __host__ __device__

 #define CUDF_KERNEL __global__ static

 #else

 #define CUDF_HOST_DEVICE

 #define CUDF_KERNEL static

 #endif


 #include <cudf/utilities/export.hpp>


 #include <cassert>

 #include <cstddef>

 #include <cstdint>

 #include <iterator>


 // Forward declarations

 namespace rmm {

 class device_buffer;


 } // namespace rmm


 namespace CUDF_EXPORT cudf {

 // Forward declaration

 class column;

 class column_view;

 class mutable_column_view;

 class string_view;

 class list_view;

 class struct_view;

 class scalar;


 // clang-format off

 class list_scalar;

 class struct_scalar;

 class string_scalar;

 template <typename T> class numeric_scalar;

 template <typename T> class fixed_point_scalar;

 template <typename T> class timestamp_scalar;

 template <typename T> class duration_scalar;


 class string_scalar_device_view;

 template <typename T> class numeric_scalar_device_view;

 template <typename T> class fixed_point_scalar_device_view;

 template <typename T> class timestamp_scalar_device_view;

 template <typename T> class duration_scalar_device_view;

 // clang-format on


 class table;

 class table_view;

 class mutable_table_view;


 using size_type = int32_t;

 using bitmask_type = uint32_t;

 using valid_type = uint8_t;

 using thread_index_type = int64_t;

 using char_utf8 = uint32_t;


 template <typename T>

 size_type distance(T f, T l)

 {

  return static_cast<size_type>(std::distance(f, l));

 }


 enum class order : bool {

  ASCENDING,

  DESCENDING

 };


 enum class null_policy : bool {

  EXCLUDE,

  INCLUDE

 };


 enum class nan_policy : bool {

  NAN_IS_NULL,

  NAN_IS_VALID

 };


 enum class nan_equality /*unspecified*/ {

  ALL_EQUAL,

  UNEQUAL

 };


 enum class null_equality : bool {

  EQUAL,

  UNEQUAL

 };


 enum class null_order : bool {

  AFTER,

  BEFORE

 };


 enum class sorted : bool { NO, YES };


 struct order_info {

  sorted is_sorted;

  order ordering;

  null_order null_ordering;

 };


 enum class mask_state : int32_t {

  UNALLOCATED,

  UNINITIALIZED,

  ALL_VALID,

  ALL_NULL

 };


 enum class interpolation : int32_t {

  LINEAR,

  LOWER,

  HIGHER,

  MIDPOINT,

  NEAREST

 };


 enum class type_id : int32_t {

  EMPTY,

  INT8,

  INT16,

  INT32,

  INT64,

  UINT8,

  UINT16,

  UINT32,

  UINT64,

  FLOAT32,

  FLOAT64,

  BOOL8,

  TIMESTAMP_DAYS,

  TIMESTAMP_SECONDS,

  TIMESTAMP_MILLISECONDS,

  TIMESTAMP_MICROSECONDS,

  TIMESTAMP_NANOSECONDS,

  DURATION_DAYS,

  DURATION_SECONDS,

  DURATION_MILLISECONDS,

  DURATION_MICROSECONDS,

  DURATION_NANOSECONDS,

  DICTIONARY32,

  STRING,

  LIST,

  DECIMAL32,

  DECIMAL64,

  DECIMAL128,

  STRUCT,

  // `NUM_TYPE_IDS` must be last!

  NUM_TYPE_IDS

 };


 class data_type {

  public

  data_type() = default;

  ~data_type() = default;

  data_type(data_type const&) = default;

  data_type(data_type&&) = default;


  data_type& operator=(data_type const&) = default;


  data_type& operator=(data_type&&) = default;


  CUDF_HOST_DEVICE explicit constexpr data_type(type_id id) : _id{id} {}


  explicit data_type(type_id id, int32_t scale) : _id{id}, _fixed_point_scale{scale}

  {

  assert(id == type_id::DECIMAL32 || id == type_id::DECIMAL64 || id == type_id::DECIMAL128);

  }


  [[nodiscard]] CUDF_HOST_DEVICE constexpr type_id id() const noexcept { return _id; }


  [[nodiscard]] CUDF_HOST_DEVICE constexpr int32_t scale() const noexcept

  {

  return _fixed_point_scale;

  }


  private

  type_id _id{type_id::EMPTY};


  // Below is additional type specific metadata. Currently, only _fixed_point_scale is stored.


  int32_t _fixed_point_scale{}; // numeric::scale_type not available here, use int32_t

 };


 constexpr bool operator==(data_type const& lhs, data_type const& rhs)

 {

  // use std::tie in the future, breaks JITIFY currently

  return lhs.id() == rhs.id() && lhs.scale() == rhs.scale();

 }


 inline bool operator!=(data_type const& lhs, data_type const& rhs) { return !(lhs == rhs); }


 std::size_t size_of(data_type t);


 } // namespace CUDF_EXPORT cudf

cudf::data_type
列中元素的逻辑数据类型的指示符。
定义： types.hpp:243

cudf::data_type::operator=
data_type & operator=(data_type &&)=default
data_type 的移动赋值运算符。

cudf::data_type::data_type
data_type(data_type &&)=default
移动构造函数。

cudf::data_type::id
constexpr CUDF_HOST_DEVICE type_id id() const noexcept
返回类型标识符。
定义： types.hpp:287

cudf::data_type::data_type
data_type(type_id id, int32_t scale)
构造一个新的用于 numeric::fixed_point 的 data_type 对象。
定义： types.hpp:277

cudf::data_type::operator=
data_type & operator=(data_type const &)=default
data_type 的拷贝赋值运算符。

cudf::data_type::data_type
data_type(data_type const &)=default
拷贝构造函数。

cudf::data_type::data_type
constexpr CUDF_HOST_DEVICE data_type(type_id id)
构造一个新的 data_type 对象。
定义： types.hpp:269

cudf::data_type::scale
constexpr CUDF_HOST_DEVICE int32_t scale() const noexcept
返回小数位数（用于 fixed_point 类型）。
定义： types.hpp:294

rmm

cudf::strings::LOWER
@ LOWER
所有小写字符
定义： char_types_enum.hpp:45

cudf::null_order
null_order
指示 null 值与所有其他值的比较方式。
定义： types.hpp:159

cudf::null_equality
null_equality
用于将两个 null 值视为相等或不相等的枚举。
定义： types.hpp:151

cudf::size_type
int32_t size_type
列和表的行索引类型。
定义： types.hpp:95

cudf::null_policy
null_policy
用于指定是否包含 null 或排除 null 的枚举。
定义： types.hpp:126

cudf::bitmask_type
uint32_t bitmask_type
存储为 32 位无符号整数的位掩码类型。
定义： types.hpp:96

cudf::distance
size_type distance(T f, T l)
类似于 std::distance，但返回 cudf::size_type 并执行 static_cast。
定义： types.hpp:110

cudf::operator==
constexpr bool operator==(data_type const &lhs, data_type const &rhs)
比较两个 data_type 对象的相等性。
定义： types.hpp:319

cudf::mask_state
mask_state
控制 null 掩码的分配/初始化。
定义： types.hpp:181

cudf::size_of
std::size_t size_of(data_type t)
返回指定 data_type 元素的字节大小。

cudf::thread_index_type
int64_t thread_index_type
核函数中的线程索引类型。
定义： types.hpp:98

cudf::nan_policy
nan_policy
用于将 NaN 浮点值视为 null 或非 null 元素的枚举。
定义： types.hpp:134

cudf::order
order
指示元素应如何排序。
定义： types.hpp:118

cudf::operator!=
bool operator!=(data_type const &lhs, data_type const &rhs)
比较两个 data_type 对象的不相等性。
定义： types.hpp:337

cudf::valid_type
uint8_t valid_type
主机内存中的有效类型。
定义： types.hpp:97

cudf::interpolation
interpolation
当所需的 quantile 位于两个数据点 i 和 j 之间时使用的插值方法。
定义： types.hpp:192

cudf::sorted
sorted
指示已知一组值是否已排序。
定义： types.hpp:167

cudf::type_id
type_id
标识列的逻辑元素类型。
定义： types.hpp:203

cudf::nan_equality
nan_equality
用于将持有 NaN 值的不同元素（浮点类型）视为相等或不相等的枚举。
定义： types.hpp:143

cudf::char_utf8
uint32_t char_utf8
UTF-8 字符为 1-4 字节。
定义： string_view.hpp:31

cudf::null_order::BEFORE
@ BEFORE
NULL 值排在所有其他值之前。

cudf::null_order::AFTER
@ AFTER
NULL 值排在所有其他值之后。

cudf::null_policy::INCLUDE
@ INCLUDE
包含 null 元素

cudf::null_policy::EXCLUDE
@ EXCLUDE
排除 null 元素

cudf::mask_state::ALL_VALID
@ ALL_VALID
已分配 null 掩码，初始化为所有元素都有效。

cudf::mask_state::UNALLOCATED
@ UNALLOCATED
未分配 null 掩码（所有元素都有效）。

cudf::mask_state::ALL_NULL
@ ALL_NULL
已分配 null 掩码，初始化为所有元素都为 NULL。

cudf::mask_state::UNINITIALIZED
@ UNINITIALIZED
已分配 null 掩码，但未初始化。

cudf::nan_policy::NAN_IS_VALID
@ NAN_IS_VALID
将 NaN 视为有效元素（非 null）。

cudf::nan_policy::NAN_IS_NULL
@ NAN_IS_NULL
将 NaN 视为 null 元素。

cudf::order::ASCENDING
@ ASCENDING
元素从小到大排序。

cudf::order::DESCENDING
@ DESCENDING
元素从大到小排序。

cudf::interpolation::HIGHER
@ HIGHER
较高的数据点 (j)。

cudf::interpolation::LINEAR
@ LINEAR
i 和 j 之间的线性插值。

cudf::interpolation::NEAREST
@ NEAREST
i 或 j，取最接近的那个。

cudf::interpolation::MIDPOINT
@ MIDPOINT
(i + j)/2

cudf::type_id::BOOL8
@ BOOL8
布尔类型，每个值使用一个字节，0 表示 false，否则为 true。

cudf::type_id::FLOAT64
@ FLOAT64
8 字节浮点数。

cudf::type_id::UINT32
@ UINT32
4 字节无符号整数。

cudf::type_id::DURATION_MILLISECONDS
@ DURATION_MILLISECONDS
int64 表示的毫秒时间间隔。

cudf::type_id::NUM_TYPE_IDS
@ NUM_TYPE_IDS
类型 ID 的总数。

cudf::type_id::UINT16
@ UINT16
2 字节无符号整数。

cudf::type_id::DECIMAL128
@ DECIMAL128
使用 __int128_t 的定点类型。

cudf::type_id::INT16
@ INT16
2 字节有符号整数。

cudf::type_id::TIMESTAMP_MILLISECONDS
@ TIMESTAMP_MILLISECONDS
自 Unix Epoch 起以 int64 表示的毫秒时间点。

cudf::type_id::DURATION_NANOSECONDS
@ DURATION_NANOSECONDS
int64 表示的纳秒时间间隔。

cudf::type_id::DURATION_DAYS
@ DURATION_DAYS
int32 表示的天时间间隔。

cudf::type_id::UINT64
@ UINT64
8 字节无符号整数。

cudf::type_id::TIMESTAMP_MICROSECONDS
@ TIMESTAMP_MICROSECONDS
自 Unix Epoch 起以 int64 表示的微秒时间点。

cudf::type_id::DURATION_SECONDS
@ DURATION_SECONDS
int64 表示的秒时间间隔。

cudf::type_id::DURATION_MICROSECONDS
@ DURATION_MICROSECONDS
int64 表示的微秒时间间隔。

cudf::type_id::FLOAT32
@ FLOAT32
4 字节浮点数。

cudf::type_id::EMPTY
@ EMPTY
始终为 null，无底层数据。

cudf::type_id::TIMESTAMP_SECONDS
@ TIMESTAMP_SECONDS
自 Unix Epoch 起以 int64 表示的秒时间点。

cudf::type_id::TIMESTAMP_NANOSECONDS
@ TIMESTAMP_NANOSECONDS
自 Unix Epoch 起以 int64 表示的纳秒时间点。

cudf::type_id::TIMESTAMP_DAYS
@ TIMESTAMP_DAYS
自 Unix Epoch 起以 int32 表示的天时间点。

cudf::type_id::DECIMAL64
@ DECIMAL64
使用 int64_t 的定点类型。

cudf::type_id::DECIMAL32
@ DECIMAL32
使用 int32_t 的定点类型。

cudf::type_id::UINT8
@ UINT8
1 字节无符号整数。

cudf::type_id::INT8
@ INT8
1 字节有符号整数。

cudf::type_id::DICTIONARY32
@ DICTIONARY32
使用 int32 索引的字典类型。

cudf::nan_equality::UNEQUAL
@ UNEQUAL
所有 NaN 比较结果均不相等 (IEEE754 行为)。

cudf::nan_equality::ALL_EQUAL
@ ALL_EQUAL
所有 NaN 比较结果均相等，无论符号如何。

cudf
cuDF 接口
定义： host_udf.hpp:37

cudf::order_info
指示一组值的排序方式。
定义： types.hpp:172

cudf::order_info::ordering
order ordering
指示值排序的顺序。
定义： types.hpp:174

cudf::order_info::null_ordering
null_order null_ordering
指示 null 值与所有其他值的比较方式。
定义： types.hpp:175

cudf::order_info::is_sorted
sorted is_sorted
指示集合是否已排序。
定义： types.hpp:173

CUDF_HOST_DEVICE
#define CUDF_HOST_DEVICE
表示函数或方法可在 host 和 device 上使用。
定义： types.hpp:32