文件
文件	aggregation.hpp
	用于指定基于聚合的 API（例如 groupby、reduction、rolling 等）所需的聚合表示。

类
类	cudf::host_udf_base
	基于主机的 UDF 实现的基础接口。更多...

结构体	cudf::reduce_host_udf
	用于归约（reduction）上下文的基于主机的 UDF 实现接口。更多...

结构体	cudf::segmented_reduce_host_udf
	用于分段归约（segmented reduction）上下文的基于主机的 UDF 实现接口。更多...

结构体	cudf::groupby_host_udf
	用于 groupby 聚合上下文的基于主机的 UDF 实现接口。更多...

类	cudf::aggregation
	用于在 `aggregation_request` 中指定所需聚合的抽象基类。更多...

类	cudf::rolling_aggregation
	用于 rolling_window 特定聚合用法的派生类。更多...

类	cudf::groupby_aggregation
	用于 groupby 特定聚合用法的派生类。更多...

类	cudf::groupby_scan_aggregation
	用于 groupby 特定扫描（scan）用法的派生类。更多...

类	cudf::reduce_aggregation
	用于归约（reduction）用法的派生类。更多...

类	cudf::scan_aggregation
	用于扫描（scan）用法的派生类。更多...

类	cudf::segmented_reduce_aggregation
	用于分段归约（segmented reduction）用法的派生类。更多...

枚举
枚举类	cudf::rank_percentage : int32_t { cudf::NONE , cudf::ZERO_NORMALIZED , cudf::ONE_NORMALIZED }
	返回的排名是否应为百分比，并说明百分比归一化的类型。更多...

枚举类	cudf::udf_type : bool { CUDA , PTX }
	用户定义函数字符串中的代码类型。

枚举类	cudf::correlation_type : int32_t { PEARSON , KENDALL , SPEARMAN }
	相关性方法的类型。

枚举类	cudf::ewm_history : int32_t { INFINITE , FINITE }
	EWM 输入值的第一个值的处理类型。

函数
template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_sum_aggregation ()

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_product_aggregation ()

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_min_aggregation ()

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_max_aggregation ()

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_count_aggregation (null_policy null_handling=null_policy::EXCLUDE)
	创建 COUNT 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_any_aggregation ()

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_all_aggregation ()

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_histogram_aggregation ()

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_sum_of_squares_aggregation ()

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_mean_aggregation ()

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_m2_aggregation ()
	创建 M2 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_variance_aggregation (size_type ddof=1)
	创建 VARIANCE 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_std_aggregation (size_type ddof=1)
	创建 STD 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_median_aggregation ()

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_quantile_aggregation (std::vector< double > const &quantiles, interpolation interp=interpolation::LINEAR)
	创建 QUANTILE 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_argmax_aggregation ()
	创建 ARGMAX 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_argmin_aggregation ()
	创建 ARGMIN 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_nunique_aggregation (null_policy null_handling=null_policy::EXCLUDE)
	创建 NUNIQUE 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_nth_element_aggregation (size_type n, null_policy null_handling=null_policy::INCLUDE)
	创建 NTH_ELEMENT 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_row_number_aggregation ()

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_ewma_aggregation (double const center_of_mass, ewm_history history)
	创建 EWMA 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_rank_aggregation (rank_method method, order column_order=order::ASCENDING, null_policy null_handling=null_policy::EXCLUDE, null_order null_precedence=null_order::AFTER, rank_percentage percentage=rank_percentage::NONE)
	创建 RANK 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_collect_list_aggregation (null_policy null_handling=null_policy::INCLUDE)
	创建 COLLECT_LIST 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_collect_set_aggregation (null_policy null_handling=null_policy::INCLUDE, null_equality nulls_equal=null_equality::EQUAL, nan_equality nans_equal=nan_equality::ALL_EQUAL)
	创建 COLLECT_SET 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_lag_aggregation (size_type offset)
	创建 LAG 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_lead_aggregation (size_type offset)
	创建 LEAD 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_udf_aggregation (udf_type type, std::string const &user_defined_aggregator, data_type output_type)
	创建基于 UDF 的 PTX 或 CUDA 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_host_udf_aggregation (std::unique_ptr< host_udf_base > host_udf)
	创建 HOST_UDF 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_merge_lists_aggregation ()
	创建 MERGE_LISTS 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_merge_sets_aggregation (null_equality nulls_equal=null_equality::EQUAL, nan_equality nans_equal=nan_equality::ALL_EQUAL)
	创建 MERGE_SETS 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_merge_m2_aggregation ()
	创建 MERGE_M2 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_merge_histogram_aggregation ()
	创建 MERGE_HISTOGRAM 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_covariance_aggregation (size_type min_periods=1, size_type ddof=1)
	创建 COVARIANCE 聚合的工厂函数。更多...

template<typename Base = aggregation>
std::unique_ptr< Base >	cudf::make_correlation_aggregation (correlation_type type, size_type min_periods=1)
	创建 CORRELATION 聚合的工厂函数。更多...

template<typename Base >
std::unique_ptr< Base >	cudf::make_tdigest_aggregation (int max_centroids=1000)
	创建 TDIGEST 聚合的工厂函数。更多...

template<typename Base >
std::unique_ptr< Base >	cudf::make_merge_tdigest_aggregation (int max_centroids=1000)
	创建 MERGE_TDIGEST 聚合的工厂函数。更多...

详细描述

枚举类型文档

◆ rank_percentage

枚举 cudf::rank_percentage : int32_t

强枚举类

返回的排名是否应为百分比，并说明百分比归一化的类型。

枚举成员
NONE	排名
ZERO_NORMALIZED	rank / count
ONE_NORMALIZED	(rank - 1) / (count - 1)

定义于文件 aggregation.hpp 第 67 行。

函数文档

◆ make_all_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_all_aggregation ( )

创建 ALL 聚合的工厂函数

返回: 一个 ALL 聚合对象

◆ make_any_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_any_aggregation ( )

创建 ANY 聚合的工厂函数

返回: 一个 ANY 聚合对象

◆ make_argmax_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_argmax_aggregation ( )

创建 ARGMAX 聚合的工厂函数。

ARGMAX 返回最大元素的索引。

返回: 一个 ARGMAX 聚合对象

◆ make_argmin_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_argmin_aggregation ( )

创建 ARGMIN 聚合的工厂函数。

argmin 返回最小元素的索引。

返回: 一个 ARGMIN 聚合对象

◆ make_collect_list_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_collect_list_aggregation ( null_policy null_handling = null_policy::INCLUDE )

创建 COLLECT_LIST 聚合的工厂函数。

COLLECT_LIST 返回一个包含组/系列中所有包含元素的列表列。

如果 null_handling 设置为 EXCLUDE，则从每个列表行中删除空值元素。

参数

null_handling 指示列表元素中是否包含/排除空值

返回: 一个 COLLECT_LIST 聚合对象

◆ make_collect_set_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_collect_set_aggregation	(	null_policy	null_handling = `null_policy::INCLUDE`,
		null_equality	nulls_equal = `null_equality::EQUAL`,
		nan_equality	nans_equal = `nan_equality::ALL_EQUAL`
	)

创建 COLLECT_SET 聚合的工厂。

COLLECT_SET 返回一个列表列，其中包含组/系列中的所有元素。在每个列表中，重复的条目会被删除，以便每个条目只出现一次。

如果 null_handling 设置为 EXCLUDE，则从每个列表行中删除空值元素。

参数

null_handling	指示在收集期间是包含还是排除 null 值
nulls_equal	标志，指定每个列表中的 null 条目是否应被视为相等。
nans_equal	标志，指定浮点列中的 NaN 值是否应被视为相等。

返回: 一个 COLLECT_SET 聚合对象

◆ make_correlation_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_correlation_aggregation	(	correlation_type	type,
		size_type	min_periods = `1`
	)

创建 CORRELATION 聚合的工厂。

计算两列之间的相关系数。输入列是不可为空的结构列的子列。

参数

type	correlation_type
min_periods	生成结果所需的非 null 观察值的最小数量

返回: 一个 CORRELATION 聚合对象

◆ make_count_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_count_aggregation ( null_policy null_handling = null_policy::EXCLUDE )

创建 COUNT 聚合的工厂。

参数

null_handling 指示是否计算 null 值

返回: 一个 COUNT 聚合对象

◆ make_covariance_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_covariance_aggregation	(	size_type	min_periods = `1`,
		size_type	ddof = `1`
	)

创建 COVARIANCE 聚合的工厂。

计算两列之间的协方差。输入列是不可为空的结构列的子列。

参数

min_periods	生成结果所需的非 null 观察值的最小数量
ddof	自由度差。计算中使用的除数是 N - ddof，其中 N 是非 null 观察值的数量。

返回: 一个 COVARIANCE 聚合对象

◆ make_ewma_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_ewma_aggregation	(	double const	center_of_mass,
		ewm_history	history
	)

创建 EWMA 聚合的工厂。

EWMA 返回一个与输入类型相同的不可为空的列，其值是输入序列的指数加权移动平均值。这些值称为 y_i。

EWMA 聚合由质心 (com) 参数化，质心会影响先前值 (y_0 ... y_{i-1}) 在计算 y_i 时的贡献。

EWMA 聚合也由历史记录 cudf::ewm_history 参数化。必须特别考虑输入序列第一个值的数学处理。有两种方法：一种是将序列的第一个值视为无限历史数据的指数加权移动平均值；另一种是将第一个值视为唯一的已知数据点。这些假设导致 y_i 的两种不同公式。ewm_history 选择使用哪一种。

EWMA 聚合有特殊的 null 处理。null 值有两种影响。第一种是将最后一个有效值向前传播，直到计算完成。这可以视为 null 值不以任何方式影响平均值。第二种影响改变了 y_i 的计算方式。由于移动平均值在概念上旨在根据贡献值的近期程度对其进行加权，因此 null 值应该算作有效周期，即使它们不改变平均值。例如，如果输入序列是 {1, NULL, 3}，那么在计算 y_2 时，应该将 y_0 视为发生在 y_2 之前两个周期，而不仅仅是一个周期。

参数

center_of_mass	质心。
历史记录	关于第一个值应采用哪个假设

返回: 一个 EWM 聚合对象

◆ make_histogram_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_histogram_aggregation ( )

创建 HISTOGRAM 聚合的工厂

返回: 一个 HISTOGRAM 聚合对象

◆ make_host_udf_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_host_udf_aggregation ( std::unique_ptr< host_udf_base > host_udf )

创建 HOST_UDF 聚合的工厂。

参数

host_udf 派生自 host_udf_base 的类的实例，用于执行聚合

返回: 一个 HOST_UDF 聚合对象

◆ make_lag_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_lag_aggregation ( size_type offset )

创建 LAG 聚合的工厂。

参数

offset 要滞后输入数据的行数

返回: 一个 LAG 聚合对象

◆ make_lead_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_lead_aggregation ( size_type offset )

创建 LEAD 聚合的工厂。

参数

offset 要领先输入数据的行数

返回: 一个 LEAD 聚合对象

◆ make_m2_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_m2_aggregation ( )

创建 M2 聚合的工厂。

M2 聚合是与平均值差值的平方和。即：M2 = SUM((x - MEAN) * (x - MEAN))。

此聚合生成中间值，用于计算多个离散集之间的方差和标准差。有关更多详细信息，请参阅 https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm。

返回: 一个 M2 聚合对象

◆ make_max_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_max_aggregation ( )

创建 MAX 聚合的工厂

返回: 一个 MAX 聚合对象

◆ make_mean_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_mean_aggregation ( )

创建 MEAN 聚合的工厂

返回: 一个 MEAN 聚合对象

◆ make_median_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_median_aggregation ( )

创建 MEDIAN 聚合的工厂

返回: 一个 MEDIAN 聚合对象

◆ make_merge_histogram_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_merge_histogram_aggregation ( )

创建 MERGE_HISTOGRAM 聚合的工厂。

将独立集上 HISTOGRAM 聚合的结果合并为一个新的 HISTOGRAM 值，其效果等同于一次性对所有集合执行单个 HISTOGRAM 聚合。

返回: 一个 MERGE_HISTOGRAM 聚合对象

◆ make_merge_lists_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_merge_lists_aggregation ( )

创建 MERGE_LISTS 聚合的工厂。

给定一个列表列，此聚合将对应于相同键值的所有列表合并为一个列表。它专门设计用于将多个（分布式）groupby COLLECT_LIST 聚合的部分结果合并为最终的 COLLECT_LIST 结果。因此，它要求输入列表列不可为空（包含列表条目的子列不受此要求约束）。

返回: 一个 MERGE_LISTS 聚合对象

◆ make_merge_m2_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_merge_m2_aggregation ( )

创建 MERGE_M2 聚合的工厂。

将独立集上 M2 聚合的结果合并为一个新的 M2 值，其效果等同于一次性对所有集合执行单个 M2 聚合。此聚合仅对结构体有效，这些结构体的成员是同一集上 COUNT_VALID、MEAN 和 M2 聚合的结果。此聚合的输出是一个结构体，包含合并后的 COUNT_VALID、MEAN 和 M2 聚合结果。

输入的 M2 聚合值应全部为非负数，因为它们是从 M2 聚合输出的。

返回: 一个 MERGE_M2 聚合对象

◆ make_merge_sets_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_merge_sets_aggregation	(	null_equality	nulls_equal = `null_equality::EQUAL`,
		nan_equality	nans_equal = `nan_equality::ALL_EQUAL`
	)

创建 MERGE_SETS 聚合的工厂。

给定一个列表列，此聚合首先将对应于相同键值的所有列表合并为一个列表，然后删除每个列表中的所有重复条目，生成一个包含非重复条目的列表列。

此聚合专门设计用于将多个（分布式）groupby COLLECT_LIST 或 COLLECT_SET 聚合的部分结果合并为最终的 COLLECT_SET 结果。因此，它要求输入列表列不可为空（包含列表条目的子列不受此要求约束）。

实际上，此聚合的输入（部分结果）应由（分布式）COLLECT_LIST 聚合生成，而不是 COLLECT_SET，以避免不必要地删除部分结果中的重复条目。

参数

nulls_equal	标志，指定在删除重复列表条目时，每个列表中的 null 值是否应被视为相等。
nans_equal	标志，指定在删除重复列表条目时，浮点列中的 NaN 值是否应被视为相等。

返回: 一个 MERGE_SETS 聚合对象

◆ make_merge_tdigest_aggregation()

template<typename Base >

std::unique_ptr<Base> cudf::make_merge_tdigest_aggregation ( int max_centroids = 1000 )

创建 MERGE_TDIGEST 聚合的工厂。

合并先前由 make_tdigest_aggregation 或 make_merge_tdigest_aggregation 生成的聚合结果，以生成新的 tdigest (https://arxiv.org/pdf/1902.04023.pdf) 列。

生成的 tdigest 列结构如下

struct { // centroids for the digest list { struct { double // mean double // weight }, ... } // these are from the input stream, not the centroids. they are used // during the percentile_approx computation near the beginning or // end of the quantiles double // min double // max }

每个输出行是一个单独的 tdigest。行的长度是 tdigest 的“大小”，其中每个元素代表一个加权质心（均值，权重）。

参数

max_centroids 控制输出 tdigest 数据上后续查询的压缩级别和准确性的参数。max_centroids 对计算出的 tdigest 大小设置上限：值为 1000 将生成一个包含不超过 1000 个质心（每个 32 字节）的 tdigest。值越高，tdigest 信息越准确。

返回: 一个 MERGE_TDIGEST 聚合对象

◆ make_min_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_min_aggregation ( )

创建 MIN 聚合的工厂

返回: 一个 MIN 聚合对象

◆ make_nth_element_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_nth_element_aggregation	(	size_type	n,
		null_policy	null_handling = `null_policy::INCLUDE`
	)

创建 NTH_ELEMENT 聚合的工厂。

NTH_ELEMENT 返回组/系列的第 n 个元素。

如果 n 不在 [-group_size, group_size) 范围内，则相应组的结果将为 null。负索引 [-group_size, -1] 分别对应于 [0, group_size-1] 索引，其中 group_size 是每个组的大小。

参数

n	每个组中第 n 个元素的索引
null_handling	指示在索引期间是包含还是排除 null 值

返回: 一个 NTH_ELEMENT 聚合对象

◆ make_nunique_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_nunique_aggregation ( null_policy null_handling = null_policy::EXCLUDE )

创建 NUNIQUE 聚合的工厂。

NUNIQUE 返回唯一元素的数量。

参数

null_handling 指示是否计算 null 值

返回: 一个 NUNIQUE 聚合对象

◆ make_product_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_product_aggregation ( )

创建 PRODUCT 聚合的工厂

返回: 一个 PRODUCT 聚合对象

◆ make_quantile_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_quantile_aggregation	(	std::vector< double > const &	quantiles,
		interpolation	interp = `interpolation::LINEAR`
	)

创建 QUANTILE 聚合的工厂。

参数

quantiles	所需的分位数
interp	所需的插值方法

返回: 一个 QUANTILE 聚合对象

◆ make_rank_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_rank_aggregation	(	rank_method	method,
		order	column_order = `order::ASCENDING`,
		null_policy	null_handling = `null_policy::EXCLUDE`,
		null_order	null_precedence = `null_order::AFTER`,
		rank_percentage	percentage = `rank_percentage::NONE`
	)

创建 RANK 聚合的工厂。

对于给定的排名方法和列顺序，RANK 返回一个 size_type 或 double 类型的“排名”列（数据类型确定方式请参见下面的注释 3）。如果排除 null 值，则这些行的排名将为 null；否则返回一个不可为空的列。仅当 percentage!=NONE 且排名方法为 average 时，才返回双精度列。

此聚合仅适用于“扫描”算法。输入到分组或非分组扫描的列是一个 orderby 列，用于对聚合函数排名的行进行排序。如果按多于一列进行排序，则 orderby 输入列应为包含排序列的结构体列。

注意

如果行已按组键和 order_by 列预排序，此方法可能运行得更快。尽管 groupby 对象不要求 order_by 列已排序，但如果键已排序，groupby rank scan 聚合确实要求 order_by 列已排序。
RANK 聚合与独占扫描不兼容。
除 AVERAGE 方法和 percentage!=NONE 外，所有排名方法都返回 size_type 列。对于 AVERAGE 方法和 percentage!=NONE，返回类型为 double 列。

示例：考虑一个赛车统计数据集，包含以下列
1. venue: (STRING) 比赛地点
2. driver: (STRING) 赛车手姓名（缩写为 3 个字符）
3. time: (INT32) 完成赛道所需时间
 
对于以下预排序数据
 
[ // 比赛地点, 赛车手, 时间
{ "silverstone", "HAM" ("hamilton"), 15823},
{ "silverstone", "LEC" ("leclerc"), 15827},
{ "silverstone", "BOT" ("bottas"), 15834}, // <-- 并列第 3 名。
{ "silverstone", "NOR" ("norris"), 15834}, // <-- 并列第 3 名。
{ "silverstone", "RIC" ("ricciardo"), 15905},
{ "monza", "RIC" ("ricciardo"), 12154},
{ "monza", "NOR" ("norris"), 12156}, // <-- 并列第 2 名。
{ "monza", "BOT" ("bottas"), 12156}, // <-- 并列第 2 名。
{ "monza", "LEC" ("leclerc"), 12201},
{ "monza", "PER" ("perez"), 12203}
 ]
 
一个分组排名聚合扫描，参数为
分组列 : venue
输入排序依据列: time
为每种方法生成以下排名列
first: { 1, 2, 3, 4, 5, 1, 2, 3, 4, 5}
average: { 1, 2, 3.5, 3.5, 5, 1, 2.5, 2.5, 4, 5}
min: { 1, 2, 3, 3, 5, 1, 2, 2, 4, 5}
max: { 1, 2, 4, 4, 5, 1, 3, 3, 4, 5}
dense: { 1, 2, 3, 3, 4, 1, 2, 2, 3, 4}
这对应于以下分组和 `driver` 行
{ "HAM", "LEC", "BOT", "NOR", "RIC", "RIC", "NOR", "BOT", "LEC", "PER" }
<----------silverstone----------->|<-------------monza-------------->
 
每种百分比类型的最小排名
NONE: { 1, 2, 3, 3, 5, 1, 2, 2, 4, 5 }
ZERO_NORMALIZED : { 0.16, 0.33, 0.50, 0.50, 0.83, 0.16, 0.33, 0.33, 0.66, 0.83 }
ONE_NORMALIZED: { 0.00, 0.25, 0.50, 0.50, 1.00, 0.00, 0.25, 0.25, 0.75, 1.00 }
其中 count 对应于组中的行数。@see cudf::rank_percentage

参数

method	用于打破并列（相同值）的排名方法
column_order	期望的排名排序顺序
null_handling	在排名期间包含 null 值的标志。如果 null 值不包含在内，则相应的排名将为 null。
null_precedence	null 值与列中其他元素的相对排序顺序
percentage	枚举，表示将排名转换为 (0,1] 范围内的百分比类型

返回: 一个 RANK 聚合对象

◆ make_row_number_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_row_number_aggregation ( )

创建 ROW_NUMBER 聚合的工厂

返回: 一个 ROW_NUMBER 聚合对象

◆ make_std_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_std_aggregation ( size_type ddof = 1 )

创建 STD 聚合的工厂。

参数

ddof	自由度差。计算 `std` 时使用的除数是 `N - ddof`，其中 `N` 是总体大小。

异常

cudf::logic_error 如果输入类型是 chrono 或复合类型。

返回: 一个 STD 聚合对象

◆ make_sum_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_sum_aggregation ( )

创建 SUM 聚合的工厂

返回: 一个 SUM 聚合对象

◆ make_sum_of_squares_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_sum_of_squares_aggregation ( )

创建 SUM_OF_SQUARES 聚合的工厂

返回: 一个 SUM_OF_SQUARES 聚合对象

◆ make_tdigest_aggregation()

template<typename Base >

std::unique_ptr<Base> cudf::make_tdigest_aggregation ( int max_centroids = 1000 )

创建 TDIGEST 聚合的工厂。

根据输入值生成一个 tdigest (https://arxiv.org/pdf/1902.04023.pdf) 列。输入的聚合值应为定宽数值类型。

生成的 tdigest 列结构如下

struct { // centroids for the digest list { struct { double // mean double // weight }, ... } // these are from the input stream, not the centroids. they are used // during the percentile_approx computation near the beginning or // end of the quantiles double // min double // max }

每个输出行是一个单独的 tdigest。行的长度是 tdigest 的“大小”，其中每个元素代表一个加权质心（均值，权重）。

参数

max_centroids 控制输出 tdigest 数据上后续查询的压缩级别和准确性的参数。max_centroids 对计算出的 tdigest 大小设置上限：值为 1000 将生成一个包含不超过 1000 个质心（每个 32 字节）的 tdigest。值越高，tdigest 信息越准确。

返回: 一个 TDIGEST 聚合对象

◆ make_udf_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_udf_aggregation	(	udf_type	type,
		std::string const &	user_defined_aggregator,
		data_type	output_type
	)

基于 UDF 为 PTX 或 CUDA 创建聚合的工厂。

参数

[in]	type	udf_type::PTX 或 udf_type::CUDA
[in]	user_defined_aggregator	包含聚合器代码的字符串
[in]	output_type	期望的输出类型

返回: 包含用户定义的聚合器字符串的聚合

◆ make_variance_aggregation()

template<typename Base = aggregation>

std::unique_ptr<Base> cudf::make_variance_aggregation ( size_type ddof = 1 )

创建 VARIANCE 聚合的工厂。

参数

ddof	自由度差。计算 `variance` 时使用的除数是 `N - ddof`，其中 `N` 是总体大小。

异常

cudf::logic_error 如果输入类型是 chrono 或复合类型。

返回: 一个 VARIANCE 聚合对象