Cluster#
Params#
#include <cuvs/cluster/kmeans.hpp>
namespace cuvs::cluster::kmeans
-
struct params : public cuvs::cluster::kmeans::base_params#
- #include <kmeans.hpp>
用于指定 kmeans 算法超参数的简单对象。
公共成员
-
int n_clusters = 8#
要形成的聚类数量以及要生成的中心点数量 (默认值:8)。
-
InitMethod init = KMeansPlusPlus#
初始化方法,默认为 k-means++
InitMethod::KMeansPlusPlus (k-means++): 使用可扩展的 k-means++ 算法选择初始聚类中心。
InitMethod::Random (random): 从输入数据中随机选择 'n_clusters' 个观测值(行)作为初始中心点。
InitMethod::Array (ndarray): 使用 'centroids' 作为初始聚类中心。
-
int max_iter = 300#
k-means 算法单次运行的最大迭代次数。
-
double tol = 1e-4#
声明收敛时相对于惯性的相对容差。
-
rapids_logger::level_enum verbosity = rapids_logger::level_enum::info#
详细级别。
-
raft::random::RngState rng_state = {0}#
随机数生成器的种子。
-
int n_init = 1#
k-means 算法将以不同种子运行的实例数。
-
double oversampling_factor = 2.0#
用于 k-means|| 算法中的过采样因子
-
int batch_centroids = 0#
如果为 0,则 batch_centroids = n_clusters
-
int n_clusters = 8#
K-means#
include <cuvs/cluster/kmeans.hpp>
namespace cuvs::cluster::kmeans
- void fit(
- raft::resources const &handle,
- const cuvs::cluster::kmeans::params ¶ms,
- raft::device_matrix_view<const float, int> X,
- std::optional<raft::device_vector_view<const float, int>> sample_weight,
- raft::device_matrix_view<float, int> centroids,
- raft::host_scalar_view<float, int> inertia,
- raft::host_scalar_view<int, int> n_iter
使用 k-means 算法查找聚类。初始中心点通过 k-means++ 算法选择。空聚类通过 k-means++ 算法选择新的中心点进行重新初始化。
#include <raft/core/resources.hpp> #include <cuvs/cluster/kmeans.hpp> using namespace cuvs::cluster; ... raft::resources handle; cuvs::cluster::kmeans::params params; int n_features = 15, inertia, n_iter; auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features); kmeans::fit(handle, params, X, std::nullopt, centroids, raft::make_scalar_view(&inertia), raft::make_scalar_view(&n_iter));
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]
sample_weight – [in] X 中每个观测值的可选权重。 [长度 = n_samples]
centroids – [inout] [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心。[out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]
inertia – [out] 样本到其最近聚类中心的平方距离之和。
n_iter – [out] 运行的迭代次数。
- void fit(
- raft::resources const &handle,
- const cuvs::cluster::kmeans::params ¶ms,
- raft::device_matrix_view<const float, int64_t> X,
- std::optional<raft::device_vector_view<const float, int64_t>> sample_weight,
- raft::device_matrix_view<float, int64_t> centroids,
- raft::host_scalar_view<float, int64_t> inertia,
- raft::host_scalar_view<int64_t, int64_t> n_iter
使用 k-means 算法查找聚类。初始中心点通过 k-means++ 算法选择。空聚类通过 k-means++ 算法选择新的中心点进行重新初始化。
#include <raft/core/resources.hpp> #include <cuvs/cluster/kmeans.hpp> using namespace cuvs::cluster; ... raft::resources handle; cuvs::cluster::kmeans::params params; int64_t n_features = 15, inertia, n_iter; auto centroids = raft::make_device_matrix<float, int64_t>(handle, params.n_clusters, n_features); kmeans::fit(handle, params, X, std::nullopt, centroids, raft::make_scalar_view(&inertia), raft::make_scalar_view(&n_iter));
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]
sample_weight – [in] X 中每个观测值的可选权重。 [长度 = n_samples]
centroids – [inout] [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心。[out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]
inertia – [out] 样本到其最近聚类中心的平方距离之和。
n_iter – [out] 运行的迭代次数。
- void fit(
- raft::resources const &handle,
- const cuvs::cluster::kmeans::params ¶ms,
- raft::device_matrix_view<const double, int> X,
- std::optional<raft::device_vector_view<const double, int>> sample_weight,
- raft::device_matrix_view<double, int> centroids,
- raft::host_scalar_view<double, int> inertia,
- raft::host_scalar_view<int, int> n_iter
使用 k-means 算法查找聚类。初始中心点通过 k-means++ 算法选择。空聚类通过 k-means++ 算法选择新的中心点进行重新初始化。
#include <raft/core/resources.hpp> #include <cuvs/cluster/kmeans.hpp> using namespace cuvs::cluster; ... raft::resources handle; cuvs::cluster::kmeans::params params; int n_features = 15, inertia, n_iter; auto centroids = raft::make_device_matrix<double, int>(handle, params.n_clusters, n_features); kmeans::fit(handle, params, X, std::nullopt, centroids, raft::make_scalar_view(&inertia), raft::make_scalar_view(&n_iter));
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]
sample_weight – [in] X 中每个观测值的可选权重。 [长度 = n_samples]
centroids – [inout] [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心。[out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]
inertia – [out] 样本到其最近聚类中心的平方距离之和。
n_iter – [out] 运行的迭代次数。
- void fit(
- raft::resources const &handle,
- const cuvs::cluster::kmeans::params ¶ms,
- raft::device_matrix_view<const double, int64_t> X,
- std::optional<raft::device_vector_view<const double, int64_t>> sample_weight,
- raft::device_matrix_view<double, int64_t> centroids,
- raft::host_scalar_view<double, int64_t> inertia,
- raft::host_scalar_view<int64_t, int64_t> n_iter
使用 k-means 算法查找聚类。初始中心点通过 k-means++ 算法选择。空聚类通过 k-means++ 算法选择新的中心点进行重新初始化。
#include <raft/core/resources.hpp> #include <cuvs/cluster/kmeans.hpp> using namespace cuvs::cluster; ... raft::resources handle; cuvs::cluster::kmeans::params params; int64_t n_features = 15, inertia, n_iter; auto centroids = raft::make_device_matrix<double, int64_t>(handle, params.n_clusters, n_features); kmeans::fit(handle, params, X, std::nullopt, centroids, raft::make_scalar_view(&inertia), raft::make_scalar_view(&n_iter));
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]
sample_weight – [in] X 中每个观测值的可选权重。 [长度 = n_samples]
centroids – [inout] [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心。[out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]
inertia – [out] 样本到其最近聚类中心的平方距离之和。
n_iter – [out] 运行的迭代次数。
- void fit(
- raft::resources const &handle,
- const cuvs::cluster::kmeans::params ¶ms,
- raft::device_matrix_view<const int8_t, int> X,
- std::optional<raft::device_vector_view<const int8_t, int>> sample_weight,
- raft::device_matrix_view<int8_t, int> centroids,
- raft::host_scalar_view<int8_t, int> inertia,
- raft::host_scalar_view<int, int> n_iter
使用 k-means 算法查找聚类。初始中心点通过 k-means++ 算法选择。空聚类通过 k-means++ 算法选择新的中心点进行重新初始化。
#include <raft/core/resources.hpp> #include <cuvs/cluster/kmeans.hpp> using namespace cuvs::cluster; ... raft::resources handle; cuvs::cluster::kmeans::params params; int n_features = 15, inertia, n_iter; auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features); kmeans::fit(handle, params, X, std::nullopt, centroids, raft::make_scalar_view(&inertia), raft::make_scalar_view(&n_iter));
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]
sample_weight – [in] X 中每个观测值的可选权重。 [长度 = n_samples]
centroids – [inout] [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心。[out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]
inertia – [out] 样本到其最近聚类中心的平方距离之和。
n_iter – [out] 运行的迭代次数。
- void fit(
- const raft::resources &handle,
- cuvs::cluster::kmeans::balanced_params const ¶ms,
- raft::device_matrix_view<const float, int> X,
- raft::device_matrix_view<float, int> centroids
使用 k-means 算法查找平衡聚类。
#include <raft/core/resources.hpp> #include <cuvs/cluster/kmeans.hpp> using namespace cuvs::cluster; ... raft::resources handle; cuvs::cluster::kmeans::balanced_params params; int n_features = 15; auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features); kmeans::fit(handle, params, X, centroids);
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]
centroids – [out] [out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]
- void fit(
- const raft::resources &handle,
- cuvs::cluster::kmeans::balanced_params const ¶ms,
- raft::device_matrix_view<const int8_t, int> X,
- raft::device_matrix_view<int8_t, int> centroids
使用 k-means 算法查找平衡聚类。
#include <raft/core/resources.hpp> #include <cuvs/cluster/kmeans.hpp> using namespace cuvs::cluster; ... raft::resources handle; cuvs::cluster::kmeans::balanced_params params; int n_features = 15; auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features); kmeans::fit(handle, params, X, centroids);
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]
centroids – [inout] [out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]
- void predict(
- raft::resources const &handle,
- const kmeans::params ¶ms,
- raft::device_matrix_view<const float, int> X,
- std::optional<raft::device_vector_view<const float, int>> sample_weight,
- raft::device_matrix_view<const float, int> centroids,
- raft::device_vector_view<int, int> labels,
- bool normalize_weight,
- raft::host_scalar_view<float> inertia
预测 X 中每个样本所属的最接近的聚类。
#include <raft/core/resources.hpp> #include <cuvs/cluster/kmeans.hpp> using namespace cuvs::cluster; ... raft::resources handle; cuvs::cluster::kmeans::params params; int n_features = 15, inertia, n_iter; auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features); kmeans::fit(handle, params, X, std::nullopt, centroids.view(), raft::make_scalar_view(&inertia), raft::make_scalar_view(&n_iter)); ... auto labels = raft::make_device_vector<int, int>(handle, X.extent(0)); kmeans::predict(handle, params, X, std::nullopt, centroids.view(), false, labels.view(), raft::make_scalar_view(&ineratia));
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于预测的新数据。 [维度 = n_samples x n_features]
sample_weight – [in] X 中每个观测值的可选权重。 [长度 = n_samples]
centroids – [in] 聚类中心点。数据必须为行主序格式。 [维度 = n_clusters x n_features]
normalize_weight – [in] 如果应对权重进行归一化,则为 True
labels – [out] X 中每个样本所属聚类的索引。 [长度 = n_samples]
inertia – [out] 样本到其最近聚类中心的平方距离之和。
- void predict(
- raft::resources const &handle,
- const kmeans::params ¶ms,
- raft::device_matrix_view<const float, int> X,
- std::optional<raft::device_vector_view<const float, int>> sample_weight,
- raft::device_matrix_view<const float, int> centroids,
- raft::device_vector_view<int64_t, int> labels,
- bool normalize_weight,
- raft::host_scalar_view<float> inertia
预测 X 中每个样本所属的最接近的聚类。
#include <raft/core/resources.hpp> #include <cuvs/cluster/kmeans.hpp> using namespace cuvs::cluster; ... raft::resources handle; cuvs::cluster::kmeans::params params; int n_features = 15, inertia, n_iter; auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features); kmeans::fit(handle, params, X, std::nullopt, centroids.view(), raft::make_scalar_view(&inertia), raft::make_scalar_view(&n_iter)); ... auto labels = raft::make_device_vector<int64_t, int>(handle, X.extent(0)); kmeans::predict(handle, params, X, std::nullopt, centroids.view(), false, labels.view(), raft::make_scalar_view(&ineratia));
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于预测的新数据。 [维度 = n_samples x n_features]
sample_weight – [in] X 中每个观测值的可选权重。 [长度 = n_samples]
centroids – [in] 聚类中心点。数据必须为行主序格式。 [维度 = n_clusters x n_features]
normalize_weight – [in] 如果应对权重进行归一化,则为 True
labels – [out] X 中每个样本所属聚类的索引。 [长度 = n_samples]
inertia – [out] 样本到其最近聚类中心的平方距离之和。
- void predict(
- raft::resources const &handle,
- const kmeans::params ¶ms,
- raft::device_matrix_view<const double, int> X,
- std::optional<raft::device_vector_view<const double, int>> sample_weight,
- raft::device_matrix_view<const double, int> centroids,
- raft::device_vector_view<int, int> labels,
- bool normalize_weight,
- raft::host_scalar_view<double> inertia
预测 X 中每个样本所属的最接近的聚类。
#include <raft/core/resources.hpp> #include <cuvs/cluster/kmeans.hpp> using namespace cuvs::cluster; ... raft::resources handle; cuvs::cluster::kmeans::params params; int n_features = 15, inertia, n_iter; auto centroids = raft::make_device_matrix<double, int>(handle, params.n_clusters, n_features); kmeans::fit(handle, params, X, std::nullopt, centroids.view(), raft::make_scalar_view(&inertia), raft::make_scalar_view(&n_iter)); ... auto labels = raft::make_device_vector<int, int>(handle, X.extent(0)); kmeans::predict(handle, params, X, std::nullopt, centroids.view(), false, labels.view(), raft::make_scalar_view(&ineratia));
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于预测的新数据。 [维度 = n_samples x n_features]
sample_weight – [in] X 中每个观测值的可选权重。 [长度 = n_samples]
centroids – [in] 聚类中心点。数据必须为行主序格式。 [维度 = n_clusters x n_features]
normalize_weight – [in] 如果应对权重进行归一化,则为 True
labels – [out] X 中每个样本所属聚类的索引。 [长度 = n_samples]
inertia – [out] 样本到其最近聚类中心的平方距离之和。
- void predict(
- raft::resources const &handle,
- const kmeans::params ¶ms,
- raft::device_matrix_view<const double, int> X,
- std::optional<raft::device_vector_view<const double, int>> sample_weight,
- raft::device_matrix_view<const double, int> centroids,
- raft::device_vector_view<int64_t, int> labels,
- bool normalize_weight,
- raft::host_scalar_view<double> inertia
预测 X 中每个样本所属的最接近的聚类。
#include <raft/core/resources.hpp> #include <cuvs/cluster/kmeans.hpp> using namespace cuvs::cluster; ... raft::resources handle; cuvs::cluster::kmeans::params params; int n_features = 15, inertia, n_iter; auto centroids = raft::make_device_matrix<double, int>(handle, params.n_clusters, n_features); kmeans::fit(handle, params, X, std::nullopt, centroids.view(), raft::make_scalar_view(&inertia), raft::make_scalar_view(&n_iter)); ... auto labels = raft::make_device_vector<int64_t, int>(handle, X.extent(0)); kmeans::predict(handle, params, X, std::nullopt, centroids.view(), false, labels.view(), raft::make_scalar_view(&ineratia));
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于预测的新数据。 [维度 = n_samples x n_features]
sample_weight – [in] X 中每个观测值的可选权重。 [长度 = n_samples]
centroids – [in] 聚类中心点。数据必须为行主序格式。 [维度 = n_clusters x n_features]
normalize_weight – [in] 如果应对权重进行归一化,则为 True
labels – [out] X 中每个样本所属聚类的索引。 [长度 = n_samples]
inertia – [out] 样本到其最近聚类中心的平方距离之和。
- void predict(
- const raft::resources &handle,
- cuvs::cluster::kmeans::balanced_params const ¶ms,
- raft::device_matrix_view<const int8_t, int> X,
- raft::device_matrix_view<const float, int> centroids,
- raft::device_vector_view<uint32_t, int> labels
预测 X 中每个样本所属的最接近的聚类。
#include <raft/core/resources.hpp> #include <cuvs/cluster/kmeans.hpp> using namespace cuvs::cluster; ... raft::resources handle; cuvs::cluster::kmeans::params params; int n_features = 15, inertia, n_iter; auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features); kmeans::fit(handle, params, X, std::nullopt, centroids.view(), raft::make_scalar_view(&inertia), raft::make_scalar_view(&n_iter)); ... auto labels = raft::make_device_vector<int, int>(handle, X.extent(0)); kmeans::predict(handle, params, X, std::nullopt, centroids.view(), false, labels.view(), raft::make_scalar_view(&ineratia));
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于预测的新数据。 [维度 = n_samples x n_features]
centroids – [in] 聚类中心点。数据必须为行主序格式。 [维度 = n_clusters x n_features]
labels – [out] X 中每个样本所属聚类的索引。 [长度 = n_samples]
- void fit_predict(
- raft::resources const &handle,
- const kmeans::params ¶ms,
- raft::device_matrix_view<const float, int> X,
- std::optional<raft::device_vector_view<const float, int>> sample_weight,
- std::optional<raft::device_matrix_view<float, int>> centroids,
- raft::device_vector_view<int, int> labels,
- raft::host_scalar_view<float> inertia,
- raft::host_scalar_view<int> n_iter
计算 k-means 聚类并预测输入中每个样本的聚类索引。
#include <raft/core/resources.hpp> #include <cuvs/cluster/kmeans.hpp> using namespace cuvs::cluster; ... raft::resources handle; cuvs::cluster::kmeans::params params; int n_features = 15, inertia, n_iter; auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features); auto labels = raft::make_device_vector<int, int>(handle, X.extent(0)); kmeans::fit_predict(handle, params, X, std::nullopt, centroids.view(), labels.view(), raft::make_scalar_view(&inertia), raft::make_scalar_view(&n_iter));
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]
sample_weight – [in] X 中每个观测值的可选权重。 [长度 = n_samples]
centroids – [inout] 可选 [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心 [out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]
labels – [out] X 中每个样本所属聚类的索引。 [长度 = n_samples]
inertia – [out] 样本到其最近聚类中心的平方距离之和。
n_iter – [out] 运行的迭代次数。
- void fit_predict(
- raft::resources const &handle,
- const kmeans::params ¶ms,
- raft::device_matrix_view<const float, int64_t> X,
- std::optional<raft::device_vector_view<const float, int64_t>> sample_weight,
- std::optional<raft::device_matrix_view<float, int64_t>> centroids,
- raft::device_vector_view<int64_t, int64_t> labels,
- raft::host_scalar_view<float> inertia,
- raft::host_scalar_view<int64_t> n_iter
计算 k-means 聚类并预测输入中每个样本的聚类索引。
#include <raft/core/resources.hpp> #include <cuvs/cluster/kmeans.hpp> using namespace cuvs::cluster; ... raft::resources handle; cuvs::cluster::kmeans::params params; int64_t n_features = 15, inertia, n_iter; auto centroids = raft::make_device_matrix<float, int64_t>(handle, params.n_clusters, n_features); auto labels = raft::make_device_vector<int64_t, int64_t>(handle, X.extent(0)); kmeans::fit_predict(handle, params, X, std::nullopt, centroids.view(), labels.view(), raft::make_scalar_view(&inertia), raft::make_scalar_view(&n_iter));
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]
sample_weight – [in] X 中每个观测值的可选权重。 [长度 = n_samples]
centroids – [inout] 可选 [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心 [out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]
labels – [out] X 中每个样本所属聚类的索引。 [长度 = n_samples]
inertia – [out] 样本到其最近聚类中心的平方距离之和。
n_iter – [out] 运行的迭代次数。
- void fit_predict(
- raft::resources const &handle,
- const kmeans::params ¶ms,
- raft::device_matrix_view<const double, int> X,
- std::optional<raft::device_vector_view<const double, int>> sample_weight,
- std::optional<raft::device_matrix_view<double, int>> centroids,
- raft::device_vector_view<int, int> labels,
- raft::host_scalar_view<double> inertia,
- raft::host_scalar_view<int> n_iter
计算 k-means 聚类并预测输入中每个样本的聚类索引。
#include <raft/core/resources.hpp> #include <cuvs/cluster/kmeans.hpp> using namespace cuvs::cluster; ... raft::resources handle; cuvs::cluster::kmeans::params params; int n_features = 15, inertia, n_iter; auto centroids = raft::make_device_matrix<double, int>(handle, params.n_clusters, n_features); auto labels = raft::make_device_vector<int, int>(handle, X.extent(0)); kmeans::fit_predict(handle, params, X, std::nullopt, centroids.view(), labels.view(), raft::make_scalar_view(&inertia), raft::make_scalar_view(&n_iter));
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]
sample_weight – [in] X 中每个观测值的可选权重。 [长度 = n_samples]
centroids – [inout] 可选 [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心 [out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]
labels – [out] X 中每个样本所属聚类的索引。 [长度 = n_samples]
inertia – [out] 样本到其最近聚类中心的平方距离之和。
n_iter – [out] 运行的迭代次数。
- void fit_predict(
- raft::resources const &handle,
- const kmeans::params ¶ms,
- raft::device_matrix_view<const double, int64_t> X,
- std::optional<raft::device_vector_view<const double, int64_t>> sample_weight,
- std::optional<raft::device_matrix_view<double, int64_t>> centroids,
- raft::device_vector_view<int64_t, int64_t> labels,
- raft::host_scalar_view<double> inertia,
- raft::host_scalar_view<int64_t> n_iter
计算 k-means 聚类并预测输入中每个样本的聚类索引。
#include <raft/core/resources.hpp> #include <cuvs/cluster/kmeans.hpp> using namespace cuvs::cluster; ... raft::resources handle; cuvs::cluster::kmeans::params params; int64_t n_features = 15, inertia, n_iter; auto centroids = raft::make_device_matrix<double, int64_t>(handle, params.n_clusters, n_features); auto labels = raft::make_device_vector<int64_t, int64_t>(handle, X.extent(0)); kmeans::fit_predict(handle, params, X, std::nullopt, centroids.view(), labels.view(), raft::make_scalar_view(&inertia), raft::make_scalar_view(&n_iter));
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]
sample_weight – [in] X 中每个观测值的可选权重。 [长度 = n_samples]
centroids – [inout] 可选 [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心 [out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]
labels – [out] X 中每个样本所属聚类的索引。 [长度 = n_samples]
inertia – [out] 样本到其最近聚类中心的平方距离之和。
n_iter – [out] 运行的迭代次数。
- void fit_predict(
- const raft::resources &handle,
- cuvs::cluster::kmeans::balanced_params const ¶ms,
- raft::device_matrix_view<const float, int> X,
- raft::device_matrix_view<float, int> centroids,
- raft::device_vector_view<uint32_t, int> labels
计算平衡 k-means 聚类并预测输入中每个样本的聚类索引。
#include <raft/core/resources.hpp> #include <cuvs/cluster/kmeans.hpp> using namespace cuvs::cluster; ... raft::resources handle; cuvs::cluster::kmeans::balanced_params params; int n_features = 15; auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features); auto labels = raft::make_device_vector<int, int>(handle, X.extent(0)); kmeans::fit_predict(handle, params, X, centroids.view(), labels.view());
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]
centroids – [inout] 可选 [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心 [out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]
labels – [out] X 中每个样本所属聚类的索引。 [长度 = n_samples]
- void fit_predict(
- const raft::resources &handle,
- cuvs::cluster::kmeans::balanced_params const ¶ms,
- raft::device_matrix_view<const int8_t, int> X,
- raft::device_matrix_view<float, int> centroids,
- raft::device_vector_view<uint32_t, int> labels
计算平衡 k-means 聚类并预测输入中每个样本的聚类索引。
#include <raft/core/resources.hpp> #include <cuvs/cluster/kmeans.hpp> using namespace cuvs::cluster; ... raft::resources handle; cuvs::cluster::kmeans::balanced_params params; int n_features = 15; auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features); auto labels = raft::make_device_vector<int, int>(handle, X.extent(0)); kmeans::fit_predict(handle, params, X, centroids.view(), labels.view());
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]
centroids – [inout] 可选 [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心 [out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]
labels – [out] X 中每个样本所属聚类的索引。 [长度 = n_samples]
- void transform(
- raft::resources const &handle,
- const kmeans::params ¶ms,
- raft::device_matrix_view<const float, int> X,
- raft::device_matrix_view<const float, int> centroids,
- raft::device_matrix_view<float, int> X_new
将 X 转换到聚类距离空间。
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于聚类的训练实例。数据必须为行主序格式 [维度 = n_samples x n_features]
centroids – [in] 聚类中心点。数据必须为行主序格式。 [维度 = n_clusters x n_features]
X_new – [out] 在新空间中转换后的 X。 [维度 = n_samples x n_features]
- void transform(
- raft::resources const &handle,
- const kmeans::params ¶ms,
- raft::device_matrix_view<const double, int> X,
- raft::device_matrix_view<const double, int> centroids,
- raft::device_matrix_view<double, int> X_new
将 X 转换到聚类距离空间。
- 参数:
handle – [in] Raft 句柄。
params – [in] KMeans 模型的参数。
X – [in] 用于聚类的训练实例。数据必须为行主序格式 [维度 = n_samples x n_features]
centroids – [in] 聚类中心点。数据必须为行主序格式。 [维度 = n_clusters x n_features]
X_new – [out] 在新空间中转换后的 X。 [维度 = n_samples x n_features]
K-means 辅助函数#
include <cuvs/cluster/kmeans.hpp>
namespace cuvs::cluster::kmeans::helpers