Cluster#

Params#

#include <cuvs/cluster/kmeans.hpp>

namespace cuvs::cluster::kmeans

struct params : public cuvs::cluster::kmeans::base_params#
#include <kmeans.hpp>

用于指定 kmeans 算法超参数的简单对象。

公共成员

int n_clusters = 8#

要形成的聚类数量以及要生成的中心点数量 (默认值:8)。

InitMethod init = KMeansPlusPlus#

初始化方法,默认为 k-means++

  • InitMethod::KMeansPlusPlus (k-means++): 使用可扩展的 k-means++ 算法选择初始聚类中心。

  • InitMethod::Random (random): 从输入数据中随机选择 'n_clusters' 个观测值(行)作为初始中心点。

  • InitMethod::Array (ndarray): 使用 'centroids' 作为初始聚类中心。

int max_iter = 300#

k-means 算法单次运行的最大迭代次数。

double tol = 1e-4#

声明收敛时相对于惯性的相对容差。

rapids_logger::level_enum verbosity = rapids_logger::level_enum::info#

详细级别。

raft::random::RngState rng_state = {0}#

随机数生成器的种子。

int n_init = 1#

k-means 算法将以不同种子运行的实例数。

double oversampling_factor = 2.0#

用于 k-means|| 算法中的过采样因子

int batch_centroids = 0#

如果为 0,则 batch_centroids = n_clusters

struct balanced_params : public cuvs::cluster::kmeans::base_params#
#include <kmeans.hpp>

用于指定平衡 k-means 算法超参数的简单对象。

k-means balanced 目前支持以下指标

  • CosineExpanded

  • InnerProduct

  • L2Expanded

  • L2SqrtExpanded

公共成员

uint32_t n_iters = 20#

训练迭代次数

K-means#

include <cuvs/cluster/kmeans.hpp>

namespace cuvs::cluster::kmeans

void fit(
raft::resources const &handle,
const cuvs::cluster::kmeans::params &params,
raft::device_matrix_view<const float, int> X,
std::optional<raft::device_vector_view<const float, int>> sample_weight,
raft::device_matrix_view<float, int> centroids,
raft::host_scalar_view<float, int> inertia,
raft::host_scalar_view<int, int> n_iter
)#

使用 k-means 算法查找聚类。初始中心点通过 k-means++ 算法选择。空聚类通过 k-means++ 算法选择新的中心点进行重新初始化。

#include <raft/core/resources.hpp>
#include <cuvs/cluster/kmeans.hpp>
using namespace  cuvs::cluster;
...
raft::resources handle;
cuvs::cluster::kmeans::params params;
int n_features = 15, inertia, n_iter;
auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features);

kmeans::fit(handle,
            params,
            X,
            std::nullopt,
            centroids,
            raft::make_scalar_view(&inertia),
            raft::make_scalar_view(&n_iter));
参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]

  • sample_weight[in] X 中每个观测值的可选权重。 [长度 = n_samples]

  • centroids[inout] [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心。[out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]

  • inertia[out] 样本到其最近聚类中心的平方距离之和。

  • n_iter[out] 运行的迭代次数。

void fit(
raft::resources const &handle,
const cuvs::cluster::kmeans::params &params,
raft::device_matrix_view<const float, int64_t> X,
std::optional<raft::device_vector_view<const float, int64_t>> sample_weight,
raft::device_matrix_view<float, int64_t> centroids,
raft::host_scalar_view<float, int64_t> inertia,
raft::host_scalar_view<int64_t, int64_t> n_iter
)#

使用 k-means 算法查找聚类。初始中心点通过 k-means++ 算法选择。空聚类通过 k-means++ 算法选择新的中心点进行重新初始化。

  #include <raft/core/resources.hpp>
  #include <cuvs/cluster/kmeans.hpp>
  using namespace  cuvs::cluster;
  ...
  raft::resources handle;
  cuvs::cluster::kmeans::params params;
  int64_t n_features = 15, inertia, n_iter;
  auto centroids = raft::make_device_matrix<float, int64_t>(handle, params.n_clusters,
n_features);

  kmeans::fit(handle,
              params,
              X,
              std::nullopt,
              centroids,
              raft::make_scalar_view(&inertia),
              raft::make_scalar_view(&n_iter));
参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]

  • sample_weight[in] X 中每个观测值的可选权重。 [长度 = n_samples]

  • centroids[inout] [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心。[out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]

  • inertia[out] 样本到其最近聚类中心的平方距离之和。

  • n_iter[out] 运行的迭代次数。

void fit(
raft::resources const &handle,
const cuvs::cluster::kmeans::params &params,
raft::device_matrix_view<const double, int> X,
std::optional<raft::device_vector_view<const double, int>> sample_weight,
raft::device_matrix_view<double, int> centroids,
raft::host_scalar_view<double, int> inertia,
raft::host_scalar_view<int, int> n_iter
)#

使用 k-means 算法查找聚类。初始中心点通过 k-means++ 算法选择。空聚类通过 k-means++ 算法选择新的中心点进行重新初始化。

#include <raft/core/resources.hpp>
#include <cuvs/cluster/kmeans.hpp>
using namespace  cuvs::cluster;
...
raft::resources handle;
cuvs::cluster::kmeans::params params;
int n_features = 15, inertia, n_iter;
auto centroids = raft::make_device_matrix<double, int>(handle, params.n_clusters, n_features);

kmeans::fit(handle,
            params,
            X,
            std::nullopt,
            centroids,
            raft::make_scalar_view(&inertia),
            raft::make_scalar_view(&n_iter));
参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]

  • sample_weight[in] X 中每个观测值的可选权重。 [长度 = n_samples]

  • centroids[inout] [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心。[out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]

  • inertia[out] 样本到其最近聚类中心的平方距离之和。

  • n_iter[out] 运行的迭代次数。

void fit(
raft::resources const &handle,
const cuvs::cluster::kmeans::params &params,
raft::device_matrix_view<const double, int64_t> X,
std::optional<raft::device_vector_view<const double, int64_t>> sample_weight,
raft::device_matrix_view<double, int64_t> centroids,
raft::host_scalar_view<double, int64_t> inertia,
raft::host_scalar_view<int64_t, int64_t> n_iter
)#

使用 k-means 算法查找聚类。初始中心点通过 k-means++ 算法选择。空聚类通过 k-means++ 算法选择新的中心点进行重新初始化。

  #include <raft/core/resources.hpp>
  #include <cuvs/cluster/kmeans.hpp>
  using namespace  cuvs::cluster;
  ...
  raft::resources handle;
  cuvs::cluster::kmeans::params params;
  int64_t n_features = 15, inertia, n_iter;
  auto centroids = raft::make_device_matrix<double, int64_t>(handle, params.n_clusters,
n_features);

  kmeans::fit(handle,
              params,
              X,
              std::nullopt,
              centroids,
              raft::make_scalar_view(&inertia),
              raft::make_scalar_view(&n_iter));
参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]

  • sample_weight[in] X 中每个观测值的可选权重。 [长度 = n_samples]

  • centroids[inout] [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心。[out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]

  • inertia[out] 样本到其最近聚类中心的平方距离之和。

  • n_iter[out] 运行的迭代次数。

void fit(
raft::resources const &handle,
const cuvs::cluster::kmeans::params &params,
raft::device_matrix_view<const int8_t, int> X,
std::optional<raft::device_vector_view<const int8_t, int>> sample_weight,
raft::device_matrix_view<int8_t, int> centroids,
raft::host_scalar_view<int8_t, int> inertia,
raft::host_scalar_view<int, int> n_iter
)#

使用 k-means 算法查找聚类。初始中心点通过 k-means++ 算法选择。空聚类通过 k-means++ 算法选择新的中心点进行重新初始化。

#include <raft/core/resources.hpp>
#include <cuvs/cluster/kmeans.hpp>
using namespace  cuvs::cluster;
...
raft::resources handle;
cuvs::cluster::kmeans::params params;
int n_features = 15, inertia, n_iter;
auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features);

kmeans::fit(handle,
            params,
            X,
            std::nullopt,
            centroids,
            raft::make_scalar_view(&inertia),
            raft::make_scalar_view(&n_iter));
参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]

  • sample_weight[in] X 中每个观测值的可选权重。 [长度 = n_samples]

  • centroids[inout] [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心。[out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]

  • inertia[out] 样本到其最近聚类中心的平方距离之和。

  • n_iter[out] 运行的迭代次数。

void fit(
const raft::resources &handle,
cuvs::cluster::kmeans::balanced_params const &params,
raft::device_matrix_view<const float, int> X,
raft::device_matrix_view<float, int> centroids
)#

使用 k-means 算法查找平衡聚类。

#include <raft/core/resources.hpp>
#include <cuvs/cluster/kmeans.hpp>
using namespace  cuvs::cluster;
...
raft::resources handle;
cuvs::cluster::kmeans::balanced_params params;
int n_features = 15;
auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features);

kmeans::fit(handle,
            params,
            X,
            centroids);
参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]

  • centroids[out] [out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]

void fit(
const raft::resources &handle,
cuvs::cluster::kmeans::balanced_params const &params,
raft::device_matrix_view<const int8_t, int> X,
raft::device_matrix_view<int8_t, int> centroids
)#

使用 k-means 算法查找平衡聚类。

#include <raft/core/resources.hpp>
#include <cuvs/cluster/kmeans.hpp>
using namespace  cuvs::cluster;
...
raft::resources handle;
cuvs::cluster::kmeans::balanced_params params;
int n_features = 15;
auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features);

kmeans::fit(handle,
            params,
            X,
            centroids);
参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]

  • centroids[inout] [out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]

void predict(
raft::resources const &handle,
const kmeans::params &params,
raft::device_matrix_view<const float, int> X,
std::optional<raft::device_vector_view<const float, int>> sample_weight,
raft::device_matrix_view<const float, int> centroids,
raft::device_vector_view<int, int> labels,
bool normalize_weight,
raft::host_scalar_view<float> inertia
)#

预测 X 中每个样本所属的最接近的聚类。

#include <raft/core/resources.hpp>
#include <cuvs/cluster/kmeans.hpp>
using namespace  cuvs::cluster;
...
raft::resources handle;
cuvs::cluster::kmeans::params params;
int n_features = 15, inertia, n_iter;
auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features);

kmeans::fit(handle,
            params,
            X,
            std::nullopt,
            centroids.view(),
            raft::make_scalar_view(&inertia),
            raft::make_scalar_view(&n_iter));
...
auto labels = raft::make_device_vector<int, int>(handle, X.extent(0));

kmeans::predict(handle,
                params,
                X,
                std::nullopt,
                centroids.view(),
                false,
                labels.view(),
                raft::make_scalar_view(&ineratia));
参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于预测的新数据。 [维度 = n_samples x n_features]

  • sample_weight[in] X 中每个观测值的可选权重。 [长度 = n_samples]

  • centroids[in] 聚类中心点。数据必须为行主序格式。 [维度 = n_clusters x n_features]

  • normalize_weight[in] 如果应对权重进行归一化,则为 True

  • labels[out] X 中每个样本所属聚类的索引。 [长度 = n_samples]

  • inertia[out] 样本到其最近聚类中心的平方距离之和。

void predict(
raft::resources const &handle,
const kmeans::params &params,
raft::device_matrix_view<const float, int> X,
std::optional<raft::device_vector_view<const float, int>> sample_weight,
raft::device_matrix_view<const float, int> centroids,
raft::device_vector_view<int64_t, int> labels,
bool normalize_weight,
raft::host_scalar_view<float> inertia
)#

预测 X 中每个样本所属的最接近的聚类。

#include <raft/core/resources.hpp>
#include <cuvs/cluster/kmeans.hpp>
using namespace  cuvs::cluster;
...
raft::resources handle;
cuvs::cluster::kmeans::params params;
int n_features = 15, inertia, n_iter;
auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features);

kmeans::fit(handle,
            params,
            X,
            std::nullopt,
            centroids.view(),
            raft::make_scalar_view(&inertia),
            raft::make_scalar_view(&n_iter));
...
auto labels = raft::make_device_vector<int64_t, int>(handle, X.extent(0));

kmeans::predict(handle,
                params,
                X,
                std::nullopt,
                centroids.view(),
                false,
                labels.view(),
                raft::make_scalar_view(&ineratia));
参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于预测的新数据。 [维度 = n_samples x n_features]

  • sample_weight[in] X 中每个观测值的可选权重。 [长度 = n_samples]

  • centroids[in] 聚类中心点。数据必须为行主序格式。 [维度 = n_clusters x n_features]

  • normalize_weight[in] 如果应对权重进行归一化,则为 True

  • labels[out] X 中每个样本所属聚类的索引。 [长度 = n_samples]

  • inertia[out] 样本到其最近聚类中心的平方距离之和。

void predict(
raft::resources const &handle,
const kmeans::params &params,
raft::device_matrix_view<const double, int> X,
std::optional<raft::device_vector_view<const double, int>> sample_weight,
raft::device_matrix_view<const double, int> centroids,
raft::device_vector_view<int, int> labels,
bool normalize_weight,
raft::host_scalar_view<double> inertia
)#

预测 X 中每个样本所属的最接近的聚类。

#include <raft/core/resources.hpp>
#include <cuvs/cluster/kmeans.hpp>
using namespace  cuvs::cluster;
...
raft::resources handle;
cuvs::cluster::kmeans::params params;
int n_features = 15, inertia, n_iter;
auto centroids = raft::make_device_matrix<double, int>(handle, params.n_clusters, n_features);

kmeans::fit(handle,
            params,
            X,
            std::nullopt,
            centroids.view(),
            raft::make_scalar_view(&inertia),
            raft::make_scalar_view(&n_iter));
...
auto labels = raft::make_device_vector<int, int>(handle, X.extent(0));

kmeans::predict(handle,
                params,
                X,
                std::nullopt,
                centroids.view(),
                false,
                labels.view(),
                raft::make_scalar_view(&ineratia));
参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于预测的新数据。 [维度 = n_samples x n_features]

  • sample_weight[in] X 中每个观测值的可选权重。 [长度 = n_samples]

  • centroids[in] 聚类中心点。数据必须为行主序格式。 [维度 = n_clusters x n_features]

  • normalize_weight[in] 如果应对权重进行归一化,则为 True

  • labels[out] X 中每个样本所属聚类的索引。 [长度 = n_samples]

  • inertia[out] 样本到其最近聚类中心的平方距离之和。

void predict(
raft::resources const &handle,
const kmeans::params &params,
raft::device_matrix_view<const double, int> X,
std::optional<raft::device_vector_view<const double, int>> sample_weight,
raft::device_matrix_view<const double, int> centroids,
raft::device_vector_view<int64_t, int> labels,
bool normalize_weight,
raft::host_scalar_view<double> inertia
)#

预测 X 中每个样本所属的最接近的聚类。

#include <raft/core/resources.hpp>
#include <cuvs/cluster/kmeans.hpp>
using namespace  cuvs::cluster;
...
raft::resources handle;
cuvs::cluster::kmeans::params params;
int n_features = 15, inertia, n_iter;
auto centroids = raft::make_device_matrix<double, int>(handle, params.n_clusters, n_features);

kmeans::fit(handle,
            params,
            X,
            std::nullopt,
            centroids.view(),
            raft::make_scalar_view(&inertia),
            raft::make_scalar_view(&n_iter));
...
auto labels = raft::make_device_vector<int64_t, int>(handle, X.extent(0));

kmeans::predict(handle,
                params,
                X,
                std::nullopt,
                centroids.view(),
                false,
                labels.view(),
                raft::make_scalar_view(&ineratia));
参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于预测的新数据。 [维度 = n_samples x n_features]

  • sample_weight[in] X 中每个观测值的可选权重。 [长度 = n_samples]

  • centroids[in] 聚类中心点。数据必须为行主序格式。 [维度 = n_clusters x n_features]

  • normalize_weight[in] 如果应对权重进行归一化,则为 True

  • labels[out] X 中每个样本所属聚类的索引。 [长度 = n_samples]

  • inertia[out] 样本到其最近聚类中心的平方距离之和。

void predict(
const raft::resources &handle,
cuvs::cluster::kmeans::balanced_params const &params,
raft::device_matrix_view<const int8_t, int> X,
raft::device_matrix_view<const float, int> centroids,
raft::device_vector_view<uint32_t, int> labels
)#

预测 X 中每个样本所属的最接近的聚类。

#include <raft/core/resources.hpp>
#include <cuvs/cluster/kmeans.hpp>
using namespace  cuvs::cluster;
...
raft::resources handle;
cuvs::cluster::kmeans::params params;
int n_features = 15, inertia, n_iter;
auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features);

kmeans::fit(handle,
            params,
            X,
            std::nullopt,
            centroids.view(),
            raft::make_scalar_view(&inertia),
            raft::make_scalar_view(&n_iter));
...
auto labels = raft::make_device_vector<int, int>(handle, X.extent(0));

kmeans::predict(handle,
                params,
                X,
                std::nullopt,
                centroids.view(),
                false,
                labels.view(),
                raft::make_scalar_view(&ineratia));
参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于预测的新数据。 [维度 = n_samples x n_features]

  • centroids[in] 聚类中心点。数据必须为行主序格式。 [维度 = n_clusters x n_features]

  • labels[out] X 中每个样本所属聚类的索引。 [长度 = n_samples]

void fit_predict(
raft::resources const &handle,
const kmeans::params &params,
raft::device_matrix_view<const float, int> X,
std::optional<raft::device_vector_view<const float, int>> sample_weight,
std::optional<raft::device_matrix_view<float, int>> centroids,
raft::device_vector_view<int, int> labels,
raft::host_scalar_view<float> inertia,
raft::host_scalar_view<int> n_iter
)#

计算 k-means 聚类并预测输入中每个样本的聚类索引。

#include <raft/core/resources.hpp>
#include <cuvs/cluster/kmeans.hpp>
using namespace  cuvs::cluster;
...
raft::resources handle;
cuvs::cluster::kmeans::params params;
int n_features = 15, inertia, n_iter;
auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features);
auto labels = raft::make_device_vector<int, int>(handle, X.extent(0));

kmeans::fit_predict(handle,
                    params,
                    X,
                    std::nullopt,
                    centroids.view(),
                    labels.view(),
                    raft::make_scalar_view(&inertia),
                    raft::make_scalar_view(&n_iter));
参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]

  • sample_weight[in] X 中每个观测值的可选权重。 [长度 = n_samples]

  • centroids[inout] 可选 [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心 [out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]

  • labels[out] X 中每个样本所属聚类的索引。 [长度 = n_samples]

  • inertia[out] 样本到其最近聚类中心的平方距离之和。

  • n_iter[out] 运行的迭代次数。

void fit_predict(
raft::resources const &handle,
const kmeans::params &params,
raft::device_matrix_view<const float, int64_t> X,
std::optional<raft::device_vector_view<const float, int64_t>> sample_weight,
std::optional<raft::device_matrix_view<float, int64_t>> centroids,
raft::device_vector_view<int64_t, int64_t> labels,
raft::host_scalar_view<float> inertia,
raft::host_scalar_view<int64_t> n_iter
)#

计算 k-means 聚类并预测输入中每个样本的聚类索引。

  #include <raft/core/resources.hpp>
  #include <cuvs/cluster/kmeans.hpp>
  using namespace  cuvs::cluster;
  ...
  raft::resources handle;
  cuvs::cluster::kmeans::params params;
  int64_t n_features = 15, inertia, n_iter;
  auto centroids = raft::make_device_matrix<float, int64_t>(handle, params.n_clusters,
n_features); auto labels = raft::make_device_vector<int64_t, int64_t>(handle, X.extent(0));

  kmeans::fit_predict(handle,
                      params,
                      X,
                      std::nullopt,
                      centroids.view(),
                      labels.view(),
                      raft::make_scalar_view(&inertia),
                      raft::make_scalar_view(&n_iter));
参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]

  • sample_weight[in] X 中每个观测值的可选权重。 [长度 = n_samples]

  • centroids[inout] 可选 [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心 [out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]

  • labels[out] X 中每个样本所属聚类的索引。 [长度 = n_samples]

  • inertia[out] 样本到其最近聚类中心的平方距离之和。

  • n_iter[out] 运行的迭代次数。

void fit_predict(
raft::resources const &handle,
const kmeans::params &params,
raft::device_matrix_view<const double, int> X,
std::optional<raft::device_vector_view<const double, int>> sample_weight,
std::optional<raft::device_matrix_view<double, int>> centroids,
raft::device_vector_view<int, int> labels,
raft::host_scalar_view<double> inertia,
raft::host_scalar_view<int> n_iter
)#

计算 k-means 聚类并预测输入中每个样本的聚类索引。

#include <raft/core/resources.hpp>
#include <cuvs/cluster/kmeans.hpp>
using namespace  cuvs::cluster;
...
raft::resources handle;
cuvs::cluster::kmeans::params params;
int n_features = 15, inertia, n_iter;
auto centroids = raft::make_device_matrix<double, int>(handle, params.n_clusters, n_features);
auto labels = raft::make_device_vector<int, int>(handle, X.extent(0));

kmeans::fit_predict(handle,
                    params,
                    X,
                    std::nullopt,
                    centroids.view(),
                    labels.view(),
                    raft::make_scalar_view(&inertia),
                    raft::make_scalar_view(&n_iter));
参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]

  • sample_weight[in] X 中每个观测值的可选权重。 [长度 = n_samples]

  • centroids[inout] 可选 [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心 [out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]

  • labels[out] X 中每个样本所属聚类的索引。 [长度 = n_samples]

  • inertia[out] 样本到其最近聚类中心的平方距离之和。

  • n_iter[out] 运行的迭代次数。

void fit_predict(
raft::resources const &handle,
const kmeans::params &params,
raft::device_matrix_view<const double, int64_t> X,
std::optional<raft::device_vector_view<const double, int64_t>> sample_weight,
std::optional<raft::device_matrix_view<double, int64_t>> centroids,
raft::device_vector_view<int64_t, int64_t> labels,
raft::host_scalar_view<double> inertia,
raft::host_scalar_view<int64_t> n_iter
)#

计算 k-means 聚类并预测输入中每个样本的聚类索引。

  #include <raft/core/resources.hpp>
  #include <cuvs/cluster/kmeans.hpp>
  using namespace  cuvs::cluster;
  ...
  raft::resources handle;
  cuvs::cluster::kmeans::params params;
  int64_t n_features = 15, inertia, n_iter;
  auto centroids = raft::make_device_matrix<double, int64_t>(handle, params.n_clusters,
n_features); auto labels = raft::make_device_vector<int64_t, int64_t>(handle, X.extent(0));

  kmeans::fit_predict(handle,
                      params,
                      X,
                      std::nullopt,
                      centroids.view(),
                      labels.view(),
                      raft::make_scalar_view(&inertia),
                      raft::make_scalar_view(&n_iter));
参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]

  • sample_weight[in] X 中每个观测值的可选权重。 [长度 = n_samples]

  • centroids[inout] 可选 [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心 [out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]

  • labels[out] X 中每个样本所属聚类的索引。 [长度 = n_samples]

  • inertia[out] 样本到其最近聚类中心的平方距离之和。

  • n_iter[out] 运行的迭代次数。

void fit_predict(
const raft::resources &handle,
cuvs::cluster::kmeans::balanced_params const &params,
raft::device_matrix_view<const float, int> X,
raft::device_matrix_view<float, int> centroids,
raft::device_vector_view<uint32_t, int> labels
)#

计算平衡 k-means 聚类并预测输入中每个样本的聚类索引。

#include <raft/core/resources.hpp>
#include <cuvs/cluster/kmeans.hpp>
using namespace  cuvs::cluster;
...
raft::resources handle;
cuvs::cluster::kmeans::balanced_params params;
int n_features = 15;
auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features);
auto labels = raft::make_device_vector<int, int>(handle, X.extent(0));

kmeans::fit_predict(handle,
                    params,
                    X,
                    centroids.view(),
                    labels.view());
参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]

  • centroids[inout] 可选 [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心 [out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]

  • labels[out] X 中每个样本所属聚类的索引。 [长度 = n_samples]

void fit_predict(
const raft::resources &handle,
cuvs::cluster::kmeans::balanced_params const &params,
raft::device_matrix_view<const int8_t, int> X,
raft::device_matrix_view<float, int> centroids,
raft::device_vector_view<uint32_t, int> labels
)#

计算平衡 k-means 聚类并预测输入中每个样本的聚类索引。

#include <raft/core/resources.hpp>
#include <cuvs/cluster/kmeans.hpp>
using namespace  cuvs::cluster;
...
raft::resources handle;
cuvs::cluster::kmeans::balanced_params params;
int n_features = 15;
auto centroids = raft::make_device_matrix<float, int>(handle, params.n_clusters, n_features);
auto labels = raft::make_device_vector<int, int>(handle, X.extent(0));

kmeans::fit_predict(handle,
                    params,
                    X,
                    centroids.view(),
                    labels.view());
参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于聚类的训练实例。数据必须为行主序格式。 [维度 = n_samples x n_features]

  • centroids[inout] 可选 [in] 当 init 为 InitMethod::Array 时,使用 centroids 作为初始聚类中心 [out] k-means 算法生成的中心点存储在 'centroids' 指向的地址处。 [维度 = n_clusters x n_features]

  • labels[out] X 中每个样本所属聚类的索引。 [长度 = n_samples]

void transform(
raft::resources const &handle,
const kmeans::params &params,
raft::device_matrix_view<const float, int> X,
raft::device_matrix_view<const float, int> centroids,
raft::device_matrix_view<float, int> X_new
)#

将 X 转换到聚类距离空间。

参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于聚类的训练实例。数据必须为行主序格式 [维度 = n_samples x n_features]

  • centroids[in] 聚类中心点。数据必须为行主序格式。 [维度 = n_clusters x n_features]

  • X_new[out] 在新空间中转换后的 X。 [维度 = n_samples x n_features]

void transform(
raft::resources const &handle,
const kmeans::params &params,
raft::device_matrix_view<const double, int> X,
raft::device_matrix_view<const double, int> centroids,
raft::device_matrix_view<double, int> X_new
)#

将 X 转换到聚类距离空间。

参数:
  • handle[in] Raft 句柄。

  • params[in] KMeans 模型的参数。

  • X[in] 用于聚类的训练实例。数据必须为行主序格式 [维度 = n_samples x n_features]

  • centroids[in] 聚类中心点。数据必须为行主序格式。 [维度 = n_clusters x n_features]

  • X_new[out] 在新空间中转换后的 X。 [维度 = n_samples x n_features]

K-means 辅助函数#

include <cuvs/cluster/kmeans.hpp>

namespace cuvs::cluster::kmeans::helpers