赞赞赞! 顺手试用了一下, 速度很快!
library(oneclust)
library(ids)
set.seed(1)
df_levels <- data.frame(postcode = ids::adjective_animal(500), size = round(runif(500, min = 200, max = 500),0) )
train <- oneclust::sim_postcode_samples(df_levels, n = 100000, threshold = 300, prob = c(0.2, 0.1), seed = 4)
test <- oneclust::sim_postcode_samples(df_levels, n = 100000, threshold = 300, prob = c(0.2, 0.1), seed = 5)
## training
k <- 28
level_hist <- table(train$postcode)
level_new <- oneclust(level_hist, k)$cluster
feature_tr <- level_new[match(train$postcode, names(level_hist))] %>%
as.character() %>%
ordered(levels = as.character(1:k))
par(las = 1)
plot(feature_tr, train$label, lty = 0, xlab = "Cluster", ylab = "Label")
abline(h = 0.9, col = cud(1))
abline(h = 0.8, col = cud(2))
## test
feature_te <- level_new[match(test$postcode, names(level_hist))] %>%
as.character() %>%
ordered(levels = as.character(1:k))
par(las = 1)
plot(feature_te, test$label, lty = 0, xlab = "Cluster", ylab = "Label")
abline(h = 0.9, col = cud(1))
abline(h = 0.8, col = cud(2))