正巧是过生日当天参加统计之都的赠阅活动,很幸运的中了奖( つ•̀ω•́)つ,刚好这学期选修了学校的地理信息系统课程,阅读《地理计算与R语言》中文版作为课内知识的填充让我受益匪浅,校内本科生课程以Arcgis为主,用的10.3版本有些陈旧,也没有机器学习相关的讲解,结合《地理计算与R语言》中文版和英文网页版敲了一下以下代码作为学习笔记,主要以最新英文网页版使用的mlr3框架和terra包取代中文版中mlr框架和raster包
# 导入需要的包
library(sf)
library(terra)
library(future)
library(mlr3verse)
library(mlr3spatiotempcv)
library(mlr3tuning)
library(mlr3viz)
# 加载数据
data("lsl", "study_mask", package = "spDataLarge")
ta = terra::rast(system.file("raster/ta.tif", package = "spDataLarge"))
# terra提供的plot语法糖可视化
plot(ta)
# 针对地理信息数据的任务包装器
task = as_task_classif_st(
mlr3::as_data_backend(lsl),
target = "lslpts",
id = "ecuador_lsl",
positive = "TRUE",
coordinate_names = c("x", "y"),
crs = "EPSG:32717",
coords_as_features = FALSE
)
# 利用mlr3viz对每个特征可视化
autoplot(task, type = "pairs")
# 定义空间数据重抽样策略
resampling = rsmp("repeated_spcv_coords", folds = 5, repeats = 10)
# 指定学习器直接基准测试比较,输出AUC值
design = benchmark_grid(task,lrns(c("classif.glmnet", "classif.ranger"),predict_type = "prob"), resampling)
# future::plan("multisession") 在笔者的Win11电脑上使用future会带来卡顿和闪退
bmr = benchmark(design)
bmr$aggregate(msr("classif.acc"))[, .(task_id, learner_id, classif.acc)]
# 调参SVM
lrn_ksvm = lrn("classif.ksvm", predict_type = "prob", kernel = "rbfdot",
type = "C-svc")
# 在SVM报错时使用无特征基线拟合
# lrn_ksvm$encapsulate(method = "try",
# fallback = lrn("classif.featureless",
# predict_type = "prob"))
# 定义空间数据重抽样策略
rsmp = rsmp("repeated_spcv_coords", folds = 5, repeats = 10)
# 定义多参数的搜索空间(也可以使用to_tune在初始化学习器时指定), “trafo”用于定义量纲映射
search_space = ps(
C = p_dbl(lower = -12, upper = 15, trafo = function(x) 2^x),
sigma = p_dbl(lower = -15, upper = 6, trafo = function(x) 2^x)
)
# 设置终止条件
terminator = trm("evals", n_evals = 10)
# 设置调参优化器
tuner = tnr("mbo")
at_ksvm = auto_tuner(
learner = lrn_ksvm,
resampling = rsmp,
measure = msr("classif.auc"),
search_space = search_space,
terminator = terminator,
tuner = tuner
)
# 定义划分
set.seed(123)
split = partition(task, ratio = 0.8)
# 在内层训练调参
at_ksvm$train(task, row_ids = split$train)
at_ksvm$tuning_result
# 调整后超参数回赋(如果需要)
lrn_ksvm$param_set$values = at_ksvm$tuning_result$learner_param_vals[[1]] # 列表取出[[1]]