Susannalsy
其实自己写也很简单的啦,用个for
循环就行。以10折交叉验证为例:
library(e1071)
data(BostonHousing, package = 'mlbench')
BostonHousing1 = sapply(BostonHousing,as.numeric) # convert all columns to numeric
x_train = subset(BostonHousing1, select = -medv) # drop target
y_train = subset(BostonHousing1, select = medv) # target
k = 10 # how many folds?
n_val = nrow(x_train) %/% k # integer divide
nr = nrow(x_train) # the number of rows
y_predcv = c()
for (i in 0:(k-1)){
l1 = i * n_val + 1 # validation set index
l2 = (i+1) * n_val
if (i == 9){ # if not be divisible
x_val = x_train[l1:nr,]
y_val = y_train[l1:nr,]
}else{
x_val = x_train[l1:l2,]
y_val = y_train[l1:l2,]
}
par_x_train = rbind(x_train[1:l1,],x_train[l2 : nr,]) # combine validation data
par_y_train = c(y_train[1:l1,],y_train[l2 : nr,])
model = svm(par_x_train, par_y_train) # fit a svm
y_pred = predict(model,x_val) # predict
y_predcv = c(y_predcv,y_pred)
}
length(y_predcv)
#> [1] 506
或者在划分数据集的时候使用caret::createFolds
,这个函数随机性更强,是打乱了index后划分k份数据的。
library(caret)
#> Loading required package: lattice
#> Loading required package: ggplot2
folds = createFolds(y = y_train, k = 10)
#folds
y_predcv = c()
for (i in 1:10){
tra_x = x_train[-folds[[i]],]
tra_y = y_train[-folds[[i]],]
valid_x = x_train[folds[[i]],]
valid_y = y_train[folds[[i]],]
model = svm(tra_x, tra_y) # fit a svm
y_pred = predict(model,valid_x) # predict
y_predcv = c(y_predcv,y_pred)
}
length(y_predcv)
#> [1] 506