这里要开始一点题外话,我最初是翻数据集 anscombe 的帮助文档 help(anscombe)
,在文档最后说我有一个魔法可以在一个 for 循环内完成四个线性回归(我看好久没看明白魔法的优雅之处,遂动手实现一个,毕竟批量地跑模型似乎有点用)
##-- now some "magic" to do the 4 regressions in a loop:
ff <- y ~ x
mods <- setNames(as.list(1:4), paste0("lm", 1:4))
for(i in 1:4) {
ff[2:3] <- lapply(paste0(c("y","x"), i), as.name)
## or ff[[2]] <- as.name(paste0("y", i))
## ff[[3]] <- as.name(paste0("x", i))
mods[[i]] <- lmi <- lm(ff, data = anscombe)
print(anova(lmi))
}
## See how close they are (numerically!)
sapply(mods, coef)
lapply(mods, function(fm) coef(summary(fm)))
## Now, do what you should have done in the first place: PLOTS
op <- par(mfrow = c(2, 2), mar = 0.1+c(4,4,1,1), oma = c(0, 0, 2, 0))
for(i in 1:4) {
ff[2:3] <- lapply(paste0(c("y","x"), i), as.name)
plot(ff, data = anscombe, col = "red", pch = 21, bg = "orange", cex = 1.2,
xlim = c(3, 19), ylim = c(3, 13))
abline(mods[[i]], col = "blue")
}
mtext("Anscombe's 4 Regression data sets", outer = TRUE, cex = 1.5)
par(op)
我的优雅版本是这样的,完全没有魔法
data(anscombe)
form <- paste(paste0("y", seq(4)), paste0("x", seq(4)), sep = "~")
fit <- lapply(form, lm, data = anscombe)
op <- par(mfrow = c(2, 2), mar = 0.1 + c(4, 4, 1, 1), oma = c(0, 0, 2, 0))
for (i in seq(4)) {
plot(as.formula(form[i]),
data = anscombe, col = hcl.colors(11),
pch = 19, cex = 1.2,
xlim = c(3, 19), ylim = c(3, 13),
xlab = as.expression(substitute(x[i], list(i = i))),
ylab = as.expression(substitute(y[i], list(i = i)))
)
abline(fit[[i]], col = "red", lwd = 2)
}
mtext("Anscombe's 4 Regression data sets", outer = TRUE, cex = 1.5)
par(op)