predict.lm中interval可以是"none", "confidence", "prediction"
选"confidence"和"prediction"时都会得到区间,这两个有什么区别吗?
选"confidence"和"prediction"时都会得到区间,这两个有什么区别吗?
predict()
... 因为僕之前都得使用for loop
来模拟预测数据非常浪费时间subset
数据后计算再比较... 僕有个非常基本的时间序列问题,就是有关predict()
和 forecast
的涵义。t0 to tn and ti-1
数据来预测ti
点?ŷ
是基于数据观测x0 to xn and xi-1
:
ŷ <- predict(fit)
mse0 <- mean((y - ŷ)^2)
#举例
library('plyr')
library('tidyverse')
library('glmnet')
set.seed(101)
testdata <- data_frame(date = sort(today() - 1:100), x1 = runif(100), x2 = runif(100), y = runif(100))
fit <- glmnet(model.matrix(~ . - 1, data = testdata[,-c(1, 4)]), testdata$y, family = 'gaussian')
mean((testdata$y - predict(fit, newx = model.matrix(~ . - 1, data = testdata[,-c(1, 4)])))^2)
[1] 0.07751218
dateID = unique(testdata$date)
n = seq(30)
## 测试从0至1的alpha值:Ridge to Lasso
fit <- llply(0:10, function(i) {
cv.glmnet(model.matrix(~ . - 1, data = testdata[n, -c(1, 4)]), testdata$y[n], family = 'gaussian', type.measure = 'mse')
})
coef(fit[[1]])
#3 x 1 sparse Matrix of class "dgCMatrix"
# 1
#(Intercept) 0.4959626
#x1 .
#x2
## 测试30天的数据的精准度
fit0 <- llply(dateID, function(dt) {
ldply(fit, function(ft) {
smp = filter(testdata, date == dt)
yhat = predict(ft, s = ft$lambda.1se, newx = model.matrix(~ . - 1, data = smp[, -c(1, 4)]))
mean((smp$y - yhat)^2)
}) %>% min #提取最优值
}) %>% tbl_df
## 测试30天的数据的预测值
fit0 <- llply(dateID, function(dt) {
ldply(fit, function(ft) {
smp = filter(testdata, date == dt)
yhat = predict(ft, s = ft$lambda.1se, newx = model.matrix(~ . - 1, data = smp[, -c(1, 4)]))
}) %>% last #'@ 提取最新日期数据
}) %>% tbl_df
last()
来提取最新日期数据... Df %Dev Lambda
却有57个</li>cv.glmnet
测试了57次才活得最优值?不过怎么一百个观测值中只有51个lambda值呢? :?: :?: :?:
> fit[[1]]
$lambda
[1] 0.0509098593 0.0463871677 0.0422662596 0.0385114416 0.0350901913 0.0319728754 0.0291324933
[8] 0.0265444429 0.0241863077 0.0220376628 0.0200798976 0.0182960548 0.0166706837 0.0151897061
[15] 0.0138402945 0.0126107609 0.0114904556 0.0104696752 0.0095395781 0.0086921083 0.0079199254
[22] 0.0072163410 0.0065752612 0.0059911331 0.0054588973 0.0049739439 0.0045320725 0.0041294556
[29] 0.0037626062 0.0034283466 0.0031237818 0.0028462736 0.0025934185 0.0023630263 0.0021531016
[36] 0.0019618260 0.0017875428 0.0016287424 0.0014840494 0.0013522106 0.0012320839 0.0011226290
[43] 0.0010228977 0.0009320263 0.0008492276 0.0007737846 0.0007050437 0.0006424096 0.0005853397
[50] 0.0005333397 0.0004859593
$cvm
[1] 0.08395731 0.08474542 0.08539466 0.08599341 0.08650683 0.08696220 0.08735730 0.08775368 0.08814499
[10] 0.08852839 0.08889111 0.08923069 0.08954764 0.08984340 0.09011944 0.09043344 0.09074651 0.09103540
[19] 0.09129628 0.09149598 0.09165070 0.09179097 0.09192034 0.09203950 0.09214917 0.09224999 0.09234258
[28] 0.09242757 0.09250551 0.09257696 0.09264241 0.09270233 0.09275718 0.09280736 0.09285324 0.09289519
[37] 0.09293500 0.09297533 0.09301329 0.09304794 0.09307957 0.09310843 0.09313477 0.09315882 0.09318073
[46] 0.09320066 0.09321890 0.09323553 0.09325070 0.09326453 0.09327714
$cvsd
[1] 0.01366894 0.01373537 0.01376924 0.01380243 0.01381329 0.01382723 0.01386547 0.01390787 0.01394999
[10] 0.01398853 0.01402609 0.01406323 0.01409954 0.01413434 0.01416752 0.01419973 0.01423075 0.01426021
[19] 0.01428985 0.01431190 0.01432668 0.01434049 0.01435391 0.01436684 0.01437920 0.01439094 0.01440205
[28] 0.01441251 0.01442232 0.01443149 0.01444003 0.01444798 0.01445536 0.01446219 0.01446850 0.01447433
[37] 0.01447986 0.01448534 0.01449050 0.01449523 0.01449956 0.01450354 0.01450717 0.01451050 0.01451354
[46] 0.01451633 0.01451888 0.01452121 0.01452333 0.01452528 0.01452706
$cvup
[1] 0.09762624 0.09848079 0.09916389 0.09979584 0.10032011 0.10078944 0.10122277 0.10166155 0.10209499
[10] 0.10251692 0.10291720 0.10329392 0.10364718 0.10397775 0.10428695 0.10463317 0.10497726 0.10529561
[19] 0.10558614 0.10580789 0.10597738 0.10613146 0.10627425 0.10640634 0.10652836 0.10664093 0.10674464
[28] 0.10684008 0.10692783 0.10700844 0.10708244 0.10715032 0.10721254 0.10726954 0.10732174 0.10736952
[37] 0.10741485 0.10746067 0.10750379 0.10754317 0.10757913 0.10761197 0.10764194 0.10766931 0.10769427
[46] 0.10771699 0.10773777 0.10775673 0.10777403 0.10778981 0.10780420
$cvlo
[1] 0.07028837 0.07101004 0.07162542 0.07219098 0.07269354 0.07313497 0.07349184 0.07384581 0.07419500
[10] 0.07453986 0.07486502 0.07516747 0.07544810 0.07570906 0.07595192 0.07623370 0.07651576 0.07677520
[19] 0.07700643 0.07718408 0.07732401 0.07745048 0.07756643 0.07767267 0.07776997 0.07785904 0.07794053
[28] 0.07801506 0.07808319 0.07814547 0.07820237 0.07825435 0.07830183 0.07834517 0.07838474 0.07842086
[37] 0.07845514 0.07848999 0.07852279 0.07855272 0.07858001 0.07860490 0.07862760 0.07864832 0.07866719
[46] 0.07868433 0.07870002 0.07871432 0.07872736 0.07873925 0.07875008
$nzero
s0 s1 s2 s3 s4 s5 s6 s7 s8 s9 s10 s11 s12 s13 s14 s15 s16 s17 s18 s19 s20 s21 s22 s23 s24 s25
0 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
s26 s27 s28 s29 s30 s31 s32 s33 s34 s35 s36 s37 s38 s39 s40 s41 s42 s43 s44 s45 s46 s47 s48 s49 s50
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
$name
mse
"Mean-Squared Error"
$glmnet.fit
Call: glmnet(x = model.matrix(~. - 1, data = testdata[n, -c(1, 4)]), y = testdata$y[n], family = "gaussian")
Df %Dev Lambda
[1,] 0 0.000000 0.0509100
[2,] 1 0.005743 0.0463900
[3,] 1 0.010510 0.0422700
[4,] 1 0.014470 0.0385100
[5,] 1 0.017760 0.0350900
[6,] 1 0.020480 0.0319700
[7,] 1 0.022750 0.0291300
[8,] 1 0.024630 0.0265400
[9,] 1 0.026190 0.0241900
[10,] 1 0.027490 0.0220400
[11,] 1 0.028560 0.0200800
[12,] 2 0.029800 0.0183000
[13,] 2 0.031120 0.0166700
[14,] 2 0.032220 0.0151900
[15,] 2 0.033130 0.0138400
[16,] 2 0.033890 0.0126100
[17,] 2 0.034520 0.0114900
[18,] 2 0.035040 0.0104700
[19,] 2 0.035480 0.0095400
[20,] 2 0.035840 0.0086920
[21,] 2 0.036140 0.0079200
[22,] 2 0.036390 0.0072160
[23,] 2 0.036590 0.0065750
[24,] 2 0.036760 0.0059910
[25,] 2 0.036910 0.0054590
[26,] 2 0.037020 0.0049740
[27,] 2 0.037120 0.0045320
[28,] 2 0.037200 0.0041290
[29,] 2 0.037270 0.0037630
[30,] 2 0.037330 0.0034280
[31,] 2 0.037370 0.0031240
[32,] 2 0.037410 0.0028460
[33,] 2 0.037440 0.0025930
[34,] 2 0.037470 0.0023630
[35,] 2 0.037490 0.0021530
[36,] 2 0.037510 0.0019620
[37,] 2 0.037530 0.0017880
[38,] 2 0.037540 0.0016290
[39,] 2 0.037550 0.0014840
[40,] 2 0.037560 0.0013520
[41,] 2 0.037560 0.0012320
[42,] 2 0.037570 0.0011230
[43,] 2 0.037580 0.0010230
[44,] 2 0.037580 0.0009320
[45,] 2 0.037580 0.0008492
[46,] 2 0.037590 0.0007738
[47,] 2 0.037590 0.0007050
[48,] 2 0.037590 0.0006424
[49,] 2 0.037590 0.0005853
[50,] 2 0.037590 0.0005333
[51,] 2 0.037590 0.0004860
[52,] 2 0.037600 0.0004428
[53,] 2 0.037600 0.0004035
[54,] 2 0.037600 0.0003676
[55,] 2 0.037600 0.0003350
[56,] 2 0.037600 0.0003052
[57,] 2 0.037600 0.0002781
$lambda.min
[1] 0.05090986
$lambda.1se
[1] 0.05090986
attr(,"class")
[1] "cv.glmnet"