✓ 1.9 GiB [main*]> microbenchmark(
test_4.1.0 = {
dat1 <- expand.grid(year = 2015:2021, month = 1:12, day = 1:31) |>
{\(i) within(i, value <- seq_len(nrow(i))) }();
dat1 |>
split(~ month + year) |>
lapply(\(i) with(i, c(year = year[1], month = month[1], value_mean = mean(tail(value[order(day)], 5))))) |>
{\(i) do.call(rbind, i) }()},
test_4.0.5 = {
dat2 <- expand.grid(year = 2015:2021, month = 1:12, day = 1:31)
dat2 <- within(dat2, value <- seq_len(nrow(dat2)))
do.call(rbind, lapply(
split(dat2, dat2[, c("month", "year")]),
function(i) with(i, c(year = year[1], month = month[1], value_mean = mean(tail(value[order(day)], 5))))
))},
## https://github.com/nmatzke/BioGeoBEARS
#test_BioGeoBEARS.alt = {
# dat <- expand.grid.alt(year = 2015:2021, month = 1:12, day = 1:31) |>
# {\(i) within(i, value <- seq_len(nrow(i))) }();
# dat |>
# split(~ month + year) |>
# lapply(\(i) with(i, c(year = year[1], month = month[1], value_mean = mean(tail(value[order(day)], 5))))) |>
# {\(i) do.call(rbind, i) }()},
## https://github.com/nmatzke/BioGeoBEARS
#test_BioGeoBEARS.jc = {
# dat <- expand.grid.jc(year = 2015:2021, month = 1:12, day = 1:31) |>
# {\(i) within(i, value <- seq_len(nrow(i))) }();
# dat |>
# split(~ month + year) |>
# lapply(\(i) with(i, c(year = year[1], month = month[1], value_mean = mean(tail(value[order(day)], 5))))) |>
# {\(i) do.call(rbind, i) }()},
test_dplyr = {
dat3 <- expand.grid(year = 2015:2021, month = 1:12, day = 1:31) %>%
mutate(value = 1:nrow(.))
dat3 %>% ddply(., .(year, month), summarise, mean_value = mean(tail(value, 5)))
#},
## https://rdrr.io/cran/noncompliance/man/expand.grid.DT.html
## https://atrebas.github.io/post/2019-03-03-datatable-dplyr
#test_exg.DT = {
#dat4 <- noncompliance::expand.grid.DT(2015:2021, 1:12, 1:31)
#setnames(dat4, old = c('seq1', 'seq2', 'seq3'), new = c('year', 'month', 'day'))
#dat4[, value := 1:.N][]
## https://s3.amazonaws.com/assets.datacamp.com/blog_assets/datatable_Cheat_Sheet_R.pdf
#dat4[order(year, month, day), .(mean_value = mean((value[.N-5:.N]))), by = c('year', 'month')]
## https://stackoverflow.com/questions/36526141/summarize-data-table-by-group
## https://stackoverflow.com/a/16325932/3806250
## https://stackoverflow.com/a/34753260/3806250
## https://stackoverflow.com/a/53994503/3806250
## https://stackoverflow.com/a/50093928/3806250
## https://s3.amazonaws.com/assets.datacamp.com/blog_assets/datatable_Cheat_Sheet_R.pdf
#dat4[dat4[, .I[(.N-4):.N], by = c('year', 'month')]$V1]
#dat4[dat4[, .I[tail(seq_len(.N),5)], by = c('year', 'month')]$V1]
#dat4[, {
#value = value
#value = value[order(day)]
#.SD[, .(mean_value = mean(tail(value, 5))),
# by=.(year, month)]}]
#dat4[(.N-5):.N,
# .(mean_value = mean(value)),
# by = c('year', 'month')]
#dat4[, mean(value[(.N-5):.N]), by = c('year', 'month')]
#dat4[, mean(tail(value, 5)), by = c('year', 'month')]
#dat4[, .(mean_value = mean(tail(value, 5))), by = c('year', 'month')]
#dat4[, .(mean_value = mean(tail(value[order(day)], 5))), by = c('year', 'month')]
#dat4[, by = c('year', 'month'),
# lapply(tail(.SD, 5), mean),
# .SDcols = c('value')]
#dat4[, keyby = .(year, month),
# .(mean_value = mean(tail(value, 5)))]
## https://stackoverflow.com/questions/33702470/aggregate-sum-and-mean-in-r-with-ddply
#dat4[, lapply(.SD[c(.N-5, .N)], mean), by = c('year', 'month')]
#},
## https://stackoverflow.com/questions/12948686/why-is-expand-grid-faster-than-data-table-s-cj
#test_CJ = {
#dat5 <- CJ(2015:2021, 1:12, 1:31)
#setnames(dat4, old = c('seq1', 'seq2', 'seq3'), new = c('year', 'month', 'day'))
#dat4[, value := 1:.N][]
}
)
Unit: milliseconds
expr min lq mean median uq max neval
test_4.1.0 7.129 7.647 8.293 7.915 8.145 19.88 100
test_4.0.5 7.087 7.609 8.492 7.866 8.254 21.05 100
test_dplyr 15.888 16.695 17.533 17.089 17.528 28.48 100
这data.table
怎么使用?搞了一整天都错,晕 😅 ~
✓ 1.9 GiB [main*]> microbenchmark(
eg = expand.grid(1800:2050, 1:12, 1:31),
egDT = noncompliance::expand.grid.DT(expand.grid.DT(1800:2050, 1:12, 1:31)),
CJ = data.table::CJ(1800:2050, 1:12, 1:31),
eg.L = expand.grid(1800:2050L, 1:12, 1:31),
egDT.L = noncompliance::expand.grid.DT(expand.grid.DT(1800:2050L, 1:12, 1:31)),
CJ.L = data.table::CJ(1800:2050L, 1:12, 1:31),
eg.LL = expand.grid(1800L:2050L, 1:12, 1:31),
egDT.LL = noncompliance::expand.grid.DT(expand.grid.DT(1800L:2050L, 1:12, 1:31)),
CJ.LL = data.table::CJ(1800L:2050L, 1:12, 1:31))
Unit: microseconds
expr min lq mean median uq max neval
eg 2682.4 2939.9 3171.6 3002.4 3096.1 13861 100
egDT 4455.3 4652.5 5500.6 4785.3 5015.9 20647 100
CJ 459.9 531.6 4260.9 594.2 681.8 357043 100
eg.L 2737.2 2938.1 3345.6 3020.6 3191.0 14063 100
egDT.L 4378.7 4662.0 5497.7 4836.1 5238.5 16041 100
CJ.L 459.8 538.6 731.5 586.2 670.0 9731 100
eg.LL 2725.1 2949.2 3288.2 3022.1 3176.0 13908 100
egDT.LL 4463.2 4681.4 5278.5 4841.8 5090.6 16971 100
CJ.LL 464.8 554.1 695.0 612.4 679.5 5130 100