来个base版:

dat <- expand.grid(year = 2015:2021, month = 1:5, day = 10:19)
dat$value <- seq_len(nrow(dat))

do.call(rbind, with(dat, by(
   dat[order(year, month, day), ],  interaction(year, month),
   function(i) with(i, c(year = year[1], month = month[1], value_mean = mean(tail(value, 5))))
)))

    chuxinyuan
    增加一行能好,但是baseR功力太弱,不知道怎么改的更好看

    dat <- dat[order(dat$year, dat$month, dat$day),]

      更正base版:

      dat <- expand.grid(year = 2015:2021, month = 1:5, day = 10:19)
      dat$value <- seq_len(nrow(dat))
       
      dat <- dat[order(dat$year, dat$month, dat$day), ]
      do.call(rbind, by(dat, interaction(dat$month, dat$year),
         function(i) c(year = i$year[1], month = i$month[1], value_mean = mean(tail(i$value, 5)))
      ))

      chuxinyuan barnett874 多谢二位的指正。

      dplyr解决方法

      library(tidyverse)
      #构造数据
      data <- data.frame(
        stringsAsFactors = FALSE,
        date = c("2020-4-5","2020-4-6",
                 "2020-4-7","2020-4-8","2020-4-9","2020-4-10","2020-5-15",
                 "2020-5-16","2020-5-17","2020-5-18","2020-5-19",
                 "2020-5-20","2020-5-21"),
        value = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L)
      )
      data$date = as.Date(data$date)
      
      # 解决方案
      data %>% 
        slice_max(date, n = 5) %>% # 选中后5天
        summarise(mean = mean(value)) # 计算value的平均值

      R 4.1.0 版

      dat <- expand.grid(year = 2015:2021, month = 1:5, day = 10:19) |> 
        {\(i) within(i, value <- seq_len(nrow(i))) }() 
      
      dat |> 
        split(~ month + year) |>
        lapply(\(i) with(i, c(year = year[1], month = month[1], value_mean = mean(tail(value[order(day)], 5))))) |>
        {\(i) do.call(rbind, i) }()

        wangbinzjcc wangbinzjcc

        ✓ [main*]> system.time({
             dat <- expand.grid(year = 2015:2021, month = 1:5, day = 10:19) |> 
                 {\(i) within(i, value <- seq_len(nrow(i))) }();
             
             dat |> 
                 split(~ month + year) |>
                 lapply(\(i) with(i, c(year = year[1], month = month[1], value_mean = mean(tail(value[order(day)], 5))))) |>
                 {\(i) do.call(rbind, i) }()  
         })
           user  system elapsed 
          0.009   0.000   0.009 
        ✓ [main*]> 
        ✓ [main*]> system.time({
             dat <- expand.grid(year = 2015:2021, month = 1:5, day = 10:19);
             dat$value <- seq_len(nrow(dat));
             
             dat <- dat[order(dat$year, dat$month, dat$day), ];
             do.call(rbind, by(dat, interaction(dat$month, dat$year),
                               function(i) c(year = i$year[1], month = i$month[1], value_mean = mean(tail(i$value, 5)))
             ))
         })
           user  system elapsed 
          0.034   0.093   0.125 
          test_4.1.0 <- function(){
            # R 4.1.0
            system.time({
              
              dat <- expand.grid(year = 2015:2021, month = 1:12, day = 1:31) |> 
                {\(i) within(i, value <- seq_len(nrow(i))) }()
              
              dat |> 
                split(~ month + year) |>
                lapply(\(i) with(i, c(year = year[1], month = month[1], value_mean = mean(tail(value[order(day)], 5))))) |>
                {\(i) do.call(rbind, i) }()  
              
              })
          }
          
          times_4.1.0 <- replicate(10, test_4.1.0()[1:3])
          times_4.1.0
          #>           [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
          #> user.self    0    0    0    0    0 0.02 0.01 0.02    0  0.01
          #> sys.self     0    0    0    0    0 0.00 0.00 0.00    0  0.00
          #> elapsed      0    0    0    0    0 0.01 0.01 0.02    0  0.01
          
          
          
          
          
          test_4.0.5 <- function(){
            # R 4.0.5
            system.time({
              
              dat <- expand.grid(year = 2015:2021, month = 1:12, day = 1:31)
              dat <- within(dat, value <- seq_len(nrow(dat)))
              
              do.call(rbind, lapply(
                split(dat, dat[, c("month", "year")]), 
                function(i) with(i, c(year = year[1], month = month[1], value_mean = mean(tail(value[order(day)], 5))))
                ))
              
            })
          }
          
          times_4.0.5 <- replicate(10, test_4.0.5()[1:3])
          times_4.0.5
          #>           [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10]
          #> user.self    0    0    0 0.01    0 0.01    0    0    0     0
          #> sys.self     0    0    0 0.00    0 0.00    0    0    0     0
          #> elapsed      0    0    0 0.01    0 0.01    0    0    0     0

          <sup>Created on 2021-05-21 by the reprex package (v2.0.0)</sup>

          运行速度上没有区别。

            wangbinzjcc ryo 多测算几次求平均这个思路没错,但是数据量太少了,说明不了问题。

            重新开个帖子讨论这个问题吧!楼主的问题早就干净利索的解决了。

              chuxinyuan 是的,正规测速不是这么测的。system.time() 可以用来粗测,但对运行时间短的代码仅仅跑 10 遍求平均是不太靠谱的。可以考虑 microbenchmark::microbenchmark() 等工具。

              ✓ 1.9 GiB [main*]> microbenchmark(
                   test_4.1.0 = {
                       dat1 <- expand.grid(year = 2015:2021, month = 1:12, day = 1:31) |> 
                           {\(i) within(i, value <- seq_len(nrow(i))) }(); 
                       dat1 |> 
                           split(~ month + year) |> 
                           lapply(\(i) with(i, c(year = year[1], month = month[1], value_mean = mean(tail(value[order(day)], 5))))) |> 
                           {\(i) do.call(rbind, i) }()}, 
                   test_4.0.5 = {
                       dat2 <- expand.grid(year = 2015:2021, month = 1:12, day = 1:31)
                       dat2 <- within(dat2, value <- seq_len(nrow(dat2)))
                       
                       do.call(rbind, lapply(
                           split(dat2, dat2[, c("month", "year")]), 
                           function(i) with(i, c(year = year[1], month = month[1], value_mean = mean(tail(value[order(day)], 5))))
                       ))}, 
                   ## https://github.com/nmatzke/BioGeoBEARS
                   #test_BioGeoBEARS.alt = {
                   #  dat <- expand.grid.alt(year = 2015:2021, month = 1:12, day = 1:31) |> 
                   #    {\(i) within(i, value <- seq_len(nrow(i))) }(); 
                   #  dat |> 
                   #    split(~ month + year) |> 
                   #    lapply(\(i) with(i, c(year = year[1], month = month[1], value_mean = mean(tail(value[order(day)], 5))))) |> 
                   #    {\(i) do.call(rbind, i) }()}, 
                   ## https://github.com/nmatzke/BioGeoBEARS
                   #test_BioGeoBEARS.jc = {
                   #  dat <- expand.grid.jc(year = 2015:2021, month = 1:12, day = 1:31) |> 
                   #    {\(i) within(i, value <- seq_len(nrow(i))) }(); 
                   #  dat |> 
                   #    split(~ month + year) |> 
                   #    lapply(\(i) with(i, c(year = year[1], month = month[1], value_mean = mean(tail(value[order(day)], 5))))) |> 
                   #    {\(i) do.call(rbind, i) }()}, 
                   
                   test_dplyr = {
                       dat3 <- expand.grid(year = 2015:2021, month = 1:12, day = 1:31) %>% 
                           mutate(value = 1:nrow(.))
                       dat3 %>% ddply(., .(year, month), summarise, mean_value = mean(tail(value, 5)))
                       #}, 
                       ## https://rdrr.io/cran/noncompliance/man/expand.grid.DT.html
                       ## https://atrebas.github.io/post/2019-03-03-datatable-dplyr
                       #test_exg.DT = {
                       #dat4 <- noncompliance::expand.grid.DT(2015:2021, 1:12, 1:31)
                       #setnames(dat4, old = c('seq1', 'seq2', 'seq3'), new = c('year', 'month', 'day'))
                       #dat4[, value := 1:.N][]
                      
                       ## https://s3.amazonaws.com/assets.datacamp.com/blog_assets/datatable_Cheat_Sheet_R.pdf
                       #dat4[order(year, month, day), .(mean_value = mean((value[.N-5:.N]))), by = c('year', 'month')]
                       
                       ## https://stackoverflow.com/questions/36526141/summarize-data-table-by-group
                       ## https://stackoverflow.com/a/16325932/3806250
                       ## https://stackoverflow.com/a/34753260/3806250
                       ## https://stackoverflow.com/a/53994503/3806250
                       ## https://stackoverflow.com/a/50093928/3806250
                       ## https://s3.amazonaws.com/assets.datacamp.com/blog_assets/datatable_Cheat_Sheet_R.pdf
                       #dat4[dat4[, .I[(.N-4):.N], by = c('year', 'month')]$V1]
                       #dat4[dat4[, .I[tail(seq_len(.N),5)], by = c('year', 'month')]$V1]
                           
                       #dat4[, {
                       #value = value
                       #value = value[order(day)]
                       #.SD[, .(mean_value = mean(tail(value, 5))), 
                       #    by=.(year, month)]}]
                       
                       #dat4[(.N-5):.N, 
                       #   .(mean_value = mean(value)), 
                       #   by = c('year', 'month')]
                       
                       #dat4[, mean(value[(.N-5):.N]), by = c('year', 'month')]
                       
                       #dat4[, mean(tail(value, 5)), by = c('year', 'month')]
                       
                       #dat4[, .(mean_value = mean(tail(value, 5))), by = c('year', 'month')]
                       
                       #dat4[, .(mean_value = mean(tail(value[order(day)], 5))), by = c('year', 'month')]
              
                       #dat4[, by = c('year', 'month'),
                       #   lapply(tail(.SD, 5), mean),
                      #    .SDcols = c('value')]
              
                       #dat4[, keyby = .(year, month),
                       #     .(mean_value = mean(tail(value, 5)))]
                       
                       ## https://stackoverflow.com/questions/33702470/aggregate-sum-and-mean-in-r-with-ddply
                       #dat4[, lapply(.SD[c(.N-5, .N)], mean), by = c('year', 'month')]
                       #},
                       ## https://stackoverflow.com/questions/12948686/why-is-expand-grid-faster-than-data-table-s-cj
                       #test_CJ = {
                       #dat5 <- CJ(2015:2021, 1:12, 1:31)
                       #setnames(dat4, old = c('seq1', 'seq2', 'seq3'), new = c('year', 'month', 'day'))
                       #dat4[, value := 1:.N][]
                   }
               )
              Unit: milliseconds
                     expr    min     lq   mean median     uq   max neval
               test_4.1.0  7.129  7.647  8.293  7.915  8.145 19.88   100
               test_4.0.5  7.087  7.609  8.492  7.866  8.254 21.05   100
               test_dplyr 15.888 16.695 17.533 17.089 17.528 28.48   100

              data.table怎么使用?搞了一整天都错,晕 😅

              ✓ 1.9 GiB [main*]> microbenchmark(
                   eg = expand.grid(1800:2050, 1:12, 1:31),
                   egDT = noncompliance::expand.grid.DT(expand.grid.DT(1800:2050, 1:12, 1:31)),
                   CJ = data.table::CJ(1800:2050, 1:12, 1:31),
                   eg.L = expand.grid(1800:2050L, 1:12, 1:31),
                   egDT.L = noncompliance::expand.grid.DT(expand.grid.DT(1800:2050L, 1:12, 1:31)),
                   CJ.L = data.table::CJ(1800:2050L, 1:12, 1:31),
                   eg.LL = expand.grid(1800L:2050L, 1:12, 1:31),
                   egDT.LL = noncompliance::expand.grid.DT(expand.grid.DT(1800L:2050L, 1:12, 1:31)),
                   CJ.LL = data.table::CJ(1800L:2050L, 1:12, 1:31))
              Unit: microseconds
                  expr    min     lq   mean median     uq    max neval
                    eg 2682.4 2939.9 3171.6 3002.4 3096.1  13861   100
                  egDT 4455.3 4652.5 5500.6 4785.3 5015.9  20647   100
                    CJ  459.9  531.6 4260.9  594.2  681.8 357043   100
                  eg.L 2737.2 2938.1 3345.6 3020.6 3191.0  14063   100
                egDT.L 4378.7 4662.0 5497.7 4836.1 5238.5  16041   100
                  CJ.L  459.8  538.6  731.5  586.2  670.0   9731   100
                 eg.LL 2725.1 2949.2 3288.2 3022.1 3176.0  13908   100
               egDT.LL 4463.2 4681.4 5278.5 4841.8 5090.6  16971   100
                 CJ.LL  464.8  554.1  695.0  612.4  679.5   5130   100