• R语言
  • 怎样把一段 tidy 版本的代码改写成 data.table 版本的?

如题。

第一段代码

library(sparkline)
library(dplyr)
library(DT)

table2 <- data.frame(
  column1 = c(rep(c('坂田银时', '神乐', '志村新八', '定春' ), 10)),
  column2 = sample(1:4, 40, replace = T),
  column3 = sample(10:20, 40, replace = T))

table2.group <- table2 %>%
  group_by(column1) %>%
  summarise(column2_mean = mean(column2))

table2.group$sparkline1 <- table2$column2 %>%
  split(table2$column1) %>%
  map(~ sparkline(.x, type = "line")) %>%
  map(htmltools::as.tags) %>%
  map_chr(as.character)

table2.group$sparkline2 <- table2$column3 %>%
  split(table2$column1) %>%
  map(~ sparkline(.x, type = "bar")) %>%
  map(htmltools::as.tags) %>%
  map_chr(as.character)

DT::datatable(table2.group, escape = FALSE) %>% spk_add_deps()

第二段代码

library(DT)
library(tibble) #使用tibble类型的数据
library(purrr) #使用 map_chr 函数

table1 <-
  tibble(
    column1 = c('坂田银时', '神乐', '志村新八', '定春'), # 第一列
    column2 = c(100, 10000, 10, 100), # 第二列
    column3 = c(1:4), # 第三列
    sparkline = list(
      v1 = x,      # 第四列第一行
      v2 = abs(x), # 第四列第二行
      v3 = x,      # 第四列第三行
      v4 = abs(x)  # 第四列第四行
    )
  )

table1$sparkline <- table1$sparkline %>%
  map( ~ sparkline(.x, type = "box")) %>%
  map(htmltools::as.tags) %>%
  map_chr(as.character)

DT::datatable(table1, escape = FALSE) %>% spk_add_deps()

环境信息如下:

> sessionInfo()
R version 4.2.0 (2022-04-22 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19043)

Matrix products: default

locale:
[1] LC_COLLATE=Chinese (Simplified)_China.utf8  LC_CTYPE=Chinese (Simplified)_China.utf8    LC_MONETARY=Chinese (Simplified)_China.utf8
[4] LC_NUMERIC=C                                LC_TIME=Chinese (Simplified)_China.utf8    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] dplyr_1.0.8       purrr_0.3.4       tibble_3.1.6      DT_0.23           sparkline_2.0     formattable_0.2.1

loaded via a namespace (and not attached):
 [1] bslib_0.3.1       compiler_4.2.0    pillar_1.7.0      jquerylib_0.1.4   tools_4.2.0       digest_0.6.28     jsonlite_1.8.0   
 [8] evaluate_0.14     lifecycle_1.0.1   pkgconfig_2.0.3   rlang_1.0.2       DBI_1.1.2         cli_3.2.0         rstudioapi_0.13  
[15] crosstalk_1.2.0   yaml_2.2.1        xfun_0.26         fastmap_1.1.0     knitr_1.36        generics_0.1.2    htmlwidgets_1.5.4
[22] sass_0.4.0        vctrs_0.3.8       tidyselect_1.1.2  glue_1.6.2        R6_2.5.1          fansi_1.0.2       rmarkdown_2.11   
[29] magrittr_2.0.2    scales_1.1.1      ellipsis_0.3.2    htmltools_0.5.2   assertthat_0.2.1  colorspace_2.0-2  utf8_1.2.2       
[36] munsell_0.5.0     crayon_1.5.0  

    yuanfan
    我知道的是dtplyr可以把dplyr的代码转化成data.table版本的,但是只支持dplyr和部分tidyr的函数的转化
    比如说第一段代码,先把purrr::map的写法改写成dplyr的写法,然后加上dtplyr::lazy_dt

    library(sparkline)
    library(dplyr, warn.conflicts = F)
    library(dtplyr)
    
    table2 <- tibble(
      column1 = c(rep(c('坂田银时', '神乐', '志村新八', '定春' ), 10)),
      column2 = sample(1:4, 40, replace = T),
      column3 = sample(10:20, 40, replace = T)
    )
    
    # dplyr version
    table2.group <- table2 %>%
      group_by(column1) %>%
      summarise(
        column2_mean = mean(column2),
        sparkline1 = as.character(htmltools::as.tags(sparkline(column2, type = "line"))),
        sparkline2 = as.character(htmltools::as.tags(sparkline(column2, type = "bar")))
      ) %>% 
      ungroup()
    
    # start with lazy_dt
    lazy_dt(table2) %>%
      group_by(column1) %>%
      summarise(
        column2_mean = mean(column2),
        sparkline1 = as.character(htmltools::as.tags(sparkline(column2, type = "line"))),
        sparkline2 = as.character(htmltools::as.tags(sparkline(column2, type = "bar")))
      ) %>% 
      ungroup()
    #> Source: local data table [4 x 4]
    #> Call:   `_DT1`[, .(column2_mean = mean(column2), sparkline1 = as.character(htmltools::as.tags(sparkline(column2, 
    #>     type = "line"))), sparkline2 = as.character(htmltools::as.tags(sparkline(column2, 
    #>     type = "bar")))), keyby = .(column1)]
    #> 
    #>   column1  column2_mean sparkline1                   sparkline2                 
    #>   <chr>           <dbl> <chr>                        <chr>                      
    #> 1 坂田银时          2.8 "<span id=\"htmlwidget-5b49… "<span id=\"htmlwidget-430…
    #> 2 定春              2.4 "<span id=\"htmlwidget-bbb6… "<span id=\"htmlwidget-b68…
    #> 3 志村新八          2.6 "<span id=\"htmlwidget-bd77… "<span id=\"htmlwidget-4d1…
    #> 4 神乐              2.4 "<span id=\"htmlwidget-a271… "<span id=\"htmlwidget-413…
    #> 
    #> # Use as.data.table()/as.data.frame()/as_tibble() to access results

    这里的Call就是转化后的data.table代码,把_DT1替换成data.table(table2)就可以了

    library(data.table, warn.conflicts = F)
    
    data.table(table2)[, .(column2_mean = mean(column2), sparkline1 = as.character(htmltools::as.tags(sparkline(column2, type = "line"))), sparkline2 = as.character(htmltools::as.tags(sparkline(column2, type = "bar")))), keyby = .(column1)]

      又翻了下dtplyrdtplyr:::dt_call可以把Call直接提取出来

      a <- lazy_dt(table2) %>%
        group_by(column1) %>%
        summarise(
          column2_mean = mean(column2),
          sparkline1 = as.character(htmltools::as.tags(sparkline(column2, type = "line"))),
          sparkline2 = as.character(htmltools::as.tags(sparkline(column2, type = "bar")))
        ) %>% 
        ungroup()
      
      dtplyr:::dt_call(a)
      #> `_DT2`[, .(column2_mean = mean(column2), sparkline1 = as.character(htmltools::as.tags(sparkline(column2, 
      #>     type = "line"))), sparkline2 = as.character(htmltools::as.tags(sparkline(column2, 
      #>     type = "bar")))), keyby = .(column1)]

      这里_DT1变成了_DT2应该是每次使用dtplyr的时候都会新生成一个临时的data.table,编号会增加

      以第一段代码为例,我们看看其中用到的函数 map(),其帮助文档标题如下:

      Apply a function to each element of a list or atomic vector

      这不就是 lapply() 的意思吗? 完全可以干掉多余的 dplyr / purrr

      library(sparkline)
      library(DT)
      
      table2 <- data.frame(
        column1 = c(rep(c("坂田银时", "神乐", "志村新八", "定春"), 10)),
        column2 = sample(1:4, 40, replace = T),
        column3 = sample(10:20, 40, replace = T)
      )
      
      library(data.table)
      
      table2 <- as.data.table(table2)
      
      table2.group <- table2[, .(column2_mean = mean(column2)), by = "column1"]
      
      table2.group$sparkline1 <- lapply(
        lapply(
          split(table2$column2, table2$column1), sparkline,
          type = "line"
        ),
        function(x) as.character(htmltools::as.tags(x))
      )
      
      table2.group$sparkline2 <- lapply(
        lapply(
          split(table2$column3, table2$column1), sparkline,
          type = "bar"
        ),
        function(x) as.character(htmltools::as.tags(x))
      )
      
      DT::datatable(table2.group, escape = FALSE) |>
        spk_add_deps()

      meeeeeeeeo
      谢谢你,这个办法管用。
      这个函数真可爱啊,我最近发现另一个略相似的函数,plotly 包里有个ggplotly()函数可以把 ggplot2 画的静态图直接转换成动态图,如下。

      library(plotly)
      library(ggplot2)
      
      mtcars$cyl <- as.factor(mtcars$cyl)
      
      p <-
        ggplot(data = mtcars, aes(x = wt, y = mpg, color = cyl)) + geom_point()
      plotly::ggplotly(p)

        yuanfan
        这种“翻译”函数是挺好的,但是用的时候要格外小心,毕竟能翻译的部分是有限的,超出范围的内容,翻译的时候很可能会丢失掉