怎样把一段 tidy 版本的代码改写成 data.table 版本的？

yuanfan

如题。

第一段代码

library(sparkline)
library(dplyr)
library(DT)

table2 <- data.frame(
  column1 = c(rep(c('坂田银时', '神乐', '志村新八', '定春' ), 10)),
  column2 = sample(1:4, 40, replace = T),
  column3 = sample(10:20, 40, replace = T))

table2.group <- table2 %>%
  group_by(column1) %>%
  summarise(column2_mean = mean(column2))

table2.group$sparkline1 <- table2$column2 %>%
  split(table2$column1) %>%
  map(~ sparkline(.x, type = "line")) %>%
  map(htmltools::as.tags) %>%
  map_chr(as.character)

table2.group$sparkline2 <- table2$column3 %>%
  split(table2$column1) %>%
  map(~ sparkline(.x, type = "bar")) %>%
  map(htmltools::as.tags) %>%
  map_chr(as.character)

DT::datatable(table2.group, escape = FALSE) %>% spk_add_deps()

第二段代码

library(DT)
library(tibble) #使用tibble类型的数据
library(purrr) #使用 map_chr 函数

table1 <-
  tibble(
    column1 = c('坂田银时', '神乐', '志村新八', '定春'), # 第一列
    column2 = c(100, 10000, 10, 100), # 第二列
    column3 = c(1:4), # 第三列
    sparkline = list(
      v1 = x,      # 第四列第一行
      v2 = abs(x), # 第四列第二行
      v3 = x,      # 第四列第三行
      v4 = abs(x)  # 第四列第四行
    )
  )

table1$sparkline <- table1$sparkline %>%
  map( ~ sparkline(.x, type = "box")) %>%
  map(htmltools::as.tags) %>%
  map_chr(as.character)

DT::datatable(table1, escape = FALSE) %>% spk_add_deps()

环境信息如下：

> sessionInfo()
R version 4.2.0 (2022-04-22 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19043)

Matrix products: default

locale:
[1] LC_COLLATE=Chinese (Simplified)_China.utf8  LC_CTYPE=Chinese (Simplified)_China.utf8    LC_MONETARY=Chinese (Simplified)_China.utf8
[4] LC_NUMERIC=C                                LC_TIME=Chinese (Simplified)_China.utf8    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] dplyr_1.0.8       purrr_0.3.4       tibble_3.1.6      DT_0.23           sparkline_2.0     formattable_0.2.1

loaded via a namespace (and not attached):
 [1] bslib_0.3.1       compiler_4.2.0    pillar_1.7.0      jquerylib_0.1.4   tools_4.2.0       digest_0.6.28     jsonlite_1.8.0   
 [8] evaluate_0.14     lifecycle_1.0.1   pkgconfig_2.0.3   rlang_1.0.2       DBI_1.1.2         cli_3.2.0         rstudioapi_0.13  
[15] crosstalk_1.2.0   yaml_2.2.1        xfun_0.26         fastmap_1.1.0     knitr_1.36        generics_0.1.2    htmlwidgets_1.5.4
[22] sass_0.4.0        vctrs_0.3.8       tidyselect_1.1.2  glue_1.6.2        R6_2.5.1          fansi_1.0.2       rmarkdown_2.11   
[29] magrittr_2.0.2    scales_1.1.1      ellipsis_0.3.2    htmltools_0.5.2   assertthat_0.2.1  colorspace_2.0-2  utf8_1.2.2       
[36] munsell_0.5.0     crayon_1.5.0

meeeeeeeeo

yuanfan
我知道的是dtplyr可以把dplyr的代码转化成data.table版本的，但是只支持dplyr和部分tidyr的函数的转化
比如说第一段代码，先把purrr::map的写法改写成dplyr的写法，然后加上dtplyr::lazy_dt

library(sparkline)
library(dplyr, warn.conflicts = F)
library(dtplyr)

table2 <- tibble(
  column1 = c(rep(c('坂田银时', '神乐', '志村新八', '定春' ), 10)),
  column2 = sample(1:4, 40, replace = T),
  column3 = sample(10:20, 40, replace = T)
)

# dplyr version
table2.group <- table2 %>%
  group_by(column1) %>%
  summarise(
    column2_mean = mean(column2),
    sparkline1 = as.character(htmltools::as.tags(sparkline(column2, type = "line"))),
    sparkline2 = as.character(htmltools::as.tags(sparkline(column2, type = "bar")))
  ) %>% 
  ungroup()

# start with lazy_dt
lazy_dt(table2) %>%
  group_by(column1) %>%
  summarise(
    column2_mean = mean(column2),
    sparkline1 = as.character(htmltools::as.tags(sparkline(column2, type = "line"))),
    sparkline2 = as.character(htmltools::as.tags(sparkline(column2, type = "bar")))
  ) %>% 
  ungroup()
#> Source: local data table [4 x 4]
#> Call:   `_DT1`[, .(column2_mean = mean(column2), sparkline1 = as.character(htmltools::as.tags(sparkline(column2, 
#>     type = "line"))), sparkline2 = as.character(htmltools::as.tags(sparkline(column2, 
#>     type = "bar")))), keyby = .(column1)]
#> 
#>   column1  column2_mean sparkline1                   sparkline2                 
#>   <chr>           <dbl> <chr>                        <chr>                      
#> 1 坂田银时          2.8 "<span id=\"htmlwidget-5b49… "<span id=\"htmlwidget-430…
#> 2 定春              2.4 "<span id=\"htmlwidget-bbb6… "<span id=\"htmlwidget-b68…
#> 3 志村新八          2.6 "<span id=\"htmlwidget-bd77… "<span id=\"htmlwidget-4d1…
#> 4 神乐              2.4 "<span id=\"htmlwidget-a271… "<span id=\"htmlwidget-413…
#> 
#> # Use as.data.table()/as.data.frame()/as_tibble() to access results

这里的Call就是转化后的data.table代码，把_DT1替换成data.table(table2)就可以了

library(data.table, warn.conflicts = F)

data.table(table2)[, .(column2_mean = mean(column2), sparkline1 = as.character(htmltools::as.tags(sparkline(column2, type = "line"))), sparkline2 = as.character(htmltools::as.tags(sparkline(column2, type = "bar")))), keyby = .(column1)]

yuanfan

meeeeeeeeo
谢谢你，这个办法管用。
这个函数真可爱啊，我最近发现另一个略相似的函数，plotly 包里有个ggplotly()函数可以把 ggplot2 画的静态图直接转换成动态图，如下。

library(plotly)
library(ggplot2)

mtcars$cyl <- as.factor(mtcars$cyl)

p <-
  ggplot(data = mtcars, aes(x = wt, y = mpg, color = cyl)) + geom_point()
plotly::ggplotly(p)

meeeeeeeeo

又翻了下dtplyr，dtplyr:::dt_call可以把Call直接提取出来

a <- lazy_dt(table2) %>%
  group_by(column1) %>%
  summarise(
    column2_mean = mean(column2),
    sparkline1 = as.character(htmltools::as.tags(sparkline(column2, type = "line"))),
    sparkline2 = as.character(htmltools::as.tags(sparkline(column2, type = "bar")))
  ) %>% 
  ungroup()

dtplyr:::dt_call(a)
#> `_DT2`[, .(column2_mean = mean(column2), sparkline1 = as.character(htmltools::as.tags(sparkline(column2, 
#>     type = "line"))), sparkline2 = as.character(htmltools::as.tags(sparkline(column2, 
#>     type = "bar")))), keyby = .(column1)]

这里_DT1变成了_DT2应该是每次使用dtplyr的时候都会新生成一个临时的data.table，编号会增加

Cloud2016

以第一段代码为例，我们看看其中用到的函数 map()，其帮助文档标题如下：

Apply a function to each element of a list or atomic vector

这不就是 lapply() 的意思吗? 完全可以干掉多余的 dplyr / purrr

library(sparkline)
library(DT)

table2 <- data.frame(
  column1 = c(rep(c("坂田银时", "神乐", "志村新八", "定春"), 10)),
  column2 = sample(1:4, 40, replace = T),
  column3 = sample(10:20, 40, replace = T)
)

library(data.table)

table2 <- as.data.table(table2)

table2.group <- table2[, .(column2_mean = mean(column2)), by = "column1"]

table2.group$sparkline1 <- lapply(
  lapply(
    split(table2$column2, table2$column1), sparkline,
    type = "line"
  ),
  function(x) as.character(htmltools::as.tags(x))
)

table2.group$sparkline2 <- lapply(
  lapply(
    split(table2$column3, table2$column1), sparkline,
    type = "bar"
  ),
  function(x) as.character(htmltools::as.tags(x))
)

DT::datatable(table2.group, escape = FALSE) |>
  spk_add_deps()

meeeeeeeeo

yuanfan
这种“翻译”函数是挺好的，但是用的时候要格外小心，毕竟能翻译的部分是有限的，超出范围的内容，翻译的时候很可能会丢失掉