刚刚发现个实用的splitstackshape-package程序包,按照该链接中的代码运行了下效率,僕也比较了下根据自己以往的写法编写出来的代码O5
,结果马屁都追不上(<https://www.youtube.com/watch?v=izJNIOQzaZU> 哈哈,实力悬殊啊!凤牙和超级阿斯拉达比赛,僕这种“陪太子读书”的赛车手只能垫底而已... 怪不得僕的科研就是那么费时... ?)。
> system.time({
+ O1 <- mybigdf %>%
+ Reshape(
+ id.vars = 'id',
+ var.stubs = c('varA', 'varB', 'varC'),
+ sep = '_',
+ convert = TRUE)
+ O1 <- O1[order(O1$id, O1$time), ]
+ })
user system elapsed
5.06 0.00 5.69
>
> system.time({
+ O2 <- mybigdf %>% merged.stack(
+ id.vars = 'id',
+ var.stubs=c('varA', 'varB', 'varC'),
+ sep = '_',
+ convert = TRUE)
+ })
user system elapsed
3.40 0.34 4.45
>
> system.time({
+ O3 <- mybigdf %>% Stacked(
+ id.vars = 'id',
+ var.stubs = c('varA', 'varB', 'varC'),
+ sep = '_',
+ convert = TRUE)
+ })
user system elapsed
1.11 0.17 1.65
>
> DT <- data.table(mybigdf)
> system.time({
+ O4 <- DT %>% merged.stack(
+ id.vars = 'id',
+ var.stubs = c('varA', 'varB', 'varC'),
+ sep = '_',
+ convert = TRUE)
+ })
user system elapsed
3.33 0.30 4.19
>
> system.time({
+ O5 <- mybigdf %>%
+ mutate(.id = 1:nrow(.)) %>%
+ gather(id, var, varA_1:varC_3) %>%
+ separate(id, c('id', 'time'), convert = TRUE) %>%
+ spread(id, var)
+ })
user system elapsed
24.00 17.88 100.67