问题

有一组数据:

data <- structure(list(longten = c(37.45, 42, 1300.6, 288.8, 157.05, 
                                   487.4), tollten = c(0, 211.45, 1247.2, 0, 0, 798.4), equipten = c(0, 
                                                                                                     0, 0, 0, 0, 0), cardten = c(110, 125, 2150, 0, 0, 570), wireten = c(0, 
                                                                                                                                                                         380.35, 0, 0, 0, 0), longmon = c(3.7, 4.4, 18.15, 9.45, 6.3, 
                                                                                                                                                                                                          11.8), tollmon = c(0, 20.75, 18, 0, 0, 19.25), equipmon = c(0, 
                                                                                                                                                                                                                                                                      0, 0, 0, 0, 0), cardmon = c(7.5, 15.25, 30.25, 0, 0, 13.5), wiremon = c(0, 
                                                                                                                                                                                                                                                                                                                                              35.7, 0, 0, 0, 0)), row.names = c(NA, -6L), class = c("tbl_df", 
                                                                                                                                                                                                                                                                                                                                                                                                    "tbl", "data.frame"))

它们是由*mon*ten两两对应的,想把这个数据框变形为:

| ten | mon | type |
| :-----: | :----: | :----: |
| 单元格 | 单元格 | long |
| 单元格 | 单元格 | toll |
...

笨办法

这是我用的笨办法,分别依赖于操作方式的一致才使得最后的cbind()可以对应,但是我感觉这个方法不太好。

data_mon <- select(data, contains('mon')) %>% 
  pivot_longer(cols = contains('mon'),
               names_to = 'type', names_pattern = '(.*)mon',
               values_to = 'mon')

data_ten <- select(data, contains('ten')) %>% 
  pivot_longer(cols = contains('ten'),
               names_to = 'type', names_pattern = '(.*)ten',
               values_to = 'ten')

mon_ten <- cbind(data_mon, ten = data_ten$ten)

> mon_ten
    type   mon     ten
1   long  3.70   37.45
2   toll  0.00    0.00
3  equip  0.00    0.00
4   card  7.50  110.00
5   wire  0.00    0.00
6   long  4.40   42.00
7   toll 20.75  211.45
8  equip  0.00    0.00
9   card 15.25  125.00
10  wire 35.70  380.35
11  long 18.15 1300.60
12  toll 18.00 1247.20
13 equip  0.00    0.00
14  card 30.25 2150.00
15  wire  0.00    0.00
16  long  9.45  288.80
17  toll  0.00    0.00
18 equip  0.00    0.00
19  card  0.00    0.00
20  wire  0.00    0.00
21  long  6.30  157.05
22  toll  0.00    0.00
23 equip  0.00    0.00
24  card  0.00    0.00
25  wire  0.00    0.00
26  long 11.80  487.40
27  toll 19.25  798.40
28 equip  0.00    0.00
29  card 13.50  570.00
30  wire  0.00    0.00

请问大家有无更加贴切的方法吗?

我发现经常做各种数据操作还是挺有用的,你这个问题我碰到过类似的。

看你的数据处理的思路:

  • 核心思路也是宽表变长表,用pivot_longer做没问题_
  • 原始data数据隐含了一个行编号的列,否则在数据转化之后,用你的ten,mon,type无法确定在原表中的数据。所以我第一个操作是加了个id列。
  • 你觉得的苯办法,是因为最终需要的ten | mon | type并不是长表,而是宽表。可以看我下面先建了一个ten_mon辅助列, 转成 id |ten_mon| type| value 四列的长表,再转成id | ten | mon | type的三列

library(tidyr)
library(dplyr)

data <-
  structure(
    list(
      longten = c(37.45, 42, 1300.6, 288.8, 157.05,
                  487.4),
      tollten = c(0, 211.45, 1247.2, 0, 0, 798.4),
      equipten = c(0,
                   0, 0, 0, 0, 0),
      cardten = c(110, 125, 2150, 0, 0, 570),
      wireten = c(0,
                  380.35, 0, 0, 0, 0),
      longmon = c(3.7, 4.4, 18.15, 9.45, 6.3,
                  11.8),
      tollmon = c(0, 20.75, 18, 0, 0, 19.25),
      equipmon = c(0,
                   0, 0, 0, 0, 0),
      cardmon = c(7.5, 15.25, 30.25, 0, 0, 13.5),
      wiremon = c(0,
                  35.7, 0, 0, 0, 0)
    ),
    row.names = c(NA,-6L),
    class = c("tbl_df",
              "tbl", "data.frame")
  )

data  %>% 
  mutate(id = 1:nrow(.)) %>% 
  pivot_longer(cols = 1:10, names_to = "typeall", values_to ="value") %>% 
  mutate(ten_mon = gsub("(.*)(ten|mon)","\\2",typeall),
          type= gsub("(.*)(ten|mon)","\\1",typeall)) %>% 
  
   pivot_wider(id_cols = c(id,type), names_from = ten_mon,values_from=value)
#> # A tibble: 30 x 4
#>       id type    ten   mon
#>    <int> <chr> <dbl> <dbl>
#>  1     1 long   37.4   3.7
#>  2     1 toll    0     0  
#>  3     1 equip   0     0  
#>  4     1 card  110     7.5
#>  5     1 wire    0     0  
#>  6     2 long   42     4.4
#>  7     2 toll  211.   20.8
#>  8     2 equip   0     0  
#>  9     2 card  125    15.2
#> 10     2 wire  380.   35.7
#> # … with 20 more rows

<sup>Created on 2020-12-05 by the reprex package (v0.3.0.9000)</sup>

    啊哈,用data.table实现了一下楼上的做法

    data_dt = as.data.table(data)
    data_dt[, id := 1:.N
            ][, melt(.SD, 'id')
              ][, ':='( name = gsub("(.*)(ten|mon)", "\\2", variable), 
                        type = gsub("(.*)(ten|mon)", "\\1", variable))
                ][, dcast(.SD, id + type ~  name)]