导入 data.table
library(data.table)
创建example data
set.seed(42)
dt <- data.table(
A1 = sample(1:100,10),
A2 = sample(1:100,10),
A3 = sample(1:100,10),
B1 = sample(1:100,10),
B2 = sample(1:100,10),
B3 = sample(1:100,10),
C = sample(1:100,10)
)
dt
## A1 A2 A3 B1 B2 B3 C
## 1: 49 37 34 58 99 2 84
## 2: 65 20 92 8 88 58 9
## 3: 25 26 3 36 87 10 35
## 4: 74 3 58 68 49 40 93
## 5: 18 41 42 86 26 5 16
## 6: 100 89 24 18 6 33 92
## 7: 47 27 30 92 95 49 69
## 8: 24 36 43 69 2 73 95
## 9: 71 5 15 4 3 29 2
## 10: 89 84 22 50 21 76 82
Filter多列
所有列都要大于20
# 当有NA时注意使用 na.rm = TRUE
dt[rowMeans(dt>20)==1]
## A1 A2 A3 B1 B2 B3 C
## 1: 47 27 30 92 95 49 69
## 2: 89 84 22 50 21 76 82
任意列 > 90
# 当有NA时注意使用 na.rm = TRUE
dt[rowSums(dt>90)>0]
## A1 A2 A3 B1 B2 B3 C
## 1: 49 37 34 58 99 2 84
## 2: 65 20 92 8 88 58 9
## 3: 74 3 58 68 49 40 93
## 4: 100 89 24 18 6 33 92
## 5: 47 27 30 92 95 49 69
## 6: 24 36 43 69 2 73 95
所有列(除了C)> C
dt[dt[,Reduce("&",lapply(.SD,">",C)),.SDcols = !c("C")]]
## A1 A2 A3 B1 B2 B3 C
## 1: 71 5 15 4 3 29 2
任意列(除了C)> C
dt[dt[,Reduce("|",lapply(.SD,">",C)),.SDcols = !c("C")]]
## A1 A2 A3 B1 B2 B3 C
## 1: 49 37 34 58 99 2 84
## 2: 65 20 92 8 88 58 9
## 3: 25 26 3 36 87 10 35
## 4: 18 41 42 86 26 5 16
## 5: 100 89 24 18 6 33 92
## 6: 47 27 30 92 95 49 69
## 7: 71 5 15 4 3 29 2
## 8: 89 84 22 50 21 76 82
cbind
Base R
dt2 <- copy(dt)
# base R
cbind(dt,dt2)
## A1 A2 A3 B1 B2 B3 C A1 A2 A3 B1 B2 B3 C
## 1: 49 37 34 58 99 2 84 49 37 34 58 99 2 84
## 2: 65 20 92 8 88 58 9 65 20 92 8 88 58 9
## 3: 25 26 3 36 87 10 35 25 26 3 36 87 10 35
## 4: 74 3 58 68 49 40 93 74 3 58 68 49 40 93
## 5: 18 41 42 86 26 5 16 18 41 42 86 26 5 16
## 6: 100 89 24 18 6 33 92 100 89 24 18 6 33 92
## 7: 47 27 30 92 95 49 69 47 27 30 92 95 49 69
## 8: 24 36 43 69 2 73 95 24 36 43 69 2 73 95
## 9: 71 5 15 4 3 29 2 71 5 15 4 3 29 2
## 10: 89 84 22 50 21 76 82 89 84 22 50 21 76 82
data.table way
setDT(unlist(list(dt,dt2),recursive = FALSE),check.names = TRUE)[]
## A1 A2 A3 B1 B2 B3 C A1.1 A2.1 A3.1 B1.1 B2.1 B3.1 C.1
## 1: 49 37 34 58 99 2 84 49 37 34 58 99 2 84
## 2: 65 20 92 8 88 58 9 65 20 92 8 88 58 9
## 3: 25 26 3 36 87 10 35 25 26 3 36 87 10 35
## 4: 74 3 58 68 49 40 93 74 3 58 68 49 40 93
## 5: 18 41 42 86 26 5 16 18 41 42 86 26 5 16
## 6: 100 89 24 18 6 33 92 100 89 24 18 6 33 92
## 7: 47 27 30 92 95 49 69 47 27 30 92 95 49 69
## 8: 24 36 43 69 2 73 95 24 36 43 69 2 73 95
## 9: 71 5 15 4 3 29 2 71 5 15 4 3 29 2
## 10: 89 84 22 50 21 76 82 89 84 22 50 21 76 82
生成多列
使用set
#注意区别于lapply使用的场景
for (i in 1:3) {
set(dt2,j = paste0("diff","_",i),value = dt[[paste0("A",i)]] - dt[[paste0("B",i)]])
}
dt2
## A1 A2 A3 B1 B2 B3 C diff_1 diff_2 diff_3
## 1: 49 37 34 58 99 2 84 -9 -62 32
## 2: 65 20 92 8 88 58 9 57 -68 34
## 3: 25 26 3 36 87 10 35 -11 -61 -7
## 4: 74 3 58 68 49 40 93 6 -46 18
## 5: 18 41 42 86 26 5 16 -68 15 37
## 6: 100 89 24 18 6 33 92 82 83 -9
## 7: 47 27 30 92 95 49 69 -45 -68 -19
## 8: 24 36 43 69 2 73 95 -45 34 -30
## 9: 71 5 15 4 3 29 2 67 2 -14
## 10: 89 84 22 50 21 76 82 39 63 -54
使用Map
A_cols <- grep("A",names(dt),value = TRUE)
B_cols <- grep("B",names(dt),value = TRUE)
new_diff_cols <- paste0("diff","_",1:3)
dt2[,(new_diff_cols) := Map("-",mget(A_cols),mget(B_cols))][]
## A1 A2 A3 B1 B2 B3 C diff_1 diff_2 diff_3
## 1: 49 37 34 58 99 2 84 -9 -62 32
## 2: 65 20 92 8 88 58 9 57 -68 34
## 3: 25 26 3 36 87 10 35 -11 -61 -7
## 4: 74 3 58 68 49 40 93 6 -46 18
## 5: 18 41 42 86 26 5 16 -68 15 37
## 6: 100 89 24 18 6 33 92 82 83 -9
## 7: 47 27 30 92 95 49 69 -45 -68 -19
## 8: 24 36 43 69 2 73 95 -45 34 -30
## 9: 71 5 15 4 3 29 2 67 2 -14
## 10: 89 84 22 50 21 76 82 39 63 -54