基于另一个数据帧中的字符串,在 R 中的列表中子集多个数据帧
•浏览 1
Subsetting multiple dataframes within list in R based on strings in another dataframe
我正在尝试根据包含在另一个数据帧中的字符串对列表中包含的多个数据帧进行子集化。
list.df <- list(
df.1 = data.frame(LM = c(1:10), LS = c(1:10), PL = c(1:10)),
df.2 = data.frame(XY = c(1:10), FE = c(4:13), OI = c(1:10)),
df.3 = data.frame(IL = c(1:10), KU = c(9:18), TS = c(1:10)))
df.4 <- data.frame(df.1 = c("LM","PL", NA), df.2 = c("FE", NA, NA),
df.3 = c("IL","KU","TS"))df.1_sub <- subset(list.df[["df.1"]], select =
colnames(list.df[["df.1"]]) %in% df.4$df.1)Map(function(x, y) x[as.character(na.omit(y))], list.df, df.4)
#$df.1
# LM PL
#1 1 1
#2 2 2
#3 3 3
#4 4 4
#5 5 5
#6 6 6
#7 7 7
#8 8 8
#9 9 9
#10 10 10
#$df.2
# FE
#1 4
#2 5
#3 6
#4 7
#5 8
#6 9
#7 10
#8 11
#9 12
#10 13
#$df.3
# IL KU TS
#1 1 9 1
#2 2 10 2
#3 3 11 3
#.....purrr::map2(list.df, df.4, ~.x[na.omit(as.character(.y))])Map(function(x, y) x[complete.cases(y)], list.df, df.4)
#$df.1
# LM LS
#1 1 1
#2 2 2
#3 3 3
#4 4 4
#5 5 5
#6 6 6
#7 7 7
#8 8 8
#9 9 9
#10 10 10
#$df.2
# XY
#1 1
#2 2
#3 3
#4 4
#5 5
#6 6
#7 7
#8 8
#9 9
#10 10
#$df.3
# IL KU TS
#1 1 9 1
#2 2 10 2
#3 3 11 3
#4 4 12 4
#5 5 13 5
#6 6 14 6
#7 7 15 7
#8 8 16 8
#9 9 17 9
#10 10 18 10library(purrr)
pmap(list(list.df, df.4), ~ .x[complete.cases(.y)])
我希望我的所有数据框最终看起来像这样:
list.df <- list(
df.1 = data.frame(LM = c(1:10), LS = c(1:10), PL = c(1:10)),
df.2 = data.frame(XY = c(1:10), FE = c(4:13), OI = c(1:10)),
df.3 = data.frame(IL = c(1:10), KU = c(9:18), TS = c(1:10)))
df.4 <- data.frame(df.1 = c("LM","PL", NA), df.2 = c("FE", NA, NA),
df.3 = c("IL","KU","TS"))df.1_sub <- subset(list.df[["df.1"]], select =
colnames(list.df[["df.1"]]) %in% df.4$df.1)Map(function(x, y) x[as.character(na.omit(y))], list.df, df.4)
#$df.1
# LM PL
#1 1 1
#2 2 2
#3 3 3
#4 4 4
#5 5 5
#6 6 6
#7 7 7
#8 8 8
#9 9 9
#10 10 10
#$df.2
# FE
#1 4
#2 5
#3 6
#4 7
#5 8
#6 9
#7 10
#8 11
#9 12
#10 13
#$df.3
# IL KU TS
#1 1 9 1
#2 2 10 2
#3 3 11 3
#.....purrr::map2(list.df, df.4, ~.x[na.omit(as.character(.y))])Map(function(x, y) x[complete.cases(y)], list.df, df.4)
#$df.1
# LM LS
#1 1 1
#2 2 2
#3 3 3
#4 4 4
#5 5 5
#6 6 6
#7 7 7
#8 8 8
#9 9 9
#10 10 10
#$df.2
# XY
#1 1
#2 2
#3 3
#4 4
#5 5
#6 6
#7 7
#8 8
#9 9
#10 10
#$df.3
# IL KU TS
#1 1 9 1
#2 2 10 2
#3 3 11 3
#4 4 12 4
#5 5 13 5
#6 6 14 6
#7 7 15 7
#8 8 16 8
#9 9 17 9
#10 10 18 10library(purrr)
pmap(list(list.df, df.4), ~ .x[complete.cases(.y)])
我将不得不对大约 50 个数据集执行此操作,并且想知道是否有一种方法可以编写一个循环来一次对所有数据集执行此操作。
我尝试过使用 lapply 和 for 循环,但到目前为止都没有成功。我是在 R 中使用列表的新手,不胜感激!
这是我第一次在堆栈溢出上发帖,如果我的帖子不合适,请告诉我,
使用 Map 的一种方法是从 df.4 中删除 NA 值,并从 list.df
中子集相应的列
list.df <- list(
df.1 = data.frame(LM = c(1:10), LS = c(1:10), PL = c(1:10)),
df.2 = data.frame(XY = c(1:10), FE = c(4:13), OI = c(1:10)),
df.3 = data.frame(IL = c(1:10), KU = c(9:18), TS = c(1:10)))
df.4 <- data.frame(df.1 = c("LM","PL", NA), df.2 = c("FE", NA, NA),
df.3 = c("IL","KU","TS"))df.1_sub <- subset(list.df[["df.1"]], select =
colnames(list.df[["df.1"]]) %in% df.4$df.1)Map(function(x, y) x[as.character(na.omit(y))], list.df, df.4)
#$df.1
# LM PL
#1 1 1
#2 2 2
#3 3 3
#4 4 4
#5 5 5
#6 6 6
#7 7 7
#8 8 8
#9 9 9
#10 10 10
#$df.2
# FE
#1 4
#2 5
#3 6
#4 7
#5 8
#6 9
#7 10
#8 11
#9 12
#10 13
#$df.3
# IL KU TS
#1 1 9 1
#2 2 10 2
#3 3 11 3
#.....purrr::map2(list.df, df.4, ~.x[na.omit(as.character(.y))])Map(function(x, y) x[complete.cases(y)], list.df, df.4)
#$df.1
# LM LS
#1 1 1
#2 2 2
#3 3 3
#4 4 4
#5 5 5
#6 6 6
#7 7 7
#8 8 8
#9 9 9
#10 10 10
#$df.2
# XY
#1 1
#2 2
#3 3
#4 4
#5 5
#6 6
#7 7
#8 8
#9 9
#10 10
#$df.3
# IL KU TS
#1 1 9 1
#2 2 10 2
#3 3 11 3
#4 4 12 4
#5 5 13 5
#6 6 14 6
#7 7 15 7
#8 8 16 8
#9 9 17 9
#10 10 18 10library(purrr)
pmap(list(list.df, df.4), ~ .x[complete.cases(.y)])
同样可以使用 purrr::map2
list.df <- list(
df.1 = data.frame(LM = c(1:10), LS = c(1:10), PL = c(1:10)),
df.2 = data.frame(XY = c(1:10), FE = c(4:13), OI = c(1:10)),
df.3 = data.frame(IL = c(1:10), KU = c(9:18), TS = c(1:10)))
df.4 <- data.frame(df.1 = c("LM","PL", NA), df.2 = c("FE", NA, NA),
df.3 = c("IL","KU","TS"))df.1_sub <- subset(list.df[["df.1"]], select =
colnames(list.df[["df.1"]]) %in% df.4$df.1)Map(function(x, y) x[as.character(na.omit(y))], list.df, df.4)
#$df.1
# LM PL
#1 1 1
#2 2 2
#3 3 3
#4 4 4
#5 5 5
#6 6 6
#7 7 7
#8 8 8
#9 9 9
#10 10 10
#$df.2
# FE
#1 4
#2 5
#3 6
#4 7
#5 8
#6 9
#7 10
#8 11
#9 12
#10 13
#$df.3
# IL KU TS
#1 1 9 1
#2 2 10 2
#3 3 11 3
#.....purrr::map2(list.df, df.4, ~.x[na.omit(as.character(.y))])Map(function(x, y) x[complete.cases(y)], list.df, df.4)
#$df.1
# LM LS
#1 1 1
#2 2 2
#3 3 3
#4 4 4
#5 5 5
#6 6 6
#7 7 7
#8 8 8
#9 9 9
#10 10 10
#$df.2
# XY
#1 1
#2 2
#3 3
#4 4
#5 5
#6 6
#7 7
#8 8
#9 9
#10 10
#$df.3
# IL KU TS
#1 1 9 1
#2 2 10 2
#3 3 11 3
#4 4 12 4
#5 5 13 5
#6 6 14 6
#7 7 15 7
#8 8 16 8
#9 9 17 9
#10 10 18 10library(purrr)
pmap(list(list.df, df.4), ~ .x[complete.cases(.y)])
我们可以使用 complete.cases 和 Map
list.df <- list(
df.1 = data.frame(LM = c(1:10), LS = c(1:10), PL = c(1:10)),
df.2 = data.frame(XY = c(1:10), FE = c(4:13), OI = c(1:10)),
df.3 = data.frame(IL = c(1:10), KU = c(9:18), TS = c(1:10)))
df.4 <- data.frame(df.1 = c("LM","PL", NA), df.2 = c("FE", NA, NA),
df.3 = c("IL","KU","TS"))df.1_sub <- subset(list.df[["df.1"]], select =
colnames(list.df[["df.1"]]) %in% df.4$df.1)Map(function(x, y) x[as.character(na.omit(y))], list.df, df.4)
#$df.1
# LM PL
#1 1 1
#2 2 2
#3 3 3
#4 4 4
#5 5 5
#6 6 6
#7 7 7
#8 8 8
#9 9 9
#10 10 10
#$df.2
# FE
#1 4
#2 5
#3 6
#4 7
#5 8
#6 9
#7 10
#8 11
#9 12
#10 13
#$df.3
# IL KU TS
#1 1 9 1
#2 2 10 2
#3 3 11 3
#.....purrr::map2(list.df, df.4, ~.x[na.omit(as.character(.y))])Map(function(x, y) x[complete.cases(y)], list.df, df.4)
#$df.1
# LM LS
#1 1 1
#2 2 2
#3 3 3
#4 4 4
#5 5 5
#6 6 6
#7 7 7
#8 8 8
#9 9 9
#10 10 10
#$df.2
# XY
#1 1
#2 2
#3 3
#4 4
#5 5
#6 6
#7 7
#8 8
#9 9
#10 10
#$df.3
# IL KU TS
#1 1 9 1
#2 2 10 2
#3 3 11 3
#4 4 12 4
#5 5 13 5
#6 6 14 6
#7 7 15 7
#8 8 16 8
#9 9 17 9
#10 10 18 10library(purrr)
pmap(list(list.df, df.4), ~ .x[complete.cases(.y)])
或使用 pmap
list.df <- list(
df.1 = data.frame(LM = c(1:10), LS = c(1:10), PL = c(1:10)),
df.2 = data.frame(XY = c(1:10), FE = c(4:13), OI = c(1:10)),
df.3 = data.frame(IL = c(1:10), KU = c(9:18), TS = c(1:10)))
df.4 <- data.frame(df.1 = c("LM","PL", NA), df.2 = c("FE", NA, NA),
df.3 = c("IL","KU","TS"))df.1_sub <- subset(list.df[["df.1"]], select =
colnames(list.df[["df.1"]]) %in% df.4$df.1)Map(function(x, y) x[as.character(na.omit(y))], list.df, df.4)
#$df.1
# LM PL
#1 1 1
#2 2 2
#3 3 3
#4 4 4
#5 5 5
#6 6 6
#7 7 7
#8 8 8
#9 9 9
#10 10 10
#$df.2
# FE
#1 4
#2 5
#3 6
#4 7
#5 8
#6 9
#7 10
#8 11
#9 12
#10 13
#$df.3
# IL KU TS
#1 1 9 1
#2 2 10 2
#3 3 11 3
#.....purrr::map2(list.df, df.4, ~.x[na.omit(as.character(.y))])Map(function(x, y) x[complete.cases(y)], list.df, df.4)
#$df.1
# LM LS
#1 1 1
#2 2 2
#3 3 3
#4 4 4
#5 5 5
#6 6 6
#7 7 7
#8 8 8
#9 9 9
#10 10 10
#$df.2
# XY
#1 1
#2 2
#3 3
#4 4
#5 5
#6 6
#7 7
#8 8
#9 9
#10 10
#$df.3
# IL KU TS
#1 1 9 1
#2 2 10 2
#3 3 11 3
#4 4 12 4
#5 5 13 5
#6 6 14 6
#7 7 15 7
#8 8 16 8
#9 9 17 9
#10 10 18 10library(purrr)
pmap(list(list.df, df.4), ~ .x[complete.cases(.y)])