基于另一个数据帧中的字符串,在 R 中的列表中子集多个数据帧

Subsetting multiple dataframes within list in R based on strings in another dataframe

我正在尝试根据包含在另一个数据帧中的字符串对列表中包含的多个数据帧进行子集化。

list.df <- list(

df.1 = data.frame(LM = c(1:10), LS = c(1:10), PL = c(1:10)), 

df.2 = data.frame(XY = c(1:10), FE = c(4:13), OI = c(1:10)), 

df.3 = data.frame(IL = c(1:10), KU = c(9:18), TS = c(1:10)))



df.4 <- data.frame(df.1 = c("LM","PL", NA), df.2 = c("FE", NA, NA), 

df.3 = c("IL","KU","TS"))df.1_sub <- subset(list.df[["df.1"]], select = 

 colnames(list.df[["df.1"]]) %in% df.4$df.1)Map(function(x, y) x[as.character(na.omit(y))], list.df, df.4)



#$df.1

#  LM PL

#1  1 1

#2  2 2

#3  3 3

#4  4 4

#5  5 5

#6  6 6

#7  7 7

#8  8 8

#9  9 9

#10 10 10



#$df.2

#  FE

#1  4

#2  5

#3  6

#4  7

#5  8

#6  9

#7 10

#8 11

#9 12

#10 13



#$df.3

#  IL KU TS

#1  1 9 1

#2  2 10 2

#3  3 11 3

#.....purrr::map2(list.df, df.4, ~.x[na.omit(as.character(.y))])Map(function(x, y) x[complete.cases(y)], list.df, df.4)

#$df.1

#  LM LS

#1  1 1

#2  2 2

#3  3 3

#4  4 4

#5  5 5

#6  6 6

#7  7 7

#8  8 8

#9  9 9

#10 10 10



#$df.2

#  XY

#1  1

#2  2

#3  3

#4  4

#5  5

#6  6

#7  7

#8  8

#9  9

#10 10



#$df.3

#  IL KU TS

#1  1 9 1

#2  2 10 2

#3  3 11 3

#4  4 12 4

#5  5 13 5

#6  6 14 6

#7  7 15 7

#8  8 16 8

#9  9 17 9

#10 10 18 10library(purrr) 

pmap(list(list.df, df.4), ~ .x[complete.cases(.y)])

我希望我的所有数据框最终看起来像这样:

list.df <- list(

df.1 = data.frame(LM = c(1:10), LS = c(1:10), PL = c(1:10)), 

df.2 = data.frame(XY = c(1:10), FE = c(4:13), OI = c(1:10)), 

df.3 = data.frame(IL = c(1:10), KU = c(9:18), TS = c(1:10)))



df.4 <- data.frame(df.1 = c("LM","PL", NA), df.2 = c("FE", NA, NA), 

df.3 = c("IL","KU","TS"))df.1_sub <- subset(list.df[["df.1"]], select = 

 colnames(list.df[["df.1"]]) %in% df.4$df.1)Map(function(x, y) x[as.character(na.omit(y))], list.df, df.4)



#$df.1

#  LM PL

#1  1 1

#2  2 2

#3  3 3

#4  4 4

#5  5 5

#6  6 6

#7  7 7

#8  8 8

#9  9 9

#10 10 10



#$df.2

#  FE

#1  4

#2  5

#3  6

#4  7

#5  8

#6  9

#7 10

#8 11

#9 12

#10 13



#$df.3

#  IL KU TS

#1  1 9 1

#2  2 10 2

#3  3 11 3

#.....purrr::map2(list.df, df.4, ~.x[na.omit(as.character(.y))])Map(function(x, y) x[complete.cases(y)], list.df, df.4)

#$df.1

#  LM LS

#1  1 1

#2  2 2

#3  3 3

#4  4 4

#5  5 5

#6  6 6

#7  7 7

#8  8 8

#9  9 9

#10 10 10



#$df.2

#  XY

#1  1

#2  2

#3  3

#4  4

#5  5

#6  6

#7  7

#8  8

#9  9

#10 10



#$df.3

#  IL KU TS

#1  1 9 1

#2  2 10 2

#3  3 11 3

#4  4 12 4

#5  5 13 5

#6  6 14 6

#7  7 15 7

#8  8 16 8

#9  9 17 9

#10 10 18 10library(purrr) 

pmap(list(list.df, df.4), ~ .x[complete.cases(.y)])

我将不得不对大约 50 个数据集执行此操作,并且想知道是否有一种方法可以编写一个循环来一次对所有数据集执行此操作。

我尝试过使用 lapply 和 for 循环,但到目前为止都没有成功。我是在 R 中使用列表的新手,不胜感激!

这是我第一次在堆栈溢出上发帖,如果我的帖子不合适,请告诉我,


使用 Map 的一种方法是从 df.4 中删除 NA 值,并从 list.df

中子集相应的列

list.df <- list(

df.1 = data.frame(LM = c(1:10), LS = c(1:10), PL = c(1:10)), 

df.2 = data.frame(XY = c(1:10), FE = c(4:13), OI = c(1:10)), 

df.3 = data.frame(IL = c(1:10), KU = c(9:18), TS = c(1:10)))



df.4 <- data.frame(df.1 = c("LM","PL", NA), df.2 = c("FE", NA, NA), 

df.3 = c("IL","KU","TS"))df.1_sub <- subset(list.df[["df.1"]], select = 

 colnames(list.df[["df.1"]]) %in% df.4$df.1)Map(function(x, y) x[as.character(na.omit(y))], list.df, df.4)



#$df.1

#  LM PL

#1  1 1

#2  2 2

#3  3 3

#4  4 4

#5  5 5

#6  6 6

#7  7 7

#8  8 8

#9  9 9

#10 10 10



#$df.2

#  FE

#1  4

#2  5

#3  6

#4  7

#5  8

#6  9

#7 10

#8 11

#9 12

#10 13



#$df.3

#  IL KU TS

#1  1 9 1

#2  2 10 2

#3  3 11 3

#.....purrr::map2(list.df, df.4, ~.x[na.omit(as.character(.y))])Map(function(x, y) x[complete.cases(y)], list.df, df.4)

#$df.1

#  LM LS

#1  1 1

#2  2 2

#3  3 3

#4  4 4

#5  5 5

#6  6 6

#7  7 7

#8  8 8

#9  9 9

#10 10 10



#$df.2

#  XY

#1  1

#2  2

#3  3

#4  4

#5  5

#6  6

#7  7

#8  8

#9  9

#10 10



#$df.3

#  IL KU TS

#1  1 9 1

#2  2 10 2

#3  3 11 3

#4  4 12 4

#5  5 13 5

#6  6 14 6

#7  7 15 7

#8  8 16 8

#9  9 17 9

#10 10 18 10library(purrr) 

pmap(list(list.df, df.4), ~ .x[complete.cases(.y)])

同样可以使用 purrr::map2

list.df <- list(

df.1 = data.frame(LM = c(1:10), LS = c(1:10), PL = c(1:10)), 

df.2 = data.frame(XY = c(1:10), FE = c(4:13), OI = c(1:10)), 

df.3 = data.frame(IL = c(1:10), KU = c(9:18), TS = c(1:10)))



df.4 <- data.frame(df.1 = c("LM","PL", NA), df.2 = c("FE", NA, NA), 

df.3 = c("IL","KU","TS"))df.1_sub <- subset(list.df[["df.1"]], select = 

 colnames(list.df[["df.1"]]) %in% df.4$df.1)Map(function(x, y) x[as.character(na.omit(y))], list.df, df.4)



#$df.1

#  LM PL

#1  1 1

#2  2 2

#3  3 3

#4  4 4

#5  5 5

#6  6 6

#7  7 7

#8  8 8

#9  9 9

#10 10 10



#$df.2

#  FE

#1  4

#2  5

#3  6

#4  7

#5  8

#6  9

#7 10

#8 11

#9 12

#10 13



#$df.3

#  IL KU TS

#1  1 9 1

#2  2 10 2

#3  3 11 3

#.....purrr::map2(list.df, df.4, ~.x[na.omit(as.character(.y))])Map(function(x, y) x[complete.cases(y)], list.df, df.4)

#$df.1

#  LM LS

#1  1 1

#2  2 2

#3  3 3

#4  4 4

#5  5 5

#6  6 6

#7  7 7

#8  8 8

#9  9 9

#10 10 10



#$df.2

#  XY

#1  1

#2  2

#3  3

#4  4

#5  5

#6  6

#7  7

#8  8

#9  9

#10 10



#$df.3

#  IL KU TS

#1  1 9 1

#2  2 10 2

#3  3 11 3

#4  4 12 4

#5  5 13 5

#6  6 14 6

#7  7 15 7

#8  8 16 8

#9  9 17 9

#10 10 18 10library(purrr) 

pmap(list(list.df, df.4), ~ .x[complete.cases(.y)])

我们可以使用 complete.casesMap

list.df <- list(

df.1 = data.frame(LM = c(1:10), LS = c(1:10), PL = c(1:10)), 

df.2 = data.frame(XY = c(1:10), FE = c(4:13), OI = c(1:10)), 

df.3 = data.frame(IL = c(1:10), KU = c(9:18), TS = c(1:10)))



df.4 <- data.frame(df.1 = c("LM","PL", NA), df.2 = c("FE", NA, NA), 

df.3 = c("IL","KU","TS"))df.1_sub <- subset(list.df[["df.1"]], select = 

 colnames(list.df[["df.1"]]) %in% df.4$df.1)Map(function(x, y) x[as.character(na.omit(y))], list.df, df.4)



#$df.1

#  LM PL

#1  1 1

#2  2 2

#3  3 3

#4  4 4

#5  5 5

#6  6 6

#7  7 7

#8  8 8

#9  9 9

#10 10 10



#$df.2

#  FE

#1  4

#2  5

#3  6

#4  7

#5  8

#6  9

#7 10

#8 11

#9 12

#10 13



#$df.3

#  IL KU TS

#1  1 9 1

#2  2 10 2

#3  3 11 3

#.....purrr::map2(list.df, df.4, ~.x[na.omit(as.character(.y))])Map(function(x, y) x[complete.cases(y)], list.df, df.4)

#$df.1

#  LM LS

#1  1 1

#2  2 2

#3  3 3

#4  4 4

#5  5 5

#6  6 6

#7  7 7

#8  8 8

#9  9 9

#10 10 10



#$df.2

#  XY

#1  1

#2  2

#3  3

#4  4

#5  5

#6  6

#7  7

#8  8

#9  9

#10 10



#$df.3

#  IL KU TS

#1  1 9 1

#2  2 10 2

#3  3 11 3

#4  4 12 4

#5  5 13 5

#6  6 14 6

#7  7 15 7

#8  8 16 8

#9  9 17 9

#10 10 18 10library(purrr) 

pmap(list(list.df, df.4), ~ .x[complete.cases(.y)])

或使用 pmap

list.df <- list(

df.1 = data.frame(LM = c(1:10), LS = c(1:10), PL = c(1:10)), 

df.2 = data.frame(XY = c(1:10), FE = c(4:13), OI = c(1:10)), 

df.3 = data.frame(IL = c(1:10), KU = c(9:18), TS = c(1:10)))



df.4 <- data.frame(df.1 = c("LM","PL", NA), df.2 = c("FE", NA, NA), 

df.3 = c("IL","KU","TS"))df.1_sub <- subset(list.df[["df.1"]], select = 

 colnames(list.df[["df.1"]]) %in% df.4$df.1)Map(function(x, y) x[as.character(na.omit(y))], list.df, df.4)



#$df.1

#  LM PL

#1  1 1

#2  2 2

#3  3 3

#4  4 4

#5  5 5

#6  6 6

#7  7 7

#8  8 8

#9  9 9

#10 10 10



#$df.2

#  FE

#1  4

#2  5

#3  6

#4  7

#5  8

#6  9

#7 10

#8 11

#9 12

#10 13



#$df.3

#  IL KU TS

#1  1 9 1

#2  2 10 2

#3  3 11 3

#.....purrr::map2(list.df, df.4, ~.x[na.omit(as.character(.y))])Map(function(x, y) x[complete.cases(y)], list.df, df.4)

#$df.1

#  LM LS

#1  1 1

#2  2 2

#3  3 3

#4  4 4

#5  5 5

#6  6 6

#7  7 7

#8  8 8

#9  9 9

#10 10 10



#$df.2

#  XY

#1  1

#2  2

#3  3

#4  4

#5  5

#6  6

#7  7

#8  8

#9  9

#10 10



#$df.3

#  IL KU TS

#1  1 9 1

#2  2 10 2

#3  3 11 3

#4  4 12 4

#5  5 13 5

#6  6 14 6

#7  7 15 7

#8  8 16 8

#9  9 17 9

#10 10 18 10library(purrr) 

pmap(list(list.df, df.4), ~ .x[complete.cases(.y)])

相关推荐

  • Spring部署设置openshift

    Springdeploymentsettingsopenshift我有一个问题让我抓狂了三天。我根据OpenShift帐户上的教程部署了spring-eap6-quickstart代码。我已配置调试选项,并且已将Eclipse工作区与OpehShift服务器同步-服务器上的一切工作正常,但在Eclipse中出现无法消除的错误。我有这个错误:cvc-complex-type.2.4.a:Invali…
    2025-04-161
  • 检查Java中正则表达式中模式的第n次出现

    CheckfornthoccurrenceofpatterninregularexpressioninJava本问题已经有最佳答案,请猛点这里访问。我想使用Java正则表达式检查输入字符串中特定模式的第n次出现。你能建议怎么做吗?这应该可以工作:MatchResultfindNthOccurance(intn,Patternp,CharSequencesrc){Matcherm=p.matcher…
    2025-04-161
  • 如何让 JTable 停留在已编辑的单元格上

    HowtohaveJTablestayingontheeditedcell如果有人编辑JTable的单元格内容并按Enter,则内容会被修改并且表格选择会移动到下一行。是否可以禁止JTable在单元格编辑后转到下一行?原因是我的程序使用ListSelectionListener在单元格选择上同步了其他一些小部件,并且我不想在编辑当前单元格后选择下一行。Enter的默认绑定是名为selectNext…
    2025-04-161
  • Weblogic 12c 部署

    Weblogic12cdeploy我正在尝试将我的应用程序从Tomcat迁移到Weblogic12.2.1.3.0。我能够毫无错误地部署应用程序,但我遇到了与持久性提供程序相关的运行时错误。这是堆栈跟踪:javax.validation.ValidationException:CalltoTraversableResolver.isReachable()threwanexceptionatorg.…
    2025-04-161
  • Resteasy Content-Type 默认值

    ResteasyContent-Typedefaults我正在使用Resteasy编写一个可以返回JSON和XML的应用程序,但可以选择默认为XML。这是我的方法:@GET@Path("/content")@Produces({MediaType.APPLICATION_XML,MediaType.APPLICATION_JSON})publicStringcontentListRequestXm…
    2025-04-161
  • 代码不会停止运行,在 Java 中

    thecodedoesn'tstoprunning,inJava我正在用Java解决项目Euler中的问题10,即"Thesumoftheprimesbelow10is2+3+5+7=17.Findthesumofalltheprimesbelowtwomillion."我的代码是packageprojecteuler_1;importjava.math.BigInteger;importjava…
    2025-04-161
  • Out of memory java heap space

    Outofmemoryjavaheapspace我正在尝试将大量文件从服务器发送到多个客户端。当我尝试发送大小为700mb的文件时,它显示了"OutOfMemoryjavaheapspace"错误。我正在使用Netbeans7.1.2版本。我还在属性中尝试了VMoption。但仍然发生同样的错误。我认为阅读整个文件存在一些问题。下面的代码最多可用于300mb。请给我一些建议。提前致谢publicc…
    2025-04-161
  • Log4j 记录到共享日志文件

    Log4jLoggingtoaSharedLogFile有没有办法将log4j日志记录事件写入也被其他应用程序写入的日志文件。其他应用程序可以是非Java应用程序。有什么缺点?锁定问题?格式化?Log4j有一个SocketAppender,它将向服务发送事件,您可以自己实现或使用与Log4j捆绑的简单实现。它还支持syslogd和Windows事件日志,这对于尝试将日志输出与来自非Java应用程序…
    2025-04-161