代码之家  ›  专栏  ›  技术社区  ›  Nuller

无法对嵌套数据中的列进行子集设置。框架

  •  2
  • Nuller  · 技术社区  · 3 年前

    我有数据。框架,其中包含多个字符列,但也包含一个数据。框架因此,我有一个数据。框在我的数据里面。框架我的目标是将一个字符列与嵌套数据中的一个列一起子集。框架但是,每当我试图按名称将嵌套列子集时,它都会声明它不存在。你可以看到数据。框在这里:

    df = structure(
      list(
        `$id` = c("21", "22", "23"),
        Id = c("159347",
               "161863", "22646"),
        Name = c("159347", "161863", "22646"),
        SumPeriod = structure(
          list(
            AccPeriodBasTwrAtMarketPrice = c(0.0969367972082358, 0.537983489472227,-0.107066381156318),
            AccPeriodLocTwrAtMarketPrice = c(0.0969367972082358,
                                             0.537983489472227,-0.107066381156318),
            BopDate = c(
              "2022-02-28T00:00:00",
              "2022-02-28T00:00:00",
              "2022-02-28T00:00:00"
            ),
            BopBasHoldingValueAtMarketPrice = c(7592266.52,
                                                5135960.59, 7166815.5),
            BopBasInterestAccrual = c(0, 0, 0),
            EopDate = c(
              "2022-02-28T00:00:00",
              "2022-02-28T00:00:00",
              "2022-02-28T00:00:00"
            ),
            EopBasHoldingValueAtMarketPrice = c(7599626.22,
                                                5163591.21, 7159142.25),
            EopBasInterestAccrual = c(0, 0,
                                      0),
            AccPeriodBasTwrAtExposureValue = c(0.0969367972082358,
                                               0.537983489472227,-0.107066381156318),
            AccPeriodLocTwrAtExposureValue = c(0.0969367972082358,
                                               0.537983489472227,-0.107066381156318),
            AccBasIrr = c(0.0969367972082358,
                          0.537983489472227,-0.107066381156318),
            AccLocIrr = c(0.096936797208258,
                          0.537983489472227,-0.107066381156318),
            AccBasMwr = c(0.0484449181280957,
                          0.268270120259021,-0.0535618639528656),
            PeriodBasIrr = c(0.0969367972082358,
                             0.537983489472227,-0.107066381156318),
            PeriodLocIrr = c(0.096936797208258,
                             0.537983489472227,-0.107066381156318),
            PeriodBasTwrAtMarketPrice = c(0.0969367972082358,
                                          0.537983489472227,-0.107066381156318),
            PeriodLocTwrAtMarketPrice = c(0.0969367972082358,
                                          0.537983489472227,-0.107066381156318),
            PeriodBasTwrDeposit = c(0,
                                    0, 0),
            PeriodBasTwrWithdrawal = c(0, 0, 0),
            PeriodBasTwrDepositWithdrawal = c(0,
                                              0, 0),
            PeriodBasTwrDividendTax = c(0, 0, 0),
            PeriodBasTwr = c(7359.70000000112,
                             27630.6200000001,-7673.25),
            PeriodBasMwr = c(0.0484449181280957,
                             0.268270120259021,-0.0535618639528656),
            BenchmarkCalcType = c(
              "BenchmarkNotCalculated",
              "BenchmarkNotCalculated",
              "BenchmarkNotCalculated"
            ),
            EopBenchmarkName = c("",
                                 "", ""),
            AccBasBenchmarkReturnPct = c(0, 0, 0),
            PeriodBasBenchmarkReturnPct = c(0,
                                            0, 0)
          ),
          class = "data.frame",
          row.names = c(NA, 3L)
        ),
        Series = list(
          structure(
            list(
              AccPeriodBasTwrAtMarketPrice = 0.0969367972082358,
              AccPeriodLocTwrAtMarketPrice = 0.0969367972082358,
              BopDate = "2022-02-28T00:00:00",
              BopBasHoldingValueAtMarketPrice = 7592266.52,
              BopBasInterestAccrual = 0,
              EopDate = "2022-02-28T00:00:00",
              EopBasHoldingValueAtMarketPrice = 7599626.22,
              EopBasInterestAccrual = 0,
              AccPeriodBasTwrAtExposureValue = 0.0969367972082358,
              AccPeriodLocTwrAtExposureValue = 0.0969367972082358,
              AccBasIrr = 0,
              AccLocIrr = 0,
              AccBasMwr = 0.0968429207825055,
              PeriodBasIrr = 0,
              PeriodLocIrr = 0,
              PeriodBasTwrAtMarketPrice = 0.0969367972082358,
              PeriodLocTwrAtMarketPrice = 0.0969367972082358,
              PeriodBasTwrDeposit = 0,
              PeriodBasTwrWithdrawal = 0,
              PeriodBasTwrDepositWithdrawal = 0,
              PeriodBasTwrDividendTax = 0,
              PeriodBasTwr = 7359.70000000112,
              PeriodBasMwr = 0.0484449181280957,
              BenchmarkCalcType = "BenchmarkNotCalculated",
              EopBenchmarkName = "",
              AccBasBenchmarkReturnPct = 0,
              PeriodBasBenchmarkReturnPct = 0
            ),
            class = "data.frame",
            row.names = 1L
          ),
          structure(
            list(
              AccPeriodBasTwrAtMarketPrice = 0.537983489472227,
              AccPeriodLocTwrAtMarketPrice = 0.537983489472227,
              BopDate = "2022-02-28T00:00:00",
              BopBasHoldingValueAtMarketPrice = 5135960.59,
              BopBasInterestAccrual = 0,
              EopDate = "2022-02-28T00:00:00",
              EopBasHoldingValueAtMarketPrice = 5163591.21,
              EopBasInterestAccrual = 0,
              AccPeriodBasTwrAtExposureValue = 0.537983489472227,
              AccPeriodLocTwrAtExposureValue = 0.537983489472227,
              AccBasIrr = 0,
              AccLocIrr = 0,
              AccBasMwr = 0.535104714457055,
              PeriodBasIrr = 0,
              PeriodLocIrr = 0,
              PeriodBasTwrAtMarketPrice = 0.537983489472227,
              PeriodLocTwrAtMarketPrice = 0.537983489472227,
              PeriodBasTwrDeposit = 0,
              PeriodBasTwrWithdrawal = 0,
              PeriodBasTwrDepositWithdrawal = 0,
              PeriodBasTwrDividendTax = 0,
              PeriodBasTwr = 27630.6200000001,
              PeriodBasMwr = 0.26827012025902,
              BenchmarkCalcType = "BenchmarkNotCalculated",
              EopBenchmarkName = "",
              AccBasBenchmarkReturnPct = 0,
              PeriodBasBenchmarkReturnPct = 0
            ),
            class = "data.frame",
            row.names = 1L
          ),
          structure(
            list(
              AccPeriodBasTwrAtMarketPrice = -0.107066381156318,
              AccPeriodLocTwrAtMarketPrice = -0.107066381156318,
              BopDate = "2022-02-28T00:00:00",
              BopBasHoldingValueAtMarketPrice = 7166815.5,
              BopBasInterestAccrual = 0,
              EopDate = "2022-02-28T00:00:00",
              EopBasHoldingValueAtMarketPrice = 7159142.25,
              EopBasInterestAccrual = 0,
              AccPeriodBasTwrAtExposureValue = -0.107066381156318,
              AccPeriodLocTwrAtExposureValue = -0.107066381156318,
              AccBasIrr = 0,
              AccLocIrr = 0,
              AccBasMwr = -0.107181136120043,
              PeriodBasIrr = 0,
              PeriodLocIrr = 0,
              PeriodBasTwrAtMarketPrice = -0.107066381156318,
              PeriodLocTwrAtMarketPrice = -0.107066381156318,
              PeriodBasTwrDeposit = 0,
              PeriodBasTwrWithdrawal = 0,
              PeriodBasTwrDepositWithdrawal = 0,
              PeriodBasTwrDividendTax = 0,
              PeriodBasTwr = -7673.25,
              PeriodBasMwr = -0.0535618639528656,
              BenchmarkCalcType = "BenchmarkNotCalculated",
              EopBenchmarkName = "",
              AccBasBenchmarkReturnPct = 0,
              PeriodBasBenchmarkReturnPct = 0
            ),
            class = "data.frame",
            row.names = 1L
          )
        )
      ),
      class = "data.frame",
      row.names = c(NA,
                    3L)
    )
    

    所以在数据中。在上面的框架中,我唯一感兴趣的列是“Id”和“EopBasHoldingValueAtMarketPrice”,后者是嵌套数据中的一列。框架名为“Sumperion”。我试图通过以下方式实现这一目标:

    df_subset = subset(df, select = c("Id", "SumPeriod$EopBasHoldingValueAtMarketPrice"))
    

    但我得到了一个错误:

    frame`(x, r, vars, drop = drop) : undefined columns selected
    

    你们知道如何做到这一点吗?

    5 回复  |  直到 3 年前
        1
  •  2
  •   Stephan    3 年前

    SumPeriod 是数据帧中的数据帧。你是说 dplyr 在标签中,您可以使用以下方法解决此问题:

    library(dplyr)
    library(tidyr)
    df %>% 
      as_tibble() %>% 
      unnest(SumPeriod) %>% 
      select(Id, EopBasHoldingValueAtMarketPrice)
    

    输出为:

    # A tibble: 3 × 2
      Id     EopBasHoldingValueAtMarketPrice
      <chr>                            <dbl>
    1 159347                        7599626.
    2 161863                        5163591.
    3 22646                         7159142.
    
    
        2
  •  1
  •   user2554330    3 年前

    你没有名字吗 "SumPeriod$EopBasHoldingValueAtMarketPrice" ,这是一个从中提取列的表达式 SumPeriod 斯蒂芬给了你一个惊喜 dplyr 解决方案下面是一个基本的R解决方案:

    df1 <- subset(df, select = c("Id", "SumPeriod"))
    df1$SumPeriod <- subset(df1$SumPeriod, select = "EopBasHoldingValueAtMarketPrice")
    

    这将使结构保持为嵌套的数据帧。

        3
  •  1
  •   Wimpel    3 年前

    data.table 方法

    library(data.table)
    DT <- as.data.table(df)
    DT[, .(Id, 
           EopBasHoldingValueAtMarketPrice = lapply(Series, function(x) x$EopBasHoldingValueAtMarketPrice))]
    #        Id EopBasHoldingValueAtMarketPrice
    # 1: 159347                         7599626
    # 2: 161863                         5163591
    # 3:  22646                         7159142
    
        4
  •  1
  •   shs    3 年前

    如果你觉得舒服 dplyr ,然后使用Stephan的解决方案。如果需要基本解决方案,只需将所需变量从嵌套框架分配给父框架,然后使用 subset() :

    library(tidyverse)
    df["EopBasHoldingValueAtMarketPrice"] <- df$SumPeriod$EopBasHoldingValueAtMarketPrice
    df_subset <- subset(df, select = c("Id", "EopBasHoldingValueAtMarketPrice"))
    df_subset
    #>       Id EopBasHoldingValueAtMarketPrice
    #> 1 159347                         7599626
    #> 2 161863                         5163591
    #> 3  22646                         7159142
    

    于2022年3月28日由 reprex package (v2.0.1)

        5
  •  1
  •   chrizzle    3 年前

    数据框内有数据框的原因吗?你可以在一个简单的数据框架中拥有这些东西

    x <- df$SumPeriod
    y <- df$Serie
    y <- rbind(y[[1]],y[[2]],y[[3]])
    
    df <- cbind(df[,c("Id","Name","$id")],x,y)
    

    那么,你建议的子集就行了

    df_subset = subset(df, select = c("Id", "EopBasHoldingValueAtMarketPrice"))