代码之家  ›  专栏  ›  技术社区  ›  e.moran

基于R帧间匹配条件的操作

  •  1
  • e.moran  · 技术社区  · 7 年前

    我有以下数据帧:

    head(RH)
      160143 161143 161144 161145 161146 162145 162146 162147 163146 163147
    1   24.9   26.4   27.4   28.5   30.4   29.2   32.6   58.7   50.6   62.1
    2   10.6   29.4   29.3   29.5   30.3   29.7   33.0   68.2   53.2   82.3
    3   17.7   30.7   30.7   31.7   31.5   29.4   34.1   65.0   48.0   78.5
    4   39.2   38.6   41.0   37.5   29.0   31.1   36.4   56.4   89.7   83.9
    5   23.1   23.0   27.9   29.9   38.2   29.6   41.4   88.2   86.0   91.2
    6   27.7   28.1   38.5   40.7   50.8   43.3   56.7  106.6   72.5   94.2
    
    
    head(percentage)
          xy     perc
    1 160143 50.22337
    2 161143 29.69779
    3 107167 41.98815
    4 107168 66.68095
    5 107169 37.67827
    6 108167 29.69238
    

    结果应该是与RH尺寸相同的数据框。

    3 回复  |  直到 7 年前
        1
  •  1
  •   pogibas    7 年前

    可以提取现有列的比例因子:

    foo <- percentage$perc[match(colnames(RH), percentage$xy)]
    # [1] 50.22337 29.69779       NA       NA       NA       NA       NA       NA       NA       NA
    

    并插入 1 哪里有 NA 1 ):

    t(t(RH) * ifelse(is.na(foo), 1, foo))
    
        2
  •  1
  •   AntoniosK    7 年前

    我使用的数据与你发布的类似:

    RH = structure(list(`160143` = c(24.9, 10.6, 17.7, 39.2, 23.1, 27.7), 
                        `161143` = c(26.4, 29.4, 30.7, 38.6, 23, 28.1), 
                        `161144` = c(27.4, 29.3, 30.7, 41, 27.9, 38.5), 
                        `161145` = c(28.5, 29.5, 31.7, 37.5, 29.9, 40.7), 
                        `161146` = c(30.4, 30.3, 31.5, 29, 38.2, 50.8), 
                        `162145` = c(29.2, 29.7, 29.4, 31.1, 29.6, 43.3), 
                        `162146` = c(32.6, 33, 34.1, 36.4, 41.4, 56.7), 
                        `162147` = c(58.7, 68.2, 65, 56.4, 88.2, 106.6), 
                        `163146` = c(50.6, 53.2, 48, 89.7, 86, 72.5), 
                        `163147` = c(62.1, 82.3, 78.5, 83.9, 91.2, 94.2)), 
                   class = "data.frame", row.names = c("1", "2", "3", "4", "5", "6"))
    
    percentage = structure(list(xy = c("160143", "161143", "107167", "107168", "107169", "108167"), 
                                perc = c(50.22337, 29.69779, 41.98815, 66.68095, 37.67827, 29.69238)), 
                           row.names = c("1", "2", "3", "4", "5", "6"), class = "data.frame")
    

    tidyverse

    library(tidyverse)
    
    RH %>%
      mutate(id = row_number()) %>%
      gather(xy, value, -id) %>%
      inner_join(percentage, by="xy") %>%
      mutate(value = value * perc) %>%
      select(-perc) %>%
      spread(xy, value) %>%
      select(-id)
    
    #      160143    161143
    # 1 1250.5619  784.0217
    # 2  532.3677  873.1150
    # 3  888.9536  911.7222
    # 4 1968.7561 1146.3347
    # 5 1160.1598  683.0492
    # 6 1391.1873  834.5079
    

    注意 RH 数据集。这里的列较少,因为只有这2列与 percentage 你发布的数据集。

        3
  •  1
  •   user6261559    7 年前

    如果OP也需要原始表,我们只需要稍微修改一下用户AntoniosK的答案:

    RH %>% 
      mutate(id = row_number()) %>% 
      gather(key = column_name, value, -id) %>% 
      left_join(percentage, by = c("column_name" = "xy")) %>% 
      mutate(perc = ifelse(is.na(perc), 1, perc),
             new_value = value*perc) %>%
      select(-value, -perc) %>% 
      spread(column_name, new_value) %>% 
      select(-id)
    
    #      160143    161143 161144 161145 161146 162145 162146 162147 163146 163147
    #1 1250.5619  784.0217   27.4   28.5   30.4   29.2   32.6   58.7   50.6   62.1
    #2  532.3677  873.1150   29.3   29.5   30.3   29.7   33.0   68.2   53.2   82.3
    #3  888.9536  911.7222   30.7   31.7   31.5   29.4   34.1   65.0   48.0   78.5
    #4 1968.7561 1146.3347   41.0   37.5   29.0   31.1   36.4   56.4   89.7   83.9
    #5 1160.1598  683.0492   27.9   29.9   38.2   29.6   41.4   88.2   86.0   91.2
    #6 1391.1873  834.5079   38.5   40.7   50.8   43.3   56.7  106.6   72.5   94.2
    

    (抱歉,我是新用户,无法对用户AntoniosK的回答发表评论)