代码之家  ›  专栏  ›  技术社区  ›  Starbucks

近似日期合并R-滚动联接/日期差

r
  •  0
  • Starbucks  · 技术社区  · 6 年前

    我试图将两个数据帧合并在一起,但需要近似地连接数据(按日期和键)。例如,在“2000-03-01”上,df2$X1和df2$X2具有需要加入df1的信息,但是df1中没有“2000-03-01”。 ,如何将df1上相关的df2$X1&X2数据近似到“2000”上- 02

    df1 <- data.frame(replicate(2,sample(0:5,10,rep=TRUE)))
    df1$Key <- LETTERS[1:2]
    df1$Date <- as.Date(c("2000-01-01", "2000-02-01", "2000-02-01", "2000-04-01", "2000-05-01", 
                         "2000-05-01", "2000-07-01", "2000-08-01", "2000-09-01", "2000-10-01"))
    
    print(df1)
    
    X1 X2 Key       Date
    1   4  2   A 2000-01-01
    2   3  2   B 2000-02-01
    3   2  4   A 2000-02-01
    4   3  5   B 2000-04-01
    5   1  3   A 2000-05-01
    6   2  0   B 2000-05-01
    7   1  1   A 2000-07-01
    8   5  3   B 2000-08-01
    9   2  0   A 2000-09-01
    10  5  4   B 2000-10-01
    
    df2 <- data.frame(replicate(2,sample(0:5,10,rep=TRUE)))
    df2$Key <- c('A', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'A', 'B')
    df2$Date <- as.Date(c("2000-01-01", "2000-02-01", "2000-03-01", "2000-04-01", "2000-05-01", 
                         "2000-06-01", "2000-07-01", "2000-08-01", "2000-09-01", "2000-10-01"))
    
    print(df2)
    
    X1 X2 Key       Date
    1   4  5   A 2000-01-01
    2   3  2   B 2000-02-01
    3   1  5   A 2000-03-01
    4   1  0   B 2000-04-01
    5   0  4   A 2000-05-01
    6   5  5   B 2000-06-01
    7   2  2   A 2000-07-01
    8   1  0   B 2000-08-01
    9   0  4   A 2000-09-01
    10  3  2   B 2000-10-01
    
    library(dplyr)
    join <- left_join(df1, df2, by = c("Key", "Date")); join
    
    X1.x X2.x Key       Date X1.y X2.y
    1     4    2   A 2000-01-01    4    5
    2     3    2   B 2000-02-01    3    2
    3     2    4   A 2000-02-01   NA   NA
    4     3    5   B 2000-04-01    1    0
    5     1    3   A 2000-05-01    0    4
    6     2    0   B 2000-05-01   NA   NA
    7     1    1   A 2000-07-01    2    2
    8     5    3   B 2000-08-01    1    0
    9     2    0   A 2000-09-01    0    4
    10    5    4   B 2000-10-01    3    2
    > 
    
    join <- left_join(df1, df2, by = c("Key", "Date")) %>%
      filter(abs(difftime(day,day,units = "days"))) <= as.difftime(2, format = "%d", unit = "days")
    
    
    library(data.table)
    
    as.data.table(df1)[as.data.table(df2), on = "Date", roll = "nearest"]
    
    X1 X2 Key       Date i.X1 i.X2 i.Key
    1:  4  2   A 2000-01-01    4    5     A
    2:  3  2   B 2000-02-01    3    2     B
    3:  2  4   A 2000-02-01    3    2     B
    4:  2  4   A 2000-03-01    1    5     A
    5:  3  5   B 2000-04-01    1    0     B
    6:  1  3   A 2000-05-01    0    4     A
    7:  2  0   B 2000-05-01    0    4     A
    8:  1  1   A 2000-06-01    5    5     B
    9:  1  1   A 2000-07-01    2    2     A
    10:  5  3   B 2000-08-01    1    0     B
    11:  2  0   A 2000-09-01    0    4     A
    12:  5  4   B 2000-10-01    3    2     B
    
    1 回复  |  直到 6 年前
        1
  •  0
  •   r2evans    6 年前

    这个怎么样?

    df2$FakeDate <- df2$Date[ findInterval(df1$Date, df2$Date) ]
    df2
    #    X1 X2 Key       Date   FakeDate
    # 1   4  3   A 2000-01-01 2000-01-01
    # 2   4  0   B 2000-02-01 2000-02-01
    # 3   2  5   A 2000-03-01 2000-02-01
    # 4   3  1   B 2000-04-01 2000-04-01
    # 5   2  5   A 2000-05-01 2000-05-01
    # 6   5  3   B 2000-06-01 2000-05-01
    # 7   5  0   A 2000-07-01 2000-07-01
    # 8   3  0   B 2000-08-01 2000-08-01
    # 9   4  5   A 2000-09-01 2000-09-01
    # 10  4  0   B 2000-10-01 2000-10-01
    df2$Date <- NULL # o/w merge will complain about 'Date' in both
    merge(df1, df2, by.x=c("Key","Date"), by.y=c("Key","FakeDate"))
    #    Key       Date X1.x X2.x X1.y X2.y
    # 1    A 2000-01-01    0    5    4    3
    # 2    A 2000-02-01    4    0    2    5
    # 3    A 2000-05-01    4    1    2    5
    # 4    A 2000-07-01    2    0    5    0
    # 5    A 2000-09-01    0    3    4    5
    # 6    B 2000-02-01    0    4    4    0
    # 7    B 2000-04-01    1    5    3    1
    # 8    B 2000-05-01    5    2    5    3
    # 9    B 2000-08-01    3    5    3    0
    # 10   B 2000-10-01    4    2    4    0