代码之家 › 专栏 › 技术社区 › te time

使用dplyr计算每组不同权重的加权平均值

tidyverse dplyr r

0

te time · 技术社区 · 1 年前

我正试图使用以下命令将我的代码修改为单个管道 dplyr 。我正在计算每年两列的加权平均值,其中每年有不同的权重。如何在不显式拆分数据集然后组合结果的情况下执行此操作?谢谢

library(tidyverse)
seet.seed = 1
df = data.frame(yr = sample(c(2022,2023,2024), size =20,replace = TRUE),
                col_1 = rnorm(20),
                col_2 = rnorm(20),
                col_3 = rnorm(20))
   


dfList = split(df,df$yr)
selected_cols = c("col_1","col_2")

df2022 = dfList[[1]]  %>% rowwise() %>%
    mutate( mean_wt = weighted.mean(c_across(all_of(selected_cols )),c(0.40,0.60)))%>%        
    ungroup()        

df2023 = dfList[[2]]  %>% rowwise() %>%
    mutate( mean_wt = weighted.mean(c_across(all_of(selected_cols )),c(0.70,0.30)))%>%        
    ungroup()

df2024 = dfList[[3]]  %>% rowwise() %>%
    mutate( mean_wt = weighted.mean(c_across(all_of(selected_cols )),c(0.50,0.50)))%>%        
    ungroup()

newDF = bind_rows(list(df2022 ,df2023,df2024 ))

1 回复 | 直到 1 年前

1

0

LMc 1 年前

library(dplyr)

yr_wt = list("2022" = c(.4, .6), "2023" = c(.7, .3), "2024" = c(.5, .5))

df |>
  rowwise() |>
  mutate(mean_wt = weighted.mean(c_across(col_1:col_2), yr_wt[[as.character(yr)]])) |>
  ungroup()

rowwise() 即使在中等大小的数据帧上也可能是非常缓慢的操作。这里有一个更快的选择,仍然使用 yr_wt 查找列表:

df |>
  mutate(mean_wt = rowSums(as.matrix(pick(col_1:col_2)) %*% diag(yr_wt[[as.character(pull(cur_group()))]])),
         .by = yr)