基于David的建议-使用以下类似的方法如何
dplyr::rename_at
?
library(dplyr)
## Get data
set.seed(667)
dta <- data.frame(id = 1:6,
R1213 = runif(6),
R1224 = runif(6, 1, 2),
R1255 = runif(6, 2, 3),
R1235 = runif(6, 3, 4)) %>%
as_tibble()
## Rename
dta <- dta %>%
rename_at(.vars = grep('^R1.[125].$', names(.)),
.funs = ~paste0("A.2017.", 1:length(.)))
dta
#> # A tibble: 6 x 5
#> id A.2017.1 A.2017.2 A.2017.3 R1235
#> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0.196 1.74 2.51 3.49
#> 2 2 0.478 1.85 2.06 3.69
#> 3 3 0.780 1.32 2.21 3.26
#> 4 4 0.705 1.49 2.49 3.33
#> 5 5 0.942 1.59 2.66 3.58
#> 6 6 0.906 1.90 2.87 3.93
多模式矢量化解决方案
对于可用于多种模式和替换的完整解决方案,我们可以利用
purr::map2_dfc
如下所示。
library(dplyr)
library(purrr)
## Get data
set.seed(667)
dta <- data.frame(id = 1:6,
R1213 = runif(6),
R1224 = runif(6, 1, 2),
R1255 = runif(6, 2, 3),
R1235 = runif(6, 3, 4)) %>%
as_tibble()
## Define a function to keep a hold out data set, then rename iteratively for each pattern and replacement.
rename_multiple_years <- function(df, patterns,
replacements,
hold_out_var = "id") {
hold_out_df <- df %>%
select_at(.vars = hold_out_var)
rename_df <- map2_dfc(patterns, replacements, function(pattern, replacement) {
df %>%
rename_at(.vars = grep(pattern, names(.)),
.funs = ~paste0(replacement, 1:length(.))) %>%
select_at(.vars = grep(replacement, names(.)))
})
final_df <- bind_cols(hold_out_df, rename_df)
return(final_df)
}
## Call function on specified patterns and replacements
renamed_dta <- dta %>%
rename_multiple_years(patterns = c("^R1.[125].$", "^R1.[3].$"),
replacements = c("A.2017.", "A.2018."))
renamed_dta
#> # A tibble: 6 x 5
#> id A.2017.1 A.2017.2 A.2017.3 A.2018.1
#> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 1 0.196 1.74 2.51 3.49
#> 2 2 0.478 1.85 2.06 3.69
#> 3 3 0.780 1.32 2.21 3.26
#> 4 4 0.705 1.49 2.49 3.33
#> 5 5 0.942 1.59 2.66 3.58
#> 6 6 0.906 1.90 2.87 3.93
走向整洁的数据
现在变量已经重命名,您可能会发现以整洁的格式保存数据很有用。以下使用
tidyr::gather
可能有用。
library(tidyr)
library(dplyr)
#Use tidy dataframe gather all variables, split by "." and drop A column (or keep if a measurement id)
renamed_dta %>%
gather(key = "measure", value = "value", -id) %>%
separate(measure, c("A", "year", "measure"), "[[.]]") %>%
select(-A)
#> # A tibble: 24 x 4
#> id year measure value
#> <int> <chr> <chr> <dbl>
#> 1 1 2017 1 0.196
#> 2 2 2017 1 0.478
#> 3 3 2017 1 0.780
#> 4 4 2017 1 0.705
#> 5 5 2017 1 0.942
#> 6 6 2017 1 0.906
#> 7 1 2017 2 1.74
#> 8 2 2017 2 1.85
#> 9 3 2017 2 1.32
#> 10 4 2017 2 1.49
#> # ... with 14 more rows