代码之家 › 专栏 › 技术社区 › Faisal Elvasador

在r自定义函数中将变量传递给ggplot时出错

dplyr ggplot2 function r

Faisal Elvasador · 技术社区 · 11 月前

在我试图将变量传递给ggplot中的geom_point之前,一切都很顺利。这是我收到的错误消息:

Error in `geom_point()`:
! Problem while computing aesthetics.
â¹ Error occurred in the 2nd layer.
Caused by error:
! object 'proficiency' not found

以下是我的职责:

my_function <- function(data, component, variable, group = "all"){
  library(dplyr)
#==== data based on group ===
if(group == "all"){
  df <- data
}  else if(group == "high"){
  df <- data[data$level == "high",]
} else if(group == "low"){
  df <- data[data$level == "low",]
} else{
  stop("unrecognized level, which musg be 'all', 'high', or 'low'")
}
#==== selection based on independent variables ====
if (variable == "proficiency"){
  order <- c("below average", "average", "above average")
  data_df <- df[df$proficiency != "Saya tidak tahu" , ] 
  x_axis_lab <- "English proficiency level"
  variable <- as.name(variable)
} else if(variable =="experience"){
  order <-  c("inexperienced", "experienced")
  data_df <- df
  x_axis_lab <- "Teaching experience"
  variable <- as.name("exp")
} else if(variable =="region"){
  order <- c("remote", "rural", "urban")
  data_df <- df[df$school == "remote" | df$school == "rural" | df$school == "urban", ]
  x_axis_lab <- "School region"
  variable <- as.name("school")
} else if(variable == "tech_use"){
  order <- c("sometimes", "usually", "always")
  data_df <- df[df$tech_use != "never" & df$tech_use != "rarely", ]
  x_axis_lab <- "The frequency of technology use"
  variable <- as.name(variable)
} else {
  stop("dependent variable is invalid")
}
#====variable conversion ===
component <- as.name(component)
component2 <- data_df %>% select({{component}})
variable2 <- data_df %>% select({{variable}})
df2 <- data.frame(cbind(component2, variable2))
df3 <- setNames(df2, c("component1", "variable1"))
#====effect size===
effect_size <- as.data.frame(df3 %>% rstatix::kruskal_effsize(component1 ~ variable1))
effectsize <- paste("Effect size = ", round(effect_size$effsize, 3), " (",effect_size$magnitude, ")", sep = "")
effsize <- grid::grobTree(grid::textGrob(effectsize, x=0.1,  y=0.945, hjust=-0.2,
                                         gp=grid::gpar(col="#1f1f1f", fontsize=11, family="sans", fontface = "plain")))
library(ggplot2)

  ggplot(data_df, aes(x=factor({{variable}}, level = order), y = {{component}}, fill={{variable}})) +
    geom_boxplot() +
    geom_point(data = aggregate({{component}} ~ {{variable}}, data =data_df, mean),
                 aes(x = {{variable}}, y = {{component}}), color = "#FFF", size = 1.5) +
    scale_fill_brewer(palette="Dark2")+ theme_classic() + theme(legend.position = "none", axis.title.x = element_text(vjust = -1.1)) + labs(x = x_axis_lab) +
    ggpubr::stat_kruskal_test(label = "as_detailed_italic", label.y = 5.1, hjust=0.2)+
    annotation_custom(effsize)
}

我调用函数的代码:

my_function(data = df, component = "TPACK", variable = "proficiency", group = "low")

我想知道我在geom_point中做错了什么。当我评论这两行时,情节生成得很好。

以下是我的数据(为了节省空间,我只包含了20条记录):

structure(list(ID = c("PST100", "PST101", "PST102", "PST103", 
"PST104", "PST105", "PST106", "PST107", "PST108", "PST109", "PST110", 
"PST111", "PST112", "PST113", "PST114", "PST115", "PST116", "PST117", 
"PST118", "PST119", "PST120"), record_id = c(158L, 160L, 161L, 
162L, 163L, 166L, 167L, 169L, 170L, 171L, 172L, 173L, 174L, 175L, 
183L, 185L, 186L, 187L, 189L, 190L, 191L), gender = c("female", 
"female", "female", "male", "female", "female", "female", "female", 
"female", "female", "female", "female", "male", "female", "female", 
"female", "female", "male", "female", "female", "female"), age = c(22, 
23, 43, 36, 47, 24, 22, 21, 23, 21, 21, 23, 22, 22, 20, 22, 20, 
21, 21, 21, 21), school = c("urban", "urban", "urban", "remote", 
"urban", "urban", "remote", "remote", "urban", "rural", "urban", 
"rural", "urban", "rural", "urban", "rural", "all", "rural", 
"rural", "rural", "non_remote"), proficiency = c("average", "average", 
"average", "below average", "above average", "average", "average", 
"average", "average", "above average", "above average", "average", 
"average", "below average", "average", "below average", "above average", 
"average", "average", "below average", "average"), tech_use = c("always", 
"rarely", "always", "sometimes", "usually", "usually", "always", 
"sometimes", "always", "never", "always", "sometimes", "usually", 
"sometimes", "usually", "sometimes", "always", "sometimes", "sometimes", 
"usually", "usually"), experience = c("level_2", "level_1", "level_6", 
"level_6", "level_6", "level_1", "level_2", "level_6", "level_2", 
"level_2", "level_2", "level_1", "level_2", "level_1", "level_2", 
"level_1", "level_1", "level_3", "level_5", "level_1", "level_2"
), exp = c("inexperienced", "inexperienced", "experienced", "experienced", 
"experienced", "inexperienced", "inexperienced", "experienced", 
"inexperienced", "inexperienced", "inexperienced", "inexperienced", 
"inexperienced", "inexperienced", "inexperienced", "inexperienced", 
"inexperienced", "inexperienced", "experienced", "inexperienced", 
"inexperienced"), level = c("low", "low", "low", "high", "high", 
"low", "high", "low", "high", "low", "high", "low", "high", "low", 
"high", "low", "high", "low", "low", "low", "low"), CK1 = c(3L, 
4L, 3L, 4L, 4L, 3L, 4L, 4L, 5L, 5L, 4L, 3L, 5L, 4L, 4L, 4L, 4L, 
2L, 4L, 3L, 4L), CK2 = c(3L, 4L, 3L, 4L, 4L, 3L, 4L, 3L, 4L, 
5L, 4L, 3L, 5L, 4L, 5L, 4L, 5L, 3L, 4L, 3L, 3L), CK3 = c(4L, 
4L, 3L, 4L, 4L, 3L, 4L, 5L, 5L, 5L, 4L, 3L, 4L, 4L, 5L, 4L, 5L, 
2L, 3L, 3L, 4L), CK4 = c(4L, 4L, 3L, 4L, 4L, 2L, 4L, 4L, 4L, 
4L, 4L, 3L, 4L, 4L, 5L, 3L, 4L, 3L, 3L, 2L, 4L), PK6 = c(5L, 
4L, 3L, 4L, 5L, 4L, 5L, 3L, 4L, 4L, 4L, 3L, 3L, 4L, 4L, 3L, 5L, 
3L, 5L, 2L, 4L), PK7 = c(5L, 4L, 3L, 4L, 5L, 4L, 5L, 4L, 5L, 
4L, 4L, 3L, 3L, 4L, 4L, 3L, 4L, 2L, 4L, 3L, 4L), PK8 = c(5L, 
4L, 2L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 4L, 
3L, 4L, 3L, 3L), PK9 = c(4L, 4L, 3L, 5L, 5L, 4L, 5L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L, 3L, 4L), PK10 = c(4L, 
4L, 3L, 4L, 5L, 3L, 4L, 4L, 5L, 4L, 4L, 4L, 4L, 4L, 5L, 3L, 4L, 
3L, 3L, 3L, 4L), PK11 = c(4L, 4L, 2L, 4L, 5L, 3L, 5L, 3L, 4L, 
4L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 2L, 3L, 3L, 3L), PK12 = c(4L, 
3L, 3L, 5L, 5L, 3L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
3L, 4L, 4L, 4L), PCK13 = c(4L, 3L, 3L, 5L, 5L, 3L, 5L, 3L, 4L, 
3L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 2L, 4L, 4L, 3L), PCK14 = c(4L, 
3L, 3L, 5L, 5L, 2L, 5L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 4L, 3L, 4L, 
3L, 4L, 3L, 4L), PCK15 = c(3L, 3L, 3L, 5L, 5L, 3L, 4L, 4L, 4L, 
5L, 4L, 3L, 5L, 4L, 4L, 4L, 4L, 2L, 4L, 3L, 4L), PCK16 = c(3L, 
3L, 3L, 4L, 5L, 3L, 3L, 2L, 3L, 3L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 
3L, 4L, 4L, 3L), TK17 = c(4L, 3L, 3L, 5L, 5L, 4L, 5L, 4L, 5L, 
5L, 5L, 3L, 5L, 4L, 5L, 4L, 4L, 2L, 4L, 4L, 5L), TK19 = c(4L, 
3L, 3L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 5L, 3L, 5L, 4L, 5L, 4L, 4L, 
2L, 4L, 4L, 5L), TK20 = c(4L, 3L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 
5L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 3L, 4L, 4L, 5L), TK21 = c(4L, 
3L, 4L, 4L, 5L, 4L, 5L, 5L, 5L, 5L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 
2L, 4L, 4L, 5L), TK22 = c(4L, 3L, 3L, 4L, 5L, 2L, 5L, 5L, 5L, 
5L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 3L, 4L, 4L, 5L), TK23 = c(4L, 
3L, 3L, 4L, 5L, 2L, 5L, 5L, 4L, 5L, 5L, 3L, 5L, 4L, 5L, 4L, 4L, 
2L, 4L, 4L, 5L), TCK25 = c(4L, 4L, 3L, 4L, 5L, 3L, 5L, 4L, 5L, 
3L, 5L, 4L, 5L, 4L, 4L, 4L, 4L, 2L, 4L, 3L, 5L), TCK26 = c(4L, 
4L, 2L, 4L, 4L, 3L, 5L, 4L, 5L, 3L, 5L, 3L, 5L, 4L, 4L, 4L, 4L, 
3L, 4L, 3L, 5L), TCK27 = c(3L, 4L, 2L, 4L, 4L, 3L, 4L, 5L, 5L, 
3L, 5L, 3L, 5L, 4L, 4L, 3L, 4L, 2L, 4L, 3L, 4L), TCK28 = c(3L, 
3L, 3L, 4L, 3L, 3L, 5L, 5L, 5L, 3L, 5L, 4L, 5L, 4L, 4L, 3L, 4L, 
3L, 4L, 3L, 3L), TCK29 = c(4L, 3L, 3L, 4L, 5L, 3L, 5L, 5L, 5L, 
3L, 5L, 4L, 5L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 3L), TCK30 = c(4L, 
3L, 3L, 5L, 5L, 3L, 5L, 4L, 5L, 3L, 5L, 3L, 4L, 4L, 4L, 4L, 5L, 
3L, 4L, 4L, 3L), TCK31 = c(3L, 3L, 2L, 5L, 4L, 3L, 5L, 3L, 5L, 
3L, 5L, 4L, 3L, 4L, 4L, 3L, 5L, 2L, 4L, 4L, 3L), TPK32 = c(4L, 
3L, 2L, 5L, 5L, 3L, 5L, 3L, 4L, 3L, 5L, 4L, 5L, 4L, 4L, 4L, 5L, 
3L, 4L, 3L, 4L), TPK33 = c(3L, 3L, 2L, 5L, 5L, 3L, 5L, 3L, 4L, 
3L, 5L, 4L, 5L, 4L, 4L, 4L, 5L, 2L, 4L, 3L, 4L), TPK34 = c(4L, 
3L, 2L, 5L, 5L, 3L, 5L, 3L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 4L, 5L, 
2L, 4L, 3L, 4L), TPK35 = c(4L, 4L, 2L, 5L, 5L, 3L, 5L, 3L, 4L, 
4L, 5L, 4L, 5L, 4L, 4L, 4L, 5L, 3L, 4L, 2L, 4L), TPK36 = c(4L, 
4L, 2L, 4L, 5L, 3L, 5L, 3L, 4L, 3L, 5L, 4L, 4L, 4L, 4L, 4L, 5L, 
2L, 4L, 2L, 3L), TPK37 = c(3L, 3L, 2L, 4L, 5L, 3L, 5L, 3L, 4L, 
4L, 5L, 4L, 4L, 4L, 4L, 3L, 4L, 2L, 4L, 2L, 4L), TPK38 = c(4L, 
3L, 2L, 4L, 5L, 3L, 5L, 3L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 3L, 5L, 
3L, 4L, 3L, 3L), TPCK39 = c(3L, 3L, 2L, 4L, 4L, 3L, 5L, 3L, 4L, 
3L, 5L, 4L, 3L, 3L, 4L, 3L, 5L, 3L, 4L, 3L, 3L), TPCK40 = c(4L, 
3L, 3L, 4L, 5L, 3L, 5L, 3L, 4L, 5L, 5L, 4L, 3L, 4L, 4L, 4L, 4L, 
3L, 4L, 3L, 3L), TPCK41 = c(4L, 4L, 2L, 4L, 5L, 3L, 5L, 4L, 4L, 
5L, 5L, 4L, 4L, 4L, 4L, 3L, 5L, 3L, 4L, 3L, 4L), TPCK42 = c(4L, 
3L, 3L, 4L, 4L, 3L, 5L, 4L, 4L, 5L, 5L, 4L, 5L, 3L, 4L, 4L, 4L, 
3L, 4L, 3L, 4L), TPCK43 = c(3L, 4L, 2L, 4L, 5L, 3L, 5L, 4L, 5L, 
5L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 3L), TPCK44 = c(3L, 
3L, 3L, 4L, 4L, 3L, 5L, 3L, 4L, 5L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 
2L, 4L, 3L, 4L), TPCK45 = c(3L, 4L, 2L, 4L, 4L, 3L, 4L, 4L, 4L, 
4L, 5L, 3L, 5L, 4L, 4L, 3L, 4L, 2L, 4L, 2L, 3L), CK = c(3.5, 
4, 3, 4, 4, 2.75, 4, 4, 4.5, 4.75, 4, 3, 4.5, 4, 4.75, 3.75, 
4.5, 2.5, 3.5, 2.75, 3.75), PK = c(4.42857142857143, 3.85714285714286, 
2.71428571428571, 4.28571428571429, 5, 3.57142857142857, 4.85714285714286, 
3.71428571428571, 4.42857142857143, 4, 4, 3.57142857142857, 3.57142857142857, 
4, 4.14285714285714, 3.28571428571429, 4.14285714285714, 2.57142857142857, 
3.85714285714286, 3, 3.71428571428571), PCK = c(3.5, 3, 3, 4.75, 
5, 2.75, 4.25, 3.25, 3.75, 3.75, 4, 3.75, 4, 4.25, 4, 3.25, 4, 
2.5, 4, 3.5, 3.5), TK = c(4, 3, 3.33333333333333, 4.5, 5, 3.33333333333333, 
5, 4.83333333333333, 4.83333333333333, 5, 5, 3.5, 5, 4, 5, 4, 
4, 2.33333333333333, 4, 4, 5), TCK = c(3.57142857142857, 3.42857142857143, 
2.57142857142857, 4.28571428571429, 4.28571428571429, 3, 4.85714285714286, 
4.28571428571429, 5, 3, 5, 3.57142857142857, 4.57142857142857, 
4, 4, 3.57142857142857, 4.28571428571429, 2.42857142857143, 4, 
3.42857142857143, 3.71428571428571), TPK = c(3.71428571428571, 
3.28571428571429, 2, 4.57142857142857, 5, 3, 5, 3, 4.28571428571429, 
3.71428571428571, 5, 4, 4.57142857142857, 4, 4, 3.71428571428571, 
4.85714285714286, 2.42857142857143, 4, 2.57142857142857, 3.71428571428571
), TPCK = c(3.42857142857143, 3.42857142857143, 2.42857142857143, 
4, 4.42857142857143, 3, 4.85714285714286, 3.57142857142857, 4.14285714285714, 
4.57142857142857, 5, 3.85714285714286, 4, 3.71428571428571, 4, 
3.57142857142857, 4.28571428571429, 2.71428571428571, 4, 2.85714285714286, 
3.42857142857143), TPACK = c(3.76190476190476, 3.42857142857143, 
2.66666666666667, 4.33333333333333, 4.69047619047619, 3.0952380952381, 
4.76190476190476, 3.80952380952381, 4.45238095238095, 4.07142857142857, 
4.64285714285714, 3.64285714285714, 4.30952380952381, 3.97619047619048, 
4.23809523809524, 3.5952380952381, 4.30952380952381, 2.5, 3.92857142857143, 
3.14285714285714, 3.83333333333333)), row.names = 100:120, class = "data.frame")

1 回复 | 直到 11 月前

stefan 11 月前

我无法重现你的错误。相反,我得到了一个错误

model.frame.default中出错(公式={:对象不是矩阵`。

不管怎样。问题是你不能使用卷发 {{ 在里面 aggregate 。相反,我建议使用 dplyr::summarise 除此之外,我稍微简化了一段代码,当你将变量名作为切换到的字符传递时 .data 亲名词。

library(ggplot2)
library(dplyr)

my_function <- function(data, component, variable, group = "all") {
  # ==== data based on group ===
  if (group == "all") {
    df <- data
  } else if (group == "high") {
    df <- data[data$level == "high", ]
  } else if (group == "low") {
    df <- data[data$level == "low", ]
  } else {
    stop("unrecognized level, which musg be 'all', 'high', or 'low'")
  }
  # ==== selection based on independent variables ====
  if (variable == "proficiency") {
    order <- c("below average", "average", "above average")
    data_df <- df[df$proficiency != "Saya tidak tahu", ]
    x_axis_lab <- "English proficiency level"
  } else if (variable == "experience") {
    order <- c("inexperienced", "experienced")
    data_df <- df
    x_axis_lab <- "Teaching experience"
    variable <- "exp"
  } else if (variable == "region") {
    order <- c("remote", "rural", "urban")
    data_df <- df[df$school %in% c("remote", "rural", "urban"), ]
    x_axis_lab <- "School region"
    variable <- "school"
  } else if (variable == "tech_use") {
    order <- c("sometimes", "usually", "always")
    data_df <- df[!df$tech_use %in% c("never", "rarely"), ]
    x_axis_lab <- "The frequency of technology use"
  } else {
    stop("dependent variable is invalid")
  }
  
  # ====variable conversion ===
  effect_size <- data_df %>%
    select(all_of(c(component1 = component, variable1 = variable))) %>%
    rstatix::kruskal_effsize(component1 ~ variable1)
  
  effectsize <- paste0(
    "Effect size = ",
    round(effect_size$effsize, 3), " (", effect_size$magnitude, ")"
  )
  effsize <- grid::textGrob(effectsize,
    x = 0.1, y = 0.945, hjust = -0.2,
    gp = grid::gpar(
      col = "#1f1f1f", fontsize = 11,
      family = "sans", fontface = "plain"
    )
  )

  ggplot(data_df, aes(
    x = factor(.data[[variable]], level = order),
    y = .data[[component]], fill = .data[[variable]]
  )) +
    geom_boxplot() +
    geom_point(
      data = summarise(
        data_df,
        "{component}" := mean(.data[[component]]),
        .by = all_of(variable)
      ),
      aes(x = .data[[variable]], y = .data[[component]]),
      color = "#FFF", size = 1.5
    ) +
    scale_fill_brewer(palette = "Dark2") +
    theme_classic() +
    theme(
      legend.position = "none",
      axis.title.x = element_text(vjust = 1)
    ) +
    labs(x = x_axis_lab) +
    ggpubr::stat_kruskal_test(
      label = "as_detailed_italic",
      label.y = 5.1, hjust = 0.2
    ) +
    annotation_custom(effsize)
}

my_function(data = df, component = "TPACK", variable = "proficiency", group = "low")