代码之家  ›  专栏  ›  技术社区  ›  Faisal Elvasador

在r自定义函数中将变量传递给ggplot时出错

  •  0
  • Faisal Elvasador  · 技术社区  · 11 月前

    在我试图将变量传递给ggplot中的geom_point之前,一切都很顺利。这是我收到的错误消息:

    Error in `geom_point()`:
    ! Problem while computing aesthetics.
    ℹ Error occurred in the 2nd layer.
    Caused by error:
    ! object 'proficiency' not found
    

    以下是我的职责:

    my_function <- function(data, component, variable, group = "all"){
      library(dplyr)
    #==== data based on group ===
    if(group == "all"){
      df <- data
    }  else if(group == "high"){
      df <- data[data$level == "high",]
    } else if(group == "low"){
      df <- data[data$level == "low",]
    } else{
      stop("unrecognized level, which musg be 'all', 'high', or 'low'")
    }
    #==== selection based on independent variables ====
    if (variable == "proficiency"){
      order <- c("below average", "average", "above average")
      data_df <- df[df$proficiency != "Saya tidak tahu" , ] 
      x_axis_lab <- "English proficiency level"
      variable <- as.name(variable)
    } else if(variable =="experience"){
      order <-  c("inexperienced", "experienced")
      data_df <- df
      x_axis_lab <- "Teaching experience"
      variable <- as.name("exp")
    } else if(variable =="region"){
      order <- c("remote", "rural", "urban")
      data_df <- df[df$school == "remote" | df$school == "rural" | df$school == "urban", ]
      x_axis_lab <- "School region"
      variable <- as.name("school")
    } else if(variable == "tech_use"){
      order <- c("sometimes", "usually", "always")
      data_df <- df[df$tech_use != "never" & df$tech_use != "rarely", ]
      x_axis_lab <- "The frequency of technology use"
      variable <- as.name(variable)
    } else {
      stop("dependent variable is invalid")
    }
    #====variable conversion ===
    component <- as.name(component)
    component2 <- data_df %>% select({{component}})
    variable2 <- data_df %>% select({{variable}})
    df2 <- data.frame(cbind(component2, variable2))
    df3 <- setNames(df2, c("component1", "variable1"))
    #====effect size===
    effect_size <- as.data.frame(df3 %>% rstatix::kruskal_effsize(component1 ~ variable1))
    effectsize <- paste("Effect size = ", round(effect_size$effsize, 3), " (",effect_size$magnitude, ")", sep = "")
    effsize <- grid::grobTree(grid::textGrob(effectsize, x=0.1,  y=0.945, hjust=-0.2,
                                             gp=grid::gpar(col="#1f1f1f", fontsize=11, family="sans", fontface = "plain")))
    library(ggplot2)
    
      ggplot(data_df, aes(x=factor({{variable}}, level = order), y = {{component}}, fill={{variable}})) +
        geom_boxplot() +
        geom_point(data = aggregate({{component}} ~ {{variable}}, data =data_df, mean),
                     aes(x = {{variable}}, y = {{component}}), color = "#FFF", size = 1.5) +
        scale_fill_brewer(palette="Dark2")+ theme_classic() + theme(legend.position = "none", axis.title.x = element_text(vjust = -1.1)) + labs(x = x_axis_lab) +
        ggpubr::stat_kruskal_test(label = "as_detailed_italic", label.y = 5.1, hjust=0.2)+
        annotation_custom(effsize)
    }
    

    我调用函数的代码:

    my_function(data = df, component = "TPACK", variable = "proficiency", group = "low")
    

    我想知道我在geom_point中做错了什么。当我评论这两行时,情节生成得很好。

    以下是我的数据(为了节省空间,我只包含了20条记录):

    structure(list(ID = c("PST100", "PST101", "PST102", "PST103", 
    "PST104", "PST105", "PST106", "PST107", "PST108", "PST109", "PST110", 
    "PST111", "PST112", "PST113", "PST114", "PST115", "PST116", "PST117", 
    "PST118", "PST119", "PST120"), record_id = c(158L, 160L, 161L, 
    162L, 163L, 166L, 167L, 169L, 170L, 171L, 172L, 173L, 174L, 175L, 
    183L, 185L, 186L, 187L, 189L, 190L, 191L), gender = c("female", 
    "female", "female", "male", "female", "female", "female", "female", 
    "female", "female", "female", "female", "male", "female", "female", 
    "female", "female", "male", "female", "female", "female"), age = c(22, 
    23, 43, 36, 47, 24, 22, 21, 23, 21, 21, 23, 22, 22, 20, 22, 20, 
    21, 21, 21, 21), school = c("urban", "urban", "urban", "remote", 
    "urban", "urban", "remote", "remote", "urban", "rural", "urban", 
    "rural", "urban", "rural", "urban", "rural", "all", "rural", 
    "rural", "rural", "non_remote"), proficiency = c("average", "average", 
    "average", "below average", "above average", "average", "average", 
    "average", "average", "above average", "above average", "average", 
    "average", "below average", "average", "below average", "above average", 
    "average", "average", "below average", "average"), tech_use = c("always", 
    "rarely", "always", "sometimes", "usually", "usually", "always", 
    "sometimes", "always", "never", "always", "sometimes", "usually", 
    "sometimes", "usually", "sometimes", "always", "sometimes", "sometimes", 
    "usually", "usually"), experience = c("level_2", "level_1", "level_6", 
    "level_6", "level_6", "level_1", "level_2", "level_6", "level_2", 
    "level_2", "level_2", "level_1", "level_2", "level_1", "level_2", 
    "level_1", "level_1", "level_3", "level_5", "level_1", "level_2"
    ), exp = c("inexperienced", "inexperienced", "experienced", "experienced", 
    "experienced", "inexperienced", "inexperienced", "experienced", 
    "inexperienced", "inexperienced", "inexperienced", "inexperienced", 
    "inexperienced", "inexperienced", "inexperienced", "inexperienced", 
    "inexperienced", "inexperienced", "experienced", "inexperienced", 
    "inexperienced"), level = c("low", "low", "low", "high", "high", 
    "low", "high", "low", "high", "low", "high", "low", "high", "low", 
    "high", "low", "high", "low", "low", "low", "low"), CK1 = c(3L, 
    4L, 3L, 4L, 4L, 3L, 4L, 4L, 5L, 5L, 4L, 3L, 5L, 4L, 4L, 4L, 4L, 
    2L, 4L, 3L, 4L), CK2 = c(3L, 4L, 3L, 4L, 4L, 3L, 4L, 3L, 4L, 
    5L, 4L, 3L, 5L, 4L, 5L, 4L, 5L, 3L, 4L, 3L, 3L), CK3 = c(4L, 
    4L, 3L, 4L, 4L, 3L, 4L, 5L, 5L, 5L, 4L, 3L, 4L, 4L, 5L, 4L, 5L, 
    2L, 3L, 3L, 4L), CK4 = c(4L, 4L, 3L, 4L, 4L, 2L, 4L, 4L, 4L, 
    4L, 4L, 3L, 4L, 4L, 5L, 3L, 4L, 3L, 3L, 2L, 4L), PK6 = c(5L, 
    4L, 3L, 4L, 5L, 4L, 5L, 3L, 4L, 4L, 4L, 3L, 3L, 4L, 4L, 3L, 5L, 
    3L, 5L, 2L, 4L), PK7 = c(5L, 4L, 3L, 4L, 5L, 4L, 5L, 4L, 5L, 
    4L, 4L, 3L, 3L, 4L, 4L, 3L, 4L, 2L, 4L, 3L, 4L), PK8 = c(5L, 
    4L, 2L, 4L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 3L, 4L, 4L, 4L, 3L, 4L, 
    3L, 4L, 3L, 3L), PK9 = c(4L, 4L, 3L, 5L, 5L, 4L, 5L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L, 3L, 4L), PK10 = c(4L, 
    4L, 3L, 4L, 5L, 3L, 4L, 4L, 5L, 4L, 4L, 4L, 4L, 4L, 5L, 3L, 4L, 
    3L, 3L, 3L, 4L), PK11 = c(4L, 4L, 2L, 4L, 5L, 3L, 5L, 3L, 4L, 
    4L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 2L, 3L, 3L, 3L), PK12 = c(4L, 
    3L, 3L, 5L, 5L, 3L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    3L, 4L, 4L, 4L), PCK13 = c(4L, 3L, 3L, 5L, 5L, 3L, 5L, 3L, 4L, 
    3L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 2L, 4L, 4L, 3L), PCK14 = c(4L, 
    3L, 3L, 5L, 5L, 2L, 5L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 4L, 3L, 4L, 
    3L, 4L, 3L, 4L), PCK15 = c(3L, 3L, 3L, 5L, 5L, 3L, 4L, 4L, 4L, 
    5L, 4L, 3L, 5L, 4L, 4L, 4L, 4L, 2L, 4L, 3L, 4L), PCK16 = c(3L, 
    3L, 3L, 4L, 5L, 3L, 3L, 2L, 3L, 3L, 4L, 4L, 3L, 4L, 4L, 3L, 4L, 
    3L, 4L, 4L, 3L), TK17 = c(4L, 3L, 3L, 5L, 5L, 4L, 5L, 4L, 5L, 
    5L, 5L, 3L, 5L, 4L, 5L, 4L, 4L, 2L, 4L, 4L, 5L), TK19 = c(4L, 
    3L, 3L, 5L, 5L, 3L, 5L, 5L, 5L, 5L, 5L, 3L, 5L, 4L, 5L, 4L, 4L, 
    2L, 4L, 4L, 5L), TK20 = c(4L, 3L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 
    5L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 3L, 4L, 4L, 5L), TK21 = c(4L, 
    3L, 4L, 4L, 5L, 4L, 5L, 5L, 5L, 5L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 
    2L, 4L, 4L, 5L), TK22 = c(4L, 3L, 3L, 4L, 5L, 2L, 5L, 5L, 5L, 
    5L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 3L, 4L, 4L, 5L), TK23 = c(4L, 
    3L, 3L, 4L, 5L, 2L, 5L, 5L, 4L, 5L, 5L, 3L, 5L, 4L, 5L, 4L, 4L, 
    2L, 4L, 4L, 5L), TCK25 = c(4L, 4L, 3L, 4L, 5L, 3L, 5L, 4L, 5L, 
    3L, 5L, 4L, 5L, 4L, 4L, 4L, 4L, 2L, 4L, 3L, 5L), TCK26 = c(4L, 
    4L, 2L, 4L, 4L, 3L, 5L, 4L, 5L, 3L, 5L, 3L, 5L, 4L, 4L, 4L, 4L, 
    3L, 4L, 3L, 5L), TCK27 = c(3L, 4L, 2L, 4L, 4L, 3L, 4L, 5L, 5L, 
    3L, 5L, 3L, 5L, 4L, 4L, 3L, 4L, 2L, 4L, 3L, 4L), TCK28 = c(3L, 
    3L, 3L, 4L, 3L, 3L, 5L, 5L, 5L, 3L, 5L, 4L, 5L, 4L, 4L, 3L, 4L, 
    3L, 4L, 3L, 3L), TCK29 = c(4L, 3L, 3L, 4L, 5L, 3L, 5L, 5L, 5L, 
    3L, 5L, 4L, 5L, 4L, 4L, 4L, 4L, 2L, 4L, 4L, 3L), TCK30 = c(4L, 
    3L, 3L, 5L, 5L, 3L, 5L, 4L, 5L, 3L, 5L, 3L, 4L, 4L, 4L, 4L, 5L, 
    3L, 4L, 4L, 3L), TCK31 = c(3L, 3L, 2L, 5L, 4L, 3L, 5L, 3L, 5L, 
    3L, 5L, 4L, 3L, 4L, 4L, 3L, 5L, 2L, 4L, 4L, 3L), TPK32 = c(4L, 
    3L, 2L, 5L, 5L, 3L, 5L, 3L, 4L, 3L, 5L, 4L, 5L, 4L, 4L, 4L, 5L, 
    3L, 4L, 3L, 4L), TPK33 = c(3L, 3L, 2L, 5L, 5L, 3L, 5L, 3L, 4L, 
    3L, 5L, 4L, 5L, 4L, 4L, 4L, 5L, 2L, 4L, 3L, 4L), TPK34 = c(4L, 
    3L, 2L, 5L, 5L, 3L, 5L, 3L, 5L, 4L, 5L, 4L, 5L, 4L, 4L, 4L, 5L, 
    2L, 4L, 3L, 4L), TPK35 = c(4L, 4L, 2L, 5L, 5L, 3L, 5L, 3L, 4L, 
    4L, 5L, 4L, 5L, 4L, 4L, 4L, 5L, 3L, 4L, 2L, 4L), TPK36 = c(4L, 
    4L, 2L, 4L, 5L, 3L, 5L, 3L, 4L, 3L, 5L, 4L, 4L, 4L, 4L, 4L, 5L, 
    2L, 4L, 2L, 3L), TPK37 = c(3L, 3L, 2L, 4L, 5L, 3L, 5L, 3L, 4L, 
    4L, 5L, 4L, 4L, 4L, 4L, 3L, 4L, 2L, 4L, 2L, 4L), TPK38 = c(4L, 
    3L, 2L, 4L, 5L, 3L, 5L, 3L, 5L, 5L, 5L, 4L, 4L, 4L, 4L, 3L, 5L, 
    3L, 4L, 3L, 3L), TPCK39 = c(3L, 3L, 2L, 4L, 4L, 3L, 5L, 3L, 4L, 
    3L, 5L, 4L, 3L, 3L, 4L, 3L, 5L, 3L, 4L, 3L, 3L), TPCK40 = c(4L, 
    3L, 3L, 4L, 5L, 3L, 5L, 3L, 4L, 5L, 5L, 4L, 3L, 4L, 4L, 4L, 4L, 
    3L, 4L, 3L, 3L), TPCK41 = c(4L, 4L, 2L, 4L, 5L, 3L, 5L, 4L, 4L, 
    5L, 5L, 4L, 4L, 4L, 4L, 3L, 5L, 3L, 4L, 3L, 4L), TPCK42 = c(4L, 
    3L, 3L, 4L, 4L, 3L, 5L, 4L, 4L, 5L, 5L, 4L, 5L, 3L, 4L, 4L, 4L, 
    3L, 4L, 3L, 4L), TPCK43 = c(3L, 4L, 2L, 4L, 5L, 3L, 5L, 4L, 5L, 
    5L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L, 3L, 3L), TPCK44 = c(3L, 
    3L, 3L, 4L, 4L, 3L, 5L, 3L, 4L, 5L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 
    2L, 4L, 3L, 4L), TPCK45 = c(3L, 4L, 2L, 4L, 4L, 3L, 4L, 4L, 4L, 
    4L, 5L, 3L, 5L, 4L, 4L, 3L, 4L, 2L, 4L, 2L, 3L), CK = c(3.5, 
    4, 3, 4, 4, 2.75, 4, 4, 4.5, 4.75, 4, 3, 4.5, 4, 4.75, 3.75, 
    4.5, 2.5, 3.5, 2.75, 3.75), PK = c(4.42857142857143, 3.85714285714286, 
    2.71428571428571, 4.28571428571429, 5, 3.57142857142857, 4.85714285714286, 
    3.71428571428571, 4.42857142857143, 4, 4, 3.57142857142857, 3.57142857142857, 
    4, 4.14285714285714, 3.28571428571429, 4.14285714285714, 2.57142857142857, 
    3.85714285714286, 3, 3.71428571428571), PCK = c(3.5, 3, 3, 4.75, 
    5, 2.75, 4.25, 3.25, 3.75, 3.75, 4, 3.75, 4, 4.25, 4, 3.25, 4, 
    2.5, 4, 3.5, 3.5), TK = c(4, 3, 3.33333333333333, 4.5, 5, 3.33333333333333, 
    5, 4.83333333333333, 4.83333333333333, 5, 5, 3.5, 5, 4, 5, 4, 
    4, 2.33333333333333, 4, 4, 5), TCK = c(3.57142857142857, 3.42857142857143, 
    2.57142857142857, 4.28571428571429, 4.28571428571429, 3, 4.85714285714286, 
    4.28571428571429, 5, 3, 5, 3.57142857142857, 4.57142857142857, 
    4, 4, 3.57142857142857, 4.28571428571429, 2.42857142857143, 4, 
    3.42857142857143, 3.71428571428571), TPK = c(3.71428571428571, 
    3.28571428571429, 2, 4.57142857142857, 5, 3, 5, 3, 4.28571428571429, 
    3.71428571428571, 5, 4, 4.57142857142857, 4, 4, 3.71428571428571, 
    4.85714285714286, 2.42857142857143, 4, 2.57142857142857, 3.71428571428571
    ), TPCK = c(3.42857142857143, 3.42857142857143, 2.42857142857143, 
    4, 4.42857142857143, 3, 4.85714285714286, 3.57142857142857, 4.14285714285714, 
    4.57142857142857, 5, 3.85714285714286, 4, 3.71428571428571, 4, 
    3.57142857142857, 4.28571428571429, 2.71428571428571, 4, 2.85714285714286, 
    3.42857142857143), TPACK = c(3.76190476190476, 3.42857142857143, 
    2.66666666666667, 4.33333333333333, 4.69047619047619, 3.0952380952381, 
    4.76190476190476, 3.80952380952381, 4.45238095238095, 4.07142857142857, 
    4.64285714285714, 3.64285714285714, 4.30952380952381, 3.97619047619048, 
    4.23809523809524, 3.5952380952381, 4.30952380952381, 2.5, 3.92857142857143, 
    3.14285714285714, 3.83333333333333)), row.names = 100:120, class = "data.frame")
    
    
    1 回复  |  直到 11 月前
        1
  •  3
  •   stefan    11 月前

    我无法重现你的错误。相反,我得到了一个错误

    model.frame.default中出错(公式={:对象不是矩阵`。

    不管怎样。问题是你不能使用卷发 {{ 在里面 aggregate 。相反,我建议使用 dplyr::summarise 除此之外,我稍微简化了一段代码,当你将变量名作为切换到的字符传递时 .data 亲名词。

    library(ggplot2)
    library(dplyr)
    
    my_function <- function(data, component, variable, group = "all") {
      # ==== data based on group ===
      if (group == "all") {
        df <- data
      } else if (group == "high") {
        df <- data[data$level == "high", ]
      } else if (group == "low") {
        df <- data[data$level == "low", ]
      } else {
        stop("unrecognized level, which musg be 'all', 'high', or 'low'")
      }
      # ==== selection based on independent variables ====
      if (variable == "proficiency") {
        order <- c("below average", "average", "above average")
        data_df <- df[df$proficiency != "Saya tidak tahu", ]
        x_axis_lab <- "English proficiency level"
      } else if (variable == "experience") {
        order <- c("inexperienced", "experienced")
        data_df <- df
        x_axis_lab <- "Teaching experience"
        variable <- "exp"
      } else if (variable == "region") {
        order <- c("remote", "rural", "urban")
        data_df <- df[df$school %in% c("remote", "rural", "urban"), ]
        x_axis_lab <- "School region"
        variable <- "school"
      } else if (variable == "tech_use") {
        order <- c("sometimes", "usually", "always")
        data_df <- df[!df$tech_use %in% c("never", "rarely"), ]
        x_axis_lab <- "The frequency of technology use"
      } else {
        stop("dependent variable is invalid")
      }
      
      # ====variable conversion ===
      effect_size <- data_df %>%
        select(all_of(c(component1 = component, variable1 = variable))) %>%
        rstatix::kruskal_effsize(component1 ~ variable1)
      
      effectsize <- paste0(
        "Effect size = ",
        round(effect_size$effsize, 3), " (", effect_size$magnitude, ")"
      )
      effsize <- grid::textGrob(effectsize,
        x = 0.1, y = 0.945, hjust = -0.2,
        gp = grid::gpar(
          col = "#1f1f1f", fontsize = 11,
          family = "sans", fontface = "plain"
        )
      )
    
      ggplot(data_df, aes(
        x = factor(.data[[variable]], level = order),
        y = .data[[component]], fill = .data[[variable]]
      )) +
        geom_boxplot() +
        geom_point(
          data = summarise(
            data_df,
            "{component}" := mean(.data[[component]]),
            .by = all_of(variable)
          ),
          aes(x = .data[[variable]], y = .data[[component]]),
          color = "#FFF", size = 1.5
        ) +
        scale_fill_brewer(palette = "Dark2") +
        theme_classic() +
        theme(
          legend.position = "none",
          axis.title.x = element_text(vjust = 1)
        ) +
        labs(x = x_axis_lab) +
        ggpubr::stat_kruskal_test(
          label = "as_detailed_italic",
          label.y = 5.1, hjust = 0.2
        ) +
        annotation_custom(effsize)
    }
    
    my_function(data = df, component = "TPACK", variable = "proficiency", group = "low")
    

    enter image description here