代码之家  ›  专栏  ›  技术社区  ›  Homer Jay Simpson

ggplot2`geom_label()中的警告消息`

  •  0
  • Homer Jay Simpson  · 技术社区  · 9 月前

    我在R中有一个数据帧,经过一些数据转换、计算和绘图后:

    library(tidyverse)
    library(ggstats)
    library(patchwork)
    library(tibble)
    library(tidyverse)
    library(ggplot2)
    library(ggstats)
    likert_levels = c(
      "Very \n Dissatisfied",
      "Dissatisfied",
      "Neutral",
      "Satisfied",
      "Very \n Satisfied"
    )
    
    
    custom_colors = c(
      "Very \n Dissatisfied" = "#ed2e1c",
      "Dissatisfied" = "#e09c95",
      "Neutral" = "#85c1e9",
      "Satisfied" = "#7FF98B",
      "Very \n Satisfied" = "#04B431"
    )
    
    var_levels <- c(LETTERS[1:20])
    n = 500
    likert_levels = c(
      "Very \n Dissatisfied",
      "Dissatisfied",
      "Neutral",
      "Satisfied",
      "Very \n Satisfied"
    )
    
    df <- tibble(
      var = sample(var_levels, n, replace = TRUE),  
      val1 = sample(likert_levels, n, replace = TRUE),
      val2 = sample(c(likert_levels, NA),n, replace = TRUE),
      val3 = sample(likert_levels, n, replace = TRUE)
    )
    
    df2 = df%>%
      pivot_longer(!var, names_to = "Categories", values_to = "likert_values")%>%
      select(-Categories)%>%
      tidyr::drop_na()
    
    
    df_bar = df%>%
      select(var)%>%
      group_by(var)%>%
      summarise(n=n())
    
    df_likert = df2 %>%
      group_by(var, likert_values) %>%             # Group by `var` and `likert_values`
      summarise(count = n(), .groups = "drop") %>% # Count the occurrences
      group_by(var) %>%                            # Group by `var`
      mutate(percentage = (count / sum(count)) * 100) %>% # Calculate percentages
      ungroup()                                    # Ungroup for a clean output
    
    
    df = df_likert%>%
      left_join(.,df_bar,by = "var")%>%
      select(-count)%>%
      pivot_wider(names_from = likert_values, values_from = percentage)%>%
      dplyr::relocate(var,.before=n)%>%
      dplyr::relocate(n,.before=`Very \n Dissatisfied`)%>%
      dplyr::relocate(`Very \n Dissatisfied` ,.after = n)%>%
      dplyr::relocate( Dissatisfied,.after = `Very \n Dissatisfied`)%>%
      dplyr::relocate(Neutral,.after =Dissatisfied )%>%
      dplyr::relocate(Satisfied,.after=Neutral)%>%
      dplyr::relocate(`Very \n Satisfied`,.after = Satisfied)
    
    
    levels <- names(df)[-c(1:2)]
    df_long <- df %>%
      select(-n) %>%
      pivot_longer(!var, names_to = "Likert", values_to = "Percentage") |>
      mutate(Likert = factor(Likert, levels))
    
    
    
    
    
    df_tot <- df_long |>
      summarise(
        prop_lower = sum(Percentage[Likert %in% levels[1:2]]),
        prop_higher = sum(Percentage[Likert %in% levels[4:5]]),
        .by = var
      ) |>
      pivot_longer(-var,
                   names_prefix = "prop_",
                   values_to = "Percentage",
                   names_to = "where"
      )
    
    var_ordered <- levels(with(df_tot, reorder(var,
                                              ifelse(where == "higher", Percentage, NA),
                                              na.rm = TRUE   )) )
    var_ordered = var_ordered[1:10]
    
    df_long=df_long%>%
      filter(var %in% var_ordered)
    
    # Likert plot
    likert_plot <- ggplot(df_long, aes(x = Percentage, y = var, fill = Likert)) +
      geom_col(position = position_likert(reverse = FALSE)) +
      geom_text(
        aes(
          label = label_percent_abs(hide_below = .01, accuracy = 1, scale = 1)(Percentage)
        ),
        position = position_likert(vjust = 0.5, reverse = FALSE),
        size = 3.5,
        fontface = "bold"
      ) +
      geom_label(
        data = df_tot,
        aes(
          label = label_percent_abs(hide_below = .01, accuracy = 1, scale = 1)(Percentage),
          x = ifelse(where == "lower", -.8 , .8),
          fill = NULL
        ),
        size = 3.5,
        fontface = "bold",
        label.size = 0.2,
        show.legend = FALSE
      ) +
      scale_x_continuous(
        labels = label_percent_abs()
      ) +
      labs(
        title = "Likert Responses by Category",
        x = "Category",
        y = "Percentage",
        fill = "Likert Scale"
      ) +
      theme_bw() +
      theme( panel.border = element_rect(color = "black"))+
      scale_fill_manual(values = custom_colors) +
      labs(x = NULL, y = NULL, fill = NULL) +
      coord_cartesian(clip = "off")+
      scale_y_discrete(limits = var_ordered)
    
    
    
    df = df%>%
      filter(var %in% var_ordered)
    # Horizontal bar plot
    bar_plot <- ggplot(df, aes(x = n, y = var)) +
      geom_bar(stat = "identity", fill = "lightgrey") +
      geom_label(
        aes(
          label = label_number_abs(hide_below = .05, accuracy = 2)(n)
        ),
        size = 3.5,
        position = position_stack(vjust = 0.5),
        hjust = 1,
        fill = NA,
        label.size = 0,
        color = "black"
      ) +
      scale_y_discrete(limits = var_ordered)+
      scale_x_continuous(
        labels = label_percent_abs(),
        expand = c(0, .15)
      ) +
      theme_light() +
      theme(
        legend.position = "bottom",
        panel.grid.major.y = element_blank(),
        panel.border = element_rect(color = "black") ,
        axis.text.x = element_blank() # Hides x-axis numbers
      ) +
      labs(x = NULL, y = NULL, fill = NULL)
    
    # Print plots
    
    (likert_plot) + (bar_plot) +
      plot_layout(
        width = c(4, 1)
      ) &
      theme(legend.position = "bottom")
    
    

    我收到:

    enter image description here

    但在控制台中,我收到一条警告消息:

    Warning message:
    Removed 20 rows containing missing values or values outside the scale range (`geom_label()`). 
    

    我为什么会收到这个警告?是关于NA的吗?我怎样才能阻止这种情况?

    1 回复  |  直到 9 月前
        1
  •  1
  •   stefan    9 月前

    问题是 df_tot 包括所有类别 var 但正如你所设定的 limits=var_ordered 不在的所有类别 var_ordered 当它们超出限制时,就会被删除,即这些类别 超出刻度范围的值 .

    要使警告静音,您可以使用 data = df_tot |> filter(var %in% var_ordered) 在里面 geom_label 仅包括以下类别 var_orded .

    library(tidyverse)
    library(ggstats)
    library(patchwork)
    
    likert_plot <- ggplot(df_long, aes(x = Percentage, y = var, fill = Likert)) +
      geom_col(position = position_likert(reverse = FALSE)) +
      geom_text(
        aes(
          label = label_percent_abs(hide_below = .01, accuracy = 1, scale = 1)(Percentage)
        ),
        position = position_likert(vjust = 0.5, reverse = FALSE),
        size = 3.5,
        fontface = "bold"
      ) +
      geom_label(
        data = df_tot |> filter(var %in% var_ordered),
        aes(
          label = label_percent_abs(hide_below = .01, accuracy = 1, scale = 1)(Percentage),
          x = ifelse(where == "lower", -.8, .8),
          fill = NULL
        ),
        size = 3.5,
        fontface = "bold",
        label.size = 0.2,
        show.legend = FALSE
      ) +
      scale_x_continuous(
        labels = label_percent_abs()
      ) +
      labs(
        title = "Likert Responses by Category",
        x = "Category",
        y = "Percentage",
        fill = "Likert Scale"
      ) +
      theme_bw() +
      theme(panel.border = element_rect(color = "black")) +
      scale_fill_manual(values = custom_colors) +
      labs(x = NULL, y = NULL, fill = NULL) +
      coord_cartesian(clip = "off") +
      scale_y_discrete(limits = var_ordered)
    

    ...

    # Print plots
    
    (likert_plot) + (bar_plot) +
      plot_layout(
        width = c(4, 1)
      ) &
      theme(legend.position = "bottom")
    

    推荐文章