代码之家  ›  专栏  ›  技术社区  ›  antecessor

统计摘要中的平均线错误地连接了X轴中的因子组

  •  0
  • antecessor  · 技术社区  · 7 年前

    让我们从一些数据开始,以简化再现性:

    data <- structure(list(group = c("AUS", "AUS", "AUS", "AUS", "AUS", 
                                       "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", 
                                       "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", 
                                       "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", 
                                       "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", 
                                       "ERE", "ERE", "ERE", "ERE", "ERE", "ERE", "ERE", "ERE", "ERE", 
                                       "ERE", "ERE", "ERE", "ERE", "ERE", "ERE", "ERE", "ERH", "ERH", 
                                       "ERH", "ERH", "ERH", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                       "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                       "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                       "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                       "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                       "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                       "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                       "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                       "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                       "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                       "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                       "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                       "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", 
                                       "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "NEA", "ERH", 
                                       "ERH", "ERH", "ERH", "ERH", "ERH", "ERH", "ERH", "ERH", "PAR", 
                                       "PAR", "PAR", "PAR", "PAR", "PAR", "PAR"), num = c(165, 186, 95, 
                                                                                         168, 170, 180, 126, 131, 77, 116, 158, 100, 134, 119, 132, 117, 
                                                                                         110, 121, 107, 170, 110, 111, 112, 113, 114, 115, 116, 135, 92, 
                                                                                         103, 112, 117, 123, 146, 130, 138, 82, 62, 57, 75, 82, 114, 127, 
                                                                                         138, 92, 94, 96, 110, 90, 92, 93, 95, 97, 99, 101, 106, 100, 
                                                                                         95, 110, 97, NA, 153, 147, 149, 167, 167, 186, 150, 151, 198, 
                                                                                         143, 154, 119, 158, 145, 172, 177, 139, 136, 168, 128, 137, 137, 
                                                                                         138, 152, 127, 116, 156, 114, 157, 145, 114, 143, 151, 144, 121, 
                                                                                         142, 138, 122, 139, 140, 139, 161, 134, 144, 148, 145, 156, 146, 
                                                                                         146, 138, 133, 135, 90, 119, 141, 98, 74, 75, 81, 84, 94, 102, 
                                                                                         102, 112, 158, 162, 184, 202, 118, NA, NA, 133, NA, NA, NA, 122, 
                                                                                         NA, NA, 115, 81, 85, 99, 130, 137, 92, 92, 93, 94, 96, 100, 105, 
                                                                                         105, 107, 109, 119, 134, 138, 138, 143, 147, 159, 105, 107, 109, 
                                                                                         119, 70, 74, 78, 79, 82, 84, 86, 91, 95, 96, 103, 106, 107, 114, 
                                                                                         128, 128, 140, 154, NA, NA, NA, NA, 192, NA, NA, NA, NA, 113, 
                                                                                         NA, NA, NA, NA, NA, 91)), .Names = c("group", "num"), row.names = c(NA, 
                                                                                                                                                              200L), class = "data.frame")
    

    由于我需要X轴上的因子的特定顺序,所以我定义了对象 order :

    order <- c("AUS", "PAR", "ERH", "ERE", "NEA")
    

    现在,我运行 ggplot 功能:

    library(ggplot2)
    ggplot(data, aes(x=group)) + 
      scale_x_discrete(limits=c(order)) +
      geom_violin(aes(y=num, colour=group, alpha=0.30, fill=group), size=0.3) + 
      geom_jitter(aes(y=num, colour=group, fill=group, alpha=0.5), position = position_jitter(width = .1), size=1) +
      stat_summary(aes(y=num, group=1), fun.y=mean, colour="darkred", geom="line", group=1, lwd=0.3, lty=2) +
      stat_summary(aes(y=num), fun.y=mean, colour ="darkred",  geom="point")
    

    我得到了这个情节: enter image description here

    正如你所看到的,平均点被很好地表示出来,但是平均线并不是这样的。如何使行以与显示的顺序相同的顺序通过不同的组?

    1 回复  |  直到 7 年前
        1
  •  3
  •   pogibas    7 年前

    如果您不介意修改您的数据-更改 data$group 考虑因素。

    data$group <- factor(data$group, c("AUS", "PAR", "ERH", "ERE", "NEA"))
    
    library(ggplot2)
    # Removed some redundant parts from plot code
    ggplot(data, aes(group, num, colour = group, fill = group)) + 
        geom_violin(alpha = 0.30, size = 0.3) + 
        geom_jitter(alpha = 0.5, position = position_jitter(width = 0.1), size = 1) +
        stat_summary(fun.y = mean, colour = "darkred", geom = "line", group = 1, lwd = 0.3, lty = 2) +
        stat_summary(fun.y = mean, colour = "darkred", geom = "point")
    

    enter image description here