代码之家  ›  专栏  ›  技术社区  ›  Joke O.

为循环添加条件,以使用R中的ClassInt为n个类较少的区域生成类间隔

  •  0
  • Joke O.  · 技术社区  · 11 月前

    我在使用R中的classInt库将类分配给不符合循环函数中标准的区间时遇到了问题。如果数据帧子集中的行数小于为类区间定义的类数(n),如何包含自动类的条件?在我的例子中,n=3。下面是一个示例,其中包含一个可以在R中创建的示例数据帧

    library(classInt)
    library(rlist)
    library(dplyr)
    
    ##Create dataframe 
    Country <- c('Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 
    'China','Australia', 'Italy', 'Peru', 'China','Nigeria','Australia', 'Italy', 'Peru', 
    'China')
    Time <- c(21, 18, 17, 10,10,15,27,0,2,4,5,7,4,8,9,10,5)
    Area <- c("A","A","A","A","B","B","B","B","C","C","C","C","D","D","D","D","D")
    DF  <- data.frame(Country, Time, Area)
    
    This should produce this dataframe:
    
    
          Country Time Area
     1  Australia   21    A
     2      Italy   18    A
     3       Peru   17    A
     4      China   10    A
     5  Australia   10    B
     6      Italy   15    B
     7       Peru   27    B
     8      China    0    B
     9  Australia    2    C
     10     Italy    4    C
     11      Peru    5    C
     12     China    7    C
     13   Nigeria    4    D
     14 Australia    8    D
     15     Italy    9    D
     16      Peru   10    D
     17     China    5    D
    
    
    ## Split by Country
    NewXL <- split(DF,DF$Country)
    
    ## Generate the ranges and category/classes for each country
    NewXL2 <- list()
    for (i in 1:length(NewXL)) { AB <- NewXL[[i]]
    #Create condition:
    skip_to_next <- FALSE
    tryCatch(Classes <- classIntervals(AB$Time, n=3, 
    cutlabels=F,style='fisher',factor=F,warnSmallN=F,warnLargeN=F), error = function(e) { 
    skip_to_next <<- TRUE})
    if(skip_to_next) { next } 
    ## Classify
    # Range and Class for each Absolute population exposed
    AB$Range_Abs <- classify_intervals(AB$Time, 3, "fisher", factor = T)
    AB$Class_Abs <- classify_intervals(AB$Time,3, "fisher", factor = FALSE)
    
    NewXL2[[i]] <-AB }
    

    此结果是尼日利亚为空的5个国家的列表,因为它只有一行(至少有3个可能是创建间隔的理想选择)。有没有办法为循环编写代码,这样我就可以定义一个类和范围最小值,以便为循环中只有一行的任何数据帧添加?在这种情况下,尼日利亚应该只有一行,所以我可以将3类(最大值)自动分配给单行,范围为[0,4)。下面是循环输出的样子。

      NewXL2
      [[1]]
           Country Time Area Range_Abs Class_Abs
      1  Australia   21    A [15.5,21]         3
      5  Australia   10    B  [5,15.5)         2
      9  Australia    2    C     [2,5)         1
      14 Australia    8    D  [5,15.5)         2
    
      [[2]]
         Country Time Area Range_Abs Class_Abs
      4    China   10    A  [8.5,10]         3
      8    China    0    B   [0,2.5)         1
      12   China    7    C [2.5,8.5)         2
      17   China    5    D [2.5,8.5)         2
    
      [[3]]
         Country Time Area Range_Abs Class_Abs
      2    Italy   18    A   [12,18]         3
      6    Italy   15    B   [12,18]         3
      10   Italy    4    C   [4,6.5)         1
      15   Italy    9    D  [6.5,12)         2
    
      [[4]]
      NULL
    
      [[5]]
         Country Time Area Range_Abs Class_Abs
      3     Peru   17    A [13.5,22)         2
      7     Peru   27    B   [22,27]         3
      11    Peru    5    C  [5,13.5)         1
      16    Peru   10    D  [5,13.5)         1
    

    这是尼日利亚数据帧在循环后的样子:

    [[4]]
        Country Time Area Range_Abs Class_Abs
     13 Nigeria   10    D     [0,4)         3  
    
     #Merge all lists into long dataframe with class intervals
     NewXL2b <- list.rbind(NewXL2)  
    
    1 回复  |  直到 11 月前
        1
  •  2
  •   the-mad-statter    11 月前

    您可以在循环中使用if/if-else/else:

    library(classInt)
    
    data <- data.frame(
      country = c('Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 'China','Nigeria','Australia', 'Italy', 'Peru', 'China'), 
      time = c(21, 18, 17, 10, 10, 15, 27, 0, 2, 4, 5, 7, 4, 8, 9, 10, 5), 
      area = c("A", "A", "A", "A", "B", "B", "B", "B", "C", "C", "C", "C", "D", "D", "D", "D", "D")
    )
    
    split_data <- split(data, data$country)
    
    result <- list()
    
    for (i in 1:length(split_data)) {
      split <- split_data[[i]]
      
      if(nrow(split) == 1) {
        # add a made up lower level less than the given time
        # so that the given time is the second level
        levels <- c(-1, split$time)
        labels <- sprintf("[%s]", levels)
        split$range_abs = factor(split$time, levels, labels)
        split$class_abs = as.numeric(split$range_abs)
      } else if(nrow(split) == 2) {
        levels <- sort(split$time)
        labels <- sprintf("[%s]", levels)
        split$range_abs = factor(split$time, levels, labels)
        split$class_abs = as.numeric(split$range_abs)
      } else {
        skip_to_next <- FALSE
      
        tryCatch(
          Classes <- classIntervals(
            split$time, 
            n = 5, 
            cutlabels = FALSE,
            style = 'fisher',
            factor = FALSE,
            warnSmallN = FALSE,
            warnLargeN = FALSE
          ), 
          error = function(e) { 
            skip_to_next <<- TRUE
          }
        )
      
        if(skip_to_next) { next } 
      
        split$range_abs <- classify_intervals(split$time, 3, "fisher", factor = TRUE)
        split$class_abs <- classify_intervals(split$time, 3, "fisher", factor = FALSE)
      }
    
      result[[i]] <- split
    }
    
    result
    #> [[1]]
    #>      country time area range_abs class_abs
    #> 1  Australia   21    A [15.5,21]         3
    #> 5  Australia   10    B  [5,15.5)         2
    #> 9  Australia    2    C     [2,5)         1
    #> 14 Australia    8    D  [5,15.5)         2
    #> 
    #> [[2]]
    #>    country time area range_abs class_abs
    #> 4    China   10    A  [8.5,10]         3
    #> 8    China    0    B   [0,2.5)         1
    #> 12   China    7    C [2.5,8.5)         2
    #> 17   China    5    D [2.5,8.5)         2
    #> 
    #> [[3]]
    #>    country time area range_abs class_abs
    #> 2    Italy   18    A   [12,18]         3
    #> 6    Italy   15    B   [12,18]         3
    #> 10   Italy    4    C   [4,6.5)         1
    #> 15   Italy    9    D  [6.5,12)         2
    #> 
    #> [[4]]
    #>    country time area range_abs class_abs
    #> 13 Nigeria    4    D       [4]         2
    #> 
    #> [[5]]
    #>    country time area range_abs class_abs
    #> 3     Peru   17    A [13.5,22)         2
    #> 7     Peru   27    B   [22,27]         3
    #> 11    Peru    5    C  [5,13.5)         1
    #> 16    Peru   10    D  [5,13.5)         1
    

    创建于2024-07-03 reprex v2.1.0.9000

    Reprex文件托管于 在…上 GitHub