代码之家 › 专栏 › 技术社区 › Joke O.

为循环添加条件,以使用R中的ClassInt为n个类较少的区域生成类间隔

intervals grouping for-loop loops r

Joke O. · 技术社区 · 11 月前

我在使用R中的classInt库将类分配给不符合循环函数中标准的区间时遇到了问题。如果数据帧子集中的行数小于为类区间定义的类数(n),如何包含自动类的条件?在我的例子中,n=3。下面是一个示例,其中包含一个可以在R中创建的示例数据帧

library(classInt)
library(rlist)
library(dplyr)

##Create dataframe 
Country <- c('Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 
'China','Australia', 'Italy', 'Peru', 'China','Nigeria','Australia', 'Italy', 'Peru', 
'China')
Time <- c(21, 18, 17, 10,10,15,27,0,2,4,5,7,4,8,9,10,5)
Area <- c("A","A","A","A","B","B","B","B","C","C","C","C","D","D","D","D","D")
DF  <- data.frame(Country, Time, Area)

This should produce this dataframe:


      Country Time Area
 1  Australia   21    A
 2      Italy   18    A
 3       Peru   17    A
 4      China   10    A
 5  Australia   10    B
 6      Italy   15    B
 7       Peru   27    B
 8      China    0    B
 9  Australia    2    C
 10     Italy    4    C
 11      Peru    5    C
 12     China    7    C
 13   Nigeria    4    D
 14 Australia    8    D
 15     Italy    9    D
 16      Peru   10    D
 17     China    5    D


## Split by Country
NewXL <- split(DF,DF$Country)

## Generate the ranges and category/classes for each country
NewXL2 <- list()
for (i in 1:length(NewXL)) { AB <- NewXL[[i]]
#Create condition:
skip_to_next <- FALSE
tryCatch(Classes <- classIntervals(AB$Time, n=3, 
cutlabels=F,style='fisher',factor=F,warnSmallN=F,warnLargeN=F), error = function(e) { 
skip_to_next <<- TRUE})
if(skip_to_next) { next } 
## Classify
# Range and Class for each Absolute population exposed
AB$Range_Abs <- classify_intervals(AB$Time, 3, "fisher", factor = T)
AB$Class_Abs <- classify_intervals(AB$Time,3, "fisher", factor = FALSE)

NewXL2[[i]] <-AB }

此结果是尼日利亚为空的5个国家的列表,因为它只有一行(至少有3个可能是创建间隔的理想选择)。有没有办法为循环编写代码,这样我就可以定义一个类和范围最小值,以便为循环中只有一行的任何数据帧添加?在这种情况下,尼日利亚应该只有一行,所以我可以将3类(最大值)自动分配给单行,范围为[0,4)。下面是循环输出的样子。

  NewXL2
  [[1]]
       Country Time Area Range_Abs Class_Abs
  1  Australia   21    A [15.5,21]         3
  5  Australia   10    B  [5,15.5)         2
  9  Australia    2    C     [2,5)         1
  14 Australia    8    D  [5,15.5)         2

  [[2]]
     Country Time Area Range_Abs Class_Abs
  4    China   10    A  [8.5,10]         3
  8    China    0    B   [0,2.5)         1
  12   China    7    C [2.5,8.5)         2
  17   China    5    D [2.5,8.5)         2

  [[3]]
     Country Time Area Range_Abs Class_Abs
  2    Italy   18    A   [12,18]         3
  6    Italy   15    B   [12,18]         3
  10   Italy    4    C   [4,6.5)         1
  15   Italy    9    D  [6.5,12)         2

  [[4]]
  NULL

  [[5]]
     Country Time Area Range_Abs Class_Abs
  3     Peru   17    A [13.5,22)         2
  7     Peru   27    B   [22,27]         3
  11    Peru    5    C  [5,13.5)         1
  16    Peru   10    D  [5,13.5)         1

这是尼日利亚数据帧在循环后的样子:

[[4]]
    Country Time Area Range_Abs Class_Abs
 13 Nigeria   10    D     [0,4)         3  

 #Merge all lists into long dataframe with class intervals
 NewXL2b <- list.rbind(NewXL2)

1 回复 | 直到 11 月前

the-mad-statter 11 月前

您可以在循环中使用if/if-else/else:

library(classInt)

data <- data.frame(
  country = c('Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 'China','Australia', 'Italy', 'Peru', 'China','Nigeria','Australia', 'Italy', 'Peru', 'China'), 
  time = c(21, 18, 17, 10, 10, 15, 27, 0, 2, 4, 5, 7, 4, 8, 9, 10, 5), 
  area = c("A", "A", "A", "A", "B", "B", "B", "B", "C", "C", "C", "C", "D", "D", "D", "D", "D")
)

split_data <- split(data, data$country)

result <- list()

for (i in 1:length(split_data)) {
  split <- split_data[[i]]
  
  if(nrow(split) == 1) {
    # add a made up lower level less than the given time
    # so that the given time is the second level
    levels <- c(-1, split$time)
    labels <- sprintf("[%s]", levels)
    split$range_abs = factor(split$time, levels, labels)
    split$class_abs = as.numeric(split$range_abs)
  } else if(nrow(split) == 2) {
    levels <- sort(split$time)
    labels <- sprintf("[%s]", levels)
    split$range_abs = factor(split$time, levels, labels)
    split$class_abs = as.numeric(split$range_abs)
  } else {
    skip_to_next <- FALSE
  
    tryCatch(
      Classes <- classIntervals(
        split$time, 
        n = 5, 
        cutlabels = FALSE,
        style = 'fisher',
        factor = FALSE,
        warnSmallN = FALSE,
        warnLargeN = FALSE
      ), 
      error = function(e) { 
        skip_to_next <<- TRUE
      }
    )
  
    if(skip_to_next) { next } 
  
    split$range_abs <- classify_intervals(split$time, 3, "fisher", factor = TRUE)
    split$class_abs <- classify_intervals(split$time, 3, "fisher", factor = FALSE)
  }

  result[[i]] <- split
}

result
#> [[1]]
#>      country time area range_abs class_abs
#> 1  Australia   21    A [15.5,21]         3
#> 5  Australia   10    B  [5,15.5)         2
#> 9  Australia    2    C     [2,5)         1
#> 14 Australia    8    D  [5,15.5)         2
#> 
#> [[2]]
#>    country time area range_abs class_abs
#> 4    China   10    A  [8.5,10]         3
#> 8    China    0    B   [0,2.5)         1
#> 12   China    7    C [2.5,8.5)         2
#> 17   China    5    D [2.5,8.5)         2
#> 
#> [[3]]
#>    country time area range_abs class_abs
#> 2    Italy   18    A   [12,18]         3
#> 6    Italy   15    B   [12,18]         3
#> 10   Italy    4    C   [4,6.5)         1
#> 15   Italy    9    D  [6.5,12)         2
#> 
#> [[4]]
#>    country time area range_abs class_abs
#> 13 Nigeria    4    D       [4]         2
#> 
#> [[5]]
#>    country time area range_abs class_abs
#> 3     Peru   17    A [13.5,22)         2
#> 7     Peru   27    B   [22,27]         3
#> 11    Peru    5    C  [5,13.5)         1
#> 16    Peru   10    D  [5,13.5)         1

^{创建于2024-07-03

reprex v2.1.0.9000}

_{Reprex文件托管于

在…上

GitHub}