R cut Function


cut() function divides a numeric vector into different ranges.

cut(x, breaks, labels = NULL,
    include.lowest = FALSE, right = TRUE, dig.lab = 3,
    ordered_result = FALSE, ...)

• x: numeric vector
• breaks: break points, number or numeric vector.
• labels: level labels, character vector.
...

> x <- stats::rnorm(100)
> x
  [1] -0.154103462  0.271704132 -0.234160855  0.764474679  0.438237645
  [6] -0.763854668  1.303402711  0.051660328  1.064258570  0.079144697
 [11] -0.704381407  2.239763673 -0.749203152  0.601148921 -0.174814689
 [16]  0.100238929  0.670921777 -0.351881772 -1.452691553  0.774250401
 [21]  0.985238459 -0.159947063  0.456925349  0.062732203 -0.139094156
 [26] -0.021987877 -0.369758710 -0.623015605  0.818971164  1.024360342
 [31] -1.180039385 -1.126115746 -1.331609773  0.261068252  0.306040509
 [36]  0.186887898  0.039764640  0.618133561  0.808466877  1.530479825
 [41] -0.326594787 -0.525549355 -0.038649831 -0.320394434 -0.116615568
 [46] -0.928403864  1.284014444  0.559523194  0.511753047 -0.093609863
 [51] -1.199423552 -0.358438485 -1.421215594 -0.199430722 -1.285244671
 [56] -0.344308069  0.202383513 -1.044830704  0.009940864 -1.083693166
 [61]  0.985718206  0.942167477  0.077569581  1.456191918 -1.385394960
 [66] -0.174887806 -0.869293103  1.051227075 -0.726361522  0.082628666
 [71]  1.275779587  0.258221666 -0.629207453 -0.589352154 -0.818233970
 [76]  0.028423636 -0.491220068  0.796916741 -1.407925480  0.765093431
 [81] -0.263630781  0.854937357  0.592710059 -0.095388956 -1.064601796
 [86]  0.691149856  0.822038961  0.666786287 -1.062610036 -2.833961199
 [91]  1.570993774 -0.876630726 -0.343492831 -0.480549452  1.494723381
 [96] -2.025528709  0.949853574 -0.917568904 -1.103676434  0.728284402


Divide the data into ranges -5 ~ 5:
> c <- cut(x,breaks=-5:5)
> c
  [1] (-1,0]  (0,1]   (-1,0]  (0,1]   (0,1]   (-1,0]  (1,2]   (0,1]   (1,2]  
 [10] (0,1]   (-1,0]  (2,3]   (-1,0]  (0,1]   (-1,0]  (0,1]   (0,1]   (-1,0] 
 [19] (-2,-1] (0,1]   (0,1]   (-1,0]  (0,1]   (0,1]   (-1,0]  (-1,0]  (-1,0] 
 [28] (-1,0]  (0,1]   (1,2]   (-2,-1] (-2,-1] (-2,-1] (0,1]   (0,1]   (0,1]  
 [37] (0,1]   (0,1]   (0,1]   (1,2]   (-1,0]  (-1,0]  (-1,0]  (-1,0]  (-1,0] 
 [46] (-1,0]  (1,2]   (0,1]   (0,1]   (-1,0]  (-2,-1] (-1,0]  (-2,-1] (-1,0] 
 [55] (-2,-1] (-1,0]  (0,1]   (-2,-1] (0,1]   (-2,-1] (0,1]   (0,1]   (0,1]  
 [64] (1,2]   (-2,-1] (-1,0]  (-1,0]  (1,2]   (-1,0]  (0,1]   (1,2]   (0,1]  
 [73] (-1,0]  (-1,0]  (-1,0]  (0,1]   (-1,0]  (0,1]   (-2,-1] (0,1]   (-1,0] 
 [82] (0,1]   (0,1]   (-1,0]  (-2,-1] (0,1]   (0,1]   (0,1]   (-2,-1] (-3,-2]
 [91] (1,2]   (-1,0]  (-1,0]  (-1,0]  (1,2]   (-3,-2] (0,1]   (-1,0]  (-2,-1]
[100] (0,1]  
10 Levels: (-5,-4] (-4,-3] (-3,-2] (-2,-1] (-1,0] (0,1] (1,2] (2,3] ... (4,5]


Check the data distribution in different ranges:
> summary(c) #or table(c)
c
(-5,-4] (-4,-3] (-3,-2] (-2,-1]  (-1,0]   (0,1]   (1,2]   (2,3]   (3,4]   (4,5] 
      0       0       2      14      35      38      10       1       0       0 

The numbers are divided into 10 levels, the default step is 1. Some levels are empty. Let's try just define the total level number:
> x <- stats::rnorm(100) #random numbers, different every time
> c <- cut(x,breaks=10,dig.lab=2)
> summary(c)
    (-2,-1.6]   (-1.6,-1.1]  (-1.1,-0.69] (-0.69,-0.24]  (-0.24,0.21] 
            5             5            13            20            18 
  (0.21,0.65]    (0.65,1.1]     (1.1,1.5]       (1.5,2]       (2,2.4] 
           12            14             6             3             4 

Label all the levles:
> x <- stats::rnorm(100) #random numbers, different every time
> c <- cut(x,breaks=10,dig.lab=2,labels=1:10)
> summary(c)
 1  2  3  4  5  6  7  8  9 10 
 5  5 13 20 18 12 14  6  3  4

Try again, divide into different ranges (break points):
> x <- stats::rnorm(100) #random numbers, different every time
> c <- cut(x,breaks=c(-2,0,1,2))
> table(c)
c
(-2,0]  (0,1]  (1,2] 
    52     32     11