cut(x, breaks, labels = NULL, include.lowest = FALSE, right = TRUE, dig.lab = 3, ordered_result = FALSE, ...)
• x
: numeric vector
• breaks
: break points, number or numeric vector.
• labels
: level labels, character vector.
• include.lowest
: logical, the lowest (or highest, for right = FALSE) breaks value included or not
• right
: logical, the intervals should be closed on the right (and open on the left) or vice versa
> x <- stats::rnorm(100) > x
[1] -0.154103462 0.271704132 -0.234160855 0.764474679 0.438237645 [6] -0.763854668 1.303402711 0.051660328 1.064258570 0.079144697 [11] -0.704381407 2.239763673 -0.749203152 0.601148921 -0.174814689 [16] 0.100238929 0.670921777 -0.351881772 -1.452691553 0.774250401 [21] 0.985238459 -0.159947063 0.456925349 0.062732203 -0.139094156 [26] -0.021987877 -0.369758710 -0.623015605 0.818971164 1.024360342 [31] -1.180039385 -1.126115746 -1.331609773 0.261068252 0.306040509 [36] 0.186887898 0.039764640 0.618133561 0.808466877 1.530479825 [41] -0.326594787 -0.525549355 -0.038649831 -0.320394434 -0.116615568 [46] -0.928403864 1.284014444 0.559523194 0.511753047 -0.093609863 [51] -1.199423552 -0.358438485 -1.421215594 -0.199430722 -1.285244671 [56] -0.344308069 0.202383513 -1.044830704 0.009940864 -1.083693166 [61] 0.985718206 0.942167477 0.077569581 1.456191918 -1.385394960 [66] -0.174887806 -0.869293103 1.051227075 -0.726361522 0.082628666 [71] 1.275779587 0.258221666 -0.629207453 -0.589352154 -0.818233970 [76] 0.028423636 -0.491220068 0.796916741 -1.407925480 0.765093431 [81] -0.263630781 0.854937357 0.592710059 -0.095388956 -1.064601796 [86] 0.691149856 0.822038961 0.666786287 -1.062610036 -2.833961199 [91] 1.570993774 -0.876630726 -0.343492831 -0.480549452 1.494723381 [96] -2.025528709 0.949853574 -0.917568904 -1.103676434 0.728284402
Divide the data into ranges -5 ~ 5:
> c <- cut(x,breaks=-5:5) > c
[1] (-1,0] (0,1] (-1,0] (0,1] (0,1] (-1,0] (1,2] (0,1] (1,2] [10] (0,1] (-1,0] (2,3] (-1,0] (0,1] (-1,0] (0,1] (0,1] (-1,0] [19] (-2,-1] (0,1] (0,1] (-1,0] (0,1] (0,1] (-1,0] (-1,0] (-1,0] [28] (-1,0] (0,1] (1,2] (-2,-1] (-2,-1] (-2,-1] (0,1] (0,1] (0,1] [37] (0,1] (0,1] (0,1] (1,2] (-1,0] (-1,0] (-1,0] (-1,0] (-1,0] [46] (-1,0] (1,2] (0,1] (0,1] (-1,0] (-2,-1] (-1,0] (-2,-1] (-1,0] [55] (-2,-1] (-1,0] (0,1] (-2,-1] (0,1] (-2,-1] (0,1] (0,1] (0,1] [64] (1,2] (-2,-1] (-1,0] (-1,0] (1,2] (-1,0] (0,1] (1,2] (0,1] [73] (-1,0] (-1,0] (-1,0] (0,1] (-1,0] (0,1] (-2,-1] (0,1] (-1,0] [82] (0,1] (0,1] (-1,0] (-2,-1] (0,1] (0,1] (0,1] (-2,-1] (-3,-2] [91] (1,2] (-1,0] (-1,0] (-1,0] (1,2] (-3,-2] (0,1] (-1,0] (-2,-1] [100] (0,1] 10 Levels: (-5,-4] (-4,-3] (-3,-2] (-2,-1] (-1,0] (0,1] (1,2] (2,3] ... (4,5]
Check the data distribution in different ranges:
> summary(c) #or table(c)
c (-5,-4] (-4,-3] (-3,-2] (-2,-1] (-1,0] (0,1] (1,2] (2,3] (3,4] (4,5] 0 0 2 14 35 38 10 1 0 0
The numbers are divided into 10 levels, the default step is 1.
Some levels are empty.
Let's try just define the total level number:
> x <- stats::rnorm(100)#random numbers, different every time > c <- cut(x,breaks=10,dig.lab=2) > summary(c)
(-2,-1.6] (-1.6,-1.1] (-1.1,-0.69] (-0.69,-0.24] (-0.24,0.21] 5 5 13 20 18 (0.21,0.65] (0.65,1.1] (1.1,1.5] (1.5,2] (2,2.4] 12 14 6 3 4
Label all the levles:
> x <- stats::rnorm(100) > c <- cut(x,breaks=10,dig.lab=2,labels=1:10) > summary(c)
1 2 3 4 5 6 7 8 9 10 5 5 13 20 18 12 14 6 3 4
Try again, divide into different ranges (break points):
> x <- stats::rnorm(100) > c <- cut(x,breaks=c(-2,0,1,2)) > table(c)
c (-2,0] (0,1] (1,2] 52 32 11
By default, the right=FALSE argument indicates the intervals should be closed on the right and open on the left
or vice versa (right=TRUE by default). The default include.lowest=False will not include the
lowest value or highest value (for right = FALSE) if it equals the break point.
Let's first generate data. For example, let's flip coins.
Each run has 100 flips (size), each flip has 50% chance of head (probability of success),
and size * probability is the generated success number of the run, altogether 40 runs.
> x <- rbinom(40,100,0.5) > x[1] 53 53 51 52 58 54 54 53 43 60 56 52 57 55 52 57 52 44 54 44 51 51 45 49 48 57 48 [28] 45 52 51 53 55 46 48 47 45 48 50 46 47 > summary(x)Min. 1st Qu. Median Mean 3rd Qu. Max. 43.00 47.75 51.50 50.90 54.00 60.00 > cut(x, breaks=c(43, 48, 52,58, 60))#the minimal number 43 is <NA> by default include.lowest=F [1] (52,58] (52,58] (48,52] (48,52] (52,58] (52,58] (52,58] (52,58] <NA> (58,60] [11] (52,58] (48,52] (52,58] (52,58] (48,52] (52,58] (48,52] (43,48] (52,58] (43,48] [21] (48,52] (48,52] (43,48] (48,52] (43,48] (52,58] (43,48] (43,48] (48,52] (48,52] [31] (52,58] (52,58] (43,48] (43,48] (43,48] (43,48] (43,48] (48,52] (43,48] (43,48] Levels: (43,48] (48,52] (52,58] (58,60] > cut(x, breaks=c(43, 48, 52, 58, 60), include.lowest=T)[1] (52,58] (52,58] (48,52] (48,52] (52,58] (52,58] (52,58] (52,58] [43,48] (58,60] [11] (52,58] (48,52] (52,58] (52,58] (48,52] (52,58] (48,52] [43,48] (52,58] [43,48] [21] (48,52] (48,52] [43,48] (48,52] [43,48] (52,58] [43,48] [43,48] (48,52] (48,52] [31] (52,58] (52,58] [43,48] [43,48] [43,48] [43,48] [43,48] (48,52] [43,48] [43,48] Levels: [43,48] (48,52] (52,58] (58,60] > cut(x, breaks=c(43, 48, 52,58, 60),right=FALSE)#the max number 60 is <NA> by right=FALSE [1] [52,58) [52,58) [48,52) [52,58) [58,60) [52,58) [52,58) [52,58) [43,48) <NA> [11] [52,58) [52,58) [52,58) [52,58) [52,58) [52,58) [52,58) [43,48) [52,58) [43,48) [21] [48,52) [48,52) [43,48) [48,52) [48,52) [52,58) [48,52) [43,48) [52,58) [48,52) [31] [52,58) [52,58) [43,48) [48,52) [43,48) [43,48) [48,52) [48,52) [43,48) [43,48) Levels: [43,48) [48,52) [52,58) [58,60) > summary(cut(x, breaks=c(43, 48, 52, 58, 60), include.lowest=T))[43,48] (48,52] (52,58] (58,60] 14 11 14 1