plyr arrange(), colwise(), count(), desc()

| category RStudy  | tag R  plyr 

Data Used

library(plyr)
myCars = cbind(vehicle = row.names(mtcars), mtcars)
row.names(myCars) = NULL
myCars
vehicle  mpg cyl  disp  hp drat    wt  qsec vs am gear carb
1            Mazda RX4 21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
2        Mazda RX4 Wag 21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
3           Datsun 710 22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
4       Hornet 4 Drive 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
5    Hornet Sportabout 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
6              Valiant 18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
7           Duster 360 14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
8            Merc 240D 24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
9             Merc 230 22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
10            Merc 280 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
11           Merc 280C 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
12          Merc 450SE 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
13          Merc 450SL 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
14         Merc 450SLC 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
15  Cadillac Fleetwood 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
16 Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
17   Chrysler Imperial 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
18            Fiat 128 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
19         Honda Civic 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
20      Toyota Corolla 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
21       Toyota Corona 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
22    Dodge Challenger 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
23         AMC Javelin 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
24          Camaro Z28 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
25    Pontiac Firebird 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
26           Fiat X1-9 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
27       Porsche 914-2 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
28        Lotus Europa 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
29      Ford Pantera L 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
30        Ferrari Dino 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
31       Maserati Bora 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
32          Volvo 142E 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2

arrange() Order a data frame by its colums

# sort myCars data by cylinder and displacement
myCars1 = arrange(myCars, cyl, disp)
myCars1
vehicle  mpg cyl  disp  hp drat    wt  qsec vs am gear carb
1       Toyota Corolla 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
2          Honda Civic 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
3             Fiat 128 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
4            Fiat X1-9 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
5         Lotus Europa 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
6           Datsun 710 22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
7        Toyota Corona 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
8        Porsche 914-2 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
9           Volvo 142E 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
10            Merc 230 22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
11           Merc 240D 24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
12        Ferrari Dino 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
13           Mazda RX4 21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
14       Mazda RX4 Wag 21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
15            Merc 280 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
16           Merc 280C 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
17             Valiant 18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
18      Hornet 4 Drive 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
19          Merc 450SE 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
20          Merc 450SL 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
21         Merc 450SLC 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
22       Maserati Bora 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
23         AMC Javelin 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
24    Dodge Challenger 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
25          Camaro Z28 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
26      Ford Pantera L 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
27   Hornet Sportabout 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
28          Duster 360 14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
29    Pontiac Firebird 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
30   Chrysler Imperial 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
31 Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
32  Cadillac Fleetwood 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
# Sort with displacement in descending order
myCars2 = arrange(myCars, cyl, desc(disp))
myCars2
vehicle  mpg cyl  disp  hp drat    wt  qsec vs am gear carb
1            Merc 240D 24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
2             Merc 230 22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
3           Volvo 142E 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
4        Porsche 914-2 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
5        Toyota Corona 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
6           Datsun 710 22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
7         Lotus Europa 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
8            Fiat X1-9 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
9             Fiat 128 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
10         Honda Civic 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
11      Toyota Corolla 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
12      Hornet 4 Drive 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
13             Valiant 18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
14            Merc 280 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
15           Merc 280C 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
16           Mazda RX4 21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
17       Mazda RX4 Wag 21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
18        Ferrari Dino 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
19  Cadillac Fleetwood 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
20 Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
21   Chrysler Imperial 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
22    Pontiac Firebird 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
23   Hornet Sportabout 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
24          Duster 360 14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
25      Ford Pantera L 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
26          Camaro Z28 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
27    Dodge Challenger 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
28         AMC Javelin 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
29       Maserati Bora 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
30          Merc 450SE 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
31          Merc 450SL 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
32         Merc 450SLC 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3

colwise() Column-wise function

numcolwise(), catcolwise()

# Count number of missing values
nmissing <- function(x) sum(is.na(x))

# Apply to every column in a data frame
colwise(nmissing)(baseball)
id year stint team lg g ab r h X2b X3b hr rbi  sb   cs bb   so  ibb hbp
1  0    0     0    0  0 0  0 0 0   0   0  0  12 250 4525  0 1305 7528 377
sh   sf gidp
1 960 7390 5272

# To operate only on specified columns
baseball1 = ddply(baseball, .(year), colwise(nmissing, c("sb", "cs", "so")))
head(baseball1)
year sb cs so
1 1871  0  0  0
2 1872  0  0  0
3 1873  0  0  0
4 1874  0  0  0
5 1875  0  0  0
6 1876 15 15  0

# specify a boolean function that determines whether or not a column should
# be included
baseball2 = ddply(baseball, .(year), colwise(nmissing, is.character))
head(baseball2)
year id team lg
1 1871  0    0  0
2 1872  0    0  0
3 1873  0    0  0
4 1874  0    0  0
5 1875  0    0  0
6 1876  0    0  0

baseball3 = ddply(baseball, .(year), colwise(nmissing, is.numeric))
head(baseball3)
year stint g ab r h X2b X3b hr rbi sb cs bb so ibb hbp sh sf gidp
1 1871     0 0  0 0 0   0   0  0   0  0  0  0  0   7   7  7  7    7
2 1872     0 0  0 0 0   0   0  0   0  0  0  0  0  13  13 13 13   13
3 1873     0 0  0 0 0   0   0  0   0  0  0  0  0  13  13 13 13   13
4 1874     0 0  0 0 0   0   0  0   0  0  0  0  0  15  15 15 15   15
5 1875     0 0  0 0 0   0   0  0   0  0  0  0  0  17  17 17 17   17
6 1876     0 0  0 0 0   0   0  0   0 15 15  0  0  15  15 15 15   15
# or numcolwise()
baseball4 = ddply(baseball, .(year), numcolwise(nmissing))
head(baseball4)
year stint g ab r h X2b X3b hr rbi sb cs bb so ibb hbp sh sf gidp
1 1871     0 0  0 0 0   0   0  0   0  0  0  0  0   7   7  7  7    7
2 1872     0 0  0 0 0   0   0  0   0  0  0  0  0  13  13 13 13   13
3 1873     0 0  0 0 0   0   0  0   0  0  0  0  0  13  13 13 13   13
4 1874     0 0  0 0 0   0   0  0   0  0  0  0  0  15  15 15 15   15
5 1875     0 0  0 0 0   0   0  0   0  0  0  0  0  17  17 17 17   17
6 1876     0 0  0 0 0   0   0  0   0 15 15  0  0  15  15 15 15   15

baseball5 = ddply(baseball, .(year), colwise(nmissing, is.discrete))
head(baseball5)
year id team lg
1 1871  0    0  0
2 1872  0    0  0
3 1873  0    0  0
4 1874  0    0  0
5 1875  0    0  0
6 1876  0    0  0
# or catcolwise()
baseball6 = ddply(baseball, .(year), catcolwise(nmissing))
head(baseball6)
year id team lg
1 1871  0    0  0
2 1872  0    0  0
3 1873  0    0  0
4 1874  0    0  0
5 1875  0    0  0
6 1876  0    0  0

count() Count the number of occurences.

# Count of each value of 'id' in the first 100 cases
count(baseball[1:100, ], vars = "id")
id freq
1  ansonca01    8
2  bennech01    1
3  burdoja01    7
4  forceda01    9
5  galvipu01    1
6  gerhajo01    5
7  hinespa01    6
8  jonesch01    6
9  mathebo01    7
10 morrijo01    2
11 nelsoca01    5
12 orourji01    6
13 shaffor01    4
14 snydepo01    5
15 startjo01    7
16 suttoez01    7
17 whitede01    7
18  yorkto01    7
# Count of ids, weighted by their 'g' loading
count(baseball[1:100, ], vars = "id", wt_var = "g")
id freq
1  ansonca01  432
2  bennech01   49
3  burdoja01  414
4  forceda01  380
5  galvipu01   13
6  gerhajo01  209
7  hinespa01  302
8  jonesch01  134
9  mathebo01  327
10 morrijo01  127
11 nelsoca01  193
12 orourji01  356
13 shaffor01   90
14 snydepo01  250
15 startjo01  389
16 suttoez01  344
17 whitede01  386
18  yorkto01  396
# Count of times each player appeared in each of the years they played
count(baseball[1:100, ], c("id", "year"))[17:20, ]
id year freq
17 forceda01 1871    1
18 forceda01 1872    2
19 forceda01 1873    1
20 forceda01 1874    1

desc() Descending order

desc(1:5)
[1] -1 -2 -3 -4 -5
desc(letters[1:5])
[1] -1 -2 -3 -4 -5

Session Information

sessionInfo()
R version 3.0.2 (2013-09-25)
Platform: x86_64-w64-mingw32/x64 (64-bit)

locale:
[1] LC_COLLATE=Chinese (Simplified)_People's Republic of China.936
[2] LC_CTYPE=Chinese (Simplified)_People's Republic of China.936
[3] LC_MONETARY=Chinese (Simplified)_People's Republic of China.936
[4] LC_NUMERIC=C
[5] LC_TIME=Chinese (Simplified)_People's Republic of China.936

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base

other attached packages:
[1] plyr_1.8  knitr_1.5

loaded via a namespace (and not attached):
[1] evaluate_0.5.1 formatR_0.10   stringr_0.6.2  tools_3.0.2

Further reading


Previous     Next