Download some data (stock price)
library(FinCal)
ohlc = get.ohlcs.yahoo(symbol = c("AAPL", "GOOG", "MSFT"), start = "2013-11-01",
end = "2013-11-07")
mydata = data.frame(date = ohlc$AAPL$date, Apple = ohlc$AAPL$adjusted, Google = ohlc$GOOG$adjusted,
Microsoft = ohlc$MSFT$adjusted)
mydata
date Apple Google Microsoft
1 2013-11-01 517.0 1027 35.26
2 2013-11-04 523.7 1026 35.67
3 2013-11-05 522.4 1022 36.36
4 2013-11-06 520.9 1023 37.89
5 2013-11-07 512.5 1008 37.22
reshape a data frame from wide to long
longData <- melt(your original data frame, a vector of your category variables)
library(reshape2)
mydata = melt(mydata, id.vars = c("date"), measure.vars = c("Apple", "Google",
"Microsoft"), variable.name = "company", value.name = "price")
mydata
date company price
1 2013-11-01 Apple 517.01
2 2013-11-04 Apple 523.69
3 2013-11-05 Apple 522.40
4 2013-11-06 Apple 520.92
5 2013-11-07 Apple 512.49
6 2013-11-01 Google 1027.04
7 2013-11-04 Google 1026.11
8 2013-11-05 Google 1021.52
9 2013-11-06 Google 1022.75
10 2013-11-07 Google 1007.95
11 2013-11-01 Microsoft 35.26
12 2013-11-04 Microsoft 35.67
13 2013-11-05 Microsoft 36.36
14 2013-11-06 Microsoft 37.89
15 2013-11-07 Microsoft 37.22
or just
mydata = melt(mydata, c("date"))
mydata
ddply()
myresult <- ddply(mydata, .(column name of factor I'm splitting by, column name second factor I'm splitting by), summarize OR transform, newcolumn = myfunction(column name I want the function to act upon))
library(plyr)
# split by company
ddply(mydata, .(company), summarize, bestPrice = max(price))
company bestPrice
1 Apple 523.69
2 Google 1027.04
3 Microsoft 37.89
# highest price in the entire data set (all company)
ddply(mydata, NULL, summarize, bestPrice = max(price))
.id bestPrice
1 <NA> 1027
summarize doesn’t give any information from other columns in the original data frame. If you want all the other column data, too, change summarize to transform
ddply(mydata, .(company), transform, bestPrice = max(price))
date company price bestPrice
1 2013-11-01 Apple 517.01 523.69
2 2013-11-04 Apple 523.69 523.69
3 2013-11-05 Apple 522.40 523.69
4 2013-11-06 Apple 520.92 523.69
5 2013-11-07 Apple 512.49 523.69
6 2013-11-01 Google 1027.04 1027.04
7 2013-11-04 Google 1026.11 1027.04
8 2013-11-05 Google 1021.52 1027.04
9 2013-11-06 Google 1022.75 1027.04
10 2013-11-07 Google 1007.95 1027.04
11 2013-11-01 Microsoft 35.26 37.89
12 2013-11-04 Microsoft 35.67 37.89
13 2013-11-05 Microsoft 36.36 37.89
14 2013-11-06 Microsoft 37.89 37.89
15 2013-11-07 Microsoft 37.22 37.89
ddply() lets you apply more than one function at a time.
ddply(mydata, .(company), transform, bestPrice = max(price), worstPrice = min(price))
date company price bestPrice worstPrice
1 2013-11-01 Apple 517.01 523.69 512.49
2 2013-11-04 Apple 523.69 523.69 512.49
3 2013-11-05 Apple 522.40 523.69 512.49
4 2013-11-06 Apple 520.92 523.69 512.49
5 2013-11-07 Apple 512.49 523.69 512.49
6 2013-11-01 Google 1027.04 1027.04 1007.95
7 2013-11-04 Google 1026.11 1027.04 1007.95
8 2013-11-05 Google 1021.52 1027.04 1007.95
9 2013-11-06 Google 1022.75 1027.04 1007.95
10 2013-11-07 Google 1007.95 1027.04 1007.95
11 2013-11-01 Microsoft 35.26 37.89 35.26
12 2013-11-04 Microsoft 35.67 37.89 35.26
13 2013-11-05 Microsoft 36.36 37.89 35.26
14 2013-11-06 Microsoft 37.89 37.89 35.26
15 2013-11-07 Microsoft 37.22 37.89 35.26
ddply() lets you apply your own function. e.g. what you want is a new data frame with just the rows that have the highest price.
ddply(mydata, .(company), function(x) x[x$price == max(x$price), ])
date company price
1 2013-11-04 Apple 523.69
2 2013-11-01 Google 1027.04
3 2013-11-06 Microsoft 37.89
order data by one column (price)
ddply(mydata, .(price), transform, rank(price))
date company price
1 2013-11-01 Microsoft 35.26
2 2013-11-04 Microsoft 35.67
3 2013-11-05 Microsoft 36.36
4 2013-11-07 Microsoft 37.22
5 2013-11-06 Microsoft 37.89
6 2013-11-07 Apple 512.49
7 2013-11-01 Apple 517.01
8 2013-11-06 Apple 520.92
9 2013-11-05 Apple 522.40
10 2013-11-04 Apple 523.69
11 2013-11-07 Google 1007.95
12 2013-11-05 Google 1021.52
13 2013-11-06 Google 1022.75
14 2013-11-04 Google 1026.11
15 2013-11-01 Google 1027.04
or
ddply(mydata, .(price), transform, function(x) x[sort(x$price), ])
or
mydata[order(mydata$price), ]