data.table introduction

2014-01-02 | category RStudy | tag R data.table

library(data.table)

Fast and friendly file finagler

fread("file.csv")  # This function is still under development.

Enhanced data.frame

DT[i, j, by]: Take DT, subset rows using i, then calculate j grouped by by.

library(car)
head(Prestige)
Prestige$jobs = row.names(Prestige)
DT = data.table(Prestige)
head(DT)
DT[, max(income), by = type]

Update by reference using :=

DT[, `:=`(max_income_by_type, max(income)), by = type]  # Add new column
head(DT)

DT[, `:=`(max_income_by_type, NULL)]  # Delete a column
head(DT)

DT[, `:=`(edu, round(education))]  # Add new column
head(DT)
DT[, max(income), by = "type,edu"]
DT[, max(income), by = edu%%3]

How to sort a datatable by column(s) in R

head(DT[order(income)])
head(DT[order(-edu, income)])
head(DT[order(edu, -income)])

:= and `:=`()

DT[edu < 9, `:=`(edu_level, "elementary")]
tail(DT)

# add several columns
DT[, `:=`(mean_income = mean(income), sd_income = sd(income)), by = type]
head(DT)

Previous Next