R Data type

2013-10-16 | category RStudy | tag R

vectors and assignment

x <- c(10, 5, 3, 6, 21)

x

[1] 10  5  3  6 21


assign("y", c(10.4, 5.6, 3.1, 6.4, 21.7))

y

[1] 10.4  5.6  3.1  6.4 21.7


z <- c(4, 6, 1, 4, 7)

z

[1] 4 6 1 4 7


a <- c(x, y, z)

a

[1] 10.0  5.0  3.0  6.0 21.0 10.4  5.6  3.1  6.4 21.7  4.0  6.0  1.0  4.0
[15]  7.0


v <- 2 * x + y + 1

v

[1] 31.4 16.6 10.1 19.4 64.7


mean(v)

[1] 28.44


median(v)

[1] 19.4


var(v)

[1] 470.5


sd(v)

[1] 21.69


min(v)

[1] 10.1


max(v)

[1] 64.7


range(v)

[1] 10.1 64.7


cumsum(v)

[1]  31.4  48.0  58.1  77.5 142.2


cumprod(v)

[1]      31.4     521.2    5264.5  102131.8 6607925.2


cummax(v)

[1] 31.4 31.4 31.4 31.4 64.7


cummin(v)

[1] 31.4 16.6 10.1 10.1 10.1


sum(v)

[1] 142.2


summary(v)

Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
10.1    16.6    19.4    28.4    31.4    64.7


length(v)

[1] 5


sort(v)

[1] 10.1 16.6 19.4 31.4 64.7


sqrt(v)

[1] 5.604 4.074 3.178 4.405 8.044


log(v)

[1] 3.447 2.809 2.313 2.965 4.170


s3 <- seq(-5, 5, by = 0.2)

s3

[1] -5.0 -4.8 -4.6 -4.4 -4.2 -4.0 -3.8 -3.6 -3.4 -3.2 -3.0 -2.8 -2.6 -2.4
[15] -2.2 -2.0 -1.8 -1.6 -1.4 -1.2 -1.0 -0.8 -0.6 -0.4 -0.2  0.0  0.2  0.4
[29]  0.6  0.8  1.0  1.2  1.4  1.6  1.8  2.0  2.2  2.4  2.6  2.8  3.0  3.2
[43]  3.4  3.6  3.8  4.0  4.2  4.4  4.6  4.8  5.0


s4 <- seq(length = 51, from = -5, by = 0.2)

s4

[1] -5.0 -4.8 -4.6 -4.4 -4.2 -4.0 -3.8 -3.6 -3.4 -3.2 -3.0 -2.8 -2.6 -2.4
[15] -2.2 -2.0 -1.8 -1.6 -1.4 -1.2 -1.0 -0.8 -0.6 -0.4 -0.2  0.0  0.2  0.4
[29]  0.6  0.8  1.0  1.2  1.4  1.6  1.8  2.0  2.2  2.4  2.6  2.8  3.0  3.2
[43]  3.4  3.6  3.8  4.0  4.2  4.4  4.6  4.8  5.0


s5 <- rep(x, times = 5)

s5

[1] 10  5  3  6 21 10  5  3  6 21 10  5  3  6 21 10  5  3  6 21 10  5  3
[24]  6 21


s6 <- rep(x, each = 5)

s6

[1] 10 10 10 10 10  5  5  5  5  5  3  3  3  3  3  6  6  6  6  6 21 21 21
[24] 21 21

[1] 10.0  5.0  3.0  6.0 21.0 10.4  5.6  3.1  6.4 21.7  4.0  6.0  1.0  4.0
[15]  7.0


a[1:5]

[1] 10  5  3  6 21


a[-(1:5)]

[1] 10.4  5.6  3.1  6.4 21.7  4.0  6.0  1.0  4.0  7.0


a[c(1, 3, 5)]

[1] 10  3 21

Matrices

# generates 5 x 4 numeric matrix
x <- matrix(1:20, nrow = 5, ncol = 4)

# another example
cells <- c(1, 26, 24, 68)
rnames <- c("R1", "R2")
cnames <- c("C1", "C2")
mymatrix <- matrix(cells, nrow = 2, ncol = 2, byrow = TRUE, dimnames = list(rnames,
cnames))

# Combining Matrices
B = matrix(c(2, 4, 3, 1, 5, 7), nrow = 3, ncol = 2)
C = matrix(c(7, 4, 2), nrow = 3, ncol = 1)
cbind(B, C)

[,1] [,2] [,3]
[1,]    2    1    7
[2,]    4    5    4
[3,]    3    7    2


D = matrix(c(6, 2), nrow = 1, ncol = 2)
rbind(B, D)

[,1] [,2]
[1,]    2    1
[2,]    4    5
[3,]    3    7
[4,]    6    2


# Deconstruction
c(B)

[1] 2 4 3 1 5 7


# Identify rows, columns or elements using subscripts.
x[, 4]  # 4th column of matrix

[1] 16 17 18 19 20

x[3, ]  # 3rd row of matrix

[1]  3  8 13 18

x[2:4, 1:3]  # rows 2,3,4 of columns 1,2,3

[,1] [,2] [,3]
[1,]    2    7   12
[2,]    3    8   13
[3,]    4    9   14


# Transpose
t(x)

[,1] [,2] [,3] [,4] [,5]
[1,]    1    2    3    4    5
[2,]    6    7    8    9   10
[3,]   11   12   13   14   15
[4,]   16   17   18   19   20

Data Frame

d <- c(1, 2, 3, 4)
e <- c("red", "white", "red", NA)
f <- c(TRUE, TRUE, TRUE, FALSE)
mydata <- data.frame(d, e, f)
mydata

d     e     f
1 1   red  TRUE
2 2 white  TRUE
3 3   red  TRUE
4 4  <NA> FALSE


# variable names
names(mydata)

[1] "d" "e" "f"

names(mydata) <- c("ID", "Color", "Passed")
names(mydata)

[1] "ID"     "Color"  "Passed"


# There are a variety of ways to identify the elements of a data frame .
mydata[, 1:2]  # columns 1,2 of data frame

ID Color
1  1   red
2  2 white
3  3   red
4  4  <NA>

mydata[c("ID", "Color")]  # columns ID and Color from data frame

ID Color
1  1   red
2  2 white
3  3   red
4  4  <NA>

mydata$Passed  # variable Passed in the data frame

[1]  TRUE  TRUE  TRUE FALSE

subset(mydata, Passed == "TRUE")

ID Color Passed
1  1   red   TRUE
2  2 white   TRUE
3  3   red   TRUE

subset(mydata, ID > 3)

ID Color Passed
4  4  <NA>  FALSE

subset(mydata, ID < 3, select = c(ID, Passed))

ID Passed
1  1   TRUE
2  2   TRUE

subset(mydata, ID < 3, select = -c(Color, Passed))

ID
1  1
2  2

subset(mydata, Color == "red" & Passed == "TRUE")

ID Color Passed
1  1   red   TRUE
3  3   red   TRUE

mydata[mydata$ID %in% c(1, 3), ]

ID Color Passed
1  1   red   TRUE
3  3   red   TRUE


# number of data rows and columns
nrow(mydata)

[1] 4

ncol(mydata)

[1] 3

List

# example of a list with 4 components: a string, a numeric vector, a matrix,
# and a scaler
a <- c(1, 2, 5.3, 6, -2, 4)  # numeric vector
y <- matrix(1:20, nrow = 5, ncol = 4)
w <- list(name = "Fred", mynumbers = a, mymatrix = y, age = 5.3)
w

$name
[1] "Fred"

$mynumbers
[1]  1.0  2.0  5.3  6.0 -2.0  4.0

$mymatrix
[,1] [,2] [,3] [,4]
[1,]    1    6   11   16
[2,]    2    7   12   17
[3,]    3    8   13   18
[4,]    4    9   14   19
[5,]    5   10   15   20

$age
[1] 5.3

# example of a list containing two lists
list1 = list(mynumbers = a)
list2 = list(mymatrix = y)
v <- c(list1, list2)
v

$mynumbers
[1]  1.0  2.0  5.3  6.0 -2.0  4.0

$mymatrix
[,1] [,2] [,3] [,4]
[1,]    1    6   11   16
[2,]    2    7   12   17
[3,]    3    8   13   18
[4,]    4    9   14   19
[5,]    5   10   15   20

# Identify elements of a list using the [[]] convention.
v[[2]]  # 2nd component of the list

[,1] [,2] [,3] [,4]
[1,]    1    6   11   16
[2,]    2    7   12   17
[3,]    3    8   13   18
[4,]    4    9   14   19
[5,]    5   10   15   20

v[["mynumbers"]]  # component named mynumbers in list

[1]  1.0  2.0  5.3  6.0 -2.0  4.0

Factor

# variable gender with 20 'male' entries and 30 'female' entries
gender <- c(rep("male", 20), rep("female", 30))
gender <- factor(gender)
# stores gender as 20 1s and 30 2s and associates 1=female, 2=male
# internally (alphabetically) R now treats gender as a nominal variable
summary(gender)

female   male
30     20


# variable rating coded as 'large', 'medium', 'small'
rating <- c(rep("large", 5), rep("small", 10), rep("medium", 5))
rating <- ordered(rating)
# recodes rating to 1,2,3 and associates 1=large, 2=medium, 3=small
# internally R now treats rating as ordinal

References

Quick-R: Data type
R Tutorial: Data Frame

Previous Next