Basic plot types
- geom_area() draws an area plot, which is a line plot filled to the y-axis.
- geom_bar(stat = “identity”)() makes a barchart.
- geom_line() makes a line plot.
- geom_path() is similar to a geom_line, but lines are connected in the order they appear in the data, not from left to right.
- geom_point() produces a scatterplot.
- geom_polygon() draws polygons, which are filled paths.
- geom_text() adds labels at the specified points.
- geom_tile() makes a image plot or level plot.
library(ggplot2)
library(effects)
library(plyr)
diamonds = na.omit(diamonds)
df <- data.frame(x = c(3, 1, 5), y = c(2, 4, 6), label = c("a", "b", "c"))
p <- ggplot(df, aes(x, y, label = label)) + xlab(NULL) + ylab(NULL)
p + geom_point() + labs(title = "geom_point")
data:image/s3,"s3://crabby-images/7a2e3/7a2e3de6c3f86061f96c620354a7345607ffaeb4" alt="plot of chunk ggplot-part4-1"
# Equivalent to p + geom_point() + ggtitle('geom_point')
# Reduce line spacing and use bold text
p + geom_point() + ggtitle("geom_point") + theme(plot.title = element_text(lineheight = 0.8,
face = "bold"))
data:image/s3,"s3://crabby-images/ede0b/ede0bbaf7f90052df3d8526800e229ceae0c6db3" alt="plot of chunk ggplot-part4-1"
p + geom_bar(stat = "identity") + labs(title = "geom_bar(stat=\"identity\")")
data:image/s3,"s3://crabby-images/6cc35/6cc3557d20254383a89559bcd6b2fdac81317f41" alt="plot of chunk ggplot-part4-1"
p + geom_line() + labs(title = "geom_line")
data:image/s3,"s3://crabby-images/e8df5/e8df5d9d7c2a61056926af8df1c1477ec1aa2754" alt="plot of chunk ggplot-part4-1"
p + geom_area() + labs(title = "geom_area")
data:image/s3,"s3://crabby-images/a6330/a633004a324b2677cd2deb9af2b838669061c704" alt="plot of chunk ggplot-part4-1"
p + geom_path() + labs(title = "geom_path")
data:image/s3,"s3://crabby-images/5eed1/5eed17a4dd04bee046ae6ba0839e3fba87ddeb08" alt="plot of chunk ggplot-part4-1"
p + geom_text() + labs(title = "geom_text")
data:image/s3,"s3://crabby-images/3623f/3623fbd5b50e773c7dda28c5d6251f000b704638" alt="plot of chunk ggplot-part4-1"
p + geom_tile() + labs(title = "geom_tile")
data:image/s3,"s3://crabby-images/75422/7542225dd35573b85efa3fb63fa58c0eaece69d9" alt="plot of chunk ggplot-part4-1"
p + geom_polygon() + labs(title = "geom_polygon")
data:image/s3,"s3://crabby-images/65f05/65f05d5ea9132b984ad2e08c87b41f80326b2b9b" alt="plot of chunk ggplot-part4-1"
Displaying distributions
# Never rely on the default parameters to get a revealing view of the
# distribution. Zooming in on the x axis, and selecting a smaller bin
# width, reveals far more detail. We can see that the distribution is
# slightly skew-right. Don't forget to include information about important
# parameters (like bin width) in the caption.
qplot(depth, data = diamonds, geom = "histogram")
data:image/s3,"s3://crabby-images/fc397/fc397a5b32a54c65a35b39b486374d7130be91c2" alt="plot of chunk ggplot-part4-2"
qplot(depth, data = diamonds, geom = "histogram", xlim = c(55, 70), binwidth = 0.1)
data:image/s3,"s3://crabby-images/54b4a/54b4ab9215510b1d7d823627cea8794e97a231c8" alt="plot of chunk ggplot-part4-2"
# Three views of the distribution of depth and cut. faceted histogram, a
# conditional density plot, and frequency polygons. All show an interesting
# pattern: as quality increases, the distribution shifts to the left and
# becomes more symmetric.
depth_dist <- ggplot(diamonds, aes(depth)) + xlim(58, 68)
depth_dist + geom_histogram(aes(y = ..density..), binwidth = 0.1) + facet_grid(cut ~
.)
data:image/s3,"s3://crabby-images/b3c5d/b3c5db7cddef16f1c9ad1b9e23beb9cd7dc18053" alt="plot of chunk ggplot-part4-2"
depth_dist + geom_histogram(aes(fill = cut), binwidth = 0.1, position = "fill")
data:image/s3,"s3://crabby-images/f1134/f113492dcbb8d4a75e7ee0c7f0630e97eb81ae5b" alt="plot of chunk ggplot-part4-2"
depth_dist + geom_freqpoly(aes(y = ..density.., colour = cut), binwidth = 0.1)
data:image/s3,"s3://crabby-images/2b5b3/2b5b3c07e9e14f0acf59a6a3bbdaa0bdcad06c4d" alt="plot of chunk ggplot-part4-2"
# The boxplot geom can be use to see the distribution of a continuous
# variable conditional on a discrete varable like cut , or continuous
# variable like carat. For continuous variables, the group aesthetic must
# be set to get multiple boxplots.
qplot(cut, depth, data = diamonds, geom = "boxplot")
data:image/s3,"s3://crabby-images/7aa39/7aa39f22b1c7378d1f597dd8b13e8c88f3b1e419" alt="plot of chunk ggplot-part4-2"
qplot(carat, depth, data = diamonds, geom = "boxplot", group = round_any(carat,
0.1, floor), xlim = c(0, 3))
data:image/s3,"s3://crabby-images/18ca5/18ca5b39e5d0b3cbae5a0f44c4e4bcda7710528e" alt="plot of chunk ggplot-part4-2"
# The jitter geom can be used to give a crude visualisation of 2d
# distributions with a discrete component. Generally this works better for
# smaller datasets. Car class vs. continuous variable city mpg and discrete
# variable drive train.
qplot(class, cty, data = mpg, geom = "jitter")
data:image/s3,"s3://crabby-images/10e10/10e1012ff9acda7de40a40f05a05a8c10ea5146e" alt="plot of chunk ggplot-part4-2"
qplot(class, drv, data = mpg, geom = "jitter")
data:image/s3,"s3://crabby-images/aa6b1/aa6b122c9e4c00a0f43aefc14e578d12b77c45a8" alt="plot of chunk ggplot-part4-2"
# The density plot is a smoothed version of the histogram. It has desirable
# theoretical properties, but is more difficult to relate back to the data.
# A density plot of depth, coloured by cut
qplot(depth, data = diamonds, geom = "density", xlim = c(54, 70))
data:image/s3,"s3://crabby-images/c5a00/c5a00c513f2cb133fb79e43d63690a833743ad2f" alt="plot of chunk ggplot-part4-2"
qplot(depth, data = diamonds, geom = "density", xlim = c(54, 70), fill = cut,
alpha = I(0.2))
data:image/s3,"s3://crabby-images/44d21/44d216a09e22e4f4a64e69b33150b6ab735e6e1b" alt="plot of chunk ggplot-part4-2"
Dealing with overplotting
df <- data.frame(x = rnorm(2000), y = rnorm(2000))
norm <- ggplot(df, aes(x, y))
# the default shape
norm + geom_point()
data:image/s3,"s3://crabby-images/5c3fd/5c3fd194f5c7718761a557573fb058af3b732af3" alt="plot of chunk ggplot-part4-3"
# hollow points
norm + geom_point(shape = 1)
data:image/s3,"s3://crabby-images/d8467/d84677008a27c2b4ff3359d8069ff151d8d4db87" alt="plot of chunk ggplot-part4-3"
# pixel points
norm + geom_point(shape = ".")
data:image/s3,"s3://crabby-images/6a00c/6a00c2a523e333591390ecf6369655aaa0640cea" alt="plot of chunk ggplot-part4-3"
# Using alpha blending to alleviate overplotting in sample data from a
# bivariate normal. Alpha values from left to right: 1/3, 1/5, 1/10.
norm + geom_point(colour = "black", alpha = 1/3)
data:image/s3,"s3://crabby-images/5ab94/5ab9499f6067be5c402c76ceb148bba0a866ba1e" alt="plot of chunk ggplot-part4-3"
norm + geom_point(colour = "black", alpha = 1/5)
data:image/s3,"s3://crabby-images/a9db4/a9db4650fac00ccf088391ef6712deaae6218bd4" alt="plot of chunk ggplot-part4-3"
norm + geom_point(colour = "black", alpha = 1/10)
data:image/s3,"s3://crabby-images/d0d9a/d0d9ad0bca2bb32973a5611aafe8a2dd68054b46" alt="plot of chunk ggplot-part4-3"
# A plot of table vs. depth from the diamonds data, showing the use of
# jitter and alpha blending to alleviate overplotting in discrete data.
td <- ggplot(diamonds, aes(table, depth)) + xlim(50, 70) + ylim(50, 70)
# geom point
td + geom_point()
data:image/s3,"s3://crabby-images/85b97/85b97493254aa032494237c4a16249aa6459373e" alt="plot of chunk ggplot-part4-3"
# geom jitter with default jitter
td + geom_jitter()
data:image/s3,"s3://crabby-images/a8d70/a8d708c9cf86fc4929bbee902ae8190e902ebf2d" alt="plot of chunk ggplot-part4-3"
# geom jitter with horizontal jitter of 0.5 (half the gap between bands)
jit <- position_jitter(width = 0.5)
td + geom_jitter(position = jit)
data:image/s3,"s3://crabby-images/ebd93/ebd93e977d02d89c45ad0c0bc7d333376be99496" alt="plot of chunk ggplot-part4-3"
td + geom_jitter(position = jit, colour = "black", alpha = 1/10)
data:image/s3,"s3://crabby-images/45027/45027c532e91759f36f9a8d2c4fbe81e4d42e6a7" alt="plot of chunk ggplot-part4-3"
td + geom_jitter(position = jit, colour = "black", alpha = 1/50)
data:image/s3,"s3://crabby-images/2ab36/2ab36705775962c87e8fe67b152ea145a2c57ec6" alt="plot of chunk ggplot-part4-3"
td + geom_jitter(position = jit, colour = "black", alpha = 1/200)
data:image/s3,"s3://crabby-images/5ffa0/5ffa0b82c94fedca4051f472fd699204168b43b1" alt="plot of chunk ggplot-part4-3"
Drawing maps
# Example using the borders function.
library(maps)
data(us.cities)
big_cities <- subset(us.cities, pop > 5e+05)
# All cities with population (as of January 2006) of greater than half a
# million
qplot(long, lat, data = big_cities) + borders("state", size = 0.5)
data:image/s3,"s3://crabby-images/e9e09/e9e09642d69295bd20d3a11c2a44e7d205f4fe06" alt="plot of chunk ggplot-part4-4"
# cities in Texas.
tx_cities <- subset(us.cities, country.etc == "TX")
ggplot(tx_cities, aes(long, lat)) + borders("county", "texas", colour = "grey70") +
geom_point(colour = "black", alpha = 0.5)
data:image/s3,"s3://crabby-images/f9c40/f9c408bbde909b1a22086f8f8c2bab7fc4f72ce2" alt="plot of chunk ggplot-part4-4"
Further reading