ggplot Resources

Rstudio Cheatsheets - very useful!

YouTube videos

I highly recommend these two lectures:

https://www.youtube.com/watch?v=HeqHMM4ziXA (Part 1, 24 mins)

https://www.youtube.com/watch?v=n8kYa9vu1l8 (Part 2, 28 mins)

Things NOT to do with ggplot2 (apparently)

The Grammar Of Graphics

Building blocks of a graph include:

  • data (this must be in the R data.frame format)

  • aesthetic (this is where you define the variables)

  • geometric object (this is where you say what to put on the axes)

  • faceting (for multiple plots)

  • statistical transformations

  • scales

  • coordinate system

  • position adjustments

Aesthetic Mappings

aes()

This is the function which tells ggplot which variables to use. Variables can be x and y but also size, colour, shape. E.g.:

aes(x = x1, y = y1) 

To add another variable, z1, to a plot you can use, e.g.:

aes(x = x1, y = y1, colour = z1) 
aes(x = x1, y = y1, shape = z1) 
aes(x = x1, y = y1, size = z1)

If you don’t want colour, size etc to vary then they can be specified outside of aes(), e.g.

geom_point(colour='red')

Geometric Objects

geom()

  • these are the actual marks on a graph

  • a plot must have at least one geom; there is no upper limit, e.g.

geom_point() # e.g. scatter plots, dot plots, etc
geom_line() # e.g. time series, trend lines, etc
geom_boxplot() #boxplots, obvs!

Example

Plotting data must be in data.frame format. e.g.

library(ggplot2)
N=26
A=seq(1,N)
B=A^2+runif(N,min=0,max=100)
C=A+runif(N)
D=runif(N)
E=LETTERS #the alphabet

df = data.frame(A, B, C, D, E)  

Aesthetics

Tell ggplot which data to use and add the aesthetic

d = ggplot(data=df,aes(x=A,y=B))
print(d) 

This will plot the axes but nothing else

[ggplot does not assume you want a scatter plot (unlike base R)]

Make a scatter plot using geom_point()

g = d + geom_point() 
#Note, we have defined d previously as a ggplot object
print(g)

Add another variable using point size

g = d + geom_point(aes(size=D)) #Note, D varies so it is in aes()
print(g)

Add in 2 variables using colour and point size

g = d + geom_point(aes(color=C,size=D))
print(g)

Control Transparency using alpha

g = d + geom_point(aes(color=C,size=D),alpha=0.7)
#Note, alpha is not in aes() as it is not varying
print(g)

Change (continuously varying) colours

g = g + scale_colour_gradient(low = "yellow",high='red')
print(g)

Set size range for points: scale_size()

g = g + scale_size(range=c(1,9))
print(g)

Add text labels: geom_text()

g = g + geom_text(aes(label=E), size = 5)
#Note, here size is outside of aes() as it is not varying
print(g)

Changing axes labels: labs()

g = g + labs(title = 'Spotty plot!', x='This is A',y='This is B',
     color='This is C',size='This is D')
#Note that as colour and size are aesthetics they are labelled 
#in the same way as x and y
print(g)

Finesse your plot using theme()

g = g + theme(text = element_text(size=15), #change text size
        plot.title = element_text(hjust = 0.5)) #centre plot title
print(g)

All this can be done together

g = ggplot(data=df,aes(x=A,y=B,size=D,colour=C)) +
  geom_point(alpha=0.7) +
  scale_size(range=c(1,7)) + 
  scale_colour_gradient(low = "yellow",high='red') +
  geom_text(aes(label=E), size = 3, colour='black') + 
  labs(title = 'Spotty plot!', x='This is A',y='This is B',
     color='This is C',size='This is D') + 
  theme(text = element_text(size=10), plot.title = element_text(hjust = 0.5))
print(g)

Adding a fitted line: geom_smooth()

methods: lm, glm, gam, loess, rlm

formula: e.g. y~x, y~poly(x), y~log(x)

f = d + geom_point() + geom_smooth(method='loess')
print(f)

Choose a different fit method

f = d + geom_point() + geom_smooth(method='lm')
print(f)

Add in the other variables

f = d + geom_smooth(method='loess') + geom_point(aes(colour=C,size=D))
print(f)

Multiple plots: facet_wrap()

We create a new (fictitious) data set which shows the number of bananas at any BioSS offices (Edinburgh, Dundee, Aberdeen and Ayr)

N=300
BioSS.Office=c(rep('Edinburgh',N),rep('Aberdeen',N),
               rep('Dundee',N),rep('Ayr',N))
Day=rep(seq(1,N),4)
Bananas=5*(sin(pi*Day/N)+1)+runif(4*N,min=1,3)
  
df1=data.frame(Day, BioSS.Office, Bananas)
#More Bananas in Edinburgh:
df1[BioSS.Office=='Edinburgh','Bananas']=
  df1[BioSS.Office=='Edinburgh','Bananas']+1
df1[BioSS.Office=='Ayr','Bananas']=df1[BioSS.Office=='Ayr','Bananas']-1

d=ggplot(data=df1,aes(x=Day,y=Bananas))

Plot all data together

g = d + geom_line(aes(colour=BioSS.Office))
print(g)

Split into multiple plots: facet_wrap()

g = d + geom_line(aes(colour=BioSS.Office)) + facet_wrap(~BioSS.Office)
print(g)

Spatial plots

Example using a shape file of postcode districts from

http://www.opendoorlogistics.com/data/

Use readOGR() in rgal package to read the shapefile:

library(rgdal)
Districts = readOGR(dsn=filepath, layer="Districts")

Then use fortify in ggplot2 to turn into a dataframe

Districts.df = fortify(Districts)

UK postcode regions

fill.vec = runif(length(Districts.df$lat)) #assign a random number to each district
map = ggplot(Districts.df, aes(long, lat, fill = fill.vec)) + 
  geom_polygon(aes(group=group,fill=fill.vec),col='white',lwd=0.1) 
print(map)

Firth of Forth

Pick out a region to zoom in to and remove legend and axes labels

map.Ed = map + #Note, map is already defined as the full data set
  theme(legend.position="none",axis.title=element_blank(),axis.text=
  element_blank(),axis.ticks=element_blank(),panel.grid = element_blank()) +
  scale_fill_gradient(low ='yellow', high = 'red') +
  coord_cartesian(xlim = c(-3.75, -2.5), ylim = c(55.88, 56.26)) 
print(map.Ed)

Overlapping Histograms

#Make data set
N=100
df=data.frame(
  Item=c(rep('Orange',N),rep('Banana',N),rep('Aubergine',N),rep('Brocoli',N)),
  Number=c(rnorm(0.5*N,mean=2,sd=15),rnorm(0.5*N,mean=10,sd=5),
           rnorm(0.5*N,mean=15,sd=3),rnorm(0.5*N,mean=20,sd=10)),
  Age=rep(c(rep('New',0.25*N),rep('Old',0.25*N)),4)
)
g=ggplot(df,aes(Number,fill=Age))+
  geom_histogram(alpha=0.6,position = 'identity',bins=30)
print(g)

Facet wrap by Item

g=ggplot(df,aes(Number,fill=Age))+
  geom_histogram(alpha=0.6,position = 'identity',bins=30)+
  facet_wrap(~Item,scales='free')
print(g)

Move Legend inside plot and change colours

g=ggplot(df,aes(Number,fill=Age))+
  geom_histogram(alpha=0.6,position = 'identity',bins=30)+
  facet_wrap(~Item,scales='free')+
  theme(legend.position=c(0.1,0.85))+ 
  theme(legend.key.size = unit(0.5,"line"),legend.text=element_text(size=8),legend.title=element_text(size=8))+
  scale_fill_manual(values = c("blue", "red"))
print(g)