In Rstudio -> Help -> Cheatsheets (this should give you all the cheatsheets available for the packages you have installed)
I highly recommend these two lectures:
https://www.youtube.com/watch?v=HeqHMM4ziXA (Part 1, 24 mins)
https://www.youtube.com/watch?v=n8kYa9vu1l8 (Part 2, 28 mins)
http://www.cookbook-r.com/Graphs/
http://r-statistics.co/Complete-Ggplot2-Tutorial-Part1-With-R-Code.html
3D graphics (see the rgl package)
Graph-theory type graphs (nodes/edges layout; see the igraph package)
Excellent tutorial on igraph: http://kateto.net/networks-r-igraph
My brief tutorial on igraph: http://www.bioss.ac.uk/people/helen/igraphIntro.html
An example of how I’ve used igraph: https://helenkett.shinyapps.io/MicrobialNetworks/
Interactive graphics (see the ggvis package)
data (this must be in the R data.frame format)
aesthetic (this is where you define the variables)
geometric object (this is where you say what to put on the axes)
faceting (for multiple plots)
statistical transformations
scales
coordinate system
position adjustments
This is the function which tells ggplot which variables to use. Variables can be x and y but also size, colour, shape. E.g.:
aes(x = x1, y = y1)
To add another variable, z1, to a plot you can use, e.g.:
aes(x = x1, y = y1, colour = z1)
aes(x = x1, y = y1, shape = z1)
aes(x = x1, y = y1, size = z1)
If you don’t want colour, size etc to vary then they can be specified outside of aes(), e.g.
geom_point(colour='red')
these are the actual marks on a graph
a plot must have at least one geom; there is no upper limit, e.g.
geom_point() # e.g. scatter plots, dot plots, etc
geom_line() # e.g. time series, trend lines, etc
geom_boxplot() #boxplots, obvs!
Plotting data must be in data.frame format. e.g.
library(ggplot2)
N=26
A=seq(1,N)
B=A^2+runif(N,min=0,max=100)
C=A+runif(N)
D=runif(N)
E=LETTERS #the alphabet
df = data.frame(A, B, C, D, E)
Tell ggplot which data to use and add the aesthetic
d = ggplot(data=df,aes(x=A,y=B))
print(d)
This will plot the axes but nothing else
[ggplot does not assume you want a scatter plot (unlike base R)]
g = d + geom_point()
#Note, we have defined d previously as a ggplot object
print(g)
g = d + geom_point(aes(size=D)) #Note, D varies so it is in aes()
print(g)
g = d + geom_point(aes(color=C,size=D))
print(g)
g = d + geom_point(aes(color=C,size=D),alpha=0.7)
#Note, alpha is not in aes() as it is not varying
print(g)
g = g + scale_colour_gradient(low = "yellow",high='red')
print(g)
g = g + scale_size(range=c(1,9))
print(g)
g = g + geom_text(aes(label=E), size = 5)
#Note, here size is outside of aes() as it is not varying
print(g)
g = g + labs(title = 'Spotty plot!', x='This is A',y='This is B',
color='This is C',size='This is D')
#Note that as colour and size are aesthetics they are labelled
#in the same way as x and y
print(g)
g = g + theme(text = element_text(size=15), #change text size
plot.title = element_text(hjust = 0.5)) #centre plot title
print(g)
g = ggplot(data=df,aes(x=A,y=B,size=D,colour=C)) +
geom_point(alpha=0.7) +
scale_size(range=c(1,7)) +
scale_colour_gradient(low = "yellow",high='red') +
geom_text(aes(label=E), size = 3, colour='black') +
labs(title = 'Spotty plot!', x='This is A',y='This is B',
color='This is C',size='This is D') +
theme(text = element_text(size=10), plot.title = element_text(hjust = 0.5))
print(g)
methods: lm, glm, gam, loess, rlm
formula: e.g. y~x, y~poly(x), y~log(x)
f = d + geom_point() + geom_smooth(method='loess')
print(f)
f = d + geom_point() + geom_smooth(method='lm')
print(f)
f = d + geom_smooth(method='loess') + geom_point(aes(colour=C,size=D))
print(f)
We create a new (fictitious) data set which shows the number of bananas at any BioSS offices (Edinburgh, Dundee, Aberdeen and Ayr)
N=300
BioSS.Office=c(rep('Edinburgh',N),rep('Aberdeen',N),
rep('Dundee',N),rep('Ayr',N))
Day=rep(seq(1,N),4)
Bananas=5*(sin(pi*Day/N)+1)+runif(4*N,min=1,3)
df1=data.frame(Day, BioSS.Office, Bananas)
#More Bananas in Edinburgh:
df1[BioSS.Office=='Edinburgh','Bananas']=
df1[BioSS.Office=='Edinburgh','Bananas']+1
df1[BioSS.Office=='Ayr','Bananas']=df1[BioSS.Office=='Ayr','Bananas']-1
d=ggplot(data=df1,aes(x=Day,y=Bananas))
g = d + geom_line(aes(colour=BioSS.Office))
print(g)
g = d + geom_line(aes(colour=BioSS.Office)) + facet_wrap(~BioSS.Office)
print(g)
http://www.opendoorlogistics.com/data/
Use readOGR() in rgal package to read the shapefile:
library(rgdal)
Districts = readOGR(dsn=filepath, layer="Districts")
Then use fortify in ggplot2 to turn into a dataframe
Districts.df = fortify(Districts)
fill.vec = runif(length(Districts.df$lat)) #assign a random number to each district
map = ggplot(Districts.df, aes(long, lat, fill = fill.vec)) +
geom_polygon(aes(group=group,fill=fill.vec),col='white',lwd=0.1)
print(map)
Pick out a region to zoom in to and remove legend and axes labels
map.Ed = map + #Note, map is already defined as the full data set
theme(legend.position="none",axis.title=element_blank(),axis.text=
element_blank(),axis.ticks=element_blank(),panel.grid = element_blank()) +
scale_fill_gradient(low ='yellow', high = 'red') +
coord_cartesian(xlim = c(-3.75, -2.5), ylim = c(55.88, 56.26))
print(map.Ed)
#Make data set
N=100
df=data.frame(
Item=c(rep('Orange',N),rep('Banana',N),rep('Aubergine',N),rep('Brocoli',N)),
Number=c(rnorm(0.5*N,mean=2,sd=15),rnorm(0.5*N,mean=10,sd=5),
rnorm(0.5*N,mean=15,sd=3),rnorm(0.5*N,mean=20,sd=10)),
Age=rep(c(rep('New',0.25*N),rep('Old',0.25*N)),4)
)
g=ggplot(df,aes(Number,fill=Age))+
geom_histogram(alpha=0.6,position = 'identity',bins=30)
print(g)
g=ggplot(df,aes(Number,fill=Age))+
geom_histogram(alpha=0.6,position = 'identity',bins=30)+
facet_wrap(~Item,scales='free')
print(g)
g=ggplot(df,aes(Number,fill=Age))+
geom_histogram(alpha=0.6,position = 'identity',bins=30)+
facet_wrap(~Item,scales='free')+
theme(legend.position=c(0.1,0.85))+
theme(legend.key.size = unit(0.5,"line"),legend.text=element_text(size=8),legend.title=element_text(size=8))+
scale_fill_manual(values = c("blue", "red"))
print(g)