Exercise 1: Improve the following visualization
In a retrospective cohort where MS patients were scanned (brain +
spinal cord), 340 patients had radiological disease activity on their
MRI.
Message: A considerable part of MS patients in this
study (12%) had disease activity solely based on spinal cord MRI (no
symptoms, no new lesion on brain MRI).
# Import the data; 340 spinal cord MRIs and whether there is concomitant brain activity and whether patient was symptomatic or asymptomatic at the time
ex1.df = read.csv('data/ex1.csv', colClasses=c("symptomatic" = "factor", "concomitant_brain_activity" = "factor"))
Always take a look at the imported data!
head(ex1.df, 10)
## concomitant_brain_activity symptomatic
## 1 0 1
## 2 1 1
## 3 1 1
## 4 1 1
## 5 1 1
## 6 0 1
## 7 1 0
## 8 0 1
## 9 0 0
## 10 1 1
And use the describe function to gain insight into your
variables (type, number of observations, missing values, etc.).
describe(ex1.df)
## ex1.df
##
## 2 Variables 340 Observations
## --------------------------------------------------------------------------------
## concomitant_brain_activity
## n missing distinct
## 340 0 2
##
## Value 0 1
## Frequency 125 215
## Proportion 0.368 0.632
## --------------------------------------------------------------------------------
## symptomatic
## n missing distinct
## 340 0 2
##
## Value 0 1
## Frequency 106 234
## Proportion 0.312 0.688
## --------------------------------------------------------------------------------
Let’s create a cross-tabulation table using table, and
then divide it by the total number of rows to see the proportions of the
categories we would like to show.
table(ex1.df$symptomatic, ex1.df$concomitant_brain_activity, dnn = c("Symptomatic", "Brain activity")) / nrow(ex1.df) * 100
## Brain activity
## Symptomatic 0 1
## 0 12.05882 19.11765
## 1 24.70588 44.11765
ex1.plot <- ggplot(data = ex1.df, # Select the dataset
mapping = aes(x = concomitant_brain_activity, fill = symptomatic) # Mapping of the variables to the axes/fill/color in the figure, passed within an aesthetics object (aes)
)
ex1.plot + geom_bar() # geom_ is a modifier to the plot object to choose the plot type (see https://ggplot2.tidyverse.org/reference/index.html#geoms)

# Now we expand on the original ggplot object with the variable mappings in it
ex1.plot_pretty <- ex1.plot +
# We add the modifier for a bar plot again
# You can check with ?geom_bar, what attributes can be defined. With 'position = stack' we define a stacked barplot. And 'width' the width of the bars.
geom_bar(position = "stack", width = 0.3)
ex1.plot_pretty

ex1.plot_pretty <- ex1.plot_pretty +
# Define the plot title and labels
labs(
x = "Concomitant activity on brain MRI",
y = "Number of active spinal cord MRIs",
title = paste("Spinal cord MRIs with new cord lesions (n = ", nrow(ex1.df), ")", sep="")
) +
# Define the labels, colors etc. of your scales, by adding + scale_<channel to control>_<way of control>.
# First, for the x-axis, which is a discrete variable, we define what the 0 and 1 mean
scale_x_discrete(labels = c(
"0" = "Absent",
"1" = "Present"
)) +
# Next, lets control the fill channel (to which the symptomatic variable is mapped). With scale_fill_manual you could manually define the colours, but you can also use RColorBrower to automatically map it to a color palette.
scale_fill_brewer(palette = "Set2", labels = c(
"0" = "Asymptomatic",
"1" = "Symptomatic"
))
ex1.plot_pretty

ex1.plot_pretty <- ex1.plot_pretty +
# With the theme object we can control all kinds of aesthetics of the figure
theme(
# Add margins to the plot
plot.margin = unit(rep(0.5, 4), "cm"),
# General text style
text = element_text(size = 10),
# Plot title styling - in the middle, bold, add margin
plot.title = element_text(hjust = 0.5, face = "bold", margin = margin(b = 9)),
# Style the legend - position top, white background, remove legend title
legend.position = "top",
legend.background = element_rect(color = "#ffffff", fill = "#ffffff"),
legend.title = element_blank(),
legend.text = element_text(size = 6),
legend.margin = margin(b = -9),
# Style the axes labels
axis.title.x = element_text(margin = margin(t = 10)),
axis.title.y = element_text(margin = margin(r = 10)),
# Style the axes lines
axis.line = element_line(colour = "black"),
# Remove the grids and background
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
panel.background = element_blank()
)
ex1.plot_pretty

# Some values to know where to add the brackets for highlighting asymptomatic SC-MRI lesions that occured indepently from brain MRI activity
ymin = nrow(ex1.df[ex1.df$concomitant_brain_activity == 0 & ex1.df$symptomatic == 1,])
ymax = nrow(ex1.df[ex1.df$concomitant_brain_activity == 0,])
ex1.plot_pretty_annotated <- ex1.plot_pretty +
# Lets add the counts + percentages to the bars
# We add a layer with a geom_ modifier, in this case geom_text because we want to add text.
geom_text(stat="count", aes(label = paste(after_stat(count), "\n(", after_stat(round(count/sum(count)*100,1)), "%)", sep="")), position = position_stack(vjust = 0.5), size = 3, color = "#000000") +
# We are going to make a bracket to emphasize the asymptomatic SC-MRI lesions that occured indepently from brain MRI activity
# First, the text.
geom_text(
data = subset(ex1.df, symptomatic == 0 & concomitant_brain_activity == 0),
aes(x = as.numeric(concomitant_brain_activity) * 0.8 - 0.20,
y = (ymax - (ymax-ymin)/2), label="Isolated asymptomatic\nSC-MRI activity"), size = 2.1, fontface = "plain"
) +
# Now, the bracket
geom_segment(
data = subset(ex1.df, symptomatic == 0 & concomitant_brain_activity == 0),
aes(x = as.numeric(concomitant_brain_activity) * 0.8,
xend = as.numeric(concomitant_brain_activity) * 0.8,
y = ymax,
yend = ymin),
linewidth = 0.6
) +
geom_segment(
data = subset(ex1.df, symptomatic == 0 & concomitant_brain_activity == 0),
aes(x = as.numeric(concomitant_brain_activity) * 0.8,
xend = as.numeric(concomitant_brain_activity) * 0.8 + 0.02,
y = ymax,
yend = ymax),
linewidth = 0.6
) +
geom_segment(
data = subset(ex1.df, symptomatic == 0 & concomitant_brain_activity == 0),
aes(x = as.numeric(concomitant_brain_activity) * 0.8,
xend = as.numeric(concomitant_brain_activity) * 0.8 + 0.02,
y = ymin,
yend = nrow(ex1.df[ex1.df$concomitant_brain_activity == 0 & ex1.df$symptomatic == 1,])),
linewidth = 0.6
)
ex1.plot_pretty_annotated

ggsave("ex1_plot_pretty.png", plot=ex1.plot_pretty_annotated, width=15, height=11, units="cm", dpi = 900)