Load R packages
#install.packages(c("readxl","tidyverse","ggplot2"))
library(readxl) # read data from Excel files
library(tidyverse) # clean & format data
library(ggplot2) # visualize data
Read the data
setwd("/Users/zhouchuhan/introR/data") #specify a folder path to locate data files
data_xlsx=read_xlsx("sample_nrs.xlsx") # read an excel file
data_csv=read.csv("sample_nrs.csv") # read a csv file
str(data_xlsx) # quick check of loaded dataset - observations and variables
## tibble [1,081 × 5] (S3: tbl_df/tbl/data.frame)
## $ subjid : num [1:1081] 101002 102002 102003 200002 200003 ...
## $ aval : num [1:1081] 4 5 5 4 4 4 3 4 4 4 ...
## $ avisitn: chr [1:1081] "100" "100" "100" "100" ...
## $ vsn : chr [1:1081] "Baseline" "Baseline" "Baseline" "Baseline" ...
## $ siteid : chr [1:1081] "1" "1" "1" "2" ...
Data Visualization
1. Start simple - Line Plot
1.1 A plot with standard lines and points
# group = cond tells it which points to connect with lines
ggplot(data_plot, aes(x=avisitn, y=aval, group = subjid)) +
geom_line() +
geom_point()

1.2 Add a grouping variable
# Condition shapes and line type on variable cond
ggplot(data_plot, aes(x=avisitn, y=aval, group = subjid)) +
geom_line(aes(linetype=trtp))+ # Line type depends on cond)
geom_point(aes(shape=trtp)) # Shape depends on cond

1.3 Customize point shape and line type
# Change to specific linetypes and shapes
ggplot(data_plot, aes(x=avisitn, y=aval, group = subjid)) +
geom_line(aes(linetype=trtp), # Line type depends on cond
size = 1.2) + # Thicker line
geom_point(aes(shape=trtp), # Shape depends on cond
size = 3.5) + # Large points
scale_shape_manual("Grouping Variable",values=c(15,17)) + # Change shapes
scale_linetype_manual("Grouping Variable",values=c("dashed", "solid")) # Change linetypes

1.4 Organize axes and legend
ggplot(data_plot, aes(x=avisitn, y=aval, group = subjid)) +
geom_line(aes(linetype=trtp), # Line type depends on cond
size = 1.2) + # Thicker line
geom_point(aes(shape=trtp), # Shape depends on cond
size = 3.5) + # Large points
scale_shape_manual("Grouping Variable",values=c(15,17)) + # Change shapes
scale_linetype_manual("Grouping Variable",values=c("dashed", "solid")) + # Change linetypes
scale_x_discrete("Visit (d)",labels=unique(data_plot$vsn)) +
scale_y_continuous("NRS pain [0-10]",limits = c(0,6)) +
theme_classic() +
theme(legend.position = "top")

2. Next step - Bar Plot
2.1 A plot with standard bars
# fill = var tells it how to color each bar on this variable
ggplot(df, aes(x=as.factor(xval), y=yval,group=var)) +
geom_bar(aes(fill=var), #color depends on var
stat="identity", #specify bar height is the same of yval
position = position_dodge(0.8)) #add space on xval

2.2 Customize bar width and color
# group = as.factor(xval) tells it how to segregate each bar by xval
ggplot(df, aes(x=as.factor(xval), y=yval,group=var)) +
geom_bar(aes(fill=var), #color depends on var
stat="identity", #specify bar height is the same of yval
position = position_dodge(0.8), #add space on xval al
width=0.7) + # narrow bar width
scale_fill_manual(values = c("steelblue","seashell3")) #change bar color

2.3 Add some styling
ggplot(df, aes(x=as.factor(xval), y=yval,group=var)) +
geom_bar(aes(fill=var), #color depends on var
stat="identity", #specify bar height is the same of yval
position = position_dodge(0.8), #add space on xval al
width=0.7) + # narrow bar width
geom_text(aes(label=yval), #take yval as text label
position = position_dodge(0.8),
vjust=-0.5) + #adjust label space to appear on the top of bar
scale_fill_manual("Grouping Variable",values = c("steelblue","seashell3")) + #change bar color
scale_y_continuous(limits = c(0,4)) +
labs(x="Visit (week)",y="LSM") + #change axis names
theme_classic() #change background
