Load R packages

#install.packages(c("readxl","tidyverse","ggplot2"))
library(readxl)     # read data from Excel files
library(tidyverse)   # clean & format data
library(ggplot2)     # visualize data

Read the data

setwd("/Users/zhouchuhan/introR/data")  #specify a folder path to locate data files
data_xlsx=read_xlsx("sample_nrs.xlsx")  # read an excel file
data_csv=read.csv("sample_nrs.csv")    # read a csv file
str(data_xlsx)    # quick check of loaded dataset - observations and variables
## tibble [1,081 × 5] (S3: tbl_df/tbl/data.frame)
##  $ subjid : num [1:1081] 101002 102002 102003 200002 200003 ...
##  $ aval   : num [1:1081] 4 5 5 4 4 4 3 4 4 4 ...
##  $ avisitn: chr [1:1081] "100" "100" "100" "100" ...
##  $ vsn    : chr [1:1081] "Baseline" "Baseline" "Baseline" "Baseline" ...
##  $ siteid : chr [1:1081] "1" "1" "1" "2" ...

Data Visualization

1. Start simple - Line Plot

1.1 A plot with standard lines and points

# group = cond tells it which points to connect with lines
ggplot(data_plot, aes(x=avisitn, y=aval, group = subjid)) +
    geom_line() +
    geom_point()

1.2 Add a grouping variable

# Condition shapes and line type on variable cond
ggplot(data_plot, aes(x=avisitn, y=aval, group = subjid)) +
    geom_line(aes(linetype=trtp))+ # Line type depends on cond) 
    geom_point(aes(shape=trtp))    # Shape depends on cond

1.3 Customize point shape and line type

# Change to specific linetypes and shapes
ggplot(data_plot, aes(x=avisitn, y=aval, group = subjid)) +
    geom_line(aes(linetype=trtp), # Line type depends on cond
              size = 1.2) +       # Thicker line
    geom_point(aes(shape=trtp),   # Shape depends on cond
               size = 3.5)  +        # Large points
    scale_shape_manual("Grouping Variable",values=c(15,17)) +                  # Change shapes
    scale_linetype_manual("Grouping Variable",values=c("dashed", "solid")) # Change linetypes

1.4 Organize axes and legend

ggplot(data_plot, aes(x=avisitn, y=aval, group = subjid)) +
    geom_line(aes(linetype=trtp), # Line type depends on cond
              size = 1.2) +       # Thicker line
    geom_point(aes(shape=trtp),   # Shape depends on cond
               size = 3.5)  +        # Large points
    scale_shape_manual("Grouping Variable",values=c(15,17)) +                  # Change shapes
    scale_linetype_manual("Grouping Variable",values=c("dashed", "solid")) + # Change linetypes
    scale_x_discrete("Visit (d)",labels=unique(data_plot$vsn)) +
    scale_y_continuous("NRS pain [0-10]",limits = c(0,6)) +
    theme_classic() +
    theme(legend.position = "top")

2. Next step - Bar Plot

2.0 Build a summary dataset (e.g. group mean/median)

# Sample data
## var - grouping variable
## xval - visit
## yval - group mean

df <- read.table(header=T, text='
    var xval yval
     A    1  2.2
     A    2  2.7
     B    1  3.0
     B    2  3.2
     
')

2.1 A plot with standard bars

# fill = var tells it how to color each bar on this variable 
ggplot(df, aes(x=as.factor(xval), y=yval,group=var)) +
    geom_bar(aes(fill=var),     #color depends on var
             stat="identity",    #specify bar height is the same of yval
             position = position_dodge(0.8))   #add space on xval 

2.2 Customize bar width and color

# group = as.factor(xval) tells it how to segregate each bar by xval
ggplot(df, aes(x=as.factor(xval), y=yval,group=var)) +
    geom_bar(aes(fill=var),     #color depends on var
             stat="identity",    #specify bar height is the same of yval
             position = position_dodge(0.8),   #add space on xval al
             width=0.7) +  # narrow bar width
    scale_fill_manual(values = c("steelblue","seashell3"))  #change bar color

2.3 Add some styling

ggplot(df, aes(x=as.factor(xval), y=yval,group=var)) +
    geom_bar(aes(fill=var),     #color depends on var
             stat="identity",    #specify bar height is the same of yval
             position = position_dodge(0.8),   #add space on xval al
             width=0.7) +  # narrow bar width
    geom_text(aes(label=yval), #take yval as text label
              position = position_dodge(0.8), 
              vjust=-0.5) +   #adjust label space to appear on the top of bar
    scale_fill_manual("Grouping Variable",values = c("steelblue","seashell3")) + #change bar color
    scale_y_continuous(limits = c(0,4)) +
    labs(x="Visit (week)",y="LSM") +  #change axis names
    theme_classic() #change background

Homework - Can you make this figure?

Hint: check facet_wrap() in ggplot2