I. Load packages

library(readxl)     # read data from Excel files
library(tidyverse)   # clean & format data
library(ggplot2)     # visualize data
library(rstatix)     # basic statistical analysis
library(DT)

II. Import dataset

setwd("/Users/zhouchuhan/introR/data")  # specify a folder path to locate data files
nrs_sample=read_xlsx("sample_nrs.xlsx") %>%
  mutate(subjid=as.character(subjid))

str(nrs_sample)
## tibble [1,081 × 5] (S3: tbl_df/tbl/data.frame)
##  $ subjid : chr [1:1081] "101002" "102002" "102003" "200002" ...
##  $ aval   : num [1:1081] 4 5 5 4 4 4 3 4 4 4 ...
##  $ avisitn: chr [1:1081] "100" "100" "100" "100" ...
##  $ vsn    : chr [1:1081] "Baseline" "Baseline" "Baseline" "Baseline" ...
##  $ siteid : chr [1:1081] "1" "1" "1" "2" ...

III. Data preprocessing

# pull out unique subjid
subj=nrs_sample %>%  
  distinct(subjid)

# sample treatment group and assign to each subject
trtp=sample(c(0,1),nrow(subj),replace =TRUE)
subj$trtp = as.factor(trtp)

# match treatment to efficay data  
nrs_plot=nrs_sample %>%
  left_join(.,subj) %>%
  mutate(subjid = as.character(subjid),
         aval=round(aval,0))
## Joining, by = "subjid"

IV. Heatmap

Heatmap is helpful to realize subject level overview:
- each subject takes repeated measures of the same outcome variable on a series of fixed time points
- each subject takes repeated measures of the different scales of the outcome variable at one time

Default setting

  • default color scale is not ideal to differentiate NRS pain score
#blank heatmap
ggplot(data=nrs_plot,aes(x=avisitn,y=subjid))+
  geom_tile() +
  facet_wrap(~trtp,scales = "free_y") +
  theme_bw()

#use "fill" to specify colorbar variable
ggplot(data=nrs_plot,aes(x=avisitn,y=subjid))+
  geom_tile(aes(fill=aval)) + #produce heatmap
  facet_wrap(~trtp,scales = "free_y") +
  theme_bw()

Change color scale

  • light color for lower pain, dark color for higher pain
ggplot(data=nrs_plot,aes(x=avisitn,y=subjid))+
  geom_tile(aes(fill=aval)) +
  facet_wrap(~trtp,scales = "free_y") +
  
  scale_fill_gradient("NRS Pain Score",
                      expand=c(0,0),
                      low = "white", # nrs = 0
                      high = "red",  # nrs = 10
                      na.value = "grey50",
                      guide = "colourbar",
                      aesthetics = "fill") +
  
  theme_bw() +
  theme(legend.position = "top")

Customize breaks of colorbar

  • remove non-integer pain scores and only show integer pain scores
ggplot(data=nrs_plot,aes(x=avisitn,y=subjid))+
  geom_tile(aes(fill=aval)) +
  facet_wrap(~trtp,scales = "free_y") +
  scale_fill_gradient("NRS Pain Score",
                      breaks=c(0,2,4,6,8,10), # to change the breaks in colorbar
                      expand=c(0,0),
                      low = "white", # nrs = 0
                      high = "red",  # nrs = 10
                      na.value = "grey50",
                      guide = "colourbar",
                      aesthetics = "fill")  +
  theme_bw() +
  theme(legend.position = "top")

Reorder subjects by ascending/descending overall pain scores

  • subjects with an overall higher pain through the visits will show in the bottom
nrs_plot$subjid=fct_reorder(nrs_plot$subjid,nrs_plot$aval,.fun=mean,na.rm = TRUE,.desc = TRUE)

ggplot(data=nrs_plot,aes(x=avisitn,y=subjid))+
  geom_tile(aes(fill=aval)) +
  facet_wrap(~trtp,scales = "free_y") +
  scale_fill_gradient("NRS Pain Score",
                      breaks=c(0,2,4,6,8,10),
                      expand=c(0,0),
                      low = "white", # nrs = 0
                      high = "red",  # nrs = 10
                      na.value = "grey50",
                      guide = "colourbar",
                      aesthetics = "fill") +
  theme_bw() +
  theme(legend.position = "top")

Modify labeling and other aesthetic improvement

  • Add pain score value to the heatmap if it is greater than (or equal to) 8
ggplot(data=nrs_plot,aes(x=avisitn,y=subjid))+
  geom_tile(aes(fill=aval)) +
  geom_text(data=subset(nrs_plot,aval>=8),aes(label=aval),size=3)+ #add labels
  facet_wrap(~trtp,scales = "free_y") +
  scale_fill_gradient("NRS Pain Score",
                      breaks=c(0,2,4,6,8,10),
                      expand=c(0,0),
                      low = "white",
                      high = "red",
                      na.value = "grey50",
                      guide = "colourbar",
                      aesthetics = "fill") +
  scale_x_discrete("Visit (d)",labels=unique(nrs_plot$vsn)) +
  labs(y="") +
  theme_bw() +
  theme(legend.position = "top")

V. Time to event analysis

Time to (first) effective pain reduction

  • 13 subjects have baseline pain scores greater than (or equal to) 8
nrs_baseline = nrs_sample %>%
  filter(avisitn == 100,
         aval >= 8) %>%
  select(subjid,bsl=aval)

nrs_baseline
## # A tibble: 13 x 2
##    subjid   bsl
##    <chr>  <dbl>
##  1 201001    10
##  2 201003     8
##  3 203005     8
##  4 203006     8
##  5 301003     8
##  6 303001     9
##  7 402011     9
##  8 406002     8
##  9 413002     8
## 10 413004     8
## 11 416003     9
## 12 416009     9
## 13 416020     9
  • Define an effecive pain reduction from baseline to be greater than 4 by NRS [0-10]
nrs_effective = nrs_sample %>%
  mutate(vsn=as.numeric(vsn),
         aval= round(aval,0)) %>%
  filter(avisitn != 100) %>%
  right_join(nrs_baseline) %>%
  mutate(ref=bsl-4,
         effective_reduction=ifelse(aval<=ref,"Y","N")) %>%
  select(subjid,vsn,aval,bsl,effective_reduction) 

nrs_effective
## # A tibble: 155 x 5
##    subjid   vsn  aval   bsl effective_reduction
##    <chr>  <dbl> <dbl> <dbl> <chr>              
##  1 201001     1     2    10 Y                  
##  2 201003     1     3     8 Y                  
##  3 203005     1     7     8 N                  
##  4 203006     1     3     8 Y                  
##  5 301003     1     2     8 Y                  
##  6 303001     1     1     9 Y                  
##  7 402011     1     2     9 Y                  
##  8 406002     1     2     8 Y                  
##  9 413002     1     3     8 Y                  
## 10 413004     1     5     8 N                  
## # … with 145 more rows
  • Find the respective visits at which each patient firstly achieved effective pain reduction
nrs_event = nrs_effective %>%
  arrange(subjid,effective_reduction,vsn) %>%
  filter(effective_reduction == "Y") %>%
  group_by(subjid) %>%
  mutate(seq=row_number()) %>% # number each effective reduction
  filter(seq == 1) %>% #filter out first effective reduction
  ungroup() %>%
  select(subjid,effective_visit=vsn,bsl,aval)

nrs_event
## # A tibble: 13 x 4
##    subjid effective_visit   bsl  aval
##    <chr>            <dbl> <dbl> <dbl>
##  1 201001               1    10     2
##  2 201003               1     8     3
##  3 203005               3     8     4
##  4 203006               1     8     3
##  5 301003               1     8     2
##  6 303001               1     9     1
##  7 402011               1     9     2
##  8 406002               1     8     2
##  9 413002               1     8     3
## 10 413004               2     8     1
## 11 416003               1     9     4
## 12 416009               1     9     5
## 13 416020               2     9     4
  • Calculate the actual pain scores reduction from baseline at effective visits
nrs_event_plot = nrs_event %>%
  mutate(chg_bsl=round(aval-bsl,1),
         effective_visit=as.numeric(effective_visit),
         subjid = fct_reorder(subjid,effective_visit,.desc = FALSE))

datatable(nrs_event_plot)

Horizontal bar plot

  • Time to effective pain reduction and actual pain score reduction at the effective visits
  • 10 out of 13 patients with baseline pain >= 8 were able to achieve effective pain reduction at first visit after XX treatment
ggplot(data=nrs_event_plot,aes(x=subjid,y=effective_visit))+
  geom_bar(stat = "identity",width=0.1) +
  geom_text(aes(x=subjid,y=effective_visit,label=chg_bsl),color="gold4",vjust=-0.5) +
  scale_y_continuous("Visit (d)",labels = c("Baseline","1","2","3"),breaks=c(0:3),limits=c(0,3.5),expand = c(0,0)) +
  geom_hline(yintercept = c(1,2,3),linetype=3,color="grey")+
  coord_flip() +
  theme_classic()

Crosscheck heatmap (optional)

VI. Case Study

Data Preprocessing

hm_sample=read_xlsx("/Users/zhouchuhan/introR/data/heatmap.xlsx") [1:20]

hm_plot=hm_sample %>%
  janitor::clean_names() %>%
  #change to longitudinal data structure
  gather(key="gene",value="status",gene1:gene9) %>% 
  #new status variable (status_label) and composite grouping variable (full_label)
  mutate(status_label=ifelse(status =="Mutated","Positive", 
                             ifelse(status %in% c("No","Wild Type"), "Negative","Unknown")),
         full_label=paste0(responder,"-",status_label)) %>%
  select(subjid,id,gene,status, responder,gene,status_label,full_label) %>%
  arrange(id,gene)

datatable(hm_plot)

Visualization

ggplot(hm_plot,aes(x=id, y=gene)) +
  geom_tile(aes(fill=full_label,alpha=full_label), color="white") +
  scale_alpha_manual("Mutation \n Status",
                     breaks=c("Y-Positive","Y-Unknown","Y-Negative","N-Positive","N-Unknown","N-Negative"),
                     labels=c("Positive","Unknown","Negative","Positive","Unknown","Negative"),
                     values = rep(c(1,0.6,0.2),2)) +
 
   scale_fill_manual("Mutation \n Status",
                     breaks=c("Y-Positive","Y-Unknown","Y-Negative","N-Positive","N-Unknown","N-Negative"),
                     labels=c("Positive","Unknown","Negative","Positive","Unknown","Negative"),
                     values=c(rep("#F8766D",3),rep("#00BFC4",3))) +
  #relabel x-axis
  scale_x_continuous("",expand = c(0,0),breaks=c(29,83),labels=c("responder","non-responder"))+
  scale_y_discrete("Gene",expand = c(0,0))+
  guides(fill = guide_legend(nrow = 1, byrow = TRUE),alpha= guide_legend(nrow = 1, byrow = TRUE)) +
  labs(title = "Heatmap of Gene", subtitle = "XXX Population") +
  theme_bw() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        plot.title = element_text(size = rel(1.3),face = "bold", vjust = 2,hjust=0.5, family = "serif"),
        plot.subtitle = element_text(size = rel(1.1),vjust=3,hjust=0.5,family = "serif"),
        strip.text = element_text(face = "italic"), #facet title
        axis.title = element_text(face = "bold"),
        legend.position = "bottom",
        legend.title = element_text(size = rel(1.0),hjust=0,vjust=0.75),
        legend.text  = element_text(size = rel(0.9),hjust=0),
        axis.text.x  = element_text(size = rel(1.3),face="bold",color="black"),
        axis.text.y  = element_text(face="italic"),
        axis.title.y = element_text(size = rel(1.1),vjust= 1,angle = 0),
        axis.title.x = element_text(size = rel(1.1),vjust= -1))