library(readxl) # read data from Excel files
library(tidyverse) # clean & format data
library(ggplot2) # visualize data
library(rstatix) # basic statistical analysis
library(DT)
setwd("/Users/zhouchuhan/introR/data") # specify a folder path to locate data files
nrs_sample=read_xlsx("sample_nrs.xlsx") %>%
mutate(subjid=as.character(subjid))
str(nrs_sample)
## tibble [1,081 × 5] (S3: tbl_df/tbl/data.frame)
## $ subjid : chr [1:1081] "101002" "102002" "102003" "200002" ...
## $ aval : num [1:1081] 4 5 5 4 4 4 3 4 4 4 ...
## $ avisitn: chr [1:1081] "100" "100" "100" "100" ...
## $ vsn : chr [1:1081] "Baseline" "Baseline" "Baseline" "Baseline" ...
## $ siteid : chr [1:1081] "1" "1" "1" "2" ...
# pull out unique subjid
subj=nrs_sample %>%
distinct(subjid)
# sample treatment group and assign to each subject
trtp=sample(c(0,1),nrow(subj),replace =TRUE)
subj$trtp = as.factor(trtp)
# match treatment to efficay data
nrs_plot=nrs_sample %>%
left_join(.,subj) %>%
mutate(subjid = as.character(subjid),
aval=round(aval,0))
## Joining, by = "subjid"
Heatmap is helpful to realize subject level overview:
- each subject takes repeated measures of the same outcome variable on a series of fixed time points
- each subject takes repeated measures of the different scales of the outcome variable at one time
#blank heatmap
ggplot(data=nrs_plot,aes(x=avisitn,y=subjid))+
geom_tile() +
facet_wrap(~trtp,scales = "free_y") +
theme_bw()
#use "fill" to specify colorbar variable
ggplot(data=nrs_plot,aes(x=avisitn,y=subjid))+
geom_tile(aes(fill=aval)) + #produce heatmap
facet_wrap(~trtp,scales = "free_y") +
theme_bw()
ggplot(data=nrs_plot,aes(x=avisitn,y=subjid))+
geom_tile(aes(fill=aval)) +
facet_wrap(~trtp,scales = "free_y") +
scale_fill_gradient("NRS Pain Score",
expand=c(0,0),
low = "white", # nrs = 0
high = "red", # nrs = 10
na.value = "grey50",
guide = "colourbar",
aesthetics = "fill") +
theme_bw() +
theme(legend.position = "top")
ggplot(data=nrs_plot,aes(x=avisitn,y=subjid))+
geom_tile(aes(fill=aval)) +
facet_wrap(~trtp,scales = "free_y") +
scale_fill_gradient("NRS Pain Score",
breaks=c(0,2,4,6,8,10), # to change the breaks in colorbar
expand=c(0,0),
low = "white", # nrs = 0
high = "red", # nrs = 10
na.value = "grey50",
guide = "colourbar",
aesthetics = "fill") +
theme_bw() +
theme(legend.position = "top")
nrs_plot$subjid=fct_reorder(nrs_plot$subjid,nrs_plot$aval,.fun=mean,na.rm = TRUE,.desc = TRUE)
ggplot(data=nrs_plot,aes(x=avisitn,y=subjid))+
geom_tile(aes(fill=aval)) +
facet_wrap(~trtp,scales = "free_y") +
scale_fill_gradient("NRS Pain Score",
breaks=c(0,2,4,6,8,10),
expand=c(0,0),
low = "white", # nrs = 0
high = "red", # nrs = 10
na.value = "grey50",
guide = "colourbar",
aesthetics = "fill") +
theme_bw() +
theme(legend.position = "top")
ggplot(data=nrs_plot,aes(x=avisitn,y=subjid))+
geom_tile(aes(fill=aval)) +
geom_text(data=subset(nrs_plot,aval>=8),aes(label=aval),size=3)+ #add labels
facet_wrap(~trtp,scales = "free_y") +
scale_fill_gradient("NRS Pain Score",
breaks=c(0,2,4,6,8,10),
expand=c(0,0),
low = "white",
high = "red",
na.value = "grey50",
guide = "colourbar",
aesthetics = "fill") +
scale_x_discrete("Visit (d)",labels=unique(nrs_plot$vsn)) +
labs(y="") +
theme_bw() +
theme(legend.position = "top")
nrs_baseline = nrs_sample %>%
filter(avisitn == 100,
aval >= 8) %>%
select(subjid,bsl=aval)
nrs_baseline
## # A tibble: 13 x 2
## subjid bsl
## <chr> <dbl>
## 1 201001 10
## 2 201003 8
## 3 203005 8
## 4 203006 8
## 5 301003 8
## 6 303001 9
## 7 402011 9
## 8 406002 8
## 9 413002 8
## 10 413004 8
## 11 416003 9
## 12 416009 9
## 13 416020 9
nrs_effective = nrs_sample %>%
mutate(vsn=as.numeric(vsn),
aval= round(aval,0)) %>%
filter(avisitn != 100) %>%
right_join(nrs_baseline) %>%
mutate(ref=bsl-4,
effective_reduction=ifelse(aval<=ref,"Y","N")) %>%
select(subjid,vsn,aval,bsl,effective_reduction)
nrs_effective
## # A tibble: 155 x 5
## subjid vsn aval bsl effective_reduction
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 201001 1 2 10 Y
## 2 201003 1 3 8 Y
## 3 203005 1 7 8 N
## 4 203006 1 3 8 Y
## 5 301003 1 2 8 Y
## 6 303001 1 1 9 Y
## 7 402011 1 2 9 Y
## 8 406002 1 2 8 Y
## 9 413002 1 3 8 Y
## 10 413004 1 5 8 N
## # … with 145 more rows
nrs_event = nrs_effective %>%
arrange(subjid,effective_reduction,vsn) %>%
filter(effective_reduction == "Y") %>%
group_by(subjid) %>%
mutate(seq=row_number()) %>% # number each effective reduction
filter(seq == 1) %>% #filter out first effective reduction
ungroup() %>%
select(subjid,effective_visit=vsn,bsl,aval)
nrs_event
## # A tibble: 13 x 4
## subjid effective_visit bsl aval
## <chr> <dbl> <dbl> <dbl>
## 1 201001 1 10 2
## 2 201003 1 8 3
## 3 203005 3 8 4
## 4 203006 1 8 3
## 5 301003 1 8 2
## 6 303001 1 9 1
## 7 402011 1 9 2
## 8 406002 1 8 2
## 9 413002 1 8 3
## 10 413004 2 8 1
## 11 416003 1 9 4
## 12 416009 1 9 5
## 13 416020 2 9 4
nrs_event_plot = nrs_event %>%
mutate(chg_bsl=round(aval-bsl,1),
effective_visit=as.numeric(effective_visit),
subjid = fct_reorder(subjid,effective_visit,.desc = FALSE))
datatable(nrs_event_plot)
ggplot(data=nrs_event_plot,aes(x=subjid,y=effective_visit))+
geom_bar(stat = "identity",width=0.1) +
geom_text(aes(x=subjid,y=effective_visit,label=chg_bsl),color="gold4",vjust=-0.5) +
scale_y_continuous("Visit (d)",labels = c("Baseline","1","2","3"),breaks=c(0:3),limits=c(0,3.5),expand = c(0,0)) +
geom_hline(yintercept = c(1,2,3),linetype=3,color="grey")+
coord_flip() +
theme_classic()
hm_sample=read_xlsx("/Users/zhouchuhan/introR/data/heatmap.xlsx") [1:20]
hm_plot=hm_sample %>%
janitor::clean_names() %>%
#change to longitudinal data structure
gather(key="gene",value="status",gene1:gene9) %>%
#new status variable (status_label) and composite grouping variable (full_label)
mutate(status_label=ifelse(status =="Mutated","Positive",
ifelse(status %in% c("No","Wild Type"), "Negative","Unknown")),
full_label=paste0(responder,"-",status_label)) %>%
select(subjid,id,gene,status, responder,gene,status_label,full_label) %>%
arrange(id,gene)
datatable(hm_plot)
ggplot(hm_plot,aes(x=id, y=gene)) +
geom_tile(aes(fill=full_label,alpha=full_label), color="white") +
scale_alpha_manual("Mutation \n Status",
breaks=c("Y-Positive","Y-Unknown","Y-Negative","N-Positive","N-Unknown","N-Negative"),
labels=c("Positive","Unknown","Negative","Positive","Unknown","Negative"),
values = rep(c(1,0.6,0.2),2)) +
scale_fill_manual("Mutation \n Status",
breaks=c("Y-Positive","Y-Unknown","Y-Negative","N-Positive","N-Unknown","N-Negative"),
labels=c("Positive","Unknown","Negative","Positive","Unknown","Negative"),
values=c(rep("#F8766D",3),rep("#00BFC4",3))) +
#relabel x-axis
scale_x_continuous("",expand = c(0,0),breaks=c(29,83),labels=c("responder","non-responder"))+
scale_y_discrete("Gene",expand = c(0,0))+
guides(fill = guide_legend(nrow = 1, byrow = TRUE),alpha= guide_legend(nrow = 1, byrow = TRUE)) +
labs(title = "Heatmap of Gene", subtitle = "XXX Population") +
theme_bw() +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
plot.title = element_text(size = rel(1.3),face = "bold", vjust = 2,hjust=0.5, family = "serif"),
plot.subtitle = element_text(size = rel(1.1),vjust=3,hjust=0.5,family = "serif"),
strip.text = element_text(face = "italic"), #facet title
axis.title = element_text(face = "bold"),
legend.position = "bottom",
legend.title = element_text(size = rel(1.0),hjust=0,vjust=0.75),
legend.text = element_text(size = rel(0.9),hjust=0),
axis.text.x = element_text(size = rel(1.3),face="bold",color="black"),
axis.text.y = element_text(face="italic"),
axis.title.y = element_text(size = rel(1.1),vjust= 1,angle = 0),
axis.title.x = element_text(size = rel(1.1),vjust= -1))