library(readxl) # read data from Excel files
library(tidyverse) # clean & format data
library(ggplot2) # visualize data
library(rstatix) # basic statistical analysis
setwd("/Users/zhouchuhan/introR/data") #specify a folder path to locate data files
sample=read_xlsx("sample_nrs.xlsx")
str(sample)
## tibble [1,081 × 5] (S3: tbl_df/tbl/data.frame)
## $ subjid : num [1:1081] 101002 102002 102003 200002 200003 ...
## $ aval : num [1:1081] 4 5 5 4 4 4 3 4 4 4 ...
## $ avisitn: chr [1:1081] "100" "100" "100" "100" ...
## $ vsn : chr [1:1081] "Baseline" "Baseline" "Baseline" "Baseline" ...
## $ siteid : chr [1:1081] "1" "1" "1" "2" ...
sample$subjid=as.character(sample$subjid)
sample$avisitn=as.numeric(sample$avisitn)
subj=unique(sample$subjid)
length(subj)
## [1] 86
set.seed(1)
trt=sample(c("A","B"),size=length(subj),replace = TRUE)
trt
## [1] "A" "B" "A" "A" "B" "A" "A" "A" "B" "B" "A" "A" "A" "A" "A" "B" "B" "B" "B"
## [20] "A" "A" "A" "A" "A" "A" "A" "B" "A" "A" "B" "B" "B" "A" "B" "A" "A" "B" "A"
## [39] "B" "B" "B" "B" "A" "B" "B" "B" "B" "B" "A" "A" "B" "A" "B" "B" "A" "A" "B"
## [58] "B" "B" "A" "A" "B" "B" "B" "B" "B" "B" "A" "B" "B" "B" "B" "A" "A" "A" "B"
## [77] "B" "A" "A" "B" "B" "B" "A" "A" "A" "B"
effl=sample(c("Y","N"),size=length(subj),replace = TRUE)
effl
## [1] "Y" "N" "Y" "N" "Y" "Y" "N" "N" "Y" "Y" "Y" "N" "N" "Y" "N" "N" "N" "Y" "Y"
## [20] "Y" "Y" "N" "Y" "Y" "Y" "Y" "Y" "N" "N" "N" "N" "Y" "N" "N" "N" "N" "Y" "Y"
## [39] "N" "Y" "Y" "N" "N" "Y" "Y" "Y" "N" "Y" "N" "Y" "N" "N" "N" "Y" "N" "Y" "Y"
## [58] "N" "Y" "N" "N" "N" "Y" "Y" "N" "Y" "Y" "Y" "Y" "N" "N" "N" "N" "N" "Y" "Y"
## [77] "Y" "N" "Y" "Y" "N" "Y" "Y" "N" "Y" "Y"
subj_trt=as_tibble(cbind(subj,trt))
head(subj_trt,10)
## # A tibble: 10 x 2
## subj trt
## <chr> <chr>
## 1 101002 A
## 2 102002 B
## 3 102003 A
## 4 200002 A
## 5 200003 B
## 6 200004 A
## 7 200005 A
## 8 200009 A
## 9 200010 B
## 10 200011 B
subj_trt$trt=factor(subj_trt$trt,levels=c("A","B"))
subj_trt$effl=effl
subj_trt$effl=factor(subj_trt$effl,levels=c("Y","N"))
str(subj_trt)
## tibble [86 × 3] (S3: tbl_df/tbl/data.frame)
## $ subj: chr [1:86] "101002" "102002" "102003" "200002" ...
## $ trt : Factor w/ 2 levels "A","B": 1 2 1 1 2 1 1 1 2 2 ...
## $ effl: Factor w/ 2 levels "Y","N": 1 2 1 2 1 1 2 2 1 1 ...
sample_cm=left_join(sample, subj_trt, by=c("subjid"="subj")) %>%
arrange(subjid,avisitn)
head(sample_cm,20)
## # A tibble: 20 x 7
## subjid aval avisitn vsn siteid trt effl
## <chr> <dbl> <dbl> <chr> <chr> <fct> <fct>
## 1 101002 4 100 Baseline 1 A Y
## 2 101002 3.14 101 1 1 A Y
## 3 101002 1.86 102 2 1 A Y
## 4 101002 1.29 103 3 1 A Y
## 5 101002 2 104 4 1 A Y
## 6 101002 2.14 105 5 1 A Y
## 7 101002 1.57 106 6 1 A Y
## 8 101002 1.14 107 7 1 A Y
## 9 101002 1 108 8 1 A Y
## 10 101002 0.714 109 9 1 A Y
## 11 101002 1 110 10 1 A Y
## 12 101002 1.33 111 11 1 A Y
## 13 101002 0.667 112 12 1 A Y
## 14 102002 5 100 Baseline 1 B N
## 15 102002 2 101 1 1 B N
## 16 102002 1.43 102 2 1 B N
## 17 102002 0.571 103 3 1 B N
## 18 102002 0.571 104 4 1 B N
## 19 102002 0.857 105 5 1 B N
## 20 102002 0.571 106 6 1 B N
summary = sample_cm %>%
group_by(trt,avisitn) %>%
get_summary_stats(aval, type = "mean_sd") %>%
#select(trt,avisitn,n,mean,sd)
select(trt,avisitn,everything(),-variable)
summary
## # A tibble: 26 x 5
## trt avisitn n mean sd
## <fct> <dbl> <dbl> <dbl> <dbl>
## 1 A 100 40 4.45 2.75
## 2 A 101 39 2.01 1.50
## 3 A 102 38 1.22 1.36
## 4 A 103 38 1.09 1.28
## 5 A 104 38 1.00 1.02
## 6 A 105 38 0.949 1.32
## 7 A 106 39 0.777 1.17
## 8 A 107 38 0.631 0.875
## 9 A 108 37 0.718 1.09
## 10 A 109 37 0.825 1.34
## # … with 16 more rows
library(table1)
baseline=filter(sample_cm,vsn == "Baseline")
table1( ~ aval | trt, data=baseline)
A (N=40) |
B (N=44) |
Overall (N=84) |
|
---|---|---|---|
aval | |||
Mean (SD) | 4.45 (2.75) | 4.50 (2.30) | 4.48 (2.51) |
Median [Min, Max] | 4.00 [0, 10.0] | 4.00 [0, 9.00] | 4.00 [0, 10.0] |
table1( ~ aval | trt*effl, data=baseline)
A |
B |
Overall |
||||
---|---|---|---|---|---|---|
Y (N=18) |
N (N=22) |
Y (N=26) |
N (N=18) |
Y (N=44) |
N (N=40) |
|
aval | ||||||
Mean (SD) | 4.39 (2.50) | 4.50 (3.00) | 4.23 (2.21) | 4.89 (2.42) | 4.30 (2.31) | 4.68 (2.73) |
Median [Min, Max] | 4.00 [0, 10.0] | 4.00 [0, 9.00] | 4.00 [0, 9.00] | 4.50 [0, 9.00] | 4.00 [0, 10.0] | 4.00 [0, 9.00] |
table1( ~ aval | effl*trt, data=baseline)
Y |
N |
Overall |
||||
---|---|---|---|---|---|---|
A (N=18) |
B (N=26) |
A (N=22) |
B (N=18) |
A (N=40) |
B (N=44) |
|
aval | ||||||
Mean (SD) | 4.39 (2.50) | 4.23 (2.21) | 4.50 (3.00) | 4.89 (2.42) | 4.45 (2.75) | 4.50 (2.30) |
Median [Min, Max] | 4.00 [0, 10.0] | 4.00 [0, 9.00] | 4.00 [0, 9.00] | 4.50 [0, 9.00] | 4.00 [0, 10.0] | 4.00 [0, 9.00] |
sample_cm$vsn=factor(sample_cm$vsn,levels=c("Baseline",1:12))
ggplot(data=sample_cm,aes(x=vsn,y=aval)) +
geom_boxplot(width=0.5) +
scale_x_discrete("Visit (d)",labels=unique(sample_cm$vsn)) +
scale_y_continuous("NRS pain [0-10]",limits = c(0,10),breaks = c(0,2,4,6,8,10)) +
theme_classic()
ggplot(data=sample_cm,aes(x=vsn,y=aval)) +
#geom_boxplot(aes(group=trt),width=0.5) +
geom_boxplot(aes(fill=trt),width=0.5) +
scale_x_discrete("Visit (d)",labels=unique(sample_cm$vsn)) +
scale_y_continuous("NRS pain [0-10]",limits = c(0,10),breaks = c(0,2,4,6,8,10)) +
theme_classic()
ggplot(data=sample_cm,aes(x=vsn,y=aval)) +
stat_boxplot(aes(fill=trt),geom ='errorbar',width=0.15,position = position_dodge(0.5)) +
geom_boxplot(aes(fill=trt),width=0.5,position = position_dodge(0.5)) +
scale_x_discrete("Visit (d)",labels=unique(sample_cm$vsn)) +
scale_y_continuous("NRS pain [0-10]",limits = c(0,10),breaks = c(0,2,4,6,8,10)) +
theme_classic() +
theme(legend.position = "top")
setwd("/Users/zhouchuhan/introR/data")
KM3 = read_csv("f_14_2_4_1_crp_ri.csv") %>%
janitor::clean_names() %>%
mutate(avisitn=factor(avisitn,levels=c("2","4","5","6","7","8","10")),
aval_ad=ifelse(aval<=10,aval,11+(aval-10)/5)) # use ifelse() to create the adjusted aval
Note: Boxplots are squeezed when a singe scale being used
Note: Adjusted y-axis values are shown in the parenthesis
ggplot(data=KM3,aes(x=avisitn,y=aval_ad)) +
stat_boxplot(geom = 'errorbar',width=0.1,color="blue") +
geom_boxplot(width=0.15,color="blue",outlier.size=2,outlier.shape=1) +
scale_x_discrete(labels=c("Baseline","D4","1","2","4","6","12")) +
scale_y_continuous("CRP(mg/dL)", limits=c(0,15),breaks = c(0:10,12:15),labels = c(0:10,15,20,25,30)) +
labs(subtitle="Figure XXX. Boxplot Longitudinal \n Safety Analysis Set",
x="Weeks") +
theme_classic() +
theme( plot.margin = margin(0.5,0.2,0.1,0.85, "cm"),
panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.y = element_blank(),
plot.title = element_text(size = rel(0.6),face = "bold", vjust = 5),
plot.subtitle = element_text(size = rel(1),face = "bold",family="serif", hjust=0.5,vjust = 1.5),
axis.title.y = element_text(#face = "bold",
size = rel(1.2),family="serif",vjust= 5),
axis.title.x = element_text(#face = "bold",
size = rel(1.2),family="serif",vjust= -0.5),
axis.text.x = element_text(color="black",family="serif",size=rel(1.2)),
axis.text.y = element_text(color="black",family="serif",size=rel(1.2))
)
table=KM3 %>%
dplyr::group_by(avisitn) %>%
dplyr::summarise(n=n()) %>%
spread(avisitn,n) %>%
mutate(c1=c("Number of \n Subjects:")) %>%
select(c1,everything())
library(ggpubr)
ggtexttable(table, rows = NULL, cols = NULL, theme = ttheme("blank"))