Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Welcome To Ask or Share your Answers For Others

Categories

0 votes
632 views
in Technique[技术] by (71.8m points)

testing - R ggplot2 - perform pairwise tests per pair in a facet and show the p-values with ggsignif

Following up on this question I posted some days ago, I want to perform something similar.

Given the following MWE:

##############################
##MWE
library(ggplot2)
library(ggsignif)

set.seed(1)
alpha.subA <- data.frame(Sample.ID=paste(sample(LETTERS, 163, replace=TRUE), sample(1:1000, 163, replace=FALSE), sep=''),
                   Group=c(rep('C',10),rep('FH',10),rep('I',19),rep('IF',42),rep('NA',14),rep('NF',42),rep('NI',15),rep('NS',10),rep('PGMC4',1)),
                   Value=rnorm(n=163))
alpha.subA$DB <- "DATABASE1"
set.seed(2)
alpha.subB <- data.frame(Sample.ID=paste(sample(LETTERS, 163, replace=TRUE), sample(1:1000, 163, replace=FALSE), sep=''),
                   Group=c(rep('C',10),rep('FH',10),rep('I',19),rep('IF',42),rep('NA',14),rep('NF',42),rep('NI',15),rep('NS',10),rep('PGMC4',1)),
                   Value=rnorm(n=163))
alpha.subB$DB <- "DATABASE2"
alpha.sub <- rbind(alpha.subA, alpha.subB)

alpha.sub$DB <- as.factor(alpha.sub$DB)
alpha.sub$both <- factor(paste(alpha.sub$Group, alpha.sub$DB), levels=paste(rep(levels(alpha.sub$Group), each=length(levels(alpha.sub$DB))), rep(levels(alpha.sub$DB), length(levels(alpha.sub$Group)))))

png(filename="test.png", height=1000, width=2000)
print(#or ggsave()
  ggplot(alpha.sub, aes(x=both, y=Value, fill=Group)) + geom_boxplot() +
    facet_grid(~Group, scales="free", space="free_x") +
    stat_summary(fun.y=mean, geom="point", shape=5, size=4)

    # + geom_signif() ##HOW TO TEST EACH PAIR IN EACH FACET (DATABASE1 vs DATABASE2 PER GROUP)?
)
dev.off()
##############################

which produces:

test

I would like to make use of ggsignif's geom_signif() to compare DATABASE1 to DATABASE2 per Group (each pair in a facet) and show the significance level (* for p-value<0.05, ** for p-value<0.01) for each pairwise comparison.

Any help would be greatly appreciated. Thanks!!

EDIT!

I have managed to place the significance levels properly, but the moment I add facets, everything crumbles...

See my new MWE

##MWE
library(ggplot2)
library(ggsignif)
library(tidyverse)
library(broom)

set.seed(1)
alpha.subA <- data.frame(Sample.ID=paste(sample(LETTERS, 163, replace=TRUE), sample(1:1000, 163, replace=FALSE), sep=''),
                   Group=c(rep('C',10),rep('FH',10),rep('I',19),rep('IF',42),rep('NA',14),rep('NF',42),rep('NI',15),rep('NS',10),rep('PGMC4',1)),
                   Value=rnorm(n=163, mean=2.3, sd=0.45))
alpha.subA$DB <- "DATABASE1"
alpha.subB <- data.frame(Sample.ID=paste(sample(LETTERS, 163, replace=TRUE), sample(1:1000, 163, replace=FALSE), sep=''),
                   Group=c(rep('C',10),rep('FH',10),rep('I',19),rep('IF',42),rep('NA',14),rep('NF',42),rep('NI',15),rep('NS',10),rep('PGMC4',1)),
                   Value=rnorm(n=163, mean=2, sd=0.5))
alpha.subB$DB <- "DATABASE2"
alpha.sub <- rbind(alpha.subA, alpha.subB)

alpha.sub$DB <- as.factor(alpha.sub$DB)
alpha.sub$both <- factor(paste(alpha.sub$Group, alpha.sub$DB), levels=paste(rep(levels(alpha.sub$Group), each=length(levels(alpha.sub$DB))), rep(levels(alpha.sub$DB), length(levels(alpha.sub$Group)))))

v1 <- grep("DATABASE1", levels(alpha.sub$both), val=TRUE)[-9]
v2 <- grep("DATABASE2", levels(alpha.sub$both), val=TRUE)[-9]
CNb <- mapply(c, v1, v2, SIMPLIFY=FALSE)
CNb <- unname(CNb)

pv <- tidy(with(alpha.sub[ alpha.sub$Group != "PGMC4", ], pairwise.wilcox.test(Value, both, p.adjust.method = "BH")))#ADJUSTED PVALUES
#  data preparation 
CNb2 <- do.call(rbind.data.frame, CNb)
colnames(CNb2) <- colnames(pv)[-3]
# subset the pvalues, by merging the CN list
pv.final <- merge(CNb2, pv, by.x = c("group2", "group1"), by.y = c("group1", "group2"))
# fix ordering
pv.final <- pv.final[order(pv.final$group1), ] 
# set signif level
pv.final$map.signif <- ifelse(pv.final$p.value > 0.05, "", ifelse(pv.final$p.value > 0.01,"*", "**"))
# subset 
gr <- pv.final$p.value <= 0.05
CNb[gr]
# the plot
png(filename="test.png", height=1000, width=2000)
print(#or ggsave()
  ggplot(alpha.sub, aes(x=both, y=Value, fill=Group)) + geom_boxplot() +
    #facet_grid(~Group, scales="free", space="free_x") +
    stat_summary(fun.y=mean, geom="point", shape=5, size=4) +
  geom_signif(comparisons=CNb[gr],
              vjust=0.7,
              annotation=pv.final$map.signif[gr],
              textsize=10,
              size=1)
)
dev.off()

which produces:

test2

How can I reproduce this plot above, but with the facets in the first plot? Thanks!

See Question&Answers more detail:os

与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
Welcome To Ask or Share your Answers For Others

1 Reply

0 votes
by (71.8m points)

I used the idea from here. In brief, plot the plot, get the underlying data, update the annotations and plot the final picture with facets.

# first save the plot in variable
myplot <- ggplot(alpha.sub, aes(x=DB, y=Value)) + 
  geom_boxplot(aes(fill=Group)) +
  facet_grid(~Group) +
  geom_signif(test="wilcox.test", comparisons = list(c("DATABASE1", "DATABASE2")), map_signif_level = F)
# a plot with all significance layer per facet group. 
myplot

enter image description here

# build the plot, e.g. get a list of data frames
myplot2 <- ggplot_build(myplot)
# in list 2 you have access to each annotation 
head(myplot2$data[[2]])
  x xend        y     yend annotation                 group PANEL shape colour textsize angle hjust vjust alpha family
1 1    1 3.968526 4.063608       0.11 DATABASE1-DATABASE2-1     1    19  black     3.88     0   0.5     0    NA       
2 1    2 4.063608 4.063608       0.11 DATABASE1-DATABASE2-1     1    19  black     3.88     0   0.5     0    NA       
3 2    2 4.063608 3.968526       0.11 DATABASE1-DATABASE2-1     1    19  black     3.88     0   0.5     0    NA       
4 1    1 3.968526 4.063608      0.035 DATABASE1-DATABASE2-1     2    19  black     3.88     0   0.5     0    NA       
5 1    2 4.063608 4.063608      0.035 DATABASE1-DATABASE2-1     2    19  black     3.88     0   0.5     0    NA       
6 2    2 4.063608 3.968526      0.035 DATABASE1-DATABASE2-1     2    19  black     3.88     0   0.5     0    NA       
  fontface lineheight linetype size
1        1        1.2        1  0.5
2        1        1.2        1  0.5
3        1        1.2        1  0.5
4        1        1.2        1  0.5
5        1        1.2        1  0.5
6        1        1.2        1  0.5

# now you can remove or update the annotation
# get all ADJUSTED PVALUES
pv <- tidy(with(alpha.sub, pairwise.wilcox.test(Value, interaction(Group,DB), p.adjust.method = "BH")))
# create final dataset using dplyr
pv_final <- pv %>% 
  separate(group1, c("g1_1", "g1_2")) %>% 
  separate(group2, c("g2_1", "g2_2")) %>% 
  filter(g1_1 == g2_1) %>% 
  mutate(p=ifelse(p.value > 0.05, "", ifelse(p.value > 0.01,"*", "**")))

# each pvalue is repeated three times in this dataset.  
myplot2$data[[2]]$annotation <- rep(pv_final$p, each=3)
# remove non significants
myplot2$data[[2]] <- myplot2$data[[2]][myplot2$data[[2]]$annotation != "",]
# and the final plot
plot(ggplot_gtable(myplot2))

enter image description here


与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…
OGeek|极客中国-欢迎来到极客的世界,一个免费开放的程序员编程交流平台!开放,进步,分享!让技术改变生活,让极客改变未来! Welcome to OGeek Q&A Community for programmer and developer-Open, Learning and Share
Click Here to Ask a Question

...