| library(ggplot2) |
| |
| ALL <- read.csv('figs/ALL.csv', row.names = 1, na.strings = c(".", "NA")) |
| ALL <- ALL[ALL$data_source != 'glazer',] |
| |
| benign <- read.csv('figs/benign.csv', row.names = 1, na.strings = c(".", "NA")) |
| benign <- benign[benign$uniprotID %in% ALL$uniprotID,] |
| |
| gene.df <- data.frame(uniprotID=unique(ALL$uniprotID), |
| GoF=NA, LoF=NA) |
| for (i in 1:dim(gene.df)[1]) { |
| gene.df$GoF[i] <- sum(ALL$score[ALL$uniprotID==gene.df$uniprotID[i]]==1) |
| gene.df$LoF[i] <- sum(ALL$score[ALL$uniprotID==gene.df$uniprotID[i]]==-1) |
| } |
| gene.df$label <- NA |
| genes.dic <- c("Q09428"="ABCC8", "P15056"="BRAF", "O00555"="CACNA1A", "P21802"="FGFR2", |
| "Q14654"="KCNJ11", "P07949"="RET", "Q99250"="SCN2A", "Q14524"="SCN5A", "P04637"="TP53") |
| gene.df$label[gene.df$uniprotID %in% names(genes.dic)] <- genes.dic[gene.df$uniprotID[gene.df$uniprotID %in% names(genes.dic)]] |
| gene.df$transfer.learning <- NA |
| gene.df$transfer.learning[!is.na(gene.df$label)] <- 'Selected' |
| ggplot(gene.df, aes(x=GoF, y=LoF, col=transfer.learning, label=label)) + |
| geom_point() + ggrepel::geom_text_repel() + theme_bw() + |
| scale_x_continuous(trans = ggallin::pseudolog10_trans, breaks = c(5, 10, 20, 30, 40, 50, 75, 100)) + |
| scale_y_continuous(trans = ggallin::pseudolog10_trans, breaks = c(5, 10, 20, 40, 60, 80, 100, 200, 400)) |
| ggsave('figs/fig.2c.pdf', height = 3.5, width = 5) |
|
|
|
|
| p <- list() |
| ion.genes <- unique(ALL$uniprotID[grepl("Heyne", ALL$data_source)]) |
| for (j in c(0, 1, 2)) { |
| if (j==0) { |
| sse <- table(ALL$secondary_struc[!ALL$uniprotID %in% ion.genes], ALL$LABEL[!ALL$uniprotID %in% ion.genes]) |
| } else if (j==1) { |
| sse <- table(ALL$secondary_struc[ALL$uniprotID %in% ion.genes], ALL$LABEL[ALL$uniprotID %in% ion.genes]) |
| } else { |
| sse <- table(ALL$secondary_struc, ALL$LABEL) |
| } |
| sse.df <- matrix(NA, nrow = dim(sse)[1], ncol = dim(sse)[2]) |
| colnames(sse.df) <- colnames(sse) |
| rownames(sse.df) <- rownames(sse) |
| for (i in 1:dim(sse)[2]) { |
| sse.df[,i] <- sse[,i] |
| } |
| sse.df <- as.data.frame(sse.df) |
| for (i in 1:dim(sse.df)[1]) { |
| res <- binom.test(sse.df[i,1], sse.df[i,1]+sse.df[i,2], p=sum(sse.df[,1])/sum(sse.df[,1]+sse.df[,2])) |
| sse.df$p.value[i] <- res$p.value |
| } |
| sse.df$q.value <- p.adjust(sse.df$p.value, method = "fdr") |
| code.dict <- c("H"="Alpha helix (4-12)", "B"="Isolated beta-bridge residue", |
| "E"="Beta Sheet", "G"="3-10 helix", "I"="Pi helix", "T"="Turn", |
| "S"="Bend", " "="none") |
| sse.df$sec_struc <- code.dict[rownames(sse.df)] |
| to.plot <- rbind(sse.df, sse.df) |
| to.plot$n_mutation <- c(sse.df$GOF, sse.df$LOF) |
| to.plot$frac_mutation <- c(sse.df$GOF/sum(sse.df$GOF), sse.df$LOF/sum(sse.df$LOF)) |
| to.plot$label <- c(rep("GOF", dim(sse.df)[1]), rep("LOF", dim(sse.df)[1])) |
| to.plot$sec_struc <- gsub(" ", "\n", to.plot$sec_struc) |
| |
| anno <- to.plot |
| anno$sec_struc[anno$q.value > 0.05] <- NA |
| anno$frac_mutation[anno$q.value > 0.05] <- NA |
| anno <- anno[!is.na(anno$sec_struc),] |
| anno$x <- as.numeric(as.factor(to.plot$sec_struc))[match(anno$sec_struc, to.plot$sec_struc)] - 0.2 |
| anno$xend <- as.numeric(as.factor(to.plot$sec_struc))[match(anno$sec_struc, to.plot$sec_struc)] + 0.2 |
| anno$y <- anno$frac_mutation + 0.025 |
| anno <- anno[order(anno$x),] |
| to.keep <- c() |
| for (i in 1:(dim(anno)[1]/2)) { |
| to.keep <- c(to.keep, c(i*2-1, i*2)[which.max(anno$y[c(i*2-1, i*2)])]) |
| } |
| anno <- anno[to.keep,] |
| anno$annotation <- NA |
| for (k in 1:dim(anno)[1]) { |
| anno$annotation[k] <- paste(c(rep(" ", k-1), "*", rep(" ", k-1)), collapse = "") |
| } |
| library(ggplot2) |
| library(ggsignif) |
| p1 <- ggplot(to.plot, aes(x=sec_struc, y=frac_mutation, fill=label)) + |
| geom_bar(stat='identity', position=position_dodge()) + |
| geom_signif(stat="identity", |
| data=anno, |
| aes(x=x, |
| xend=xend, |
| y=y, yend=y, |
| annotation=annotation)) + ylim(0, 0.8) + |
| xlab('secondary structures') + |
| |
| theme_bw() |
| if (j==0) { |
| p1 <- p1 + ggtitle('Other Genes') + ggeasy::easy_center_title() |
| |
| } else { |
| p1 <- p1 + ggtitle('Na+/Ca2+ Channel Genes') + ggeasy::easy_center_title() |
| |
| } |
| p[[j+1]] <- p1 |
| } |
| library(patchwork) |
| p1 <- p[[2]]+p[[1]]+plot_layout(ncol = 1) |
|
|
| wil.stat <- wilcox.test(ALL$rsa[ALL$LABEL=="GOF"], ALL$rsa[ALL$LABEL=="LOF"]) |
| p2 <- ggplot(rbind(ALL[,c("rsa", "LABEL")], benign[,c("rsa", "LABEL")]), aes(x=rsa, col=LABEL)) + geom_density() + |
| theme_bw() + ggpp::geom_text_npc(data=data.frame(x="middle", y="top", |
| label=paste0("Mann-Whitney test G/LoF p=", signif(wil.stat$p.value, digits = 2))), |
| aes(npcx=x, npcy=y, label=label), |
| col='black') |
| |
| wil.stat <- wilcox.test(ALL$pLDDT[ALL$LABEL=="GOF"], ALL$pLDDT[ALL$LABEL=="LOF"]) |
| p3 <- ggplot(rbind(ALL[,c("pLDDT", "LABEL")], benign[,c("pLDDT", "LABEL")]), aes(x=pLDDT, col=LABEL)) + geom_density() + |
| theme_bw() + ggpp::geom_text_npc(data=data.frame(x="middle", y="top", |
| label=paste0("Mann-Whitney test G/LoF p=", signif(wil.stat$p.value, digits = 2))), |
| aes(npcx=x, npcy=y, label=label), |
| col='black') |
|
|
| wil.stat <- wilcox.test(ALL$FoldXddG[ALL$LABEL=="GOF"], ALL$FoldXddG[ALL$LABEL=="LOF"]) |
| p4 <- ggplot(rbind(ALL[,c("FoldXddG", "LABEL")], |
| benign[,c("FoldXddG", "LABEL")]), |
| aes(x=FoldXddG, col=LABEL)) + geom_density() + |
| theme_bw() + ggpp::geom_text_npc(data=data.frame(x="right", y="top", |
| label=paste0("Mann-Whitney test G/LoF p=", signif(wil.stat$p.value, digits = 2))), |
| aes(npcx=x, npcy=y, label=label), |
| col='black') + |
| scale_x_continuous(trans = ggallin::pseudolog10_trans) |
|
|
| wil.stat <- wilcox.test(ALL$conservation.entropy[ALL$LABEL=="GOF"], ALL$conservation.entropy[ALL$LABEL=="LOF"]) |
| p5 <- ggplot(rbind(ALL[,c('conservation.entropy', 'LABEL')], benign[,c('conservation.entropy', 'LABEL')]), |
| aes(x=conservation.entropy, col=LABEL)) + geom_density() + |
| theme_bw() + ggpp::geom_text_npc(data=data.frame(x="middle", y="top", |
| label=paste0("Mann-Whitney test G/LoF p=", signif(wil.stat$p.value, digits = 2))), |
| aes(npcx=x, npcy=y, label=label), |
| col='black') |
|
|
| p <- (p3 + p4) / (p2 + p5) |
| ggsave(plot = p, filename = "figs/fig.2a.pdf", height=5, width=12) |
| ggsave(plot=p1, filename = "figs/fig.2b.pdf", height = 5, width = 6) |
|
|
|
|
|
|
|
|
|
|
|
|