Yuanclaw / modules /gsea_analysis.R
huashu's picture
Export YuanSeq to Hugging Face without binary assets
7e6a9d1
# =====================================================
# GSEA分析模块
# =====================================================
gsea_analysis_server <- function(input, output, session, deg_results) {
# =====================================================
# GSEA 模块
# =====================================================
# 🔧 辅助函数:转换core_enrichment为SYMBOL格式
convert_core_enrichment_to_symbol <- function(df, deg_results) {
if (!"core_enrichment" %in% colnames(df)) {
return(df)
}
# 获取差异分析数据用于ID转换
deg_data <- deg_results()
res <- deg_data$deg_df
res_clean <- res[!is.na(res$SYMBOL) & !is.na(res$ENTREZID), ]
res_clean <- res_clean %>%
group_by(SYMBOL) %>%
slice(1) %>%
ungroup()
# 创建ENTREZID到SYMBOL的映射
entrez_to_symbol <- setNames(res_clean$SYMBOL, res_clean$ENTREZID)
# 转换core_enrichment列:始终转换为SYMBOL格式
df$core_enrichment <- sapply(df$core_enrichment, function(x) {
if (is.na(x) || !nzchar(x)) return("")
# 检查是ENTREZID(纯数字)还是SYMBOL
genes <- unlist(strsplit(x, "/"))
# 如果是ENTREZID(数字),转换为SYMBOL
if (all(grepl("^[0-9]+$", genes))) {
symbols <- entrez_to_symbol[genes]
symbols <- symbols[!is.na(symbols)]
paste(symbols, collapse = "/")
} else {
# 已经是SYMBOL,直接返回
x
}
}, USE.NAMES = FALSE)
return(df)
}
gsea_results <- eventReactive(input$run_gsea, {
req(deg_results(), input$gmt_file)
showNotification("正在运行 GSEA...", type = "message")
# 从deg_results中提取差异分析结果
deg_data <- deg_results()
res <- deg_data$deg_df
id_col <- if(input$gsea_id_type == "SYMBOL") "SYMBOL" else "ENTREZID"
res_clean <- res[!is.na(res[[id_col]]) & !is.na(res$log2FoldChange), ]
res_clean <- res_clean %>%
group_by(!!sym(id_col)) %>%
filter(abs(log2FoldChange) == max(abs(log2FoldChange))) %>%
ungroup()
gene_list <- res_clean$log2FoldChange
names(gene_list) <- res_clean[[id_col]]
gene_list <- sort(gene_list, decreasing = TRUE)
# 读取GMT文件
gmt <- clusterProfiler::read.gmt(input$gmt_file$datapath)
# 🔧 关键修复:如果使用SYMBOL运行但GMT是ENTREZID,转换GMT为SYMBOL
if (input$gsea_id_type == "SYMBOL") {
# 检查GMT中的基因是否是数字(ENTREZID)
sample_genes <- head(gmt$gene, 100)
if (all(grepl("^[0-9]+$", sample_genes))) {
cat("🔄 检测到GMT使用ENTREZID,正在转换为SYMBOL...\n")
cat(sprintf("📊 GMT文件: %d 行\n", nrow(gmt)))
tryCatch({
# 创建ENTREZID到SYMBOL的映射
entrez_to_symbol <- setNames(res_clean$SYMBOL, res_clean$ENTREZID)
cat(sprintf("📊 映射关系: %d 个ENTREZID -> %d 个SYMBOL\n",
length(entrez_to_symbol), sum(!is.na(entrez_to_symbol))))
# 转换整个GMT文件
gmt$gene_symbol <- entrez_to_symbol[as.character(gmt$gene)]
# 统计转换结果
n_total <- nrow(gmt)
n_mapped <- sum(!is.na(gmt$gene_symbol))
n_unmapped <- sum(is.na(gmt$gene_symbol))
cat(sprintf("📊 转换结果: %d/%d 成功映射 (%.1f%%), %d 无法映射\n",
n_mapped, n_total, n_mapped/n_total*100, n_unmapped))
if (n_mapped < n_total * 0.5) {
# 如果超过50%无法映射,可能ID类型选择错误
msg <- sprintf(
"⚠️ GMT文件中超过50%%的基因无法映射!\n\n您的GMT文件使用ENTREZID格式,但您选择了SYMBOL。\n\n建议:\n1. 在'GMT中的ID类型'中选择'Entrez ID'\n2. 或者使用SYMBOL格式的GMT文件\n\n当前映射:%.1f%% (%d/%d)",
n_mapped/n_total*100, n_mapped, n_total
)
showNotification(msg, type = "warning", duration = 10)
cat("⚠️ 映射率过低,建议用户调整ID类型选择\n")
# 不中断,继续使用部分映射的数据
}
# 过滤掉无法映射的基因
gmt_filtered <- gmt[!is.na(gmt$gene_symbol), ]
gmt_filtered <- gmt_filtered[, c("term", "gene_symbol")]
colnames(gmt_filtered) <- c("term", "gene")
if (nrow(gmt_filtered) > 0) {
gmt <- gmt_filtered
cat(sprintf("✅ GMT转换完成: %d 个基因集, %d 个基因\n",
length(unique(gmt$term)), nrow(gmt)))
} else {
showNotification("❌ GMT转换失败:无法将ENTREZID映射到SYMBOL\n\n请选择正确的ID类型", type = "error")
return(NULL)
}
}, error = function(e) {
error_msg <- conditionMessage(e)
cat(sprintf("❌ GMT转换错误: %s\n", error_msg))
# 提供用户友好的错误信息
msg <- sprintf(
"GMT ID类型不匹配!\n\n错误:%s\n\n您的GMT文件使用ENTREZID格式,但您选择了SYMBOL。\n\n请选择'Entrez ID'作为ID类型。",
substr(error_msg, 1, 100)
)
showNotification(msg, type = "error", duration = 15)
return(NULL)
})
}
}
tryCatch({
gsea_res <- clusterProfiler::GSEA(gene_list,
TERM2GENE = gmt,
pvalueCutoff = input$gsea_pvalue,
minGSSize = 10,
maxGSSize = 500,
verbose = FALSE)
if (nrow(gsea_res@result) == 0) {
showNotification("GSEA 未发现显著富集通路", type = "warning")
return(NULL)
}
return(gsea_res)
}, error = function(e) {
showNotification(paste("GSEA 运行失败:", e$message), type = "error")
return(NULL)
})
})
# === 新增:GSEA结果数据处理函数 ===
gsea_processed_data <- reactive({
req(gsea_results())
gsea_obj <- gsea_results()
df <- gsea_obj@result
# 根据选择的导出类型处理数据
if (input$gsea_export_type == "full") {
# 完整结果
return(df)
} else if (input$gsea_export_type == "significant") {
# 显著结果 (p < 0.05)
sig_df <- df %>% filter(pvalue < 0.05)
return(sig_df)
} else if (input$gsea_export_type == "top50") {
# Top N 结果
top_n <- input$gsea_export_topn
top_df <- df %>% arrange(pvalue) %>% head(top_n)
return(top_df)
}
})
# === 新增:GSEA结果下载处理器 ===
# 下载完整结果
output$download_gsea_full <- downloadHandler(
filename = function() {
paste0("GSEA_Full_Results_", Sys.Date(), ".csv")
},
content = function(file) {
req(gsea_results())
df <- gsea_results()@result
# 🔧 转换core_enrichment为SYMBOL格式
df <- convert_core_enrichment_to_symbol(df, deg_results)
write.csv(df, file, row.names = FALSE)
}
)
# 下载显著结果
output$download_gsea_sig <- downloadHandler(
filename = function() {
paste0("GSEA_Significant_Results_", Sys.Date(), ".csv")
},
content = function(file) {
req(gsea_results())
df <- gsea_results()@result
sig_df <- df %>% filter(pvalue < 0.05)
# 🔧 转换core_enrichment为SYMBOL格式
sig_df <- convert_core_enrichment_to_symbol(sig_df, deg_results)
write.csv(sig_df, file, row.names = FALSE)
}
)
# 下载Top N结果
output$download_gsea_top <- downloadHandler(
filename = function() {
paste0("GSEA_Top", input$gsea_export_topn, "_Results_", Sys.Date(), ".csv")
},
content = function(file) {
req(gsea_results())
df <- gsea_results()@result
top_n <- input$gsea_export_topn
top_df <- df %>% arrange(pvalue) %>% head(top_n)
# 🔧 转换core_enrichment为SYMBOL格式
top_df <- convert_core_enrichment_to_symbol(top_df, deg_results)
write.csv(top_df, file, row.names = FALSE)
}
)
# 下载GSEA图为SVG
output$download_gsea_plot_svg <- downloadHandler(
filename = function() {
req(gsea_results())
selected <- input$gsea_table_rows_selected
if (length(selected)) {
pathway_id <- gsea_results()@result$ID[selected]
} else {
pathway_id <- gsea_results()@result$ID[1]
}
paste0("GSEA_Plot_", pathway_id, "_", Sys.Date(), ".svg")
},
content = function(file) {
# 重新生成plot(避免访问reactive value)
req(gsea_results())
gsea_obj <- gsea_results()
selected <- input$gsea_table_rows_selected
if (length(selected)) {
pathway_id <- gsea_obj@result$ID[selected]
} else {
pathway_id <- gsea_obj@result$ID[1]
}
txt_col <- if(input$theme_toggle) "white" else "black"
# 生成plot(简化版本,不带addGene以避免复杂性)
if ("GseaVis" %in% loadedNamespaces()) {
p <- GseaVis::gseaNb(
object = gsea_obj,
geneSetID = pathway_id,
subPlot = 2,
termWidth = 35,
addPval = TRUE
) + theme(
plot.title = element_text(color = txt_col, face = "bold", hjust = 0.5),
axis.title = element_text(color = txt_col, face = "bold"),
axis.text = element_text(color = txt_col)
)
} else if ("enrichplot" %in% loadedNamespaces()) {
p <- enrichplot::gseaplot2(gsea_obj, geneSetID = pathway_id) + theme(
plot.title = element_text(color = txt_col, face = "bold", hjust = 0.5),
axis.title = element_text(color = txt_col, face = "bold"),
axis.text = element_text(color = txt_col)
)
} else {
p <- ggplot() + labs(title = "No plotting package available")
}
# 保存为SVG
svg(file, width = 10, height = 6)
print(p)
dev.off()
},
contentType = "image/svg+xml"
)
# 下载山脊图为SVG
output$download_gsea_ridge_svg <- downloadHandler(
filename = function() {
paste0("GSEA_Ridge_Plot_", Sys.Date(), ".svg")
},
content = function(file) {
req(gsea_results())
gsea_obj <- gsea_results()
txt_col <- if(input$theme_toggle) "white" else "black"
# 获取用户设置的通路数
top_n <- suppressWarnings(as.integer(input$gsea_ridge_pathways))
if (is.na(top_n) || top_n < 1) {
top_n <- 10L
}
total_pathways <- nrow(gsea_obj@result)
top_n <- max(1, min(top_n, total_pathways))
# 生成ridge plot
if ("enrichplot" %in% loadedNamespaces()) {
p <- enrichplot::ridgeplot(gsea_obj, showCategory = top_n) +
labs(title = sprintf("Top %d GSEA Pathways", top_n)) +
theme(
plot.title = element_text(color = txt_col, face = "bold", hjust = 0.5),
axis.title = element_text(color = txt_col, face = "bold"),
axis.text = element_text(color = txt_col)
)
} else {
p <- ggplot() + labs(title = "enrichplot package required")
}
# 保存为SVG
svg(file, width = 12, height = 8)
print(p)
dev.off()
},
contentType = "image/svg+xml"
)
output$gsea_table <- DT::renderDataTable({
req(gsea_results())
df <- gsea_results()@result
# 调试:检查数据
cat(sprintf("📊 GSEA结果: %d 行, %d 列\n", nrow(df), ncol(df)))
cat(sprintf("📊 列名: %s\n", paste(head(colnames(df), 10), collapse=", ")))
# 检查是否有core_enrichment列
has_core <- "core_enrichment" %in% colnames(df)
cat(sprintf("📊 有core_enrichment列: %s\n", has_core))
# 创建显示用的数据框副本
df_show <- df
if (has_core) {
cat("✅ 找到core_enrichment列,正在转换为SYMBOL...\n")
# 获取差异分析数据用于ID转换
deg_data <- deg_results()
res <- deg_data$deg_df
# 创建ENTREZID到SYMBOL的映射
res_clean <- res[!is.na(res$SYMBOL) & !is.na(res$ENTREZID), ]
entrez_to_symbol <- setNames(res_clean$SYMBOL, res_clean$ENTREZID)
# 转换core_enrichment列
df_show$core_enrichment_symbol <- sapply(df_show$core_enrichment, function(core_str) {
if (is.na(core_str) || core_str == "") {
return("")
}
# 分割基因ID
gene_ids <- unlist(strsplit(core_str, "/"))
# 检测是否为ENTREZID(纯数字)
if (all(grepl("^[0-9]+$", gene_ids))) {
# 转换为SYMBOL
gene_symbols <- entrez_to_symbol[gene_ids]
gene_symbols <- gene_symbols[!is.na(gene_symbols)]
return(paste(gene_symbols, collapse = "/"))
} else {
# 已经是SYMBOL格式
return(core_str)
}
}, USE.NAMES = FALSE)
cat(sprintf("✅ core_enrichment转换完成\n"))
cat(sprintf("📊 示例: %s\n", df_show$core_enrichment_symbol[1]))
# 隐藏原始的core_enrichment列,显示转换后的列
# 重命名列以保持一致性
df_show$core_enrichment <- df_show$core_enrichment_symbol
df_show$core_enrichment_symbol <- NULL
}
# 检查df_show是否为空
if (nrow(df_show) == 0) {
cat("❌ 错误:df_show为空!\n")
return(DT::datatable(data.frame(Error="No data")))
}
cat(sprintf("📊 准备显示: %d 行, %d 列\n", nrow(df_show), ncol(df_show)))
# DT配置
# 简化配置,避免DT错误
DT::datatable(df_show,
selection = 'single',
options = list(
pageLength = 10,
scrollX = TRUE
),
rownames = FALSE) %>%
DT::formatRound(c("enrichmentScore", "NES", "pvalue", "p.adjust"), 4)
})
output$gsea_plot <- renderPlot({
req(gsea_results())
gsea_obj <- gsea_results()
selected <- input$gsea_table_rows_selected
txt_col <- if(input$theme_toggle) "white" else "black"
if (length(selected)) {
pathway_id <- gsea_obj@result$ID[selected]
title_text <- pathway_id
} else {
pathway_id <- gsea_obj@result$ID[1]
title_text <- paste(pathway_id, "(Default: Top 1)")
}
# 提取Leading Edge基因(Top N)用于在图上标记
leading_genes <- NULL
tryCatch({
leading_genes_data <- extract_leading_edge_genes()
if (!is.null(leading_genes_data) && nrow(leading_genes_data) > 0) {
# 提取基因SYMBOL列表
leading_genes <- leading_genes_data$gene
# 根据排序方式确保是Top N
top_n <- input$gsea_top_genes
if (length(leading_genes) > top_n) {
leading_genes <- leading_genes[1:top_n]
}
cat(sprintf("在GSEA图上标记 %d 个Leading Edge基因\n", length(leading_genes)))
cat("基因列表:", paste(head(leading_genes, 10), collapse=", "), ifelse(length(leading_genes)>10, "...", ""), "\n")
}
}, error = function(e) {
cat("提取Leading Edge基因失败:", e$message, "\n")
})
# 如果用户输入了自定义基因列表,使用自定义基因
custom_gene_list <- input$custom_gene_list
if (!is.null(custom_gene_list) && nzchar(trimws(custom_gene_list))) {
# 解析自定义基因列表(支持逗号、分号、空格分隔)
custom_genes <- unlist(strsplit(custom_gene_list, "[,;\\s]+"))
custom_genes <- trimws(custom_genes)
custom_genes <- custom_genes[nzchar(custom_genes)]
if (length(custom_genes) > 0) {
# 获取ranked gene list用于排序
deg_data <- deg_results()
res <- deg_data$deg_df
# 准备ranked gene list(使用SYMBOL作为名称)
res_clean <- res[!is.na(res$SYMBOL) & !is.na(res$log2FoldChange), ]
res_clean <- res_clean %>%
group_by(SYMBOL) %>%
filter(abs(log2FoldChange) == max(abs(log2FoldChange))) %>%
ungroup()
gene_list <- res_clean$log2FoldChange
names(gene_list) <- res_clean$SYMBOL
gene_list <- sort(gene_list, decreasing = TRUE)
# 获取通路的基因
gmt <- clusterProfiler::read.gmt(input$gmt_file$datapath)
pathway_genes_in_gmt <- gmt$gene[gmt$term == pathway_id]
# 过滤出在通路中的自定义基因
custom_genes_in_pathway <- custom_genes[custom_genes %in% pathway_genes_in_gmt]
if (length(custom_genes_in_pathway) == 0) {
cat("警告:自定义基因都不在通路中\n")
leading_genes <- NULL
} else {
# 创建自定义基因的数据框
custom_genes_data <- data.frame(
gene = custom_genes_in_pathway,
log2FoldChange = gene_list[custom_genes_in_pathway],
stringsAsFactors = FALSE
)
# 移除没有log2FoldChange的基因
custom_genes_data <- custom_genes_data[!is.na(custom_genes_data$log2FoldChange), ]
if (nrow(custom_genes_data) == 0) {
cat("警告:自定义基因都没有log2FoldChange值\n")
leading_genes <- NULL
} else {
# 根据用户选择的方式排序自定义基因
if (input$gsea_gene_order == "abs_logFC") {
custom_genes_data <- custom_genes_data[order(abs(custom_genes_data$log2FoldChange), decreasing = TRUE), ]
} else if (input$gsea_gene_order == "logFC") {
custom_genes_data <- custom_genes_data[order(custom_genes_data$log2FoldChange, decreasing = TRUE), ]
} else if (input$gsea_gene_order == "rank") {
# 按在ranked list中的位置排序
custom_genes_data$rank <- match(custom_genes_data$gene, names(gene_list))
custom_genes_data <- custom_genes_data[order(custom_genes_data$rank), ]
}
# 提取排序后的基因列表
leading_genes <- custom_genes_data$gene
cat(sprintf("使用自定义基因列表(按%s排序): %d 个基因\n",
input$gsea_gene_order, length(leading_genes)))
cat("排序后的基因:", paste(head(leading_genes, 10), collapse=", "),
ifelse(length(leading_genes)>10, "...", ""), "\n")
}
}
}
}
if ("GseaVis" %in% loadedNamespaces()) {
# 🔥 关键修复:addGene需要与GSEA运行时相同ID类型的基因
# 如果GSEA用ENTREZID运行,addGene也要用ENTREZID
# 获取Top N基因
top_genes_data <- tryCatch({
extract_leading_edge_genes()
}, error = function(e) {
cat("❌ 调用extract_leading_edge_genes失败:", e$message, "\n")
NULL
})
# 基因列表(用于addGene)
genes_to_add <- NULL
if (!is.null(top_genes_data) && is.data.frame(top_genes_data) && nrow(top_genes_data) > 0) {
# top_genes_data包含SYMBOL和log2FoldChange
# 我们需要找到对应的ENTREZID(如果GSEA用ENTREZID运行)
if (input$gsea_id_type == "ENTREZID") {
# GSEA用ENTREZID运行,addGene需要ENTREZID
cat("📝 GSEA使用ENTREZID运行,需要提供ENTREZID格式的基因\n")
# 从差异分析数据获取ENTREZID
deg_data <- deg_results()
res <- deg_data$deg_df
res_clean <- res[!is.na(res$SYMBOL) & !is.na(res$ENTREZID), ]
# 创建SYMBOL到ENTREZID的映射
symbol_to_entrez <- setNames(res_clean$ENTREZID, res_clean$SYMBOL)
# 转换为ENTREZID
genes_entrez <- symbol_to_entrez[top_genes_data$gene]
genes_entrez <- genes_entrez[!is.na(genes_entrez)]
if (length(genes_entrez) > 0) {
genes_to_add <- as.character(genes_entrez)
cat(sprintf("📝 转换为ENTREZID格式: %d 个基因\n", length(genes_to_add)))
cat("基因列表(ENTREZID):", paste(head(genes_to_add, 10), collapse=", "), ifelse(length(genes_to_add)>10, "...", ""), "\n")
}
} else {
# GSEA用SYMBOL运行,addGene直接用SYMBOL
cat("📝 GSEA使用SYMBOL运行,直接使用SYMBOL格式\n")
genes_to_add <- top_genes_data$gene
cat(sprintf("📝 准备标注 %d 个基因(SYMBOL格式)\n", length(genes_to_add)))
cat("基因列表(SYMBOL):", paste(head(genes_to_add, 10), collapse=", "), ifelse(length(genes_to_add)>10, "...", ""), "\n")
}
}
# 基础GSEA图参数
plot_args <- list(
object = gsea_obj,
geneSetID = pathway_id,
subPlot = 2,
termWidth = 35,
addPval = TRUE,
pvalX = input$gsea_stats_x,
pvalY = input$gsea_stats_y
)
# 🔥 不使用addGene参数,因为它会显示ENTREZID数字
# 我们自己添加SYMBOL标签,使用合适的y坐标
# 基因标注在leading edge区域(score较高的位置)
# 使用do.call调用gseaNb(不使用addGene)
p <- do.call(GseaVis::gseaNb, plot_args)
# 添加自定义SYMBOL标签
if (!is.null(genes_to_add) && length(genes_to_add) > 0 && !is.null(top_genes_data)) {
cat("📝 添加SYMBOL标签(沿着enrichment score曲线)\n")
# 🔥 关键:从GSEA对象提取enrichment score轨迹
# GSEA对象包含running enrichment score数据
tryCatch({
# 获取ranked gene list
gene_list <- gsea_obj@geneList
gene_names <- names(gene_list)
# 计算基因在ranked list中的位置(x坐标)
if (input$gsea_id_type == "ENTREZID") {
gene_positions <- match(genes_to_add, gene_names) # ENTREZID
} else {
gene_positions <- match(genes_to_add, gene_names) # SYMBOL
}
# 🔥 提取enrichment score轨迹
# clusterProfiler/fgsea 的 GSEA 对象:用 core_enrichment(leading edge)或 geneID
pathway_genes <- if ("core_enrichment" %in% colnames(gsea_obj@result)) {
as.character(gsea_obj@result$core_enrichment[selected])
} else {
as.character(gsea_obj@result$geneID[selected])
}
pathway_genes_list <- if (length(pathway_genes) == 0L || is.na(pathway_genes[1L]) || !nzchar(pathway_genes[1L])) {
character(0L)
} else {
unlist(strsplit(pathway_genes[1L], "/"))
}
# 计算running enrichment score
ranked_gene_scores <- gene_list[gene_names] # 所有基因的log2FoldChange
# 计算累积enrichment score(简化版本)
# 实际的GSEA算法更复杂,但我们可以估算
gene_in_pathway <- gene_names %in% pathway_genes_list
# Running enrichment score
running_score <- cumsum(ranked_gene_scores * gene_in_pathway)
rmax <- max(abs(running_score), na.rm = TRUE)
if (is.finite(rmax) && rmax > 0) running_score <- running_score / rmax
# 为每个基因找到其对应的running score
label_data <- data.frame(
x = gene_positions,
label = top_genes_data$gene[1:min(length(genes_to_add), nrow(top_genes_data))],
stringsAsFactors = FALSE
)
# 移除没有找到位置的
label_data <- label_data[!is.na(label_data$x), ]
if (nrow(label_data) > 0) {
# 获取每个基因位置对应的enrichment score
label_data$y <- running_score[label_data$x]
cat(sprintf("📝 提取了 %d 个基因的enrichment score\n", nrow(label_data)))
cat("📝 y值范围:", min(label_data$y), "至", max(label_data$y), "\n")
annotation_color <- if(input$theme_toggle) "#00FF00" else "#CC0000"
# 添加点标记(在曲线上)
p <- p + geom_point(
data = label_data,
aes(x = x, y = y),
inherit.aes = FALSE,
size = 2.5,
color = annotation_color,
alpha = 0.9
)
# 添加文本标签(在曲线上方或下方)
p <- p + geom_text(
data = label_data,
aes(x = x, y = y + ifelse(y >= 0, 0.05, -0.05), label = label),
inherit.aes = FALSE,
size = 3.5,
color = annotation_color,
fontface = "bold",
vjust = ifelse(label_data$y >= 0, 0, 1), # 根据y值正负调整
angle = 45,
hjust = 0,
check_overlap = TRUE
)
cat("✅ SYMBOL标签已添加到enrichment score曲线上\n")
}
}, error = function(e) {
cat("❌ 提取enrichment score失败:", e$message, "\n")
cat("⚠️ 使用备用方法:固定y坐标\n")
# 备用方案:使用固定y值
label_data <- data.frame(
x = gene_positions,
y = 0.5,
label = top_genes_data$gene[1:min(length(genes_to_add), nrow(top_genes_data))],
stringsAsFactors = FALSE
)
label_data <- label_data[!is.na(label_data$x), ]
if (nrow(label_data) > 0) {
annotation_color <- if(input$theme_toggle) "#00FF00" else "#CC0000"
p <- p + geom_text(
data = label_data,
aes(x = x, y = y, label = label),
inherit.aes = FALSE,
size = 3.5,
color = annotation_color,
fontface = "bold",
vjust = 1.5,
angle = 45,
hjust = 0
)
}
})
}
p <- p + theme(
plot.title = element_text(color = txt_col, face = "bold", hjust = 0.5),
axis.title = element_text(color = txt_col, face = "bold"),
axis.text = element_text(color = txt_col),
legend.text = element_text(color = txt_col),
legend.title = element_text(color = txt_col),
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA)
)
} else {
if ("enrichplot" %in% loadedNamespaces()) {
p <- enrichplot::gseaplot2(gsea_obj, geneSetID = pathway_id, title = title_text) +
theme(
plot.title = element_text(color = txt_col, face = "bold", hjust = 0.5),
axis.title = element_text(color = txt_col, face = "bold"),
axis.text = element_text(color = txt_col),
legend.text = element_text(color = txt_col),
legend.title = element_text(color = txt_col),
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA)
)
} else {
p <- ggplot() + labs(title = "缺少 GseaVis 或 enrichplot 包,无法绘图")
}
}
print(p)
})
# === 新增:Leading Edge 基因提取函数 ===
extract_leading_edge_genes <- reactive({
req(gsea_results())
gsea_obj <- gsea_results()
selected <- input$gsea_table_rows_selected
if (!length(selected)) {
# 如果没有选择,使用第一个通路
selected <- 1
}
pathway_id <- gsea_obj@result$ID[selected]
# 提取基因列表(从原始差异分析数据)
deg_data <- deg_results()
res <- deg_data$deg_df
# 根据GMT文件中的ID类型选择合适的列
# GMT文件中的ID类型由用户在gsea_id_type中指定
id_col_in_gmt <- if(input$gsea_id_type == "SYMBOL") "SYMBOL" else "ENTREZID"
# 确保使用SYMBOL用于最终显示(GseaVis需要SYMBOL)
res_clean <- res[!is.na(res$SYMBOL) & !is.na(res$log2FoldChange), ]
# 去重并排序
res_clean <- res_clean %>%
group_by(SYMBOL) %>%
filter(abs(log2FoldChange) == max(abs(log2FoldChange))) %>%
ungroup()
# 创建排序列表(使用SYMBOL作为名称)
gene_list <- res_clean$log2FoldChange
names(gene_list) <- res_clean$SYMBOL
gene_list <- sort(gene_list, decreasing = TRUE)
# 读取GMT文件以获取该通路的基因集
gmt <- clusterProfiler::read.gmt(input$gmt_file$datapath)
# 获取选中通路的基因
pathway_genes <- gmt$gene[gmt$term == pathway_id]
if (length(pathway_genes) == 0) {
return(NULL)
}
# 如果GMT文件使用ENTREZID,需要转换为SYMBOL
if (id_col_in_gmt == "ENTREZID") {
# 创建ENTREZID到SYMBOL的映射
entrez_to_symbol <- setNames(res_clean$SYMBOL, res_clean$ENTREZID)
pathway_genes_symbol <- entrez_to_symbol[pathway_genes]
# 移除NA值(没有映射到的基因)
pathway_genes_symbol <- pathway_genes_symbol[!is.na(pathway_genes_symbol)]
pathway_genes <- pathway_genes_symbol
}
# 🔥 新增:提取真正的Leading Edge基因
# 首先尝试从GSEA结果的core_enrichment字段提取
if (input$gsea_gene_order == "leading_edge") {
tryCatch({
# 从GSEA结果中提取core_enrichment基因
core_enrichment_str <- gsea_obj@result$core_enrichment[selected]
cat(sprintf("🔍 提取Leading Edge基因,selected=%d, pathway_id=%s\n", selected, pathway_id))
cat(sprintf("🔍 core_enrichment内容: %s\n", substring(core_enrichment_str, 1, 200)))
if (!is.na(core_enrichment_str) && nzchar(core_enrichment_str)) {
# core_enrichment字段是用"/"分隔的基因列表
# ⚠️ 注意:core_enrichment中的ID类型与gene_list的names类型相同
le_genes_raw <- unlist(strsplit(core_enrichment_str, "/"))
cat(sprintf("🔍 原始Leading Edge基因数量: %d (ID类型: %s)\n", length(le_genes_raw), input$gsea_id_type))
# 🔧 关键修复:始终检测并转换为SYMBOL格式
le_genes_symbol <- le_genes_raw # 初始值
# 检测是否为ENTREZID(纯数字)并转换为SYMBOL
if (all(grepl("^[0-9]+$", le_genes_raw))) {
cat("🔄 检测到ENTREZID格式,正在转换为SYMBOL...\n")
# 创建ENTREZID到SYMBOL的映射
entrez_to_symbol <- setNames(res_clean$SYMBOL, res_clean$ENTREZID)
le_genes_symbol <- entrez_to_symbol[le_genes_raw]
# 移除NA值(没有映射到的基因)
le_genes_symbol <- le_genes_symbol[!is.na(le_genes_symbol)]
cat(sprintf("✅ 转换后SYMBOL基因数量: %d\n", length(le_genes_symbol)))
} else {
cat("✅ 已经是SYMBOL格式\n")
}
if (length(le_genes_symbol) == 0) {
cat("⚠️ Leading Edge基因ID转换失败,尝试使用常规方式\n")
} else {
# 获取这些基因的log2FoldChange
# ⚠️ 注意:gene_list的names类型可能与le_genes不同
# 需要使用SYMBOL作为key来查找log2FoldChange
gene_list_symbol <- res_clean$log2FoldChange
names(gene_list_symbol) <- res_clean$SYMBOL
gene_list_symbol <- sort(gene_list_symbol, decreasing = TRUE)
pathway_data <- data.frame(
gene = le_genes_symbol,
log2FoldChange = gene_list_symbol[le_genes_symbol],
stringsAsFactors = FALSE
)
# 移除没有log2FoldChange的基因
pathway_data <- pathway_data[!is.na(pathway_data$log2FoldChange), ]
if (nrow(pathway_data) > 0) {
# Leading Edge基因按在ranked list中的位置排序
pathway_data$rank <- match(pathway_data$gene, names(gene_list_symbol))
pathway_data <- pathway_data[order(pathway_data$rank), ]
# 选择Top N基因
top_n <- min(input$gsea_top_genes, nrow(pathway_data))
pathway_data_top <- pathway_data[1:top_n, ]
# 添加排名信息
pathway_data_top$rank_label <- paste0("#", 1:top_n)
cat(sprintf("✅ 提取了 %d 个真正的Leading Edge基因 (ID类型: SYMBOL)\n",
nrow(pathway_data_top)))
cat("✅ Leading Edge基因示例:", paste(head(pathway_data_top$gene, 5), collapse=", "), "\n")
return(pathway_data_top)
}
}
}
}, error = function(e) {
cat("⚠️ 提取Leading Edge基因失败,使用常规方式:", e$message, "\n")
})
}
# 如果不是leading_edge模式,或者提取失败,使用原有逻辑
# 获取基因集中的基因及其log2FoldChange
pathway_data <- data.frame(
gene = pathway_genes,
log2FoldChange = gene_list[pathway_genes],
stringsAsFactors = FALSE
)
# 移除没有log2FoldChange的基因
pathway_data <- pathway_data[!is.na(pathway_data$log2FoldChange), ]
if (nrow(pathway_data) == 0) {
return(NULL)
}
# 根据用户选择的方式排序
if (input$gsea_gene_order == "abs_logFC") {
pathway_data <- pathway_data[order(abs(pathway_data$log2FoldChange), decreasing = TRUE), ]
} else if (input$gsea_gene_order == "logFC") {
pathway_data <- pathway_data[order(pathway_data$log2FoldChange, decreasing = TRUE), ]
} else if (input$gsea_gene_order == "rank") {
# 按在ranked list中的位置排序
pathway_data$rank <- match(pathway_data$gene, names(gene_list))
pathway_data <- pathway_data[order(pathway_data$rank), ]
}
# 选择Top N基因
top_n <- min(input$gsea_top_genes, nrow(pathway_data))
pathway_data_top <- pathway_data[1:top_n, ]
# 添加排名信息
pathway_data_top$rank_label <- paste0("#", 1:top_n)
return(pathway_data_top)
})
# === 新增:GSEA 山脊图可视化 ===
output$gsea_ridge_plot <- renderPlot({
req(gsea_results())
req(input$show_gsea_ridge)
gsea_obj <- gsea_results()
txt_col <- if(input$theme_toggle) "white" else "black"
# 🔧 安全转换整数,设置默认值
top_n <- suppressWarnings(as.integer(input$gsea_ridge_pathways))
if (is.na(top_n) || top_n < 1) {
top_n <- 10L # 默认显示10个通路
}
cat(sprintf("🎨 用户请求显示 %d 个通路的山脊图\n", top_n))
# 使用enrichplot的ridgeplot
if ("enrichplot" %in% loadedNamespaces()) {
tryCatch({
# 🔧 修复:确保top_n是有效整数
total_pathways <- nrow(gsea_obj@result)
top_n <- max(1, min(top_n, total_pathways))
cat(sprintf("📊 总共有 %d 个通路,将显示前 %d 个\n", total_pathways, top_n))
# 🔧 关键修复:使用showCategory参数限制显示数量
# 根据enrichplot文档,showCategory接受数字或向量
p <- enrichplot::ridgeplot(gsea_obj, showCategory = top_n) +
labs(title = sprintf("Top %d GSEA Pathways (Total: %d)", top_n, total_pathways)) +
theme(
plot.title = element_text(color = txt_col, face = "bold", hjust = 0.5, size = 14),
axis.title = element_text(color = txt_col, face = "bold"),
axis.text = element_text(color = txt_col),
legend.text = element_text(color = txt_col),
legend.title = element_text(color = txt_col),
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA)
)
print(p)
cat("✅ 山脊图生成成功\n")
return(NULL)
}, error = function(e) {
cat("❌ ridgeplot错误:", e$message, "\n")
cat("错误详情:", conditionMessage(e), "\n")
# 显示友好的错误消息
showNotification(paste("ridgeplot绘图失败:", e$message), type = "warning")
# 返回一个简单的错误图
p <- ggplot() +
labs(title = "山脊图生成失败") +
geom_text(aes(x = 0.5, y = 0.5, label = "请检查GSEA结果\n或减少显示的通路数"), size = 5) +
theme_void() +
theme(
plot.title = element_text(color = txt_col, hjust = 0.5, face = "bold"),
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA)
)
print(p)
})
} else {
# 如果enrichplot不可用,显示提示
p <- ggplot() +
labs(title = "无法生成山脊图 - 需要enrichplot包") +
theme(
plot.title = element_text(color = txt_col, hjust = 0.5, face = "bold"),
panel.background = element_rect(fill = "transparent", colour = NA),
plot.background = element_rect(fill = "transparent", colour = NA)
)
print(p)
}
})
}