| |
| |
| |
|
|
| differential_analysis_server <- function(input, output, session) { |
|
|
| |
| data_input <- data_input_server(input, output, session) |
|
|
| |
| output$sample_count_display <- renderUI({ |
| req(data_input$raw_data(), input$control_group, input$treat_group) |
|
|
| |
| groups_list <- list( |
| Control = input$control_group, |
| Treatment = input$treat_group |
| ) |
|
|
| |
| group_counts <- sapply(groups_list, length) |
| min_replicates <- min(group_counts) |
|
|
| |
| if (min_replicates >= 3) { |
| method_text <- "limma-voom (样本充足)" |
| method_color <- "success" |
| } else { |
| method_text <- "edgeR (样本较少)" |
| method_color <- "warning" |
| } |
|
|
| tagList( |
| tags$div(class = "alert alert-info", |
| tags$h5("📊 样本统计与对比设置"), |
| tags$p(tags$strong("对照组 (Control): "), group_counts["Control"], " 个样本"), |
| tags$p(tags$strong("处理组 (Treatment): "), group_counts["Treatment"], " 个样本"), |
| tags$p(tags$strong("最小重复数: "), min_replicates), |
| tags$hr(), |
| tags$p(tags$strong("分析方法: "), |
| tags$span(class = paste0("text-", method_color), method_text)), |
| tags$p(class = "text-muted small", |
| "规则: 每组样本数≥3时使用limma-voom,<3时使用edgeR"), |
| tags$hr(style = "margin-top: 10px; margin-bottom: 10px;"), |
| tags$p(tags$strong("🔄 对比方向: "), |
| tags$span(class = "text-primary", "Treatment vs Control"), |
| tags$br(), |
| tags$small(class = "text-muted", |
| "log2FC > 0: 基因在处理组中上调表达", |
| tags$br(), |
| "log2FC < 0: 基因在处理组中下调表达") |
| ) |
| ) |
| ) |
| }) |
|
|
| |
| perform_differential_analysis <- function(df_use, group, min_replicates) { |
| |
| dge <- DGEList(counts = df_use, group = group) |
| dge <- calcNormFactors(dge) |
|
|
| |
| keep <- filterByExpr(dge) |
| dge <- dge[keep, , keep.lib.sizes=FALSE] |
|
|
| if (min_replicates >= 3) { |
| |
| design <- model.matrix(~0 + group) |
| colnames(design) <- levels(group) |
|
|
| v <- voom(dge, design, plot = FALSE) |
| fit <- lmFit(v, design) |
|
|
| |
| cm <- makeContrasts( |
| Treatment_vs_Control = Treatment - Control, |
| levels = design |
| ) |
|
|
| fit2 <- contrasts.fit(fit, cm) |
| fit2 <- eBayes(fit2) |
|
|
| |
| res <- topTable(fit2, coef = "Treatment_vs_Control", number = Inf) |
| res$GeneID <- rownames(res) |
| res <- res %>% |
| dplyr::rename( |
| log2FoldChange = logFC, |
| pvalue = P.Value, |
| padj = adj.P.Val, |
| t_stat = t |
| ) |
|
|
| |
| required_cols <- c("log2FoldChange", "pvalue", "padj") |
| missing_cols <- setdiff(required_cols, colnames(res)) |
| if (length(missing_cols) > 0) { |
| stop(sprintf("limma-voom结果缺少必要列: %s。现有列: %s", |
| paste(missing_cols, collapse = ", "), |
| paste(colnames(res), collapse = ", "))) |
| } |
|
|
| } else { |
| |
| |
| |
| |
| if (min_replicates > 1) { |
| |
| dge <- estimateDisp(dge) |
| |
| et <- exactTest(dge, pair = c("Control", "Treatment")) |
| } else { |
| |
| user_disp_sqrt <- 0.1 |
| |
| et <- exactTest(dge, pair = c("Control", "Treatment"), dispersion = user_disp_sqrt^2) |
| } |
|
|
| |
| res <- topTags(et, n = Inf)$table |
| res$GeneID <- rownames(res) |
| res <- res %>% |
| dplyr::rename( |
| log2FoldChange = logFC, |
| pvalue = PValue, |
| padj = FDR |
| ) |
|
|
| |
| required_cols <- c("log2FoldChange", "pvalue", "padj") |
| missing_cols <- setdiff(required_cols, colnames(res)) |
| if (length(missing_cols) > 0) { |
| stop(sprintf("edgeR结果缺少必要列: %s。现有列: %s", |
| paste(missing_cols, collapse = ", "), |
| paste(colnames(res), collapse = ", "))) |
| } |
| } |
|
|
| |
| res$baseMean <- rowMeans(edgeR::cpm(dge, log = FALSE, prior.count = 1)) |
|
|
| |
| res$logCPM <- edgeR::cpm(dge, log = TRUE, prior.count = 1) %>% |
| rowMeans() |
|
|
| return(res) |
| } |
|
|
| |
| deg_results <- eventReactive(input$analyze, { |
| req(data_input$raw_data(), input$control_group, input$treat_group) |
|
|
| |
| df <- data_input$raw_data() |
| ctrl <- input$control_group |
| trt <- input$treat_group |
|
|
| |
| if (length(ctrl) == 0 || length(trt) == 0) { |
| showNotification("请至少选择一个对照组和处理组样本", type = "error") |
| return(NULL) |
| } |
|
|
| |
| if (length(intersect(ctrl, trt)) > 0) { |
| showNotification("对照组和处理组不能有重叠样本", type = "error") |
| return(NULL) |
| } |
|
|
| |
| df_use <- df[, c(ctrl, trt)] |
| group <- factor(c(rep("Control", length(ctrl)), |
| rep("Treatment", length(trt))), |
| levels = c("Control", "Treatment")) |
|
|
| min_replicates <- min(length(ctrl), length(trt)) |
|
|
| |
| tryCatch({ |
| res <- perform_differential_analysis(df_use, group, min_replicates) |
|
|
| |
| |
| n_up <- sum(res$log2FoldChange > 0, na.rm = TRUE) |
| n_down <- sum(res$log2FoldChange < 0, na.rm = TRUE) |
| n_total <- n_up + n_down |
|
|
| |
| n_significant <- sum(res$padj < input$pval_cutoff & abs(res$log2FoldChange) > input$log2fc_cutoff, na.rm = TRUE) |
| n_up_sig <- sum(res$padj < input$pval_cutoff & res$log2FoldChange > input$log2fc_cutoff, na.rm = TRUE) |
| n_down_sig <- sum(res$padj < input$pval_cutoff & res$log2FoldChange < -input$log2fc_cutoff, na.rm = TRUE) |
|
|
| |
| pval_stats <- summary(res$pvalue) |
| padj_stats <- summary(res$padj) |
|
|
| |
| cat("\n========== 差异分析结果摘要 ==========\n") |
| cat("分析方法:", if(min_replicates >= 3) "limma-voom" else "edgeR", "\n") |
| cat("对比组别: Treatment vs Control\n") |
| cat("含义: 相对于Control组,Treatment组的基因表达变化\n") |
| cat("----------------------------------------\n") |
| cat("P值校正方法: Benjamini-Hochberg (BH FDR)\n") |
| cat("筛选阈值: padj <", input$pval_cutoff, "且 |log2FC| >", input$log2fc_cutoff, "\n") |
| cat("----------------------------------------\n") |
| cat("总体分布:\n") |
| cat(sprintf(" 上调基因 (log2FC > 0): %d (%.1f%%)\n", n_up, 100*n_up/n_total)) |
| cat(sprintf(" 下调基因 (log2FC < 0): %d (%.1f%%)\n", n_down, 100*n_down/n_total)) |
| cat("----------------------------------------\n") |
| cat("显著差异基因:\n") |
| cat(sprintf(" 总计: %d\n", n_significant)) |
| cat(sprintf(" 上调: %d\n", n_up_sig)) |
| cat(sprintf(" 下调: %d\n", n_down_sig)) |
| cat("----------------------------------------\n") |
| cat("P值分布:\n") |
| cat(sprintf(" 最小值: %.2e\n", pval_stats["Min."])) |
| cat(sprintf(" 中位数: %.2e\n", pval_stats["Median"])) |
| cat(sprintf(" 最大值: %.2e\n", pval_stats["Max."])) |
| cat("校正P值 (FDR) 分布:\n") |
| cat(sprintf(" 最小值: %.2e\n", padj_stats["Min."])) |
| cat(sprintf(" 中位数: %.2e\n", padj_stats["Median"])) |
| cat(sprintf(" 最大值: %.2e\n", padj_stats["Max."])) |
| cat("========================================\n\n") |
|
|
| |
| res$Status <- ifelse( |
| res$padj < input$pval_cutoff & abs(res$log2FoldChange) > input$log2fc_cutoff, |
| ifelse(res$log2FoldChange > 0, "Up", "Down"), |
| "Not DE" |
| ) |
|
|
| |
| if (!"t_stat" %in% colnames(res)) { |
| |
| res$t_stat <- qnorm(1 - res$pvalue/2) * sign(res$log2FoldChange) |
| } |
|
|
| |
| anno <- data_input$annotate_genes(res$GeneID, input$species_select) |
|
|
| if (!is.null(anno)) { |
| |
| clean_geneid <- res$GeneID |
| clean_geneid <- trimws(clean_geneid) |
| clean_geneid <- gsub("[\t\n\r]", "", clean_geneid) |
| |
| non_ensembl <- !grepl("^ENS", clean_geneid, ignore.case = TRUE) |
| clean_geneid[non_ensembl] <- gsub("[^[:alnum:]]", "", clean_geneid[non_ensembl]) |
|
|
| |
| anno_clean <- anno |
| if ("SYMBOL" %in% colnames(anno_clean)) { |
| |
| anno_clean$SYMBOL_CLEAN <- gsub("[^[:alnum:]]", "", anno_clean$SYMBOL) |
| if (input$species_select == "Mm") { |
| anno_clean$SYMBOL_CLEAN <- sapply(anno_clean$SYMBOL_CLEAN, function(x) { |
| if (grepl("^[A-Za-z]", x) && nchar(x) > 0) { |
| paste0(toupper(substr(x, 1, 1)), tolower(substr(x, 2, nchar(x)))) |
| } else { |
| x |
| } |
| }, USE.NAMES = FALSE) |
| } else { |
| anno_clean$SYMBOL_CLEAN <- toupper(anno_clean$SYMBOL_CLEAN) |
| } |
| } |
|
|
| |
| if (!"SYMBOL" %in% colnames(res)) res$SYMBOL <- res$GeneID |
| if (!"ENTREZID" %in% colnames(res)) res$ENTREZID <- NA |
|
|
| |
| res$Original_GeneID <- res$GeneID |
|
|
| |
| cat("开始SYMBOL匹配流程...\n") |
| cat("注释数据列名:", paste(colnames(anno_clean), collapse=", "), "\n") |
|
|
| if ("ENSEMBL" %in% colnames(anno_clean)) { |
| |
| is_ensembl_id <- grepl("^ENS", clean_geneid, ignore.case = TRUE) |
| n_ensembl <- sum(is_ensembl_id) |
| cat("发现", n_ensembl, "个Ensembl ID格式的基因\n") |
|
|
| if (any(is_ensembl_id)) { |
| |
| ensembl_ids <- clean_geneid[is_ensembl_id] |
| cat("前5个Ensembl ID:", paste(head(ensembl_ids, 5), collapse=", "), "\n") |
| cat("注释数据库中ENSEMBL列数量:", length(anno_clean$ENSEMBL), "\n") |
|
|
| |
| ensembl_match_idx <- match(clean_geneid[is_ensembl_id], anno_clean$ENSEMBL) |
| matched_ensembl <- !is.na(ensembl_match_idx) |
| n_matched <- sum(matched_ensembl) |
|
|
| cat("通过ENSEMBL列匹配成功:", n_matched, "/", n_ensembl, "个基因\n") |
|
|
| if (any(matched_ensembl)) { |
| ensembl_indices <- which(is_ensembl_id) |
| indices_to_update <- ensembl_indices[matched_ensembl] |
| res$SYMBOL[indices_to_update] <- anno_clean$SYMBOL[ensembl_match_idx[matched_ensembl]] |
| res$ENTREZID[indices_to_update] <- anno_clean$ENTREZID[ensembl_match_idx[matched_ensembl]] |
| } |
|
|
| |
| if (n_matched < n_ensembl) { |
| unmatched_ids <- clean_geneid[is_ensembl_id][!matched_ensembl] |
| cat("警告:", sum(!matched_ensembl), "个Ensembl ID未能匹配\n") |
| cat("未匹配示例(前5个):", paste(head(unmatched_ids, 5), collapse=", "), "\n") |
| } |
| } |
| } else { |
| cat("错误:注释数据中没有ENSEMBL列!\n") |
| } |
|
|
|
|
| |
| if ("SYMBOL" %in% colnames(anno_clean)) { |
| |
| unmatched <- is.na(res$ENTREZID) | res$ENTREZID == "" |
|
|
| if (any(unmatched)) { |
| match_idx <- match(clean_geneid[unmatched], anno_clean$SYMBOL_CLEAN) |
| matched_genes <- !is.na(match_idx) |
|
|
| if (any(matched_genes)) { |
| unmatched_indices <- which(unmatched) |
| indices_to_update <- unmatched_indices[matched_genes] |
| res$SYMBOL[indices_to_update] <- anno_clean$SYMBOL[match_idx[matched_genes]] |
| res$ENTREZID[indices_to_update] <- anno_clean$ENTREZID[match_idx[matched_genes]] |
| cat("通过SYMBOL列匹配成功:", sum(matched_genes), "个基因\n") |
| } |
| } |
|
|
| |
| still_unmatched <- is.na(res$ENTREZID) | res$ENTREZID == "" |
| if (any(still_unmatched)) { |
| |
| entrez_match_idx <- match(res$GeneID[still_unmatched], anno_clean$ENTREZID) |
| matched_entrez <- !is.na(entrez_match_idx) |
|
|
| if (any(matched_entrez)) { |
| unmatched_indices <- which(still_unmatched) |
| indices_to_update <- unmatched_indices[matched_entrez] |
| res$SYMBOL[indices_to_update] <- anno_clean$SYMBOL[entrez_match_idx[matched_entrez]] |
| res$ENTREZID[indices_to_update] <- anno_clean$ENTREZID[entrez_match_idx[matched_entrez]] |
| cat("通过ENTREZID反向匹配成功:", sum(matched_entrez), "个基因\n") |
| } |
| } |
| } |
|
|
| } |
|
|
| |
| if (!"SYMBOL" %in% colnames(res)) res$SYMBOL <- res$GeneID |
| if (!"ENTREZID" %in% colnames(res)) res$ENTREZID <- NA |
|
|
| |
| res <- data_input$filter_pseudo_genes(res) |
|
|
| |
| if (any(duplicated(res$SYMBOL))) { |
| |
| res <- res %>% |
| dplyr::arrange(SYMBOL, padj, abs(log2FoldChange)) %>% |
| dplyr::distinct(SYMBOL, .keep_all = TRUE) |
|
|
| |
| n_duplicates <- sum(duplicated(res$SYMBOL)) |
| if (n_duplicates > 0) { |
| cat(sprintf("移除了 %d 个重复的基因记录\n", n_duplicates)) |
| } |
| } |
|
|
| return(res) |
|
|
| }, error = function(e) { |
| showNotification(paste("差异分析失败:", e$message), type = "error") |
| return(NULL) |
| }) |
| }) |
|
|
| |
| enhanced_column_mapping <- function(df) { |
| cat("检查上传的差异基因文件列结构...\n") |
| cat("原始列名:", paste(colnames(df), collapse = ", "), "\n") |
|
|
| |
| column_mappings <- list( |
| log2FoldChange = c("log2FoldChange", "log2FC", "avg_log2FC", "logFC", "log2_fold_change", "log2fc", "log2fc_adj"), |
| pvalue = c("pvalue", "p_val", "p.value", "P.Value", "pvalue_adj"), |
| padj = c("padj", "p_val_adj", "p_adj", "adj.P.Val", "pvalue_adj", "FDR"), |
| GeneID = c("GeneID", "gene", "Gene", "SYMBOL", "symbol", "gene_symbol", "ensembl", "ENSEMBL") |
| ) |
|
|
| |
| for (target_col in names(column_mappings)) { |
| possible_names <- column_mappings[[target_col]] |
| found <- FALSE |
|
|
| for (col_name in possible_names) { |
| if (col_name %in% colnames(df)) { |
| if (col_name != target_col) { |
| cat(" 重命名列:", col_name, "->", target_col, "\n") |
| colnames(df)[colnames(df) == col_name] <- target_col |
| } else { |
| cat(" 找到列:", target_col, "\n") |
| } |
| found <- TRUE |
| break |
| } |
| } |
|
|
| if (!found) { |
| cat(" ⚠️ 缺失列:", target_col, "\n") |
| } |
| } |
|
|
| |
| if ("log2FoldChange" %in% colnames(df)) { |
| if (!is.numeric(df$log2FoldChange)) { |
| cat(" 转换log2FoldChange为数值类型\n") |
| original_type <- class(df$log2FoldChange)[1] |
| df$log2FoldChange <- as.numeric(as.character(df$log2FoldChange)) |
| n_na <- sum(is.na(df$log2FoldChange)) |
| if (n_na > 0) { |
| warning(sprintf("log2FoldChange列从%s转换为数值时产生了%d个NA值", original_type, n_na)) |
| } |
| } |
| } |
|
|
| |
| for (col in c("pvalue", "padj")) { |
| if (col %in% colnames(df)) { |
| if (!is.numeric(df[[col]])) { |
| cat(" 转换", col, "为数值类型\n") |
| original_type <- class(df[[col]])[1] |
| df[[col]] <- as.numeric(as.character(df[[col]])) |
| n_na <- sum(is.na(df[[col]])) |
| if (n_na > 0) { |
| warning(sprintf("%s列从%s转换为数值时产生了%d个NA值", col, original_type, n_na)) |
| } |
| } |
| } |
| } |
|
|
| return(df) |
| } |
|
|
| |
| deg_results_from_file <- eventReactive(input$load_deg, { |
| req(data_input$deg_file_data()) |
|
|
| showNotification("正在加载差异基因结果...", type = "message") |
|
|
| df <- data_input$deg_file_data() |
| cat("上传的文件列名:", paste(colnames(df), collapse = ", "), "\n") |
|
|
| |
| df <- enhanced_column_mapping(df) |
|
|
| |
| required_cols <- c("pvalue", "log2FoldChange") |
| missing_cols <- setdiff(required_cols, colnames(df)) |
|
|
| if (length(missing_cols) > 0) { |
| showNotification(paste("缺少必要的列:", paste(missing_cols, collapse = ", ")), type = "error") |
| showNotification("请确保上传的文件包含pvalue和log2FoldChange列,或使用以下列名之一:", type = "warning") |
| showNotification("log2FoldChange: log2FC, avg_log2FC, logFC, log2_fold_change, log2fc, log2fc_adj", type = "message") |
| showNotification("pvalue: p_val, p.value, P.Value, pvalue_adj", type = "message") |
| return(NULL) |
| } |
|
|
| |
| if (!"padj" %in% colnames(df)) { |
| df$padj <- df$pvalue |
| showNotification("⚠️ 警告:未找到校正p值(padj/FDR)列,将使用原始p值代替。", type = "warning") |
| showNotification("建议:差异分析结果应包含多重假设检验校正后的p值。", type = "message") |
| |
| df$using_unadjusted_pval <- TRUE |
| } else { |
| df$using_unadjusted_pval <- FALSE |
| } |
|
|
| |
| res <- df |
|
|
| |
| if (!"GeneID" %in% colnames(res)) { |
| if ("SYMBOL" %in% colnames(res)) { |
| res$GeneID <- res$SYMBOL |
| } else { |
| |
| res$GeneID <- rownames(res) |
| } |
| } |
|
|
| |
| if (!"baseMean" %in% colnames(res)) res$baseMean <- 1 |
| if (!"logCPM" %in% colnames(res)) res$logCPM <- 0 |
|
|
| |
| if (!"SYMBOL" %in% colnames(res)) { |
| res$SYMBOL <- res$GeneID |
| } |
|
|
| |
| pval_col <- if(input$deg_pval_type == "p_val_adj") "padj" else "pvalue" |
|
|
| res$Status <- ifelse(res[[pval_col]] < input$deg_pval_cutoff & abs(res$log2FoldChange) > input$deg_log2fc_cutoff, |
| ifelse(res$log2FoldChange > 0, "Up", "Down"), "Not DE") |
|
|
| |
| |
| res <- res %>% |
| dplyr::mutate( |
| |
| pvalue_safe = pmax(pvalue, 1e-300), |
| |
| t_stat = -log10(pvalue_safe) * log2FoldChange |
| ) %>% |
| |
| dplyr::mutate( |
| t_stat = ifelse(is.finite(t_stat), t_stat, NA) |
| ) |
|
|
| cat(sprintf("📊 差异分析: %d 个基因的t_stat\n", sum(!is.na(res$t_stat)))) |
| cat(sprintf("📊 t_stat范围: %.2f 至 %.2f\n", |
| min(res$t_stat, na.rm = TRUE), |
| max(res$t_stat, na.rm = TRUE))) |
|
|
| |
| anno <- data_input$annotate_genes(res$GeneID, input$deg_species) |
|
|
| if (!is.null(anno)) { |
| |
| clean_geneid <- res$GeneID |
| clean_geneid <- trimws(clean_geneid) |
| clean_geneid <- gsub("[\t\n\r]", "", clean_geneid) |
| |
| non_ensembl <- !grepl("^ENS", clean_geneid, ignore.case = TRUE) |
| clean_geneid[non_ensembl] <- gsub("[^[:alnum:]]", "", clean_geneid[non_ensembl]) |
|
|
| |
| anno_clean <- anno |
| if ("SYMBOL" %in% colnames(anno_clean)) { |
| |
| anno_clean$SYMBOL_CLEAN <- gsub("[^[:alnum:]]", "", anno_clean$SYMBOL) |
| if (input$deg_species == "Mm") { |
| anno_clean$SYMBOL_CLEAN <- sapply(anno_clean$SYMBOL_CLEAN, function(x) { |
| if (grepl("^[A-Za-z]", x) && nchar(x) > 0) { |
| paste0(toupper(substr(x, 1, 1)), tolower(substr(x, 2, nchar(x)))) |
| } else { |
| x |
| } |
| }, USE.NAMES = FALSE) |
| } else { |
| anno_clean$SYMBOL_CLEAN <- toupper(anno_clean$SYMBOL_CLEAN) |
| } |
| } |
|
|
| |
| if (!"SYMBOL" %in% colnames(res)) res$SYMBOL <- res$GeneID |
| if (!"ENTREZID" %in% colnames(res)) res$ENTREZID <- NA |
|
|
| |
| res$Original_GeneID <- res$GeneID |
|
|
| |
| if ("ENSEMBL" %in% colnames(anno_clean)) { |
| |
| is_ensembl_id <- grepl("^ENS", clean_geneid, ignore.case = TRUE) |
|
|
| if (any(is_ensembl_id)) { |
| |
| ensembl_match_idx <- match(clean_geneid[is_ensembl_id], anno_clean$ENSEMBL) |
| matched_ensembl <- !is.na(ensembl_match_idx) |
|
|
| if (any(matched_ensembl)) { |
| ensembl_indices <- which(is_ensembl_id) |
| indices_to_update <- ensembl_indices[matched_ensembl] |
| res$SYMBOL[indices_to_update] <- anno_clean$SYMBOL[ensembl_match_idx[matched_ensembl]] |
| res$ENTREZID[indices_to_update] <- anno_clean$ENTREZID[ensembl_match_idx[matched_ensembl]] |
| cat("通过ENSEMBL列匹配成功:", sum(matched_ensembl), "个基因\n") |
| } |
| } |
| } |
|
|
| |
| if ("SYMBOL" %in% colnames(anno_clean)) { |
| |
| unmatched <- is.na(res$ENTREZID) | res$ENTREZID == "" |
|
|
| if (any(unmatched)) { |
| match_idx <- match(clean_geneid[unmatched], anno_clean$SYMBOL_CLEAN) |
| matched_genes <- !is.na(match_idx) |
|
|
| if (any(matched_genes)) { |
| unmatched_indices <- which(unmatched) |
| indices_to_update <- unmatched_indices[matched_genes] |
| res$SYMBOL[indices_to_update] <- anno_clean$SYMBOL[match_idx[matched_genes]] |
| res$ENTREZID[indices_to_update] <- anno_clean$ENTREZID[match_idx[matched_genes]] |
| cat("通过SYMBOL列匹配成功:", sum(matched_genes), "个基因\n") |
| } |
| } |
|
|
| |
| still_unmatched <- is.na(res$ENTREZID) | res$ENTREZID == "" |
| if (any(still_unmatched)) { |
| |
| entrez_match_idx <- match(res$GeneID[still_unmatched], anno_clean$ENTREZID) |
| matched_entrez <- !is.na(entrez_match_idx) |
|
|
| if (any(matched_entrez)) { |
| unmatched_indices <- which(still_unmatched) |
| indices_to_update <- unmatched_indices[matched_entrez] |
| res$SYMBOL[indices_to_update] <- anno_clean$SYMBOL[entrez_match_idx[matched_entrez]] |
| res$ENTREZID[indices_to_update] <- anno_clean$ENTREZID[entrez_match_idx[matched_entrez]] |
| cat("通过ENTREZID反向匹配成功:", sum(matched_entrez), "个基因\n") |
| } |
| } |
| } |
| } else { |
| res$SYMBOL <- res$GeneID |
| res$ENTREZID <- NA |
| |
| res$Original_GeneID <- res$GeneID |
| } |
|
|
| if (!"SYMBOL" %in% colnames(res)) res$SYMBOL <- res$GeneID |
| if (!"ENTREZID" %in% colnames(res)) res$ENTREZID <- NA |
|
|
| |
| res <- data_input$filter_pseudo_genes(res) |
|
|
| |
| if (any(duplicated(res$SYMBOL))) { |
| |
| res <- res %>% |
| dplyr::arrange(SYMBOL, padj, abs(log2FoldChange)) %>% |
| dplyr::distinct(SYMBOL, .keep_all = TRUE) |
|
|
| |
| n_duplicates_before <- nrow(res) - nrow(dplyr::distinct(res, SYMBOL)) |
| if (n_duplicates_before > 0) { |
| cat(sprintf("移除了 %d 个重复的基因记录\n", n_duplicates_before)) |
| } |
| } |
|
|
| |
| cat("最终数据列:", paste(colnames(res), collapse = ", "), "\n") |
| cat("log2FoldChange类型:", class(res$log2FoldChange), "\n") |
| cat("log2FoldChange范围:", range(res$log2FoldChange, na.rm=TRUE), "\n") |
|
|
| return(res) |
| }) |
|
|
| |
| chip_results_from_file <- eventReactive(input$load_chip, { |
| req(data_input$chip_file_data()) |
|
|
| showNotification("正在加载芯片差异结果...", type = "message") |
|
|
| df <- data_input$chip_file_data() |
| cat("芯片文件列名:", paste(colnames(df), collapse = ", "), "\n") |
|
|
| |
| required_cols <- c("logFC", "P.Value", "SYMBOL", "ID") |
| missing_cols <- setdiff(required_cols, colnames(df)) |
|
|
| if (length(missing_cols) > 0) { |
| showNotification(paste("缺少必要的列:", paste(missing_cols, collapse = ", ")), type = "error") |
| showNotification("请确保上传的文件包含: logFC, AveExpr, t, P.Value, adj.P.Val, B, SYMBOL, ID", type = "warning") |
| return(NULL) |
| } |
|
|
| |
| res <- data.frame( |
| ID = as.character(df$ID), |
| SYMBOL = df$SYMBOL, |
| log2FoldChange = df$logFC, |
| pvalue = df$P.Value, |
| padj = df$adj.P.Val, |
| baseMean = df$AveExpr, |
| t = df$t, |
| ENTREZID = as.numeric(as.character(df$ID)), |
| GeneID = df$SYMBOL, |
| Original_GeneID = df$SYMBOL, |
| stringsAsFactors = FALSE |
| ) |
|
|
| |
| na_count <- sum(is.na(res$ENTREZID)) |
| if (na_count > 0) { |
| cat(sprintf("⚠️ 警告: %d个基因的ENTREZID转换为NA(可能包含非数字ID)\n", na_count)) |
| } |
|
|
| |
| pval_col <- if(input$chip_pval_type == "adj.P.Val") "padj" else "pvalue" |
|
|
| res$Status <- ifelse(res[[pval_col]] < input$chip_pval_cutoff & abs(res$log2FoldChange) > input$chip_log2fc_cutoff, |
| ifelse(res$log2FoldChange > 0, "Up", "Down"), "Not DE") |
|
|
| |
| res <- res %>% |
| dplyr::mutate( |
| pvalue_safe = pmax(pvalue, 1e-300), |
| t_stat = -log10(pvalue_safe) * log2FoldChange |
| ) %>% |
| dplyr::mutate( |
| t_stat = ifelse(is.finite(t_stat), t_stat, NA) |
| ) |
|
|
| cat(sprintf("📊 芯片分析: %d 个基因的t_stat\n", sum(!is.na(res$t_stat)))) |
|
|
| |
| |
| before_filter <- nrow(res) |
| res <- res[!is.na(res$ENTREZID), ] |
| after_filter <- nrow(res) |
| if (before_filter > after_filter) { |
| cat(sprintf("⚠️ 过滤了 %d 个ENTREZID为NA的基因\n", before_filter - after_filter)) |
| } |
|
|
| |
| if (any(duplicated(res$SYMBOL))) { |
| res <- res %>% |
| dplyr::arrange(SYMBOL, padj, abs(log2FoldChange)) %>% |
| dplyr::distinct(SYMBOL, .keep_all = TRUE) |
| } |
|
|
| cat("✅ 芯片数据加载完成:", nrow(res), "个基因\n") |
| showNotification(sprintf("✅ 芯片数据加载完成: %d 个基因", nrow(res)), type = "message") |
|
|
| return(res) |
| }) |
|
|
| |
| get_filtered_expr_genes <- reactive({ |
| if (input$data_source == "counts") { |
| |
| req(data_input$raw_data(), input$control_group, input$treat_group) |
| df <- data_input$raw_data() |
| ctrl <- input$control_group |
| trt <- input$treat_group |
| df_use <- df[, c(ctrl, trt)] |
| group <- factor(c(rep("Control", length(ctrl)), rep("Treatment", length(trt))), |
| levels = c("Control", "Treatment")) |
|
|
| |
| |
| dge <- DGEList(counts = df_use, group = group) |
| dge <- calcNormFactors(dge) |
| keep <- filterByExpr(dge) |
| filtered_genes <- rownames(dge)[keep] |
|
|
| return(filtered_genes) |
| } else { |
| |
| |
| return(NULL) |
| } |
| }) |
|
|
| |
| get_deg_results <- reactive({ |
| if (input$data_source == "counts") { |
| |
| return(list( |
| deg_df = deg_results(), |
| background_genes = get_filtered_expr_genes(), |
| expr_matrix = data_input$raw_data(), |
| ctrl_samples = input$ctrl_samples, |
| trt_samples = input$trt_samples |
| )) |
| } else if (input$data_source == "deg") { |
| |
| return(list( |
| deg_df = deg_results_from_file(), |
| background_genes = NULL, |
| expr_matrix = NULL, |
| ctrl_samples = NULL, |
| trt_samples = NULL |
| )) |
| } else if (input$data_source == "chip") { |
| |
| return(list( |
| deg_df = chip_results_from_file(), |
| background_genes = NULL, |
| expr_matrix = NULL, |
| ctrl_samples = NULL, |
| trt_samples = NULL |
| )) |
| } |
| }) |
|
|
| |
| output$download_results <- downloadHandler( |
| filename = function() { |
| paste0("DEG_Results_", Sys.Date(), ".csv") |
| }, |
| content = function(file) { |
| req(get_deg_results()) |
| write.csv(get_deg_results()$deg_df, file, row.names = FALSE) |
| } |
| ) |
|
|
| |
| output$deg_summary <- renderUI({ |
| req(get_deg_results()) |
| data <- get_deg_results()$deg_df |
|
|
| |
| n_total <- nrow(data) |
| n_up <- sum(data$Status == "Up", na.rm = TRUE) |
| n_down <- sum(data$Status == "Down", na.rm = TRUE) |
| n_not_de <- sum(data$Status == "Not DE", na.rm = TRUE) |
|
|
| |
| if (n_total > 0) { |
| pct_up <- round(100 * n_up / n_total, 1) |
| pct_down <- round(100 * n_down / n_total, 1) |
| pct_not_de <- round(100 * n_not_de / n_total, 1) |
| } else { |
| pct_up <- pct_down <- pct_not_de <- 0 |
| } |
|
|
| |
| tagList( |
| tags$div( |
| class = "row", |
| style = "margin-bottom: 20px;", |
| tags$div( |
| class = "col-sm-3", |
| tags$div( |
| class = "card", |
| style = "border-left: 4px solid #6c757d;", |
| tags$div(class = "card-body", style = "padding: 15px;", |
| tags$h6(class = "card-subtitle mb-2 text-muted", "总基因数"), |
| tags$h3(class = "card-title mb-0", style = "color: #6c757d;", |
| format(n_total, big.mark = ",") |
| ), |
| tags$small(class = "text-muted", paste0("100%")) |
| ) |
| ) |
| ), |
| tags$div( |
| class = "col-sm-3", |
| tags$div( |
| class = "card", |
| style = "border-left: 4px solid #28a745;", |
| tags$div(class = "card-body", style = "padding: 15px;", |
| tags$h6(class = "card-subtitle mb-2 text-muted", "上调基因"), |
| tags$h3(class = "card-title mb-0", style = "color: #28a745;", |
| format(n_up, big.mark = ",") |
| ), |
| tags$small(class = "text-muted", paste0(pct_up, "%")) |
| ) |
| ) |
| ), |
| tags$div( |
| class = "col-sm-3", |
| tags$div( |
| class = "card", |
| style = "border-left: 4px solid #dc3545;", |
| tags$div(class = "card-body", style = "padding: 15px;", |
| tags$h6(class = "card-subtitle mb-2 text-muted", "下调基因"), |
| tags$h3(class = "card-title mb-0", style = "color: #dc3545;", |
| format(n_down, big.mark = ",") |
| ), |
| tags$small(class = "text-muted", paste0(pct_down, "%")) |
| ) |
| ) |
| ), |
| tags$div( |
| class = "col-sm-3", |
| tags$div( |
| class = "card", |
| style = "border-left: 4px solid #17a2b8;", |
| tags$div(class = "card-body", style = "padding: 15px;", |
| tags$h6(class = "card-subtitle mb-2 text-muted", "非显著"), |
| tags$h3(class = "card-title mb-0", style = "color: #17a2b8;", |
| format(n_not_de, big.mark = ",") |
| ), |
| tags$small(class = "text-muted", paste0(pct_not_de, "%")) |
| ) |
| ) |
| ) |
| ) |
| ) |
| }) |
|
|
| output$deg_table <- DT::renderDataTable({ |
| req(get_deg_results()) |
| data_to_display <- get_deg_results()$deg_df |
|
|
| |
| numeric_cols <- c("log2FoldChange", "pvalue", "padj", "t_stat") |
| existing_numeric_cols <- numeric_cols[numeric_cols %in% colnames(data_to_display)] |
|
|
| if (length(existing_numeric_cols) > 0) { |
| DT::datatable(data_to_display, options = list(scrollX=T, pageLength=10), rownames=F) %>% |
| formatRound(existing_numeric_cols, 4) |
| } else { |
| DT::datatable(data_to_display, options = list(scrollX=T, pageLength=10), rownames=F) |
| } |
| }) |
|
|
| |
| custom_genes <- reactiveVal(NULL) |
|
|
| observeEvent(input$show_custom_genes, { |
| req(input$custom_genes_input) |
|
|
| |
| genes <- strsplit(input$custom_genes_input, ",")[[1]] |
| genes <- trimws(genes) |
| genes <- genes[genes != ""] |
|
|
| if (length(genes) > 0) { |
| custom_genes(genes) |
| showNotification(paste("已设置显示", length(genes), "个自定义基因"), type = "message") |
| } else { |
| custom_genes(NULL) |
| showNotification("请输入有效的基因名称", type = "warning") |
| } |
| }) |
|
|
| |
| observeEvent(input$clear_custom_genes, { |
| custom_genes(NULL) |
| updateTextInput(session, "custom_genes_input", value = "") |
| showNotification("已清除自定义基因", type = "message") |
| }) |
|
|
|
|
| |
| output$interactive_volcano <- renderPlotly({ |
| req(get_deg_results()) |
| res_data <- get_deg_results() |
| res <- res_data$deg_df |
|
|
| |
| cat("火山图数据检查:\n") |
| cat("数据类型:", class(res), "\n") |
| cat("数据列名:", paste(colnames(res), collapse = ", "), "\n") |
| if ("log2FoldChange" %in% colnames(res)) { |
| cat("log2FoldChange类型:", class(res$log2FoldChange), "\n") |
| } |
|
|
| |
| if (input$data_source == "counts") { |
| pval_col <- input$pval_type |
| } else { |
| pval_col <- if(input$deg_pval_type == "p_val_adj") "padj" else "pvalue" |
| } |
|
|
| |
| y_axis_col <- input$y_axis_type |
|
|
| |
| if (!("log2FoldChange" %in% colnames(res) && is.numeric(res$log2FoldChange))) { |
| showNotification("错误:log2FoldChange列不存在或不是数值类型", type = "error") |
| showNotification(paste("当前列名:", paste(colnames(res), collapse = ", ")), type = "message") |
| return(NULL) |
| } |
|
|
| |
| if (y_axis_col %in% colnames(res) && is.numeric(res[[y_axis_col]])) { |
| |
| valid_values <- res[[y_axis_col]] |
|
|
| |
| min_positive <- .Machine$double.xmin |
| valid_values[valid_values <= 0 & !is.na(valid_values)] <- min_positive |
| valid_values[is.na(valid_values)] <- NA |
|
|
| res$y_value <- -log10(valid_values) |
|
|
| |
| if (all(is.na(res$y_value))) { |
| showNotification(paste("错误:所有", y_axis_col, "值无效(<=0或NA),无法绘制火山图"), type = "error") |
| return(NULL) |
| } |
|
|
| |
| n_replaced <- sum(res[[y_axis_col]] <= 0 & !is.na(res[[y_axis_col]])) |
| if (n_replaced > 0) { |
| showNotification(sprintf("注意:有 %d 个p值为0或负值的基因被替换为最小正值", n_replaced), type = "message") |
| } |
| } else { |
| showNotification(paste("错误:列", y_axis_col, "不存在或不是数值类型"), type = "error") |
| return(NULL) |
| } |
|
|
| color_map <- c("Not DE"="#95a5a6", "Up"=input$up_color, "Down"=input$down_color) |
|
|
| txt_col <- if(input$theme_toggle) "#00e0ff" else "black" |
|
|
| |
| p <- plot_ly(res, |
| x = ~log2FoldChange, y = ~y_value, color = ~Status, |
| colors = color_map, |
| text = ~SYMBOL, type = 'scatter', mode = 'markers', |
| marker = list(size = input$point_size, opacity = input$point_alpha), |
| hoverinfo = 'text', |
| hovertext = ~paste("Gene:", SYMBOL, |
| "<br>log2FC:", round(log2FoldChange, 3), |
| "<br>-log10(", y_axis_col, "):", round(y_value, 3), |
| "<br>Status:", Status)) %>% |
| layout( |
| xaxis = list( |
| title = "log2(Fold Change)", |
| range = c(input$x_axis_min, input$x_axis_max), |
| titlefont = list(size = input$axis_title_size), |
| tickfont = list(size = input$axis_label_size), |
| showgrid = input$show_grid, |
| gridcolor = if(input$show_grid) "#ddd" else "transparent", |
| gridwidth = 1 |
| ), |
| yaxis = list( |
| title = paste0("-log10(", y_axis_col, ")"), |
| titlefont = list(size = input$axis_title_size), |
| tickfont = list(size = input$axis_label_size), |
| showgrid = input$show_grid, |
| gridcolor = if(input$show_grid) "#ddd" else "transparent", |
| gridwidth = 1 |
| ), |
| font = list(color = txt_col), |
| paper_bgcolor = "rgba(0,0,0,0)", |
| plot_bgcolor = "rgba(0,0,0,0)" |
| ) |
|
|
| |
| if (!is.null(custom_genes())) { |
| selected_genes <- custom_genes() |
|
|
| |
| gene_data <- res[res$SYMBOL %in% selected_genes | res$GeneID %in% selected_genes, ] |
|
|
| if (nrow(gene_data) > 0) { |
| |
| p <- p %>% |
| add_annotations( |
| x = gene_data$log2FoldChange, |
| y = gene_data$y_value, |
| text = gene_data$SYMBOL, |
| xref = "x", |
| yref = "y", |
| showarrow = TRUE, |
| arrowhead = 2, |
| arrowsize = 1, |
| arrowwidth = 1, |
| arrowcolor = input$gene_label_color, |
| ax = 20, |
| ay = -40, |
| font = list( |
| size = input$gene_label_size, |
| color = input$gene_label_color, |
| family = "Arial", |
| weight = if(input$gene_label_bold) "bold" else "normal" |
| ), |
| bgcolor = "rgba(255,255,255,0.8)", |
| bordercolor = input$gene_label_color, |
| borderwidth = 1, |
| borderpad = 4, |
| opacity = 0.8 |
| ) |
| } |
| } |
|
|
| p |
| }) |
|
|
| |
| volcano_static_plot <- reactive({ |
| req(get_deg_results()) |
| res_data <- get_deg_results() |
| res <- res_data$deg_df |
|
|
| |
| if (input$data_source == "counts") { |
| pval_col <- input$pval_type |
| } else { |
| pval_col <- if(input$deg_pval_type == "p_val_adj") "padj" else "pvalue" |
| } |
|
|
| |
| y_axis_col <- input$y_axis_type |
|
|
| |
| if (!("log2FoldChange" %in% colnames(res) && is.numeric(res$log2FoldChange))) { |
| showNotification("错误:log2FoldChange列不存在或不是数值类型", type = "error") |
| showNotification(paste("当前列名:", paste(colnames(res), collapse = ", ")), type = "message") |
| return(NULL) |
| } |
|
|
| |
| if (y_axis_col %in% colnames(res) && is.numeric(res[[y_axis_col]])) { |
| |
| valid_values <- res[[y_axis_col]] |
|
|
| |
| min_positive <- .Machine$double.xmin |
| valid_values[valid_values <= 0 & !is.na(valid_values)] <- min_positive |
| valid_values[is.na(valid_values)] <- NA |
|
|
| res$y_value <- -log10(valid_values) |
|
|
| |
| if (all(is.na(res$y_value))) { |
| showNotification(paste("错误:所有", y_axis_col, "值无效(<=0或NA),无法绘制火山图"), type = "error") |
| return(NULL) |
| } |
|
|
| |
| n_replaced <- sum(res[[y_axis_col]] <= 0 & !is.na(res[[y_axis_col]])) |
| if (n_replaced > 0) { |
| showNotification(sprintf("注意:有 %d 个p值为0或负值的基因被替换为最小正值", n_replaced), type = "message") |
| } |
| } else { |
| showNotification(paste("错误:列", y_axis_col, "不存在或不是数值类型"), type = "error") |
| return(NULL) |
| } |
|
|
| |
| res$color <- ifelse(res$Status == "Up", input$up_color, |
| ifelse(res$Status == "Down", input$down_color, "#95a5a6")) |
|
|
| |
| p <- ggplot(res, aes(x = log2FoldChange, y = y_value, color = Status)) + |
| geom_point(alpha = input$point_alpha, size = input$point_size) + |
| scale_color_manual(values = c("Up" = input$up_color, "Down" = input$down_color, "Not DE" = "#95a5a6")) + |
| labs( |
| x = "log2(Fold Change)", |
| y = paste0("-log10(", y_axis_col, ")"), |
| title = "Volcano Plot" |
| ) + |
| theme_minimal() + |
| theme( |
| axis.title = element_text(size = input$axis_title_size), |
| axis.text = element_text(size = input$axis_label_size), |
| legend.title = element_text(size = input$axis_title_size), |
| legend.text = element_text(size = input$axis_label_size), |
| plot.title = element_text(size = input$axis_title_size + 2, hjust = 0.5), |
| panel.grid.major = element_line( |
| color = if(input$show_grid) "gray" else "transparent", |
| linewidth = if(input$show_grid) 0.5 else 0 |
| ), |
| panel.grid.minor = element_line( |
| color = if(input$show_grid) "gray" else "transparent", |
| linewidth = if(input$show_grid) 0.25 else 0 |
| ) |
| ) + |
| xlim(input$x_axis_min, input$x_axis_max) |
|
|
| |
| if (!is.null(custom_genes())) { |
| selected_genes <- custom_genes() |
| gene_data <- res[res$SYMBOL %in% selected_genes | res$GeneID %in% selected_genes, ] |
|
|
| if (nrow(gene_data) > 0) { |
| p <- p + |
| geom_text_repel( |
| data = gene_data, |
| aes(label = SYMBOL), |
| size = input$gene_label_size, |
| color = input$gene_label_color, |
| fontface = if(input$gene_label_bold) "bold" else "plain", |
| box.padding = 0.5, |
| point.padding = 0.3, |
| max.overlaps = Inf |
| ) |
| } |
| } |
|
|
| return(p) |
| }) |
|
|
| |
| output$download_volcano <- downloadHandler( |
| filename = function() { |
| paste0("volcano_plot_", Sys.Date(), ".", input$export_format) |
| }, |
| content = function(file) { |
| req(volcano_static_plot()) |
|
|
| if (input$export_format == "png") { |
| png(file, width = input$export_width, height = input$export_height, units = "in", res = 300) |
| } else if (input$export_format == "pdf") { |
| pdf(file, width = input$export_width, height = input$export_height) |
| } else if (input$export_format == "svg") { |
| svg(file, width = input$export_width, height = input$export_height) |
| } |
|
|
| print(volcano_static_plot()) |
| dev.off() |
| } |
| ) |
|
|
| |
| return(get_deg_results) |
| } |