| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | parse_geo_series_matrix <- function(file_path) { |
| | cat("📂 开始解析 GEO Series Matrix 文件...\n") |
| |
|
| | tryCatch({ |
| | |
| | lines <- readLines(file_path, warn = FALSE) |
| |
|
| | |
| | start_idx <- which(lines == "!series_matrix_table_begin") |
| |
|
| | if (length(start_idx) == 0) { |
| | return(list( |
| | success = FALSE, |
| | error = "未找到 !series_matrix_table_begin 标记,这不是有效的 GEO Series Matrix 文件" |
| | )) |
| | } |
| |
|
| | start_idx <- start_idx[1] + 1 |
| |
|
| | |
| | end_idx <- which(lines == "!series_matrix_table_end") |
| | if (length(end_idx) > 0) { |
| | end_idx <- end_idx[1] - 1 |
| | } else { |
| | |
| | end_idx <- length(lines) |
| | } |
| |
|
| | cat(sprintf("✅ 找到数据区域: 第 %d - %d 行\n", start_idx, end_idx)) |
| |
|
| | |
| | matrix_lines <- lines[start_idx:end_idx] |
| |
|
| | |
| | text_connection <- textConnection(matrix_lines) |
| | expr_matrix <- read.table( |
| | text_connection, |
| | header = TRUE, |
| | row.names = 1, |
| | sep = "\t", |
| | quote = "\"", |
| | comment.char = "", |
| | stringsAsFactors = FALSE, |
| | check.names = FALSE |
| | ) |
| | close(text_connection) |
| |
|
| | |
| | colnames(expr_matrix) <- gsub('"', '', colnames(expr_matrix)) |
| | rownames(expr_matrix) <- gsub('"', '', rownames(expr_matrix)) |
| |
|
| | cat(sprintf("✅ 清理引号后样本名示例: %s\n", paste(head(colnames(expr_matrix), 3), collapse = ", "))) |
| | cat(sprintf("✅ 清理引号后探针ID示例: %s\n", paste(head(rownames(expr_matrix), 3), collapse = ", "))) |
| |
|
| | |
| | metadata <- extract_geo_metadata(lines[1:(start_idx-2)]) |
| |
|
| | cat(sprintf("✅ 解析完成: %d 探针 × %d 样本\n", |
| | nrow(expr_matrix), ncol(expr_matrix))) |
| |
|
| | return(list( |
| | success = TRUE, |
| | matrix = expr_matrix, |
| | metadata = metadata, |
| | n_probes = nrow(expr_matrix), |
| | n_samples = ncol(expr_matrix), |
| | sample_names = colnames(expr_matrix) |
| | )) |
| |
|
| | }, error = function(e) { |
| | return(list( |
| | success = FALSE, |
| | error = paste("解析文件时出错:", e$message) |
| | )) |
| | }) |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | extract_geo_metadata <- function(metadata_lines) { |
| | result <- list( |
| | has_metadata = FALSE, |
| | sample_descriptions = NULL, |
| | sample_titles = NULL |
| | ) |
| |
|
| | |
| | desc_line <- grep("^!Sample_description", metadata_lines, value = TRUE) |
| |
|
| | if (length(desc_line) > 0) { |
| | |
| | descriptions <- gsub("^!Sample_description\t", "", desc_line) |
| | descriptions <- gsub('"', '', descriptions) |
| | |
| | descriptions <- strsplit(descriptions[1], "\t")[[1]] |
| | result$sample_descriptions <- descriptions |
| | result$has_metadata <- TRUE |
| | } |
| |
|
| | |
| | title_line <- grep("^!Sample_title", metadata_lines, value = TRUE) |
| |
|
| | if (length(title_line) > 0) { |
| | |
| | titles <- gsub("^!Sample_title\t", "", title_line) |
| | titles <- gsub('"', '', titles) |
| | |
| | titles <- strsplit(titles[1], "\t")[[1]] |
| | result$sample_titles <- titles |
| | result$has_metadata <- TRUE |
| | } |
| |
|
| | return(result) |
| | } |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | parse_platform_annotation <- function(file_path, separator = "\t") { |
| | cat("📋 开始解析 SOFT 平台文件...\n") |
| | cat(sprintf("📋 使用分隔符: %s\n", separator)) |
| |
|
| | tryCatch({ |
| | lines <- readLines(file_path, warn = FALSE) |
| |
|
| | |
| | start_idx <- which(lines == "!platform_table_begin") |
| |
|
| | if (length(start_idx) == 0) { |
| | return(list( |
| | success = FALSE, |
| | error = "未找到 !platform_table_begin 标记" |
| | )) |
| | } |
| |
|
| | start_idx <- start_idx[1] + 1 |
| |
|
| | |
| | remaining_lines <- lines[start_idx:length(lines)] |
| | next_section <- which(grepl("^\\^", remaining_lines)) |
| |
|
| | if (length(next_section) > 0) { |
| | end_idx <- start_idx + next_section[1] - 2 |
| | } else { |
| | end_idx <- length(lines) |
| | } |
| |
|
| | table_lines <- lines[start_idx:end_idx] |
| |
|
| | |
| | header <- strsplit(table_lines[1], separator)[[1]] |
| |
|
| | |
| | text_conn <- textConnection(table_lines) |
| | raw_table <- read.table( |
| | text_conn, |
| | header = TRUE, |
| | sep = separator, |
| | quote = "", |
| | stringsAsFactors = FALSE, |
| | fill = TRUE, |
| | check.names = FALSE |
| | ) |
| | close(text_conn) |
| |
|
| | cat(sprintf("📋 平台注释文件解析: %d 列 × %d 行\n", |
| | ncol(raw_table), nrow(raw_table))) |
| | cat("📋 列名:", paste(colnames(raw_table), collapse = ", "), "\n") |
| |
|
| | |
| | cat("📋 各列示例数据:\n") |
| | for (col in colnames(raw_table)) { |
| | sample_vals <- head(raw_table[[col]][!is.na(raw_table[[col]]) & raw_table[[col]] != ""], 3) |
| | cat(sprintf(" %s: %s\n", col, paste(sample_vals, collapse = ", "))) |
| | } |
| |
|
| | |
| | cat("⚠️ 请用户手动选择基因符号列\n") |
| |
|
| | |
| | return(list( |
| | success = TRUE, |
| | raw_table = raw_table, |
| | mapping = NULL, |
| | gene_symbol_col = NULL, |
| | needs_manual_selection = TRUE, |
| | message = "请手动选择ID列和基因列" |
| | )) |
| |
|
| | }, error = function(e) { |
| | return(list( |
| | success = FALSE, |
| | error = paste("解析 SOFT 文件时出错:", e$message) |
| | )) |
| | }) |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | detect_gene_symbol_column <- function(table) { |
| | cat("🔍 开始智能检测基因符号列...\n") |
| |
|
| | |
| | high_priority_names <- c( |
| | "GENE_SYMBOL", |
| | "Gene.Symbol", |
| | "Gene_Symbol", |
| | "gene_symbol", |
| | "SYMBOL", |
| | "Symbol", |
| | "Gene Symbol" |
| | ) |
| |
|
| | |
| | for (name in high_priority_names) { |
| | if (name %in% colnames(table)) { |
| | |
| | col_data <- table[[name]] |
| | col_data <- col_data[!is.na(col_data) & col_data != ""] |
| | n_check <- min(50, length(col_data)) |
| |
|
| | if (n_check >= 10) { |
| | |
| | |
| | pattern <- "^[A-Z][A-Z0-9\\-]{1,15}$" |
| | match_ratio <- sum(grepl(pattern, col_data[1:n_check])) / n_check |
| |
|
| | if (match_ratio > 0.5) { |
| | cat(sprintf("✅ 智能检测到基因列(高优先级): %s (匹配率: %.1f%%)\n", |
| | name, match_ratio*100)) |
| | return(name) |
| | } |
| | } |
| | } |
| | } |
| |
|
| | |
| | for (name in high_priority_names) { |
| | matching_cols <- colnames(table)[sapply(colnames(table), function(col) { |
| | grepl(name, col, ignore.case = TRUE) |
| | })] |
| |
|
| | if (length(matching_cols) > 0) { |
| | |
| | for (col_name in matching_cols) { |
| | col_data <- table[[col_name]] |
| | col_data <- col_data[!is.na(col_data) & col_data != ""] |
| | n_check <- min(50, length(col_data)) |
| |
|
| | if (n_check >= 10) { |
| | pattern <- "^[A-Z][A-Z0-9\\-]{1,15}$" |
| | match_ratio <- sum(grepl(pattern, col_data[1:n_check])) / n_check |
| |
|
| | if (match_ratio > 0.5) { |
| | cat(sprintf("✅ 智能检测到基因列(模糊匹配): %s (匹配率: %.1f%%)\n", |
| | col_name, match_ratio*100)) |
| | return(col_name) |
| | } |
| | } |
| | } |
| | } |
| | } |
| |
|
| | |
| | best_col <- NULL |
| | best_match_ratio <- 0 |
| |
|
| | for (col_name in colnames(table)) { |
| | |
| | if (col_name %in% c("ID", "SPOT_ID", "CONTROL_TYPE", "REFSEQ", "GB_ACC", |
| | "UNIGENE_ID", "ENSEMBL_ID", "TIGR_ID", "ACCESSION_STRING", |
| | "CHROMOSOMAL_LOCATION", "CYTOBAND", "DESCRIPTION", "GO_ID", |
| | "SEQUENCE")) { |
| | next |
| | } |
| |
|
| | col_data <- table[[col_name]] |
| | col_data <- col_data[!is.na(col_data) & col_data != ""] |
| | n_check <- min(100, length(col_data)) |
| |
|
| | if (n_check < 10) next |
| |
|
| | |
| | |
| | patterns <- c( |
| | "^[A-Z][A-Z0-9]{1,10}$", |
| | "^[A-Z][A-Z0-9]{1,5}-[0-9]+$", |
| | "^[A-Z]{2,6}-[A-Z0-9]{1,3}$" |
| | ) |
| |
|
| | match_count <- sum(sapply(patterns, function(p) { |
| | sum(grepl(p, col_data[1:n_check])) |
| | })) |
| |
|
| | match_ratio <- match_count / n_check |
| |
|
| | if (match_ratio > best_match_ratio && match_ratio > 0.3) { |
| | best_match_ratio <- match_ratio |
| | best_col <- col_name |
| | } |
| | } |
| |
|
| | if (!is.null(best_col)) { |
| | cat(sprintf("✅ 智能检测到基因列(内容分析): %s (匹配率: %.1f%%)\n", |
| | best_col, best_match_ratio*100)) |
| | return(best_col) |
| | } |
| |
|
| | cat("⚠️ 无法自动检测基因符号列,需要用户手动选择\n") |
| | return(NULL) |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | annotate_probe_matrix <- function(expr_matrix, probe_gene_map) { |
| | cat("🔄 开始探针注释...\n") |
| |
|
| | |
| | common_probes <- intersect(rownames(expr_matrix), probe_gene_map$probe_id) |
| |
|
| | if (length(common_probes) == 0) { |
| | stop("❌ 探针ID不匹配,请检查平台注释文件是否正确") |
| | } |
| |
|
| | cat(sprintf("📊 共同探针数: %d (表达矩阵: %d, 注释文件: %d)\n", |
| | length(common_probes), |
| | nrow(expr_matrix), |
| | nrow(probe_gene_map))) |
| |
|
| | |
| | expr_subset <- expr_matrix[common_probes, , drop = FALSE] |
| |
|
| | |
| | expr_subset <- data.frame(probe_id = rownames(expr_subset), expr_subset, |
| | stringsAsFactors = FALSE) |
| |
|
| | |
| | merged <- merge(expr_subset, probe_gene_map, by = "probe_id") |
| |
|
| | |
| | probe_id_col <- which(names(merged) == "probe_id") |
| | if (length(probe_id_col) > 0) { |
| | merged <- merged[, -probe_id_col] |
| | } |
| |
|
| | |
| | |
| | library(dplyr) |
| |
|
| | expr_annotated <- merged %>% |
| | group_by(gene_symbol) %>% |
| | summarise(across(everything(), mean, na.rm = TRUE)) %>% |
| | column_to_rownames("gene_symbol") |
| |
|
| | |
| | expr_annotated <- as.matrix(expr_annotated) |
| |
|
| | cat(sprintf("✅ 探针注释完成: %d 探针 → %d 基因\n", |
| | length(common_probes), |
| | nrow(expr_annotated))) |
| |
|
| | |
| | n_probes_per_gene <- table(merged$gene_symbol) |
| | n_multi_probe_genes <- sum(n_probes_per_gene > 1) |
| |
|
| | if (n_multi_probe_genes > 0) { |
| | cat(sprintf("📊 其中 %d 个基因有多个探针(已取平均)\n", |
| | n_multi_probe_genes)) |
| | } |
| |
|
| | return(expr_annotated) |
| | } |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | run_limma_analysis <- function(expr_matrix, ctrl_samples, trt_samples, |
| | pvalue_threshold = 0.05, |
| | logfc_threshold = 1, |
| | pval_type = "adj.P.Val") { |
| | cat("🧬 开始 limma 差异分析...\n") |
| |
|
| | |
| | if (length(ctrl_samples) == 0 || length(trt_samples) == 0) { |
| | stop("❌ 对照组和处理组样本数不能为0") |
| | } |
| |
|
| | |
| | sample_order <- c(ctrl_samples, trt_samples) |
| |
|
| | |
| | missing_samples <- setdiff(sample_order, colnames(expr_matrix)) |
| | if (length(missing_samples) > 0) { |
| | stop(sprintf("❌ 以下样本不存在于表达矩阵中: %s", |
| | paste(missing_samples, collapse = ", "))) |
| | } |
| |
|
| | expr_ordered <- expr_matrix[, sample_order, drop = FALSE] |
| |
|
| | |
| | |
| | if ("ProbeID" %in% colnames(expr_ordered)) { |
| | cat("📋 移除ProbeID列(非数值)\n") |
| | expr_ordered <- expr_ordered[, colnames(expr_ordered) != "ProbeID", drop = FALSE] |
| | } |
| |
|
| | if ("Gene" %in% colnames(expr_ordered)) { |
| | cat("📋 移除Gene列(非数值)\n") |
| | expr_ordered <- expr_ordered[, colnames(expr_ordered) != "Gene", drop = FALSE] |
| | } |
| |
|
| | |
| | expr_ordered <- as.matrix(expr_ordered) |
| | storage.mode(expr_ordered) <- "numeric" |
| |
|
| | cat(sprintf("📊 最终分析矩阵: %d 基因 × %d 样本\n", |
| | nrow(expr_ordered), ncol(expr_ordered))) |
| |
|
| | |
| | group <- factor(c(rep("Control", length(ctrl_samples)), |
| | rep("Treatment", length(trt_samples)))) |
| |
|
| | cat(sprintf("📊 样本分组: Control=%d, Treatment=%d\n", |
| | length(ctrl_samples), length(trt_samples))) |
| |
|
| | |
| | library(limma) |
| |
|
| | |
| | design <- model.matrix(~0 + group) |
| | colnames(design) <- levels(group) |
| |
|
| | cat("📊 设计矩阵:\n") |
| | print(design) |
| |
|
| | |
| | fit <- lmFit(expr_ordered, design) |
| |
|
| | |
| | contrast.matrix <- makeContrasts(Treatment-Control, levels=design) |
| |
|
| | cat("📊 对比矩阵:\n") |
| | print(contrast.matrix) |
| |
|
| | fit2 <- contrasts.fit(fit, contrast.matrix) |
| | fit2 <- eBayes(fit2) |
| |
|
| | |
| | results <- topTable(fit2, |
| | number = Inf, |
| | adjust.method = "BH", |
| | sort.by = "P") |
| |
|
| | |
| | |
| | |
| | results$SYMBOL <- rownames(results) |
| |
|
| | |
| | if ("EntrezID" %in% colnames(expr_matrix)) { |
| | |
| | results$ID <- expr_matrix[rownames(results), "EntrezID"] |
| | cat("✅ ID列使用Entrez Gene ID,SYMBOL列使用基因符号\n") |
| | } else { |
| | results$ID <- rownames(results) |
| | cat("✅ ID列和SYMBOL列都使用基因符号(无EntrezID)\n") |
| | } |
| |
|
| | |
| | n_total <- nrow(results) |
| |
|
| | |
| | pval_col <- if (pval_type == "adj.P.Val") "adj.P.Val" else "P.Value" |
| | pval_values <- results[[pval_col]] |
| |
|
| | n_significant <- sum(pval_values < pvalue_threshold, na.rm = TRUE) |
| | n_up <- sum(results$logFC > logfc_threshold & pval_values < pvalue_threshold, |
| | na.rm = TRUE) |
| | n_down <- sum(results$logFC < -logfc_threshold & pval_values < pvalue_threshold, |
| | na.rm = TRUE) |
| |
|
| | cat(sprintf("✅ limma 分析完成: %d 个基因\n", n_total)) |
| | cat(sprintf(" 使用P值类型: %s\n", pval_type)) |
| | cat(sprintf(" 显著差异基因 (%s < %.3f): %d (%.1f%%)\n", |
| | pval_type, pvalue_threshold, n_significant, n_significant/n_total*100)) |
| | cat(sprintf(" 上调基因 (log2FC > %.2f): %d\n", logfc_threshold, n_up)) |
| | cat(sprintf(" 下调基因 (log2FC < %.2f): %d\n", logfc_threshold, n_down)) |
| |
|
| | return(list( |
| | results = results, |
| | n_total = n_total, |
| | n_significant = n_significant, |
| | n_up = n_up, |
| | n_down = n_down, |
| | design = design, |
| | fit = fit2 |
| | )) |
| | } |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | aggregate_probe_expression <- function(probe_matrix, probe_mapping) { |
| | cat("🔄 开始聚合探针表达量...\n") |
| |
|
| | |
| | probe_ids <- probe_mapping$probe_id |
| | gene_symbols <- probe_mapping$gene_symbol |
| |
|
| | |
| | names(gene_symbols) <- probe_ids |
| |
|
| | |
| | common_probes <- intersect(rownames(probe_matrix), probe_ids) |
| |
|
| | if (length(common_probes) == 0) { |
| | cat("⚠️ 未找到匹配的探针ID\n") |
| | return(NULL) |
| | } |
| |
|
| | cat(sprintf("✅ 匹配探针: %d / %d\n", length(common_probes), nrow(probe_matrix))) |
| |
|
| | |
| | expr_subset <- probe_matrix[common_probes, , drop = FALSE] |
| | gene_symbols_subset <- gene_symbols[common_probes] |
| |
|
| | |
| | valid_mask <- !is.na(gene_symbols_subset) & gene_symbols_subset != "" |
| | expr_subset <- expr_subset[valid_mask, , drop = FALSE] |
| | gene_symbols_subset <- gene_symbols_subset[valid_mask] |
| |
|
| | |
| | cat("📊 聚合策略: 选择最高表达探针\n") |
| |
|
| | |
| | unique_genes <- unique(gene_symbols_subset) |
| | cat(sprintf("📊 唯一基因数: %d\n", length(unique_genes))) |
| |
|
| | |
| | gene_expr_list <- lapply(unique_genes, function(gene) { |
| | |
| | gene_probes <- which(gene_symbols_subset == gene) |
| |
|
| | if (length(gene_probes) == 1) { |
| | |
| | return(expr_subset[gene_probes, , drop = FALSE]) |
| | } else { |
| | |
| | avg_expr <- rowMeans(expr_subset[gene_probes, , drop = FALSE]) |
| | best_probe <- gene_probes[which.max(avg_expr)] |
| | return(expr_subset[best_probe, , drop = FALSE]) |
| | } |
| | }) |
| |
|
| | |
| | gene_expr_matrix <- do.call(rbind, gene_expr_list) |
| | rownames(gene_expr_matrix) <- unique_genes |
| |
|
| | cat(sprintf("✅ 聚合完成: %d 个基因\n", nrow(gene_expr_matrix))) |
| |
|
| | return(gene_expr_matrix) |
| | } |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | detect_chip_groups_auto <- function(sample_names, |
| | sample_descriptions = NULL, |
| | sample_titles = NULL) { |
| | cat("🔍 开始自动检测分组模式...\n") |
| | cat(sprintf("📊 总样本数: %d\n", length(sample_names))) |
| |
|
| | |
| | group_patterns <- list( |
| | |
| | list( |
| | name = "时间序列", |
| | ctrl_keywords = c("before", "baseline", "time0", "initial", "visit1"), |
| | trt_keywords = c("after", "post", "follow", "final", "visit2", "visit3") |
| | ), |
| |
|
| | |
| | list( |
| | name = "处理对照", |
| | ctrl_keywords = c("control", "ctrl", "untreated", "vehicle", "placebo"), |
| | trt_keywords = c("treatment", "treated", "drug", "compound", "stimulated") |
| | ), |
| |
|
| | |
| | list( |
| | name = "疾病对照", |
| | ctrl_keywords = c("normal", "healthy", "control", "wild"), |
| | trt_keywords = c("disease", "patient", "cancer", "tumor", "sick") |
| | ), |
| |
|
| | |
| | list( |
| | name = "基因型", |
| | ctrl_keywords = c("wild", "wt", "wildtype", "normal", "control"), |
| | trt_keywords = c("mutant", "mut", "knockout", "ko", "transgenic", "tg") |
| | ), |
| |
|
| | |
| | list( |
| | name = "剂量反应", |
| | ctrl_keywords = c("dose0", "dose_0", "control", "vehicle", "untreated"), |
| | trt_keywords = c("dose", "treatment", "low", "medium", "high") |
| | ), |
| |
|
| | |
| | list( |
| | name = "激活抑制", |
| | ctrl_keywords = c("inactive", "unstimulated", "resting", "control"), |
| | trt_keywords = c("active", "stimulated", "induced", "activated") |
| | ) |
| | ) |
| |
|
| | |
| | if (!is.null(sample_descriptions) && length(sample_descriptions) > 0) { |
| | cat("📋 尝试从 Sample_description 检测分组...\n") |
| |
|
| | for (pattern in group_patterns) { |
| | ctrl_match <- sapply(sample_descriptions, function(d) { |
| | any(sapply(pattern$ctrl_keywords, function(kw) { |
| | grepl(kw, d, ignore.case = TRUE) |
| | })) |
| | }) |
| |
|
| | trt_match <- sapply(sample_descriptions, function(d) { |
| | any(sapply(pattern$trt_keywords, function(kw) { |
| | grepl(kw, d, ignore.case = TRUE) |
| | })) |
| | }) |
| |
|
| | |
| | if (sum(ctrl_match) > 0 && sum(trt_match) > 0) { |
| | ctrl_idx <- which(ctrl_match) |
| | trt_idx <- which(trt_match) |
| |
|
| | cat(sprintf("✅ 检测到 '%s' 分组模式 (from description)\n", pattern$name)) |
| | cat(sprintf(" 对照组: %d 个样本 (%s)\n", |
| | length(ctrl_idx), |
| | paste(sample_names[ctrl_idx], collapse = ", "))) |
| | cat(sprintf(" 处理组: %d 个样本 (%s)\n", |
| | length(trt_idx), |
| | paste(sample_names[trt_idx], collapse = ", "))) |
| |
|
| | return(list( |
| | pattern_name = pattern$name, |
| | method = "auto_description", |
| | ctrl_samples = sample_names[ctrl_idx], |
| | trt_samples = sample_names[trt_idx], |
| | ctrl_indices = ctrl_idx, |
| | trt_indices = trt_idx, |
| | confidence = "high", |
| | source = "description" |
| | )) |
| | } |
| | } |
| | } |
| |
|
| | |
| | if (!is.null(sample_titles) && length(sample_titles) > 0) { |
| | cat("📋 尝试从 Sample_title 检测分组...\n") |
| |
|
| | for (pattern in group_patterns) { |
| | ctrl_match <- sapply(sample_titles, function(t) { |
| | any(sapply(pattern$ctrl_keywords, function(kw) { |
| | grepl(kw, t, ignore.case = TRUE) |
| | })) |
| | }) |
| |
|
| | trt_match <- sapply(sample_titles, function(t) { |
| | any(sapply(pattern$trt_keywords, function(kw) { |
| | grepl(kw, t, ignore.case = TRUE) |
| | })) |
| | }) |
| |
|
| | if (sum(ctrl_match) > 0 && sum(trt_match) > 0) { |
| | ctrl_idx <- which(ctrl_match) |
| | trt_idx <- which(trt_match) |
| |
|
| | cat(sprintf("✅ 检测到 '%s' 分组模式 (from title)\n", pattern$name)) |
| | cat(sprintf(" 对照组: %d 个样本\n", length(ctrl_idx))) |
| | cat(sprintf(" 处理组: %d 个样本\n", length(trt_idx))) |
| |
|
| | return(list( |
| | pattern_name = pattern$name, |
| | method = "auto_title", |
| | ctrl_samples = sample_names[ctrl_idx], |
| | trt_samples = sample_names[trt_idx], |
| | ctrl_indices = ctrl_idx, |
| | trt_indices = trt_idx, |
| | confidence = "medium", |
| | source = "title" |
| | )) |
| | } |
| | } |
| | } |
| |
|
| | |
| | cat("📋 尝试从样本名检测分组...\n") |
| |
|
| | |
| | simplified_names <- gsub("^GSM\\d+_", "", sample_names) |
| |
|
| | for (pattern in group_patterns) { |
| | ctrl_match <- sapply(simplified_names, function(n) { |
| | any(sapply(pattern$ctrl_keywords, function(kw) { |
| | grepl(kw, n, ignore.case = TRUE) |
| | })) |
| | }) |
| |
|
| | trt_match <- sapply(simplified_names, function(n) { |
| | any(sapply(pattern$trt_keywords, function(kw) { |
| | grepl(kw, n, ignore.case = TRUE) |
| | })) |
| | }) |
| |
|
| | if (sum(ctrl_match) > 0 && sum(trt_match) > 0) { |
| | ctrl_idx <- which(ctrl_match) |
| | trt_idx <- which(trt_match) |
| |
|
| | cat(sprintf("✅ 检测到 '%s' 分组模式 (from name)\n", pattern$name)) |
| | cat(sprintf(" 对照组: %d 个样本\n", length(ctrl_idx))) |
| | cat(sprintf(" 处理组: %d 个样本\n", length(trt_idx))) |
| |
|
| | return(list( |
| | pattern_name = pattern$name, |
| | method = "auto_name", |
| | ctrl_samples = sample_names[ctrl_idx], |
| | trt_samples = sample_names[trt_idx], |
| | ctrl_indices = ctrl_idx, |
| | trt_indices = trt_idx, |
| | confidence = "medium", |
| | source = "name" |
| | )) |
| | } |
| | } |
| |
|
| | |
| | cat("⚠️ 未能自动检测到分组模式,请手动设置\n") |
| |
|
| | return(list( |
| | pattern_name = NULL, |
| | method = "manual", |
| | ctrl_samples = NULL, |
| | trt_samples = NULL, |
| | ctrl_indices = NULL, |
| | trt_indices = NULL, |
| | confidence = NULL, |
| | source = NULL |
| | )) |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | detect_pairing <- function(sample_names, metadata) { |
| | |
| | |
| |
|
| | |
| | |
| |
|
| | |
| | simplified <- gsub("^GSM\\d+_", "", sample_names) |
| |
|
| | |
| | pairing_keywords <- list( |
| | before = "after", |
| | baseline = "follow", |
| | control = "treated", |
| | time0 = "time1", |
| | visit1 = "visit2", |
| | pre = "post" |
| | ) |
| |
|
| | |
| | for (kw1 in names(pairing_keywords)) { |
| | kw2 <- pairing_keywords[[kw1]] |
| |
|
| | |
| | has_kw1 <- grepl(kw1, simplified, ignore.case = TRUE) |
| | has_kw2 <- grepl(kw2, simplified, ignore.case = TRUE) |
| |
|
| | if (sum(has_kw1) > 0 && sum(has_kw2) > 0) { |
| | |
| | ids_kw1 <- gsub(kw1, "", simplified[has_kw1], ignore.case = TRUE) |
| | ids_kw2 <- gsub(kw2, "", simplified[has_kw2], ignore.case = TRUE) |
| |
|
| | |
| | common_ids <- intersect(ids_kw1, ids_kw2) |
| |
|
| | if (length(common_ids) > 0) { |
| | cat(sprintf("💡 检测到配对设计: %d 对样本\n", length(common_ids))) |
| |
|
| | return(list( |
| | is_paired = TRUE, |
| | n_pairs = length(common_ids), |
| | pairing_pattern = sprintf("%s/%s", kw1, kw2) |
| | )) |
| | } |
| | } |
| | } |
| |
|
| | return(list(is_paired = FALSE)) |
| | } |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | format_chip_results_for_pipeline <- function(limma_results, expr_matrix, |
| | ctrl_samples, trt_samples) { |
| | |
| | limma_res <- limma_results$results |
| |
|
| | deg_df <- data.frame( |
| | ID = limma_res$ID, |
| | SYMBOL = limma_res$SYMBOL, |
| | log2FoldChange = limma_res$logFC, |
| | pvalue = limma_res$P.Value, |
| | padj = limma_res$adj.P.Val, |
| | baseMean = limma_res$AveExpr, |
| | t = limma_res$t, |
| | row.names = NULL, |
| | stringsAsFactors = FALSE |
| | ) |
| |
|
| | |
| | deg_df$ENTREZID <- deg_df$ID |
| |
|
| | return(list( |
| | deg_df = deg_df, |
| | background_genes = rownames(expr_matrix), |
| | expr_matrix = expr_matrix, |
| | ctrl_samples = ctrl_samples, |
| | trt_samples = trt_samples, |
| | method = "limma", |
| | n_significant = limma_results$n_significant, |
| | n_up = limma_results$n_up, |
| | n_down = limma_results$n_down |
| | )) |
| | } |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | chip_analysis_ui <- function() { |
| | tagList( |
| | fluidRow( |
| | column(12, |
| | div( |
| | class = "info-box", |
| | style = "background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
| | color: white; padding: 25px; border-radius: 15px; margin-bottom: 25px;", |
| | h4("🧬 芯片数据分析模块", style = "margin-top: 0; color: white;"), |
| | p("支持 GEO Series Matrix 格式的芯片数据差异分析,自动探针注释,无缝集成下游富集分析。", |
| | style = "color: rgba(255,255,255,0.9); margin-bottom: 0;") |
| | ) |
| | ) |
| | ), |
| |
|
| | |
| | tags$div( |
| | id = "chip_analysis_accordion", |
| |
|
| | |
| | tags$div( |
| | class = "panel panel-default", |
| | tags$div( |
| | class = "panel-heading", |
| | style = "cursor: pointer; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 15px;", |
| | `data-toggle` = "collapse", |
| | `data-target` = "#panel_upload", |
| | tags$h4( |
| | class = "panel-title", |
| | style = "margin: 0;", |
| | tags$span(icon("upload"), " 📁 步骤1: 数据上传与预览") |
| | ) |
| | ), |
| | tags$div( |
| | id = "panel_upload", |
| | class = "panel-collapse collapse in", |
| |
|
| | wellPanel( |
| | |
| | fluidRow( |
| | column(6, |
| | h5("📄 上传数据文件", style = "color: #007AFF;"), |
| | fileInput("chip_series_matrix", |
| | "GEO Series Matrix 文件", |
| | accept = c(".txt", ".matrix.txt", "text/plain"), |
| | placeholder = "选择文件..."), |
| | helpText("GEO 数据库下载的 Series Matrix 文件(通常包含样本表达矩阵)") |
| | ), |
| | column(6, |
| | fileInput("chip_soft_platform", |
| | "SOFT 平台注释文件 (可选)", |
| | accept = c(".txt", ".soft", "annot.txt", "text/plain"), |
| | placeholder = "选择文件..."), |
| | helpText("用于探针注释的 GPL 平台文件。如不上传,系统将尝试自动注释。") |
| | ) |
| | ), |
| |
|
| | tags$hr(style = "border-color: #dee2e6;"), |
| |
|
| | |
| | h5("📊 数据文件预览", style="color: #007AFF;"), |
| |
|
| | fluidRow( |
| | column(6, |
| | h6("Series Matrix 文件(前5行)", style="color: #666;"), |
| | DTOutput("chip_series_matrix_preview") |
| | ), |
| | column(6, |
| | h6("SOFT 文件(前10行)", style="color: #666;"), |
| | DTOutput("chip_soft_raw_preview") |
| | ) |
| | ) |
| | ), |
| | tags$div( |
| | class = "panel-body", |
| | style = "padding: 15px;" |
| | ) |
| | ) |
| | ), |
| |
|
| | |
| | tags$div( |
| | class = "panel panel-default", |
| | tags$div( |
| | class = "panel-heading", |
| | style = "cursor: pointer; background: linear-gradient(135deg, #9C27B0 0%, #7B1FA2 100%); color: white; padding: 15px;", |
| | `data-toggle` = "collapse", |
| | `data-target` = "#panel_annotation", |
| | tags$h4( |
| | class = "panel-title", |
| | style = "margin: 0;", |
| | tags$span(icon("cogs"), " 🧬 步骤2: 探针注释与数据合并") |
| | ) |
| | ), |
| | tags$div( |
| | id = "panel_annotation", |
| | class = "panel-collapse collapse", |
| |
|
| | wellPanel( |
| | |
| | uiOutput("chip_soft_columns_list_ui"), |
| |
|
| | tags$hr(style = "border-color: #dee2e6;"), |
| |
|
| | |
| | h5("📋 探针注释配置", style = "color: #9C27B0;"), |
| | uiOutput("chip_soft_column_selection_panel"), |
| |
|
| | tags$hr(style = "border-color: #dee2e6;"), |
| |
|
| | |
| | fluidRow( |
| | column(6, |
| | actionButton("chip_preview_merge", "👁️ 预览合并结果", |
| | class = "btn-info", style = "width: 100%;") |
| | ), |
| | column(6, |
| | actionButton("chip_apply_merge", "✅ 应用配置并生成最终矩阵", |
| | class = "btn-success", style = "width: 100%;") |
| | ) |
| | ), |
| |
|
| | |
| | conditionalPanel( |
| | condition = "input.chip_preview_merge", |
| | wellPanel( |
| | style = "background: #e8f5e9; border: 2px solid #4caf50;", |
| | h5("👁️ 合并结果预览(前5行)", style = "color: #2e7d32;"), |
| | DTOutput("chip_merge_preview_table") |
| | ) |
| | ), |
| |
|
| | |
| | conditionalPanel( |
| | condition = "input.chip_apply_merge", |
| | wellPanel( |
| | style = "background: linear-gradient(135deg, #e8f5e9 0%, #c8e6c9 100%); border: 2px solid #4caf50;", |
| | h5("✅ 最终表达矩阵", style = "color: #2e7d32; font-size: 18px; font-weight: bold;"), |
| | uiOutput("chip_final_matrix_ui"), |
| | br(), |
| | helpText("💡 此矩阵可直接用于后续的差异分析。已将探针ID和基因符号合并到表达数据中。", style = "color: #2e7d33;") |
| | ) |
| | ) |
| | ) |
| | ), |
| |
|
| | |
| | tags$div( |
| | class = "panel panel-default", |
| | tags$div( |
| | class = "panel-heading", |
| | style = "cursor: pointer; background: linear-gradient(135deg, #ff9800 0%, #f57c00 100%); color: white; padding: 15px;", |
| | `data-toggle` = "collapse", |
| | `data-target` = "#panel_preprocess", |
| | tags$h4( |
| | class = "panel-title", |
| | style = "margin: 0;", |
| | tags$span(icon("sliders-h"), " 🔧 步骤3: 数据预处理与探针去重") |
| | ) |
| | ), |
| | tags$div( |
| | id = "panel_preprocess", |
| | class = "panel-collapse collapse in", |
| |
|
| | wellPanel( |
| | |
| | h5("📊 数据预处理(log2转换 + 标准化)", style = "color: #ff9800; font-weight: bold;"), |
| | fluidRow( |
| | column(6, |
| | checkboxInput("chip_auto_log2", "自动判断并执行log2转换", value = TRUE), |
| | checkboxInput("chip_normalize_data", "执行limma标准化(normalizeBetweenArrays)", value = TRUE) |
| | ), |
| | column(6, |
| | actionButton("chip_preprocess_data", "⚙️ 执行预处理", |
| | class = "btn-warning", style = "width: 100%;") |
| | ) |
| | ), |
| |
|
| | |
| | conditionalPanel( |
| | condition = "input.chip_preprocess_data", |
| | uiOutput("chip_preprocess_result_ui") |
| | ), |
| |
|
| | tags$hr(style = "border-color: #ffc107;"), |
| |
|
| | |
| | h5("🎛️ 批次效应矫正(可选)", style = "color: #E91E63; font-weight: bold;"), |
| | fluidRow( |
| | column(6, |
| | selectInput("chip_batch_method", "批次矫正方法", |
| | choices = c("无" = "none", "ComBat (sva)" = "combat", |
| | "ComBat (limma)" = "limma", "SVA" = "sva"), |
| | selected = "none") |
| | ), |
| | column(6, |
| | actionButton("chip_apply_batch_correct", "🎛️ 执行批次矫正", |
| | class = "btn-danger", style = "width: 100%;") |
| | ) |
| | ), |
| |
|
| | |
| | conditionalPanel( |
| | condition = "input.chip_apply_batch_correct", |
| | uiOutput("chip_batch_correct_result_ui") |
| | ), |
| |
|
| | tags$hr(style = "border-color: #ffc107;"), |
| |
|
| | |
| | h5("✂️ 探针去重(保留表达量最高的探针)", style = "color: #ff9800; font-weight: bold;"), |
| | helpText("当一个基因对应多个探针时,保留表达量最高的探针。这将生成基因级别的表达矩阵。"), |
| |
|
| | fluidRow( |
| | column(6, |
| | h6("去重前统计:", style = "color: #666;"), |
| | uiOutput("chip_before_dedupe_stats") |
| | ), |
| | column(6, |
| | h6("去重后统计:", style = "color: #666;"), |
| | uiOutput("chip_after_dedupe_stats") |
| | ) |
| | ), |
| |
|
| | actionButton("chip_dedupe_probes", "✂️ 执行探针去重", |
| | class = "btn-warning btn-lg", style = "width: 100%;"), |
| |
|
| | |
| | conditionalPanel( |
| | condition = "input.chip_dedupe_probes", |
| | wellPanel( |
| | h6("✅ 去重完成", style = "color: #28a745;"), |
| | uiOutput("chip_dedupe_result_ui") |
| | ) |
| | ), |
| |
|
| | tags$hr(style = "border-color: #ffc107;"), |
| |
|
| | |
| | h5("💾 生成标准格式数据", style = "color: #ff9800; font-weight: bold;"), |
| | helpText("将处理后的表达矩阵转换成标准格式,可直接用于后续的差异分析、KEGG、GO等模块。"), |
| |
|
| | fluidRow( |
| | column(12, |
| | div( |
| | style = "background: #d4edda; padding: 15px; border-radius: 8px; border: 1px solid #c3e6cb;", |
| | h6("💡 即可对接现有分析模块:", style = "color: #155724;"), |
| | tags$ul(style="margin: 10px 0; padding-left: 20px;", |
| | tags$li("差异分析(使用现有的差异分析模块)"), |
| | tags$li("KEGG富集分析(使用现有的KEGG模块)"), |
| | tags$li("GO富集分析(使用现有的GO模块)"), |
| | tags$li("GSEA分析(使用现有的GSEA模块)") |
| | ) |
| | ) |
| | ) |
| | ), |
| |
|
| | actionButton("chip_generate_standard_data", "🚀 生成标准格式数据", |
| | class = "btn-success btn-lg", style = "width: 100%; font-size: 16px;") |
| | ) |
| | ) |
| | ), |
| |
|
| | |
| | tags$div( |
| | class = "panel panel-default", |
| | tags$div( |
| | class = "panel-heading", |
| | style = "cursor: pointer; background: linear-gradient(135deg, #34C759 0%, #2e7d32 100%); color: white; padding: 15px;", |
| | `data-toggle` = "collapse", |
| | `data-target` = "#panel_diff_analysis", |
| | tags$h4( |
| | class = "panel-title", |
| | style = "margin: 0;", |
| | tags$span(icon("chart-bar"), " 🧬 步骤4: 差异分析") |
| | ) |
| | ), |
| | tags$div( |
| | id = "panel_diff_analysis", |
| | class = "panel-collapse collapse", |
| | style = "padding: 15px;", |
| |
|
| | wellPanel( |
| | |
| | uiOutput("chip_grouping_ui"), |
| |
|
| | tags$hr(style = "border-color: #34C759;"), |
| |
|
| | |
| | h5("🔬 差异分析参数", style = "color: #34C759; font-weight: bold;"), |
| | fluidRow( |
| | column(4, |
| | sliderInput("chip_logfc_threshold", |
| | "log2FoldChange 阈值:", |
| | min = 0, max = 5, value = 1, step = 0.1) |
| | ), |
| | column(4, |
| | selectInput("chip_pval_type", |
| | "显著性指标:", |
| | choices = c("校正P值 (adj.P.Val)" = "adj.P.Val", |
| | "原始P值 (P.Value)" = "P.Value"), |
| | selected = "adj.P.Val") |
| | ), |
| | column(4, |
| | sliderInput("chip_pvalue_threshold", |
| | "P值 阈值:", |
| | min = 0.001, max = 0.1, value = 0.05, step = 0.001) |
| | ) |
| | ), |
| |
|
| | fluidRow( |
| | column(12, |
| | checkboxInput("chip_paired_analysis", |
| | "配对样本分析(如果适用)", |
| | value = FALSE) |
| | ) |
| | ), |
| |
|
| | tags$hr(style = "border-color: #34C759;"), |
| |
|
| | actionButton("run_chip_analysis", "🚀 运行差异分析", |
| | class = "btn-primary btn-lg", |
| | style = "width: 100%; margin-top: 15px;") |
| | ) |
| | ) |
| | ) |
| | ), |
| |
|
| | |
| | tags$div( |
| | class = "panel panel-default", |
| | tags$div( |
| | class = "panel-heading", |
| | style = "cursor: pointer; background: linear-gradient(135deg, #007AFF 0%, #0051D5 100%); color: white; padding: 15px;", |
| | `data-toggle` = "collapse", |
| | `data-target` = "#panel_results", |
| | tags$h4( |
| | class = "panel-title", |
| | style = "margin: 0;", |
| | tags$span(icon("table"), " 📊 步骤5: 分析结果") |
| | ) |
| | ), |
| | tags$div( |
| | id = "panel_results", |
| | class = "panel-collapse collapse", |
| | style = "padding: 15px;", |
| |
|
| | wellPanel( |
| | |
| | uiOutput("chip_results_ui"), |
| |
|
| | tags$hr(style = "border-color: #007AFF;"), |
| |
|
| | |
| | fluidRow( |
| | column(12, |
| | downloadButton("download_chip_results", "📥 下载结果", class = "btn-success") |
| | ) |
| | ) |
| | ) |
| | ) |
| | ) |
| | ) |
| | ) |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | chip_analysis_server <- function(input, output, session, deg_results) { |
| | cat("✅ 芯片分析模块已启动\n") |
| |
|
| | |
| | output$chip_analysis_ui_output <- renderUI({ |
| | chip_analysis_ui() |
| | }) |
| |
|
| | |
| | chip_data <- reactiveValues( |
| | series_matrix = NULL, |
| | soft_platform = NULL, |
| | probe_mapping = NULL, |
| | expr_matrix = NULL, |
| | metadata = NULL, |
| | group_info = NULL, |
| | manual_ctrl_samples = NULL, |
| | manual_trt_samples = NULL |
| | ) |
| |
|
| | |
| | observeEvent(input$chip_parse_ctrl, { |
| | req(input$chip_paste_ctrl) |
| | req(chip_data$series_matrix) |
| |
|
| | pasted_text <- input$chip_paste_ctrl |
| |
|
| | |
| | showNotification("正在解析对照组样本...", type = "message") |
| |
|
| | |
| | samples <- parse_sample_list(pasted_text, chip_data$series_matrix) |
| |
|
| | if (is.null(samples) || length(samples) == 0) { |
| | showNotification("解析失败:未找到有效样本", type = "error") |
| | return(NULL) |
| | } |
| |
|
| | |
| | chip_data$manual_ctrl_samples <- samples |
| |
|
| | showNotification( |
| | sprintf("✅ 成功解析对照组: %d 个样本", length(samples)), |
| | type = "message" |
| | ) |
| | }) |
| |
|
| | |
| | observeEvent(input$chip_parse_trt, { |
| | req(input$chip_paste_trt) |
| | req(chip_data$series_matrix) |
| |
|
| | pasted_text <- input$chip_paste_trt |
| |
|
| | |
| | showNotification("正在解析处理组样本...", type = "message") |
| |
|
| | |
| | samples <- parse_sample_list(pasted_text, chip_data$series_matrix) |
| |
|
| | if (is.null(samples) || length(samples) == 0) { |
| | showNotification("解析失败:未找到有效样本", type = "error") |
| | return(NULL) |
| | } |
| |
|
| | |
| | chip_data$manual_trt_samples <- samples |
| |
|
| | showNotification( |
| | sprintf("✅ 成功解析处理组: %d 个样本", length(samples)), |
| | type = "message" |
| | ) |
| | }) |
| |
|
| | |
| | observeEvent(input$chip_clear_groups, { |
| | chip_data$manual_ctrl_samples <- NULL |
| | chip_data$manual_trt_samples <- NULL |
| | showNotification("已清除分组设置", type = "message") |
| | }) |
| |
|
| | |
| | observeEvent(input$chip_series_matrix, { |
| | req(input$chip_series_matrix) |
| |
|
| | file_path <- input$chip_series_matrix$datapath |
| |
|
| | |
| | showNotification("正在解析 GEO Series Matrix 文件...", type = "message") |
| |
|
| | |
| | result <- parse_geo_series_matrix(file_path) |
| |
|
| | if (!result$success) { |
| | showNotification(result$error, type = "error") |
| | return(NULL) |
| | } |
| |
|
| | |
| | chip_data$series_matrix <- result$matrix |
| | chip_data$metadata <- result$metadata |
| | chip_data$expr_matrix <- result$matrix |
| |
|
| | showNotification( |
| | sprintf("✅ 成功解析: %d 探针 × %d 样本", |
| | result$n_probes, result$n_samples), |
| | type = "message" |
| | ) |
| |
|
| | |
| | if (!is.null(result$metadata)) { |
| | group_info <- detect_chip_groups_auto( |
| | sample_names = result$sample_names, |
| | sample_descriptions = result$metadata$sample_descriptions, |
| | sample_titles = result$metadata$sample_titles |
| | ) |
| | chip_data$group_info <- group_info |
| | } |
| | }, ignoreNULL = TRUE) |
| |
|
| | |
| | output$chip_series_matrix_preview <- renderDT({ |
| | req(chip_data$series_matrix) |
| |
|
| | |
| | preview_matrix <- head(chip_data$series_matrix, 5) |
| |
|
| | |
| | preview_df <- as.data.frame(preview_matrix) |
| |
|
| | datatable( |
| | preview_df, |
| | options = list( |
| | dom = 't', |
| | paging = FALSE, |
| | scrollX = TRUE, |
| | columnDefs = list(list( |
| | className = 'dt-center', |
| | targets = "_all" |
| | )) |
| | ), |
| | rownames = TRUE, |
| | filter = 'none' |
| | ) %>% |
| | formatStyle(columns = 1:min(5, ncol(preview_df)), fontSize = '85%') |
| | }) |
| |
|
| | |
| | output$chip_soft_preview_ui <- renderUI({ |
| | |
| | if (!is.null(input$chip_soft_platform)) { |
| | cat(sprintf("🔍 渲染SOFT预览UI (文件已上传)\n")) |
| |
|
| | if (!is.null(chip_data$soft_platform)) { |
| | cat(sprintf(" 数据可用: %d rows x %d cols\n", |
| | nrow(chip_data$soft_platform), ncol(chip_data$soft_platform))) |
| | } else { |
| | cat(" ⚠️ 数据尚未加载到chip_data\n") |
| | } |
| |
|
| | tagList( |
| | h5("📄 SOFT 平台注释文件(前10行)", style = "color: #FF9800;"), |
| | helpText("这是SOFT注释文件的真实数据。可以看到ID列和多个可能的基因列。"), |
| | DTOutput("chip_soft_raw_preview") |
| | ) |
| | } else { |
| | cat("⚠️ SOFT文件未上传\n") |
| | div( |
| | class = "alert alert-info", |
| | h5("📄 请先上传SOFT平台注释文件"), |
| | p("上传后将在此处显示文件预览。") |
| | ) |
| | } |
| | }) |
| |
|
| | |
| | output$chip_soft_columns_list_ui <- renderUI({ |
| | req(chip_data$soft_platform) |
| |
|
| | soft_cols <- colnames(chip_data$soft_platform) |
| |
|
| | tagList( |
| | h6("📌 所有列名(共", span(style = "color: #FF9500;", length(soft_cols)), "列):", style = "color: #333;"), |
| |
|
| | |
| | div( |
| | style = "background: #fff3e0; padding: 15px; border-radius: 8px; border-left: 4px solid #FF9800;", |
| |
|
| | |
| | div(style = "display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px;", |
| | lapply(seq_along(soft_cols), function(i) { |
| | col_name <- soft_cols[i] |
| | div( |
| | style = "background: white; padding: 8px; border-radius: 4px; border: 1px solid #FFB74D;", |
| | tags$span(style = "color: #E65100; font-weight: bold; font-family: monospace;", |
| | sprintf("%d. %s", i, col_name)) |
| | ) |
| | }) |
| | ) |
| | ), |
| |
|
| | br(), |
| |
|
| | |
| | div( |
| | style = "background: #e3f2fd; padding: 12px; border-radius: 5px; border-left: 4px solid #2196F3;", |
| | h6("💡 如何选择基因列?", style = "color: #1976D2; margin-top: 0;"), |
| | tags$ul(style = "padding-left: 20px; margin: 5px 0;", |
| | tags$li("查看下方的列内容示例,了解每列包含什么数据"), |
| | tags$li("基因符号列通常包含:TP53, EGFR, BRCA1, MYC 等基因名称"), |
| | tags$li("ID列通常包含:数字或探针标识符(如 1553601_at)"), |
| | tags$li("点击下方的'查看列内容'按钮查看每列的前5行数据") |
| | ) |
| | ) |
| | ) |
| | }) |
| |
|
| | |
| | output$chip_soft_column_examples_ui <- renderUI({ |
| | req(chip_data$soft_platform) |
| |
|
| | soft_cols <- colnames(chip_data$soft_platform) |
| |
|
| | |
| | tagList( |
| | h6("📊 各列内容示例(前3行)", style = "color: #333;"), |
| |
|
| | div(style = "max-height: 400px; overflow-y: auto;"), |
| | lapply(soft_cols, function(col_name) { |
| | |
| | col_data <- chip_data$soft_platform[[col_name]] |
| | col_data <- col_data[!is.na(col_data) & col_data != ""] |
| | examples <- head(col_data, 3) |
| |
|
| | wellPanel( |
| | style = "padding: 10px; margin-bottom: 10px;", |
| | h7(style = "color: #FF9800; font-weight: bold; margin-bottom: 5px;", |
| | sprintf("🔹 %s", col_name)), |
| | div( |
| | style = "background: #f5f5f5; padding: 8px; border-radius: 4px; font-family: monospace; font-size: 11px;", |
| | for (i in seq_along(examples)) { |
| | tags$div( |
| | sprintf(" %d. %s", i, as.character(examples[i])), |
| | style = i < length(examples) ? "margin-bottom: 5px;" : "" |
| | ) |
| | } |
| | ) |
| | ) |
| | }) |
| | ) |
| | }) |
| |
|
| | |
| | observeEvent(input$chip_soft_platform, { |
| | req(input$chip_soft_platform) |
| |
|
| | file_path <- input$chip_soft_platform$datapath |
| |
|
| | showNotification("正在解析 SOFT 平台注释文件...", type = "message") |
| |
|
| | result <- parse_platform_annotation(file_path, "\t") |
| |
|
| | if (!result$success) { |
| | showNotification(result$error, type = "error") |
| | return(NULL) |
| | } |
| |
|
| | chip_data$soft_platform <- result$raw_table |
| | chip_data$probe_mapping <- NULL |
| | chip_data$gene_symbol_col <- NULL |
| |
|
| | cat(sprintf("💾 SOFT数据已保存: %d rows x %d cols\n", |
| | nrow(chip_data$soft_platform), ncol(chip_data$soft_platform))) |
| | cat("⚠️ 请用户手动选择ID列和基因列\n") |
| |
|
| | showNotification( |
| | "✅ SOFT文件已加载,请在下方手动选择ID列和基因列", |
| | type = "message" |
| | ) |
| | }, ignoreNULL = TRUE) |
| |
|
| | |
| | observeEvent(input$chip_reparse_soft, { |
| | req(input$chip_soft_platform) |
| |
|
| | file_path <- input$chip_soft_platform$datapath |
| |
|
| | |
| | separator <- if (is.null(input$chip_soft_separator) || input$chip_soft_separator == "") { |
| | "\t" |
| | } else { |
| | input$chip_soft_separator |
| | } |
| |
|
| | showNotification("正在重新解析 SOFT 平台注释文件...", type = "message") |
| |
|
| | result <- parse_platform_annotation(file_path, separator) |
| |
|
| | if (!result$success) { |
| | showNotification(result$error, type = "error") |
| | return(NULL) |
| | } |
| |
|
| | chip_data$soft_platform <- result$raw_table |
| | chip_data$probe_mapping <- NULL |
| | chip_data$gene_symbol_col <- NULL |
| |
|
| | showNotification( |
| | "✅ 重新解析完成,请手动选择ID列和基因列", |
| | type = "message" |
| | ) |
| | }) |
| |
|
| | |
| | output$chip_data_summary <- renderDT({ |
| | req(chip_data$series_matrix) |
| |
|
| | matrix <- chip_data$series_matrix |
| |
|
| | |
| | summary_df <- data.frame( |
| | 项目 = c("探针数", "样本数", "样本名称"), |
| | 值 = c( |
| | nrow(matrix), |
| | ncol(matrix), |
| | paste(colnames(matrix), collapse = ", ") |
| | ) |
| | ) |
| |
|
| | |
| | if (!is.null(chip_data$group_info) && !is.null(chip_data$group_info$pattern_name)) { |
| | summary_df <- rbind(summary_df, data.frame( |
| | 项目 = c("检测到分组模式", "对照组样本", "处理组样本"), |
| | 值 = c( |
| | chip_data$group_info$pattern_name, |
| | paste(chip_data$group_info$ctrl_samples, collapse = ", "), |
| | paste(chip_data$group_info$trt_samples, collapse = ", ") |
| | ) |
| | )) |
| | } |
| |
|
| | datatable(summary_df, |
| | options = list(dom = 't', paging = FALSE), |
| | rownames = FALSE) |
| | }) |
| |
|
| | |
| | output$chip_annotation_status <- renderUI({ |
| | req(chip_data$series_matrix) |
| |
|
| | |
| | soft_loaded <- !is.null(chip_data$soft_platform) |
| |
|
| | if (soft_loaded) { |
| | |
| | div( |
| | class = "alert alert-info", |
| | h5("✅ SOFT文件已加载,等待配置", style = "color: #17a2b8;"), |
| | p(sprintf("总探针数: %d", nrow(chip_data$series_matrix))), |
| | p(sprintf("SOFT平台数据: %d 行 x %d 列", |
| | nrow(chip_data$soft_platform), |
| | ncol(chip_data$soft_platform))), |
| | p("💡 请在下方选择ID列和基因列以建立探针映射。", style = "color: #007bff; font-weight: bold;"), |
| | if (!is.null(chip_data$probe_mapping)) { |
| | p(sprintf("成功映射: %d (%.1f%%)", |
| | nrow(chip_data$probe_mapping), |
| | nrow(chip_data$probe_mapping) / nrow(chip_data$series_matrix) * 100)) |
| | } |
| | ) |
| | } else { |
| | |
| | div( |
| | class = "alert alert-warning", |
| | h5("⚠️ 未加载探针注释文件", style = "color: #ffc107;"), |
| | p("将直接使用探针ID作为基因符号进行分析。"), |
| | p("强烈建议上传 SOFT 平台注释文件以获得准确的结果。") |
| | ) |
| | } |
| | }) |
| |
|
| | |
| | output$chip_soft_column_selection_panel <- renderUI({ |
| | |
| | if (is.null(chip_data$soft_platform)) { |
| | return(NULL) |
| | } |
| |
|
| | |
| | if (is.null(chip_data$panel_initialized)) { |
| | cat(sprintf("✅ 初始化SOFT列选择面板: %d 行 x %d 列\n", |
| | nrow(chip_data$soft_platform), |
| | ncol(chip_data$soft_platform))) |
| | chip_data$panel_initialized <- TRUE |
| | } |
| |
|
| | |
| | soft_cols <- isolate(colnames(chip_data$soft_platform)) |
| |
|
| | |
| | current_id <- isolate({ |
| | if (!is.null(input$chip_soft_id_col) && input$chip_soft_id_col != "") { |
| | input$chip_soft_id_col |
| | } else { |
| | "" |
| | } |
| | }) |
| |
|
| | current_gene <- isolate({ |
| | if (!is.null(input$chip_soft_gene_col) && input$chip_soft_gene_col != "") { |
| | input$chip_soft_gene_col |
| | } else { |
| | "" |
| | } |
| | }) |
| |
|
| | wellPanel( |
| | style = "background: linear-gradient(135deg, #fff7e6 0%, #ffe6b3 100%); border: 2px solid #FF9800;", |
| |
|
| | h4("📋 SOFT文件列选择", style = "color: #FF9800; margin-top: 0;"), |
| |
|
| | helpText("💡 您可以预先浏览和选择SOFT文件的列,即使还未上传Series Matrix文件。"), |
| |
|
| | br(), |
| |
|
| | fluidRow( |
| | column(4, |
| | h5("选择ID列", style = "color: #9C27B0; font-weight: bold;"), |
| | selectInput("chip_soft_id_col", |
| | "选择ID列(必须与Series Matrix的探针ID匹配)", |
| | choices = c("", soft_cols), |
| | selected = current_id), |
| | helpText("选择SOFT文件中包含探针ID的列(通常为'ID'列)") |
| | ), |
| | column(4, |
| | h5("选择基因列", style = "color: #9C27B0; font-weight: bold;"), |
| | selectInput("chip_soft_gene_col", |
| | "选择基因列(包含基因符号的列)", |
| | choices = c("", soft_cols), |
| | selected = current_gene), |
| | helpText("选择包含基因符号的列(如GENE_SYMBOL, SYMBOL)") |
| | ), |
| | column(4, |
| | h5("选择EntrezID列(可选)", style = "color: #FF5722; font-weight: bold;"), |
| | selectInput("chip_soft_entrez_col", |
| | "选择Entrez Gene ID列", |
| | choices = c("", "自动检测", soft_cols), |
| | selected = ""), |
| | helpText("选择包含Entrez Gene ID的列(如GENE, ENTREZID)", style = "font-size: 11px; color: #FF5722;") |
| | ) |
| | ), |
| |
|
| | |
| | uiOutput("chip_selection_status") |
| | ) |
| | }) |
| |
|
| | |
| | output$chip_selection_status <- renderUI({ |
| | |
| | id_col <- isolate(input$chip_soft_id_col) |
| | gene_col <- isolate(input$chip_soft_gene_col) |
| | entrez_col <- isolate(input$chip_soft_entrez_col) |
| |
|
| | |
| | possible_entrez_hint <- "" |
| | if (!is.null(chip_data$soft_platform)) { |
| | soft_cols <- colnames(chip_data$soft_platform) |
| | |
| | entrez_candidates <- c("ENTREZ_GENE_ID", "ENTREZID", "EntrezID", "GeneID", |
| | "GENE_ID", "ENTREZ_GENE", "GENE") |
| | found_cols <- intersect(entrez_candidates, soft_cols) |
| |
|
| | if (length(found_cols) > 0) { |
| | possible_entrez_hint <- p(sprintf("💡 检测到可能的EntrezID列: %s", |
| | paste(found_cols, collapse = ", ")), |
| | collapse = " ") |
| | } |
| | } |
| |
|
| | if (is.null(id_col) || id_col == "" || is.null(gene_col) || gene_col == "") { |
| | div( |
| | style = "background: #fff3cd; padding: 10px; border-radius: 5px; margin-top: 15px;", |
| | h6("⚠️ 未完成选择", style = "color: #856404;"), |
| | p("请选择ID列和基因列,然后上传Series Matrix文件并点击应用按钮。", style = "font-size: 12px;"), |
| | if (possible_entrez_hint != "") { |
| | p(possible_entrez_hint, style = "font-size: 11px; color: #FF5722; font-weight: bold;") |
| | } |
| | ) |
| | } else { |
| | status_color <- "d4edda" |
| | status_text <- "✅ 已选择列" |
| | status_text_color <- "155724" |
| |
|
| | |
| | if (is.null(entrez_col) || entrez_col == "") { |
| | if (possible_entrez_hint != "") { |
| | status_color <- "fff3cd" |
| | status_text <- "⚠️ 建议选择EntrezID列" |
| | status_text_color <- "856404" |
| | } |
| | } |
| |
|
| | div( |
| | style = sprintf("background: %s; padding: 10px; border-radius: 5px; margin-top: 15px;", status_color), |
| | h6(status_text, style = sprintf("color: %s;", status_text_color)), |
| | p(sprintf("ID列: %s | 基因列: %s", id_col, gene_col), |
| | style = "font-size: 12px; font-weight: bold;"), |
| | if (!is.null(entrez_col) && entrez_col != "" && entrez_col != "自动检测") { |
| | p(sprintf("EntrezID列: %s", entrez_col), |
| | style = "font-size: 12px; color: #FF5722; font-weight: bold;") |
| | }, |
| | p("💡 请上传Series Matrix文件,然后点击'✅ 应用配置并生成最终矩阵'按钮。", |
| | style = "font-size: 12px;"), |
| | if (possible_entrez_hint != "" && (is.null(entrez_col) || entrez_col == "")) { |
| | p(possible_entrez_hint, |
| | style = "font-size: 11px; color: #FF5722; font-weight: bold; margin-top: 5px;" |
| | ) |
| | } |
| | ) |
| | } |
| | }) |
| |
|
| | |
| | observe({ |
| | |
| | req(input$chip_soft_id_col) |
| | req(input$chip_soft_gene_col) |
| | req(input$chip_soft_id_col != "") |
| | req(input$chip_soft_gene_col != "") |
| |
|
| | |
| | chip_data$selected_id_col <- input$chip_soft_id_col |
| | chip_data$selected_gene_col <- input$chip_soft_gene_col |
| |
|
| | |
| | isolate({ |
| | cat(sprintf("📋 用户已选择: ID列=%s, 基因列=%s\n", |
| | input$chip_soft_id_col, |
| | input$chip_soft_gene_col)) |
| | }) |
| | }) |
| |
|
| | |
| | output$chip_merge_workflow_panel <- renderUI({ |
| | |
| | has_series <- !is.null(chip_data$series_matrix) |
| | has_soft <- !is.null(chip_data$soft_platform) |
| |
|
| | if (!has_series || !has_soft) { |
| | return(NULL) |
| | } |
| |
|
| | |
| | has_id_col <- !is.null(input$chip_soft_id_col) && input$chip_soft_id_col != "" |
| | has_gene_col <- !is.null(input$chip_soft_gene_col) && input$chip_soft_gene_col != "" |
| |
|
| | wellPanel( |
| | style = "background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); border: 2px solid #667eea;", |
| |
|
| | h4("🔗 探针注释与数据合并工作流", style = "color: #667eea; margin-top: 0;"), |
| |
|
| | |
| | div( |
| | style = "background: rgba(255,255,255,0.8); padding: 15px; border-radius: 8px; margin-bottom: 20px;", |
| | h5("📋 重要提示", style = "color: #dc3545;"), |
| | helpText("您已在上方黄色面板选择了列,现在可以应用配置生成最终矩阵!"), |
| | if (has_id_col && has_gene_col) { |
| | tags$div(style = "background: #d4edda; padding: 10px; border-radius: 5px; border-left: 4px solid #28a745;", |
| | tags$strong("✅ 当前配置:"), |
| | tags$ul(style = "margin: 10px 0;", |
| | tags$li(sprintf("ID列: %s", input$chip_soft_id_col)), |
| | tags$li(sprintf("基因列: %s", input$chip_soft_gene_col)) |
| | ) |
| | ) |
| | } else { |
| | tags$div(style = "background: #fff3cd; padding: 10px; border-radius: 5px; border-left: 4px solid #ffc107;", |
| | tags$strong("⚠️ 请先在上方黄色面板选择列!") |
| | ) |
| | } |
| | ), |
| |
|
| | |
| | if (has_id_col && has_gene_col) { |
| | tagList( |
| | hr(style = "border-color: #667eea;"), |
| | h5("步骤5: 应用配置并生成最终矩阵", style = "color: #9C27B0; font-weight: bold;"), |
| | fluidRow( |
| | column(12, |
| | actionButton("chip_apply_merge", "✅ 应用配置并生成最终矩阵", |
| | class = "btn-success btn-lg btn-block", |
| | style = "font-size: 16px; padding: 15px;"), |
| | helpText("点击后将应用所有配置,生成带基因符号的表达矩阵。", style = "text-align: center;") |
| | ) |
| | ) |
| | ) |
| | } |
| | ) |
| | }) |
| |
|
| | |
| | output$chip_soft_columns_ui <- renderUI({ |
| | req(chip_data$soft_platform) |
| |
|
| | soft_cols <- colnames(chip_data$soft_platform) |
| |
|
| | tagList( |
| | h5("🔍 SOFT文件列信息", style = "color: #9C27B0;"), |
| | p(sprintf("检测到 %d 列,请确认基因符号列:", ncol(chip_data$soft_platform))), |
| | fluidRow( |
| | column(8, |
| | selectInput("chip_gene_symbol_col", |
| | "选择基因符号列:", |
| | choices = soft_cols, |
| | selected = chip_data$gene_symbol_col) |
| | ), |
| | column(4, |
| | br(), |
| | actionButton("chip_update_gene_col", "🔄 更新列选择", |
| | class = "btn-primary", style = "width: 100%;") |
| | ) |
| | ), |
| | helpText("💡 提示:系统已自动检测,如不正确可手动选择。", |
| | class = "text-info") |
| | ) |
| | }) |
| |
|
| | |
| | observeEvent(input$chip_update_gene_col, { |
| | req(chip_data$soft_platform) |
| | req(input$chip_gene_symbol_col) |
| |
|
| | selected_col <- input$chip_gene_symbol_col |
| |
|
| | |
| | probe_col <- chip_data$soft_platform[, 1] |
| | gene_col <- chip_data$soft_platform[, selected_col] |
| |
|
| | |
| | valid_mask <- !is.na(gene_col) & gene_col != "" |
| | probe_col <- probe_col[valid_mask] |
| | gene_col <- gene_col[valid_mask] |
| |
|
| | |
| | mapping <- data.frame( |
| | probe_id = as.character(probe_col), |
| | gene_symbol = as.character(gene_col), |
| | stringsAsFactors = FALSE |
| | ) |
| |
|
| | chip_data$probe_mapping <- mapping |
| | chip_data$gene_symbol_col <- selected_col |
| |
|
| | showNotification( |
| | sprintf("✅ 已更新基因列为: %s (%d 个映射)", selected_col, nrow(mapping)), |
| | type = "message" |
| | ) |
| | }) |
| |
|
| | |
| | output$chip_soft_raw_preview <- renderDT({ |
| | req(chip_data$soft_platform) |
| |
|
| | |
| | preview_df <- head(chip_data$soft_platform, 10) |
| |
|
| | |
| | truncate_text <- function(text, max_len = 100) { |
| | if (is.character(text) || is.factor(text)) { |
| | text <- as.character(text) |
| | text <- ifelse(nchar(text) > max_len, |
| | paste0(substr(text, 1, max_len), "..."), |
| | text) |
| | } |
| | return(text) |
| | } |
| |
|
| | |
| | for (col in colnames(preview_df)) { |
| | preview_df[[col]] <- truncate_text(preview_df[[col]], max_len = 100) |
| | } |
| |
|
| | |
| | if (ncol(preview_df) > 15) { |
| | preview_df <- preview_df[, 1:15] |
| | cat(sprintf("⚠️ SOFT文件有%d列,仅显示前15列\n", ncol(chip_data$soft_platform))) |
| | } |
| |
|
| | datatable( |
| | preview_df, |
| | options = list( |
| | dom = 't', |
| | paging = FALSE, |
| | scrollX = TRUE, |
| | scrollY = "400px", |
| | columnDefs = list(list( |
| | className = 'dt-center', |
| | targets = "_all" |
| | )) |
| | ), |
| | rownames = FALSE, |
| | filter = 'none', |
| | escape = FALSE |
| | ) %>% |
| | formatStyle(columns = 1:ncol(preview_df), |
| | fontSize = '85%', |
| | maxWidth = '200px', |
| | overflow = 'hidden', |
| | textOverflow = 'ellipsis') |
| | }) |
| |
|
| | |
| | output$chip_probe_mapping_preview <- renderDT({ |
| | req(chip_data$probe_mapping) |
| |
|
| | |
| | preview_df <- head(chip_data$probe_mapping, 10) |
| |
|
| | datatable( |
| | preview_df, |
| | options = list(dom = 't', paging = FALSE), |
| | rownames = FALSE, |
| | colnames = c("探针ID", "基因符号") |
| | ) %>% |
| | formatStyle(columns = c("探针ID", "基因符号"), fontSize = '90%') |
| | }) |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | output$chip_soft_id_column_ui <- renderUI({ |
| | req(chip_data$soft_platform) |
| |
|
| | soft_cols <- colnames(chip_data$soft_platform) |
| |
|
| | tagList( |
| | selectInput("chip_soft_id_col", |
| | "选择ID列(必须与Series Matrix的探针ID匹配)", |
| | choices = c("", soft_cols), |
| | selected = ""), |
| | helpText("💡 提示:根据上方SOFT文件预览,选择包含探针ID的列(通常为'ID'列)", style = "color: #ff9800;") |
| | ) |
| | }) |
| |
|
| | |
| | output$chip_soft_gene_column_ui <- renderUI({ |
| | req(chip_data$soft_platform) |
| |
|
| | soft_cols <- colnames(chip_data$soft_platform) |
| |
|
| | tagList( |
| | selectInput("chip_soft_gene_col", |
| | "选择基因列(包含基因符号的列)", |
| | choices = c("", soft_cols), |
| | selected = ""), |
| | helpText("⚠️ 请根据上方SOFT文件预览和右侧数据示例,手动选择包含基因符号的列(如 GENE_SYMBOL)。", |
| | style = "color: #ff9800; font-weight: bold;") |
| | ) |
| | }) |
| |
|
| | |
| | output$chip_gene_column_examples_ui <- renderUI({ |
| | req(chip_data$soft_platform) |
| | req(input$chip_soft_gene_col) |
| |
|
| | gene_col <- input$chip_soft_gene_col |
| | examples <- head(chip_data$soft_platform[[gene_col]], 5) |
| |
|
| | tagList( |
| | h6("📋 实际数据示例(前5行):", style = "color: #667eea;"), |
| | div( |
| | style = "background: #f8f9fa; padding: 10px; border-radius: 5px; font-family: monospace; font-size: 11px;", |
| | for (i in seq_along(examples)) { |
| | tags$div( |
| | sprintf("%d. %s", i, as.character(examples[i])) |
| | ) |
| | } |
| | ) |
| | ) |
| | }) |
| |
|
| | |
| | output$chip_regex_test_result_ui <- renderUI({ |
| | req(input$chip_test_regex) |
| |
|
| | |
| | example_text <- if (!is.null(input$chip_gene_extract_example) && input$chip_gene_extract_example != "") { |
| | input$chip_gene_extract_example |
| | } else { |
| | |
| | req(input$chip_soft_gene_col) |
| | examples <- head(chip_data$soft_platform[[input$chip_soft_gene_col]], 3) |
| | paste(examples, collapse = "\n") |
| | } |
| |
|
| | regex_pattern <- input$chip_gene_regex %||% "[A-Z][A-Z0-9]+" |
| |
|
| | |
| | tryCatch({ |
| | matches <- gregexpr(regex_pattern, example_text, perl = TRUE) |
| | extracted <- regmatches(example_text, matches) |
| |
|
| | |
| | all_matches <- unique(unlist(extracted)) |
| | all_matches <- all_matches[all_matches != ""] |
| |
|
| | if (length(all_matches) == 0) { |
| | div( |
| | class = "alert alert-warning", |
| | h6("⚠️ 未找到匹配"), |
| | p("当前正则表达式无法从示例文本中提取基因符号。"), |
| | p(sprintf("正则表达式: %s", regex_pattern)) |
| | ) |
| | } else { |
| | tagList( |
| | div( |
| | class = "alert alert-success", |
| | h6("✅ 提取成功", style = "color: #28a745;"), |
| | p(sprintf("找到 %d 个匹配:", length(all_matches))), |
| | div( |
| | style = "background: #f8f9fa; padding: 10px; border-radius: 5px; margin-top: 10px;", |
| | tags$ul(style = "margin: 0; padding-left: 20px;", |
| | tagList(lapply(all_matches, function(m) { |
| | tags$li(style = "margin: 5px 0; font-family: monospace; color: #667eea;", |
| | sprintf("<strong>%s</strong>", m)) |
| | })) |
| | ) |
| | ) |
| | ), |
| | div( |
| | style = "margin-top: 10px;", |
| | h6("📊 提取统计:", style = "color: #667eea;"), |
| | tags$table( |
| | class = "table table-striped", |
| | tags$tbody( |
| | tags$tr( |
| | tags$td("原始文本长度"), |
| | tags$td(nchar(example_text)) |
| | ), |
| | tags$tr( |
| | tags$td("提取数量"), |
| | tags$td(length(all_matches)) |
| | ), |
| | tags$tr( |
| | tags$td("平均长度"), |
| | tags$td(round(mean(nchar(all_matches)), 1)) |
| | ) |
| | ) |
| | ) |
| | ) |
| | ) |
| | } |
| | }, error = function(e) { |
| | div( |
| | class = "alert alert-danger", |
| | h6("❌ 正则表达式错误"), |
| | p(e$message) |
| | ) |
| | }) |
| | }) |
| |
|
| | |
| | observeEvent(input$chip_regex_preset1, { |
| | updateTextInput(session, "chip_gene_regex", value = "[A-Z]+") |
| | }) |
| |
|
| | observeEvent(input$chip_regex_preset2, { |
| | updateTextInput(session, "chip_gene_regex", value = "[A-Z][A-Z0-9]+") |
| | }) |
| |
|
| | observeEvent(input$chip_regex_preset3, { |
| | updateTextInput(session, "chip_gene_regex", value("\\(([^)]+)\\)")) |
| | }) |
| |
|
| | |
| | observeEvent(input$chip_preview_merge, { |
| | req(chip_data$series_matrix) |
| | req(chip_data$soft_platform) |
| | req(input$chip_soft_id_col) |
| | req(input$chip_soft_gene_col) |
| |
|
| | |
| | series_df <- as.data.frame(chip_data$series_matrix) |
| | if (input$chip_convert_rownames %||% TRUE) { |
| | series_df <- data.frame(ProbeID = rownames(series_df), series_df, row.names = NULL) |
| | } |
| |
|
| | |
| | soft_df <- chip_data$soft_platform[, c(input$chip_soft_id_col, input$chip_soft_gene_col)] |
| | colnames(soft_df) <- c("ID", "GeneSymbol") |
| |
|
| | |
| | merged_df <- merge(series_df, soft_df, by.x = "ProbeID", by.y = "ID", all.x = TRUE) |
| |
|
| | |
| | sample_cols <- colnames(merged_df)[!colnames(merged_df) %in% c("ProbeID", "GeneSymbol", "ID")] |
| | merged_df <- merged_df[, c("ProbeID", "GeneSymbol", sample_cols)] |
| |
|
| | |
| | chip_data$merged_preview <- head(merged_df, 5) |
| |
|
| | showNotification("✅ 合并预览已生成", type = "message") |
| | }) |
| |
|
| | |
| | output$chip_merge_preview_table <- renderDT({ |
| | req(chip_data$merged_preview) |
| |
|
| | datatable( |
| | chip_data$merged_preview, |
| | options = list( |
| | dom = 't', |
| | paging = FALSE, |
| | scrollX = TRUE |
| | ), |
| | rownames = FALSE, |
| | filter = 'none' |
| | ) %>% |
| | formatStyle(columns = c("ProbeID", "Gene"), fontSize = '90%') |
| | }) |
| |
|
| | |
| | observeEvent(input$chip_apply_merge, { |
| | req(chip_data$series_matrix) |
| | req(chip_data$soft_platform) |
| | req(input$chip_soft_id_col) |
| | req(input$chip_soft_gene_col) |
| |
|
| | showNotification("🔄 正在应用配置并生成最终矩阵...", type = "message") |
| |
|
| | |
| | series_df <- as.data.frame(chip_data$series_matrix) |
| | if (input$chip_convert_rownames %||% TRUE) { |
| | series_df <- data.frame(ProbeID = rownames(series_df), series_df, row.names = NULL) |
| | } |
| |
|
| | cat(sprintf("🔍 Series探针ID示例: %s\n", paste(head(series_df$ProbeID, 3), collapse = ", "))) |
| |
|
| | |
| | user_id_col <- input$chip_soft_id_col |
| | user_id_sample <- head(chip_data$soft_platform[[user_id_col]][!is.na(chip_data$soft_platform[[user_id_col]])], 10) |
| |
|
| | cat(sprintf("🔍 用户选择的ID列 '%s' 示例: %s\n", user_id_col, paste(head(user_id_sample, 3), collapse = ", "))) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | is_probe_format <- any(grepl(".*_.*_at$|.*_at$|at$", user_id_sample)) || |
| | any(grepl("^[A-Z]+_\\d+_", user_id_sample)) || |
| | any(grepl("^CN_", user_id_sample)) |
| |
|
| | |
| | if (is_probe_format) { |
| | cat("✅ 用户选择的ID列包含探针格式\n") |
| | soft_df <- chip_data$soft_platform[, c(user_id_col, input$chip_soft_gene_col)] |
| | colnames(soft_df) <- c("ID", "Gene_Raw") |
| | } else { |
| | cat("⚠️ 用户选择的ID列不包含探针格式,尝试使用SOFT行名\n") |
| |
|
| | |
| | if (!is.null(rownames(chip_data$soft_platform))) { |
| | |
| | rowname_sample <- head(rownames(chip_data$soft_platform), 10) |
| | cat(sprintf("🔍 SOFT行名示例: %s\n", paste(rowname_sample, collapse = ", "))) |
| |
|
| | |
| | soft_df <- data.frame( |
| | ID = rownames(chip_data$soft_platform), |
| | Gene_Raw = chip_data$soft_platform[[input$chip_soft_gene_col]], |
| | stringsAsFactors = FALSE |
| | ) |
| | } else { |
| | cat("❌ SOFT文件没有行名,无法自动检测\n") |
| | showNotification("⚠️ 请选择包含探针ID的列(如ID、SPOT_ID等)", type = "warning", duration = 10) |
| | return() |
| | } |
| | } |
| |
|
| | |
| | |
| | gene_sample <- head(soft_df$Gene_Raw[!is.na(soft_df$Gene_Raw) & soft_df$Gene_Raw != ""], 10) |
| |
|
| | cat(sprintf("🔍 用户选择的基因列 '%s' 示例: %s\n", |
| | input$chip_soft_gene_col, paste(head(gene_sample, 3), collapse = ", "))) |
| |
|
| | |
| | is_numeric_id <- all(grepl("^[0-9]+$", gene_sample)) |
| |
|
| | if (is_numeric_id) { |
| | cat("⚠️ 用户选择的基因列包含数字ID而非基因符号!\n") |
| | cat("💡 建议:请选择包含基因符号的列(如GENE_SYMBOL)\n") |
| |
|
| | |
| | soft_cols <- colnames(chip_data$soft_platform) |
| | cat(sprintf("🔍 SOFT文件所有列名: %s\n", paste(soft_cols, collapse = ", "))) |
| |
|
| | possible_symbol_cols <- c("GENE_SYMBOL", "SYMBOL", "GENE_NAME", "NAME", "DESCRIPTION") |
| |
|
| | for (col in possible_symbol_cols) { |
| | if (col %in% soft_cols && col != input$chip_soft_gene_col) { |
| | test_data <- head(chip_data$soft_platform[[col]][!is.na(chip_data$soft_platform[[col]])], 10) |
| | cat(sprintf("🔍 检查列 '%s': 示例=%s\n", col, paste(head(test_data, 3), collapse = ", "))) |
| |
|
| | if (!all(grepl("^[0-9]+$", test_data))) { |
| | cat(sprintf("✅ 自动检测到基因符号列: %s\n", col)) |
| | cat(sprintf(" 示例: %s\n", paste(head(test_data, 3), collapse = ", "))) |
| |
|
| | |
| | soft_df <- chip_data$soft_platform[, c(user_id_col, col)] |
| | colnames(soft_df) <- c("ID", "Gene_Raw") |
| | cat("✅ 已重新创建soft_df,使用正确的基因符号列\n") |
| | cat(sprintf("✅ 验证:soft_df的Gene_Raw列示例: %s\n", paste(head(soft_df$Gene_Raw[!is.na(soft_df$Gene_Raw)], 3), collapse = ", "))) |
| | break |
| | } |
| | } |
| | } |
| | } |
| |
|
| | |
| | gene_sample <- head(soft_df$Gene_Raw[!is.na(soft_df$Gene_Raw) & soft_df$Gene_Raw != ""], 10) |
| |
|
| | |
| | |
| | is_pure_symbol <- all(grepl("^[A-Z][A-Z0-9]{1,15}$", gene_sample)) |
| |
|
| | if (is_pure_symbol) { |
| | cat("✅ 基因列已是纯符号格式,直接使用,无需正则提取\n") |
| | cat(sprintf(" 示例: %s\n", paste(head(gene_sample, 3), collapse = ", "))) |
| | |
| | soft_df$GeneSymbol <- soft_df$Gene_Raw |
| | cat(sprintf("✅ GeneSymbol列已创建,示例: %s\n", paste(head(soft_df$GeneSymbol[!is.na(soft_df$GeneSymbol)], 3), collapse = ", "))) |
| | } else { |
| | cat("📋 基因列包含额外文本,应用正则提取\n") |
| | cat(sprintf(" 原始示例: %s\n", paste(head(gene_sample, 2), collapse = ", "))) |
| |
|
| | |
| | regex_pattern <- input$chip_gene_regex %||% "[A-Z][A-Z0-9]+" |
| |
|
| | soft_df$GeneSymbol <- sapply(soft_df$Gene_Raw, function(x) { |
| | matches <- regmatches(x, gregexpr(regex_pattern, as.character(x), perl = TRUE)) |
| | if (length(matches[[1]]) > 0) { |
| | matches[[1]][1] |
| | } else { |
| | NA |
| | } |
| | }) |
| |
|
| | extracted_sample <- head(soft_df$GeneSymbol[!is.na(soft_df$GeneSymbol)], 5) |
| | cat(sprintf(" 提取示例: %s\n", paste(extracted_sample, collapse = ", "))) |
| | } |
| |
|
| | |
| | |
| | clean_entrez_id <- function(entrez_str) { |
| | if (is.na(entrez_str) || is.null(entrez_str) || entrez_str == "") { |
| | return(NA) |
| | } |
| | |
| | entrez_str <- as.character(entrez_str) |
| | |
| | cleaned <- gsub("[^0-9]", "", entrez_str) |
| | |
| | if (cleaned == "" || is.na(cleaned)) { |
| | return(NA) |
| | } |
| | return(cleaned) |
| | } |
| |
|
| | |
| | if (!is.null(input$chip_soft_entrez_col) && input$chip_soft_entrez_col != "" && input$chip_soft_entrez_col != "自动检测") { |
| | user_entrez_col <- input$chip_soft_entrez_col |
| | if (user_entrez_col %in% colnames(chip_data$soft_platform)) { |
| | raw_entrez_ids <- as.character(chip_data$soft_platform[[user_entrez_col]][match(soft_df$ID, chip_data$soft_platform[[input$chip_soft_id_col]])]) |
| |
|
| | |
| | entrez_gene_ids <- sapply(raw_entrez_ids, clean_entrez_id) |
| |
|
| | soft_df$EntrezID <- entrez_gene_ids |
| |
|
| | |
| | na_count <- sum(is.na(entrez_gene_ids)) |
| | valid_count <- sum(!is.na(entrez_gene_ids)) |
| | entrez_sample <- head(entrez_gene_ids[!is.na(entrez_gene_ids)], 3) |
| |
|
| | cat(sprintf("✅ 用户指定的EntrezID列 '%s' 已添加\n", user_entrez_col)) |
| | cat(sprintf(" 有效ID数: %d, NA数: %d\n", valid_count, na_count)) |
| | cat(sprintf(" 示例: %s\n", paste(entrez_sample, collapse = ", "))) |
| |
|
| | if (na_count > 0) { |
| | cat(sprintf(" ⚠️ %d个ID被清理为NA(包含非数字字符)\n", na_count)) |
| | } |
| | } |
| | } |
| | |
| | else if (!is.null(input$chip_soft_entrez_col) && input$chip_soft_entrez_col == "自动检测") { |
| | |
| | possible_entrez_cols <- c("ENTREZ_GENE_ID", "ENTREZID", "EntrezID", "GeneID", |
| | "GENE_ID", "ENTREZ_GENE", "GENE", "Entrez Gene ID", |
| | "Entrez", "ENTREZ") |
| |
|
| | found_entrez_col <- NULL |
| | for (col in possible_entrez_cols) { |
| | if (col %in% colnames(chip_data$soft_platform)) { |
| | |
| | test_values <- head(chip_data$soft_platform[[col]], 100) |
| | test_values <- test_values[!is.na(test_values) & test_values != ""] |
| |
|
| | |
| | cleaned_test <- sapply(test_values, clean_entrez_id) |
| | cleaned_test <- cleaned_test[!is.na(cleaned_test)] |
| |
|
| | |
| | if (length(cleaned_test) > 0) { |
| | is_numeric_id <- sum(grepl("^[0-9]+$", cleaned_test)) / length(cleaned_test) > 0.8 |
| |
|
| | if (is_numeric_id) { |
| | found_entrez_col <- col |
| | cat(sprintf("✅ 自动检测到EntrezID列: '%s'\n", col)) |
| | break |
| | } |
| | } |
| | } |
| | } |
| |
|
| | if (!is.null(found_entrez_col)) { |
| | raw_entrez_ids <- as.character(chip_data$soft_platform[[found_entrez_col]][match(soft_df$ID, chip_data$soft_platform[[input$chip_soft_id_col]])]) |
| |
|
| | |
| | entrez_gene_ids <- sapply(raw_entrez_ids, clean_entrez_id) |
| |
|
| | soft_df$EntrezID <- entrez_gene_ids |
| |
|
| | |
| | na_count <- sum(is.na(entrez_gene_ids)) |
| | valid_count <- sum(!is.na(entrez_gene_ids)) |
| | entrez_sample <- head(entrez_gene_ids[!is.na(entrez_gene_ids)], 3) |
| |
|
| | cat(sprintf("✅ 自动检测的EntrezID列已添加\n", found_entrez_col)) |
| | cat(sprintf(" 有效ID数: %d, NA数: %d\n", valid_count, na_count)) |
| | cat(sprintf(" 示例: %s\n", paste(entrez_sample, collapse = ", "))) |
| |
|
| | if (na_count > 0) { |
| | cat(sprintf(" ⚠️ %d个ID被清理为NA(包含非数字字符)\n", na_count)) |
| | } |
| | } else { |
| | cat("⚠️ 自动检测未找到EntrezID列\n") |
| | } |
| | } |
| | |
| | else if ("GENE" %in% colnames(chip_data$soft_platform)) { |
| | raw_entrez_ids <- as.character(chip_data$soft_platform$GENE[match(soft_df$ID, chip_data$soft_platform[[input$chip_soft_id_col]])]) |
| |
|
| | |
| | entrez_gene_ids <- sapply(raw_entrez_ids, clean_entrez_id) |
| |
|
| | soft_df$EntrezID <- entrez_gene_ids |
| |
|
| | |
| | na_count <- sum(is.na(entrez_gene_ids)) |
| | valid_count <- sum(!is.na(entrez_gene_ids)) |
| | entrez_sample <- head(entrez_gene_ids[!is.na(entrez_gene_ids)], 3) |
| |
|
| | cat(sprintf("✅ Entrez Gene ID列(GENE)已添加\n")) |
| | cat(sprintf(" 有效ID数: %d, NA数: %d\n", valid_count, na_count)) |
| | cat(sprintf(" 示例: %s\n", paste(entrez_sample, collapse = ", "))) |
| |
|
| | if (na_count > 0) { |
| | cat(sprintf(" ⚠️ %d个ID被清理为NA(包含非数字字符)\n", na_count)) |
| | } |
| | } |
| |
|
| | |
| | cat("✅ 使用GeneSymbol列进行探针去重\n") |
| | cat("✅ 使用EntrezID列作为差异分析结果的ID\n") |
| |
|
| | |
| | merged_df <- merge(series_df, soft_df, by.x = "ProbeID", by.y = "ID", all.x = TRUE) |
| |
|
| | |
| | |
| | exclude_cols <- c("ProbeID", "GeneSymbol", "EntrezID", "Gene_Raw", "ID") |
| |
|
| | |
| | sample_cols <- character(0) |
| | for (col in colnames(merged_df)) { |
| | if (!(col %in% exclude_cols)) { |
| | |
| | if (is.numeric(merged_df[[col]])) { |
| | sample_cols <- c(sample_cols, col) |
| | } |
| | } |
| | } |
| |
|
| | cat(sprintf("🔍 识别到 %d 个样本列: %s\n", length(sample_cols), |
| | paste(head(sample_cols, 5), collapse = ", "))) |
| |
|
| | |
| | if ("EntrezID" %in% colnames(merged_df)) { |
| | |
| | merged_df <- merged_df[, c("ProbeID", "GeneSymbol", "EntrezID", sample_cols)] |
| |
|
| | |
| | merged_df$EntrezID <- as.character(merged_df$EntrezID) |
| | cat("✅ EntrezID列已强制转换为字符型\n") |
| | } else { |
| | |
| | merged_df <- merged_df[, c("ProbeID", "GeneSymbol", sample_cols)] |
| | cat("⚠️ 未找到EntrezID列\n") |
| | } |
| |
|
| | |
| | cat(sprintf("🔍 合并后矩阵结构: %d 行 × %d 列\n", nrow(merged_df), ncol(merged_df))) |
| | cat(sprintf("🔍 列名: %s\n", paste(colnames(merged_df), collapse = ", "))) |
| | cat(sprintf("🔍 前3列: %s\n", paste(head(colnames(merged_df), 3), collapse = ", "))) |
| | cat(sprintf("🔍 后3列: %s\n", paste(tail(colnames(merged_df), 3), collapse = ", "))) |
| |
|
| | |
| | cat("🔍 列类型:\n") |
| | for (i in 1:min(5, ncol(merged_df))) { |
| | cat(sprintf(" %s: %s\n", colnames(merged_df)[i], class(merged_df[[i]]))) |
| | } |
| |
|
| | |
| | if ("EntrezID" %in% colnames(merged_df)) { |
| | entrez_col_idx <- which(colnames(merged_df) == "EntrezID") |
| | cat(sprintf("🔍 EntrezID列位置: 第%d列,类型: %s\n", entrez_col_idx, class(merged_df$EntrezID))) |
| | cat(sprintf("🔍 EntrezID示例: %s\n", paste(head(merged_df$EntrezID[!is.na(merged_df$EntrezID)], 3), collapse = ", "))) |
| | } |
| |
|
| | |
| | chip_data$merged_matrix <- merged_df |
| |
|
| | |
| | gene_symbol_sample <- head(merged_df$GeneSymbol[!is.na(merged_df$GeneSymbol)], 5) |
| | cat(sprintf("✅ GeneSymbol列包含基因符号(示例: %s)\n", |
| | paste(gene_symbol_sample, 3), collapse = ", ")) |
| |
|
| | if ("EntrezID" %in% colnames(merged_df)) { |
| | entrez_sample <- head(merged_df$EntrezID[!is.na(merged_df$EntrezID)], 5) |
| | cat(sprintf("✅ EntrezID列包含Entrez Gene ID(示例: %s)\n", |
| | paste(entrez_sample, collapse = ", "))) |
| | } |
| |
|
| | |
| | n_total <- nrow(merged_df) |
| | n_annotated <- sum(!is.na(merged_df$GeneSymbol)) |
| | annotation_rate <- n_annotated / n_total * 100 |
| |
|
| | |
| | na_count <- sum(is.na(merged_df$GeneSymbol) | merged_df$GeneSymbol == "") |
| | cat(sprintf("🔍 诊断: %d个探针的GeneSymbol为NA或空 (%.1f%%)\n", na_count, na_count/n_total*100)) |
| |
|
| | |
| | series_probes <- series_df$ProbeID |
| | soft_ids <- soft_df$ID |
| | matched_probes <- sum(series_probes %in% soft_ids) |
| | cat(sprintf("🔍 ID匹配: %d / %d 个Series探针在SOFT文件中找到 (%.1f%%)\n", |
| | matched_probes, length(series_probes), matched_probes/length(series_probes)*100)) |
| |
|
| | |
| | if (matched_probes / length(series_probes) < 0.5) { |
| | cat("⚠️ 警告:ID匹配率低于50%,请检查ID列选择是否正确!\n") |
| | cat(sprintf(" Series探针示例: %s\n", paste(head(series_probes, 3), collapse = ", "))) |
| | cat(sprintf(" SOFT ID示例: %s\n", paste(head(soft_ids, 3), collapse = ", "))) |
| | } |
| |
|
| | cat(sprintf("✅ 合并完成: %d 个探针, %d 个已注释 (%.1f%%)\n", |
| | n_total, n_annotated, annotation_rate)) |
| |
|
| | showNotification( |
| | sprintf("✅ 合并完成!%d / %d 个探针已注释 (%.1f%%)", |
| | n_annotated, n_total, annotation_rate), |
| | type = "message", |
| | duration = 10 |
| | ) |
| | }) |
| |
|
| | |
| | output$chip_final_matrix_ui <- renderUI({ |
| | req(chip_data$merged_matrix) |
| |
|
| | tagList( |
| | h5("📊 最终表达矩阵(前5行)", style = "color: #28a745;"), |
| | DTOutput("chip_final_matrix_table") |
| | ) |
| | }) |
| |
|
| | output$chip_final_matrix_table <- renderDT({ |
| | req(chip_data$merged_matrix) |
| |
|
| | |
| | preview_df <- head(chip_data$merged_matrix, 5) |
| | if (ncol(preview_df) > 12) { |
| | preview_df <- preview_df[, 1:12] |
| | } |
| |
|
| | |
| | has_probe_id <- "ProbeID" %in% colnames(preview_df) |
| | has_gene <- "Gene" %in% colnames(preview_df) |
| |
|
| | |
| | dt <- datatable( |
| | preview_df, |
| | options = list( |
| | dom = 't', |
| | paging = FALSE, |
| | scrollX = TRUE, |
| | columnDefs = list(list( |
| | className = 'dt-center', |
| | targets = "_all" |
| | )) |
| | ), |
| | rownames = FALSE, |
| | filter = 'none' |
| | ) |
| |
|
| | |
| | if (has_probe_id && has_gene) { |
| | dt <- dt %>% |
| | formatStyle(columns = c("ProbeID", "Gene"), |
| | backgroundColor = '#e8f4f8', |
| | fontWeight = 'bold') |
| | } |
| |
|
| | dt |
| | }) |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | output$chip_before_dedupe_stats <- renderUI({ |
| | req(chip_data$merged_matrix) |
| |
|
| | |
| | n_probes <- nrow(chip_data$merged_matrix) |
| |
|
| | |
| | numeric_cols <- sapply(chip_data$merged_matrix, function(x) is.numeric(x)) |
| | n_samples <- sum(numeric_cols) |
| |
|
| | |
| | n_with_gene <- sum(!is.na(chip_data$merged_matrix$GeneSymbol)) |
| |
|
| | |
| | gene_counts <- table(chip_data$merged_matrix$GeneSymbol) |
| | gene_counts <- gene_counts[names(gene_counts) != ""] |
| | n_multi_probes <- sum(gene_counts > 1) |
| |
|
| | div( |
| | style = "font-size: 12px;", |
| | tags$ul(style = "padding-left: 15px; margin: 5px 0;", |
| | tags$li(sprintf("总探针数: %d", n_probes)), |
| | tags$li(sprintf("样本数: %d", n_samples)), |
| | tags$li(sprintf("有基因注释的探针: %d (%.1f%%)", n_with_gene, n_with_gene/n_probes*100)), |
| | tags$li(sprintf("一因多探针的基因数: %d", n_multi_probes)) |
| | ) |
| | ) |
| | }) |
| |
|
| | |
| | observeEvent(input$chip_preprocess_data, { |
| | req(chip_data$merged_matrix) |
| |
|
| | showNotification("🔄 正在进行数据预处理...", type = "message") |
| |
|
| | tryCatch({ |
| | |
| | merged_df <- chip_data$merged_matrix |
| |
|
| | |
| | numeric_cols <- sapply(merged_df, function(x) is.numeric(x)) |
| |
|
| | |
| | |
| | expr_cols <- which(numeric_cols) |
| |
|
| | |
| | expr_matrix <- as.matrix(merged_df[, expr_cols, drop = FALSE]) |
| |
|
| | |
| | if ("ProbeID" %in% colnames(merged_df)) { |
| | rownames(expr_matrix) <- merged_df$ProbeID |
| | cat("✅ 表达矩阵行名使用ProbeID\n") |
| | } else if ("Gene" %in% colnames(merged_df)) { |
| | rownames(expr_matrix) <- merged_df$Gene |
| | cat("⚠️ 警告:使用Gene作为行名(ProbeID列不存在)\n") |
| | } else { |
| | rownames(expr_matrix) <- rownames(merged_df) |
| | } |
| |
|
| | cat(sprintf("✅ 提取表达数据: %d 探针 × %d 样本\n", |
| | nrow(expr_matrix), ncol(expr_matrix))) |
| |
|
| | |
| | chip_data$expr_before_preprocess <- expr_matrix |
| |
|
| | |
| | if (input$chip_auto_log2 %||% TRUE) { |
| | ex <- expr_matrix |
| | qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm = TRUE)) |
| | LogC <- (qx[5] > 100) || |
| | (qx[6] - qx[1] > 50 && qx[2] > 0) || |
| | (qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2) |
| |
|
| | if (LogC) { |
| | ex[which(ex <= 0)] <- NaN |
| | expr_matrix <- log2(ex) |
| | chip_data$log2_performed <- TRUE |
| | cat("✅ log2转换已完成\n") |
| | } else { |
| | chip_data$log2_performed <- FALSE |
| | cat("ℹ️ 不需要log2转换\n") |
| | } |
| | } |
| |
|
| | |
| | if (input$chip_normalize_data %||% TRUE) { |
| | library(limma) |
| | expr_matrix <- normalizeBetweenArrays(expr_matrix) |
| | chip_data$normalize_performed <- TRUE |
| | cat("✅ limma标准化已完成\n") |
| | } |
| |
|
| | |
| | chip_data$expr_preprocessed <- expr_matrix |
| |
|
| | |
| | chip_data$preprocess_report <- list( |
| | log2_performed = chip_data$log2_performed, |
| | normalize_performed = chip_data$normalize_performed, |
| | n_probes = nrow(expr_matrix), |
| | n_samples = ncol(expr_matrix), |
| | data_range = range(expr_matrix, na.rm = TRUE) |
| | ) |
| |
|
| | |
| | tryCatch({ |
| | library(ggplot2) |
| | library(reshape2) |
| |
|
| | |
| | expr_before <- chip_data$expr_before_preprocess |
| | |
| | df_before <- as.data.frame(t(expr_before)) |
| | df_before$Sample <- rownames(df_before) |
| | |
| | df_before_long <- melt(df_before, id.vars = "Sample", variable.name = "Probe", value.name = "Expression") |
| | df_before_long$Stage <- "Before" |
| |
|
| | |
| | df_after <- as.data.frame(t(expr_matrix)) |
| | df_after$Sample <- rownames(df_after) |
| | df_after_long <- melt(df_after, id.vars = "Sample", variable.name = "Probe", value.name = "Expression") |
| | df_after_long$Stage <- "After" |
| |
|
| | |
| | df_combined <- rbind(df_before_long, df_after_long) |
| |
|
| | |
| | p <- ggplot(df_combined, aes(x = Sample, y = Expression, fill = Stage)) + |
| | geom_boxplot() + |
| | theme_bw() + |
| | theme( |
| | axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 8), |
| | legend.position = "top", |
| | plot.title = element_text(hjust = 0.5) |
| | ) + |
| | labs( |
| | title = "数据预处理前后对比", |
| | x = "样本", |
| | y = "表达值", |
| | fill = "阶段" |
| | ) + |
| | scale_fill_manual(values = c("Before" = "#E69F00", "After" = "#009E73")) |
| |
|
| | chip_data$preprocess_boxplot <- p |
| | cat("✅ 箱线图已生成\n") |
| | }, error = function(e) { |
| | cat(sprintf("⚠️ 箱线图生成失败: %s\n", e$message)) |
| | }) |
| |
|
| | showNotification("✅ 数据预处理完成!", type = "message", duration = 5) |
| |
|
| | }, error = function(e) { |
| | showNotification(sprintf("❌ 预处理失败: %s", e$message), type = "error") |
| | }) |
| | }) |
| |
|
| | |
| | output$chip_preprocess_result_ui <- renderUI({ |
| | req(chip_data$preprocess_report) |
| | req(chip_data$expr_preprocessed) |
| |
|
| | report <- chip_data$preprocess_report |
| | expr_matrix <- chip_data$expr_preprocessed |
| |
|
| | |
| | expr_mean <- mean(expr_matrix, na.rm = TRUE) |
| | expr_median <- median(expr_matrix, na.rm = TRUE) |
| | expr_sd <- sd(expr_matrix, na.rm = TRUE) |
| |
|
| | tagList( |
| | h5("✅ 预处理完成", style = "color: #28a745; font-weight: bold;"), |
| | br(), |
| |
|
| | |
| | wellPanel( |
| | style = "background: #f8f9fa; border: 1px solid #dee2e6;", |
| | h6("📊 基本统计", style = "color: #495057; margin-top: 0;"), |
| | tags$table( |
| | class = "table table-sm table-striped", |
| | style = "margin-bottom: 0;", |
| | tags$thead( |
| | tags$tr( |
| | tags$th("项目", style = "width: 50%;"), |
| | tags$th("值") |
| | ) |
| | ), |
| | tags$tbody( |
| | tags$tr( |
| | tags$td("log2转换"), |
| | tags$td({ |
| | if (report$log2_performed) |
| | tags$span("✅ 是", class = "badge badge-success") |
| | else |
| | tags$span("❌ 否", class = "badge badge-secondary") |
| | }) |
| | ), |
| | tags$tr( |
| | tags$td("limma标准化 (quantile)"), |
| | tags$td({ |
| | if (report$normalize_performed) |
| | tags$span("✅ 是", class = "badge badge-success") |
| | else |
| | tags$span("❌ 否", class = "badge badge-secondary") |
| | }) |
| | ), |
| | tags$tr( |
| | tags$td(tags$strong("探针数")), |
| | tags$td(sprintf("%d", report$n_probes)) |
| | ), |
| | tags$tr( |
| | tags$td(tags$strong("样本数")), |
| | tags$td(sprintf("%d", report$n_samples)) |
| | ), |
| | tags$tr( |
| | tags$td("数据范围"), |
| | tags$td(sprintf("%.3f ~ %.3f", report$data_range[1], report$data_range[2])) |
| | ), |
| | tags$tr( |
| | tags$td("平均值"), |
| | tags$td(sprintf("%.3f", expr_mean)) |
| | ), |
| | tags$tr( |
| | tags$td("中位数"), |
| | tags$td(sprintf("%.3f", expr_median)) |
| | ), |
| | tags$tr( |
| | tags$td("标准差"), |
| | tags$td(sprintf("%.3f", expr_sd)) |
| | ) |
| | ) |
| | ) |
| | ), |
| | br(), |
| |
|
| | |
| | wellPanel( |
| | style = "background: white; border: 1px solid #dee2e6;", |
| | h6("📊 箱线图对比(矫正前后)", style = "color: #ff9800; margin-top: 0;"), |
| | plotOutput("chip_preprocess_boxplot", height = "500px") |
| | ) |
| | ) |
| | }) |
| |
|
| | |
| | output$chip_preprocess_boxplot <- renderPlot({ |
| | req(chip_data$preprocess_boxplot) |
| | chip_data$preprocess_boxplot |
| | }) |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | observeEvent(input$chip_detect_batch, { |
| | req(chip_data$expr_preprocessed) |
| | req(input$chip_batch_pattern) |
| |
|
| | pattern <- input$chip_batch_pattern |
| | sample_names <- colnames(chip_data$expr_preprocessed) |
| |
|
| | |
| | batches <- sapply(sample_names, function(x) { |
| | match <- regmatches(x, regexpr(paste0(pattern, "\\w+"), x, ignore.case = TRUE)) |
| | if (length(match) > 0) { |
| | return(match) |
| | } else { |
| | return("Unknown") |
| | } |
| | }) |
| |
|
| | |
| | chip_data$batch_info <- batches |
| | chip_data$batch_method <- input$chip_batch_method %||% "limma" |
| |
|
| | |
| | batch_table <- table(batches) |
| |
|
| | cat(sprintf("✅ 检测到 %d 个批次:\n", length(batch_table))) |
| | for (i in seq_along(batch_table)) { |
| | cat(sprintf(" - %s: %d 个样本\n", names(batch_table)[i], batch_table[i])) |
| | } |
| |
|
| | showNotification( |
| | sprintf("✅ 检测到 %d 个批次", length(batch_table)), |
| | type = "message" |
| | ) |
| | }) |
| |
|
| | |
| | observeEvent(input$chip_batch_manual, { |
| | req(input$chip_batch_manual) |
| |
|
| | tryCatch({ |
| | lines <- strsplit(input$chip_batch_manual, "\n")[[1]] |
| | lines <- lines[lines != "" & !grepl("^\\s*$", lines)] |
| |
|
| | batch_mapping <- list() |
| | for (line in lines) { |
| | parts <- strsplit(line, "\t")[[1]] |
| | if (length(parts) >= 2) { |
| | batch_mapping[[trimws(parts[1])]] <- trimws(parts[2]) |
| | } |
| | } |
| |
|
| | chip_data$batch_manual_mapping <- batch_mapping |
| |
|
| | sample_names <- colnames(chip_data$expr_preprocessed) |
| | batches <- sapply(sample_names, function(x) { |
| | if (x %in% names(batch_mapping)) { |
| | return(batch_mapping[[x]]) |
| | } else { |
| | return("Unknown") |
| | } |
| | }) |
| |
|
| | chip_data$batch_info <- batches |
| |
|
| | batch_table <- table(batches) |
| |
|
| | cat(sprintf("✅ 手动指定批次: %d 个批次\n", length(batch_table))) |
| |
|
| | showNotification( |
| | sprintf("✅ 手动指定批次: %d 个批次", length(batch_table)), |
| | type = "message" |
| | ) |
| | }, error = function(e) { |
| | showNotification(sprintf("❌ 解析批次信息失败: %s", e$message), type = "error") |
| | }) |
| | }) |
| |
|
| | |
| | output$chip_batch_info_ui <- renderUI({ |
| | req(chip_data$batch_info) |
| |
|
| | batches <- chip_data$batch_info |
| | batch_table <- table(batches) |
| |
|
| | tagList( |
| | tags$table( |
| | class = "table table-striped", |
| | tags$thead( |
| | tags$tr( |
| | tags$th("批次"), |
| | tags$th("样本数") |
| | ) |
| | ), |
| | tags$tbody( |
| | lapply(seq_along(batch_table), function(i) { |
| | tags$tr( |
| | tags$td(names(batch_table)[i]), |
| | tags$td(batch_table[i]) |
| | ) |
| | }) |
| | ) |
| | ) |
| | ) |
| | }) |
| |
|
| | |
| | observeEvent(input$chip_apply_batch_correct, { |
| | req(chip_data$expr_preprocessed) |
| | req(chip_data$batch_info) |
| |
|
| | showNotification("🔧 正在进行批次矫正...", type = "message") |
| |
|
| | tryCatch({ |
| | expr_matrix <- chip_data$expr_preprocessed |
| | batches <- chip_data$batch_info |
| | method <- input$chip_batch_method %||% "limma" |
| |
|
| | cat(sprintf("📊 批次矫正方法: %s\n", method)) |
| |
|
| | if (method == "limma") { |
| | |
| | library(limma) |
| |
|
| | batch_factor <- factor(batches) |
| | design <- model.matrix(~1, data = data.frame(batch = batch_factor)) |
| |
|
| | expr_corrected <- removeBatchEffect(expr_matrix, batch = batch_factor) |
| |
|
| | cat("✅ limma::removeBatchEffect 批次矫正完成\n") |
| |
|
| | } else if (method == "combat") { |
| | |
| | library(sva) |
| |
|
| | batch_factor <- factor(batches) |
| |
|
| | expr_corrected <- ComBat( |
| | dat = expr_matrix, |
| | batch = batch_factor, |
| | mod = NULL, |
| | par.prior = TRUE, |
| | prior.plots = FALSE |
| | ) |
| |
|
| | cat("✅ sva::ComBat 批次矫正完成\n") |
| | } |
| |
|
| | |
| | chip_data$expr_batch_corrected <- expr_corrected |
| | chip_data$batch_correct_method <- method |
| |
|
| | |
| | mean_diff <- mean(abs(expr_matrix - expr_corrected), na.rm = TRUE) |
| |
|
| | |
| | chip_data$batch_correct_report <- list( |
| | method = method, |
| | n_batches = length(unique(batches)), |
| | batch_distribution = table(batches), |
| | mean_change = mean_diff, |
| | n_probes = nrow(expr_corrected), |
| | n_samples = ncol(expr_corrected) |
| | ) |
| |
|
| | showNotification( |
| | sprintf("✅ 批次矫正完成!方法: %s", method), |
| | type = "message", |
| | duration = 10 |
| | ) |
| |
|
| | }, error = function(e) { |
| | showNotification( |
| | sprintf("❌ 批次矫正失败: %s", e$message), |
| | type = "error", |
| | duration = 10 |
| | ) |
| | cat(sprintf("❌ 批次矫正错误: %s\n", e$message)) |
| | }) |
| | }) |
| |
|
| | |
| | output$chip_batch_correct_result_ui <- renderUI({ |
| | req(chip_data$batch_correct_report) |
| |
|
| | report <- chip_data$batch_correct_report |
| |
|
| | method_name <- if (report$method == "limma") { |
| | "limma::removeBatchEffect" |
| | } else { |
| | "sva::ComBat" |
| | } |
| |
|
| | tagList( |
| | h6("✅ 批次矫正完成", style = "color: #E91E63;"), |
| | tags$table( |
| | class = "table table-striped", |
| | tags$thead( |
| | tags$tr( |
| | tags$th("项目"), |
| | tags$th("值") |
| | ) |
| | ), |
| | tags$tbody( |
| | tags$tr( |
| | tags$td("矫正方法"), |
| | tags$td(method_name) |
| | ), |
| | tags$tr( |
| | tags$td("批次数"), |
| | tags$td(report$n_batches) |
| | ), |
| | tags$tr( |
| | tags$td("探针/基因数"), |
| | tags$td(report$n_probes) |
| | ), |
| | tags$tr( |
| | tags$td("样本数"), |
| | tags$td(report$n_samples) |
| | ), |
| | tags$tr( |
| | tags$td("平均变化幅度"), |
| | tags$td(sprintf("%.4f", report$mean_change)) |
| | ) |
| | ) |
| | ), |
| | br(), |
| | helpText("💡 提示:批次矫正后的数据已保存,将用于后续的探针去重和差异分析。") |
| | ) |
| | }) |
| |
|
| | |
| | observeEvent(input$chip_dedupe_probes, { |
| | req(chip_data$expr_preprocessed) |
| |
|
| | showNotification("✂️ 正在进行探针去重...", type = "message") |
| |
|
| | tryCatch({ |
| | library(dplyr) |
| | library(tibble) |
| |
|
| | |
| | if (!is.null(chip_data$expr_batch_corrected)) { |
| | expr_matrix <- chip_data$expr_batch_corrected |
| | cat("✅ 使用批次矫正后的数据进行探针去重\n") |
| | } else { |
| | expr_matrix <- chip_data$expr_preprocessed |
| | cat("✅ 使用预处理后的数据进行探针去重\n") |
| | } |
| |
|
| | |
| | expr_df <- as.data.frame(expr_matrix) |
| | expr_df <- expr_df %>% |
| | rownames_to_column("ProbeID") |
| |
|
| | |
| | available_cols <- colnames(chip_data$merged_matrix) |
| | cat(sprintf("📋 merged_matrix可用列: %s\n", paste(available_cols, collapse = ", "))) |
| |
|
| | |
| | merge_cols <- c("ProbeID") |
| | if ("GeneSymbol" %in% available_cols) { |
| | merge_cols <- c(merge_cols, "GeneSymbol") |
| | } |
| | if ("EntrezID" %in% available_cols) { |
| | merge_cols <- c(merge_cols, "EntrezID") |
| | } |
| |
|
| | cat(sprintf("📋 将合并列: %s\n", paste(merge_cols, collapse = ", "))) |
| |
|
| | |
| | expr_df <- expr_df %>% |
| | inner_join(chip_data$merged_matrix[, merge_cols, drop = FALSE], by = "ProbeID") |
| |
|
| | |
| | if ("EntrezID" %in% colnames(expr_df)) { |
| | clean_entrez_id <- function(entrez_str) { |
| | if (is.na(entrez_str) || is.null(entrez_str) || entrez_str == "") { |
| | return(NA) |
| | } |
| | entrez_str <- as.character(entrez_str) |
| | |
| | cleaned <- gsub("[^0-9]", "", entrez_str) |
| | if (cleaned == "" || is.na(cleaned)) { |
| | return(NA) |
| | } |
| | return(cleaned) |
| | } |
| |
|
| | |
| | na_before <- sum(is.na(expr_df$EntrezID)) |
| |
|
| | |
| | expr_df$EntrezID <- sapply(expr_df$EntrezID, clean_entrez_id) |
| |
|
| | |
| | na_after <- sum(is.na(expr_df$EntrezID)) |
| |
|
| | cat(sprintf("🔧 EntrezID清理: NA前=%d, NA后=%d, 新增NA=%d\n", |
| | na_before, na_after, na_after - na_before)) |
| |
|
| | if (na_after > na_before) { |
| | cat(sprintf("⚠️ %d个EntrezID包含非数字字符,已清理为NA\n", na_after - na_before)) |
| | } |
| | } |
| |
|
| | |
| | cat(sprintf("🔍 去重前: %d 行,GeneSymbol NA=%d, 空字符串=%d\n", |
| | nrow(expr_df), |
| | sum(is.na(expr_df$GeneSymbol)), |
| | sum(expr_df$GeneSymbol == ""))) |
| |
|
| | |
| | expr_df <- expr_df[!is.na(expr_df$GeneSymbol) & expr_df$GeneSymbol != "", ] |
| |
|
| | cat(sprintf("🔍 移除NA后: %d 行剩余\n", nrow(expr_df))) |
| |
|
| | |
| | if (nrow(expr_df) == 0) { |
| | cat("⚠️ 警告:移除NA基因后没有数据!请检查合并步骤的GeneSymbol列\n") |
| | showNotification("❌ 去重失败:所有基因都是NA,请检查ID列和基因列是否匹配", type = "error", duration = 10) |
| | return() |
| | } |
| |
|
| | |
| | |
| | |
| | if ("EntrezID" %in% colnames(expr_df)) { |
| | expr_df <- expr_df %>% |
| | select(-ProbeID) %>% |
| | select(GeneSymbol, EntrezID, everything()) %>% |
| | mutate(rowMean = rowMeans(.[, -(1:2)])) %>% |
| | arrange(desc(rowMean)) %>% |
| | distinct(GeneSymbol, .keep_all = TRUE) %>% |
| | select(-rowMean) %>% |
| | column_to_rownames("GeneSymbol") |
| |
|
| | cat("✅ 去重使用GeneSymbol,行名=GeneSymbol,EntrezID保留为列供差异分析使用\n") |
| | } else { |
| | |
| | expr_df <- expr_df %>% |
| | select(-ProbeID) %>% |
| | select(GeneSymbol, everything()) %>% |
| | mutate(rowMean = rowMeans(.[, -1])) %>% |
| | arrange(desc(rowMean)) %>% |
| | distinct(GeneSymbol, .keep_all = TRUE) %>% |
| | select(-rowMean) %>% |
| | column_to_rownames("GeneSymbol") |
| |
|
| | cat("✅ 去重使用GeneSymbol,最终结果使用GeneSymbol作为行名\n") |
| | } |
| |
|
| | |
| | chip_data$expr_deduped <- expr_df |
| |
|
| | |
| | n_before <- nrow(chip_data$expr_preprocessed) |
| | n_after <- nrow(expr_df) |
| | reduction_rate <- (n_before - n_after) / n_before * 100 |
| |
|
| | chip_data$dedupe_report <- list( |
| | n_probes_before = n_before, |
| | n_genes_after = n_after, |
| | n_removed = n_before - n_after, |
| | reduction_rate = reduction_rate, |
| | n_samples = ncol(expr_df) |
| | ) |
| |
|
| | cat(sprintf("✅ 探针去重完成: %d 探针 → %d 基因 (%.1f%% 减少)\n", |
| | n_before, n_after, reduction_rate)) |
| |
|
| | showNotification( |
| | sprintf("✅ 去重完成!%d 探针 → %d 基因", n_before, n_after), |
| | type = "message", |
| | duration = 5 |
| | ) |
| |
|
| | }, error = function(e) { |
| | showNotification(sprintf("❌ 去重失败: %s", e$message), type = "error") |
| | }) |
| | }) |
| |
|
| | |
| | output$chip_after_dedupe_stats <- renderUI({ |
| | req(chip_data$dedupe_report) |
| |
|
| | report <- chip_data$dedupe_report |
| |
|
| | div( |
| | style = "font-size: 12px; color: #28a745;", |
| | tags$ul(style = "padding-left: 15px; margin: 5px 0;", |
| | tags$li(sprintf("基因数: %d", report$n_genes_after)), |
| | tags$li(sprintf("样本数: %d", report$n_samples)), |
| | tags$li(sprintf("减少探针数: %d (%.1f%%)", report$n_removed, report$reduction_rate)) |
| | ) |
| | ) |
| | }) |
| |
|
| | |
| | output$chip_dedupe_result_ui <- renderUI({ |
| | req(chip_data$dedupe_report) |
| |
|
| | report <- chip_data$dedupe_report |
| |
|
| | tagList( |
| | tags$table( |
| | class = "table table-striped", |
| | tags$thead( |
| | tags$tr( |
| | tags$th("项目"), |
| | tags$th("值") |
| | ) |
| | ), |
| | tags$tbody( |
| | tags$tr( |
| | tags$td("去重前探针数"), |
| | tags$td(report$n_probes_before) |
| | ), |
| | tags$tr( |
| | tags$td("去重后基因数"), |
| | tags$td(report$n_genes_after) |
| | ), |
| | tags$tr( |
| | tags$td("减少的探针数"), |
| | tags$td(report$n_removed) |
| | ), |
| | tags$tr( |
| | tags$td("减少比例"), |
| | tags$td(sprintf("%.1f%%", report$reduction_rate)) |
| | ), |
| | tags$tr( |
| | tags$td("样本数"), |
| | tags$td(report$n_samples) |
| | ) |
| | ) |
| | ), |
| |
|
| | br(), |
| |
|
| | h6("📊 去重后表达矩阵预览(前5行 × 前10列):", style = "color: #666;"), |
| |
|
| | |
| | DTOutput("chip_deduped_matrix_preview") |
| | ) |
| | }) |
| |
|
| | |
| | output$chip_deduped_matrix_preview <- renderDT({ |
| | req(chip_data$expr_deduped) |
| |
|
| | preview_df <- head(chip_data$expr_deduped, 5) |
| | if (ncol(preview_df) > 10) { |
| | preview_df <- preview_df[, 1:10] |
| | } |
| |
|
| | datatable( |
| | as.data.frame(preview_df), |
| | options = list( |
| | dom = 't', |
| | paging = FALSE, |
| | scrollX = TRUE |
| | ), |
| | rownames = TRUE, |
| | filter = 'none' |
| | ) %>% |
| | formatStyle(columns = 1:ncol(preview_df), fontSize = '85%') |
| | }) |
| |
|
| | |
| | observeEvent(input$chip_generate_standard_data, { |
| | req(chip_data$expr_deduped) |
| |
|
| | showNotification("🚀 正在生成标准格式数据...", type = "message") |
| |
|
| | tryCatch({ |
| | |
| | chip_data$standard_expression <- chip_data$expr_deduped |
| |
|
| | |
| | chip_data$sample_names <- colnames(chip_data$expr_deduped) |
| |
|
| | |
| | chip_data$gene_names <- rownames(chip_data$expr_deduped) |
| |
|
| | |
| | chip_data$ready_for_analysis <- TRUE |
| |
|
| | cat(sprintf("✅ 标准格式数据已生成: %d 基因 × %d 样本\n", |
| | nrow(chip_data$standard_expression), |
| | ncol(chip_data$standard_expression))) |
| |
|
| | showNotification( |
| | sprintf("✅ 标准格式数据已生成!%d 基因 × %d 样本", |
| | nrow(chip_data$standard_expression), |
| | ncol(chip_data$standard_expression)), |
| | type = "message", |
| | duration = 10 |
| | ) |
| |
|
| | }, error = function(e) { |
| | showNotification(sprintf("❌ 生成失败: %s", e$message), type = "error") |
| | }) |
| | }) |
| |
|
| | |
| | output$chip_standard_data_summary <- renderUI({ |
| | req(chip_data$standard_expression) |
| |
|
| | tagList( |
| | h6("📊 数据摘要", style = "color: #155724;"), |
| | tags$table( |
| | class = "table table-striped", |
| | tags$thead( |
| | tags$tr( |
| | tags$th("项目"), |
| | tags$th("值") |
| | ) |
| | ), |
| | tags$tbody( |
| | tags$tr( |
| | tags$td("基因数"), |
| | tags$td(nrow(chip_data$standard_expression)) |
| | ), |
| | tags$tr( |
| | tags$td("样本数"), |
| | tags$td(ncol(chip_data$standard_expression)) |
| | ), |
| | tags$tr( |
| | tags$td("样本名称"), |
| | tags$td(paste(head(chip_data$sample_names, 5), collapse = ", ")) |
| | ) |
| | ) |
| | ), |
| | br(), |
| | h6("✅ 现在可以进行以下分析:", style = "color: #155724;"), |
| | tags$ul(style = "padding-left: 20px;", |
| | tags$li("切换到“差异分析”模块进行limma分析"), |
| | tags$li("使用样本分组功能设置对照组和处理组"), |
| | tags$li("进行KEGG和GO富集分析"), |
| | tags$li("生成火山图和其他可视化") |
| | ) |
| | ) |
| | }) |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | output$chip_grouping_ui <- renderUI({ |
| | req(chip_data$series_matrix) |
| |
|
| | sample_names <- colnames(chip_data$series_matrix) |
| |
|
| | |
| | if (!is.null(chip_data$group_info) && !is.null(chip_data$group_info$pattern_name)) { |
| | tagList( |
| | div( |
| | class = "alert alert-info", |
| | h5("✅ 自动检测到分组模式"), |
| | p(sprintf("模式: %s", chip_data$group_info$pattern_name)), |
| | p(sprintf("对照组: %s", |
| | paste(chip_data$group_info$ctrl_samples, collapse = ", "))), |
| | p(sprintf("处理组: %s", |
| | paste(chip_data$group_info$trt_samples, collapse = ", "))), |
| | checkboxInput("chip_use_auto_groups", |
| | "使用自动检测的分组", |
| | value = TRUE) |
| | ), |
| |
|
| | conditionalPanel( |
| | condition = "!input.chip_use_auto_groups", |
| | h5("手动选择分组:"), |
| | helpText("💡 提示:可以点击输入框后,直接粘贴样本名称(用逗号或空格分隔)"), |
| | fluidRow( |
| | column(6, |
| | selectizeInput("chip_ctrl_samples", |
| | "对照组样本:", |
| | choices = sample_names, |
| | multiple = TRUE, |
| | options = list(create = TRUE)) |
| | ), |
| | column(6, |
| | selectizeInput("chip_trt_samples", |
| | "处理组样本:", |
| | choices = sample_names, |
| | multiple = TRUE, |
| | options = list(create = TRUE)) |
| | ) |
| | ) |
| | ) |
| | ) |
| | } else { |
| | |
| | tagList( |
| | div( |
| | class = "alert alert-warning", |
| | h5("⚠️ 未能自动检测分组模式"), |
| | p("请使用上方的快速粘贴功能,或手动指定对照组和处理组样本。") |
| | ), |
| | helpText("💡 提示:可以点击输入框后,直接粘贴样本名称(用逗号或空格分隔)"), |
| | fluidRow( |
| | column(6, |
| | selectizeInput("chip_ctrl_samples", |
| | "对照组样本:", |
| | choices = sample_names, |
| | multiple = TRUE, |
| | options = list(create = TRUE)) |
| | ), |
| | column(6, |
| | selectizeInput("chip_trt_samples", |
| | "处理组样本:", |
| | choices = sample_names, |
| | multiple = TRUE, |
| | options = list(create = TRUE)) |
| | ) |
| | ) |
| | ) |
| | } |
| | }) |
| |
|
| | |
| | output$chip_current_groups_ui <- renderUI({ |
| | req(chip_data$series_matrix) |
| |
|
| | |
| | has_manual <- !is.null(chip_data$manual_ctrl_samples) && |
| | !is.null(chip_data$manual_trt_samples) |
| |
|
| | |
| | has_auto <- !is.null(chip_data$group_info) && |
| | !is.null(chip_data$group_info$pattern_name) |
| |
|
| | if (has_manual) { |
| | div( |
| | class = "alert alert-success", |
| | h5("✅ 当前分组(手动设置)"), |
| | p(sprintf("对照组 (%d个): %s", |
| | length(chip_data$manual_ctrl_samples), |
| | paste(chip_data$manual_ctrl_samples, collapse = ", "))), |
| | p(sprintf("处理组 (%d个): %s", |
| | length(chip_data$manual_trt_samples), |
| | paste(chip_data$manual_trt_samples, collapse = ", "))) |
| | ) |
| | } else if (has_auto) { |
| | div( |
| | class = "alert alert-info", |
| | h5("🤖 当前分组(自动检测)"), |
| | p(sprintf("模式: %s", chip_data$group_info$pattern_name)), |
| | p(sprintf("对照组 (%d个): %s", |
| | length(chip_data$group_info$ctrl_samples), |
| | paste(chip_data$group_info$ctrl_samples, collapse = ", "))), |
| | p(sprintf("处理组 (%d个): %s", |
| | length(chip_data$group_info$trt_samples), |
| | paste(chip_data$group_info$trt_samples, collapse = ", "))) |
| | ) |
| | } else { |
| | div( |
| | class = "alert alert-warning", |
| | h5("⚠️ 尚未设置分组"), |
| | p("请使用上方的快速粘贴功能设置分组,或使用手动选择。") |
| | ) |
| | } |
| | }) |
| |
|
| | |
| | output$chip_manual_grouping_ui <- renderUI({ |
| | req(chip_data$series_matrix) |
| |
|
| | sample_names <- colnames(chip_data$series_matrix) |
| |
|
| | tagList( |
| | h5("📝 手动选择样本(备选方案)", style = "color: #6E6E73;"), |
| | helpText("如果快速粘贴不方便,可以在这里手动选择样本:"), |
| | fluidRow( |
| | column(6, |
| | selectInput("chip_ctrl_samples_manual", |
| | "对照组样本:", |
| | choices = sample_names, |
| | multiple = TRUE) |
| | ), |
| | column(6, |
| | selectInput("chip_trt_samples_manual", |
| | "处理组样本:", |
| | choices = sample_names, |
| | multiple = TRUE) |
| | ) |
| | ), |
| | actionButton("chip_apply_manual_groups", |
| | "✅ 应用手动选择的分组", |
| | class = "btn-success", |
| | style = "width: 100%; margin-top: 10px;") |
| | ) |
| | }) |
| |
|
| | |
| | observeEvent(input$chip_apply_manual_groups, { |
| | req(input$chip_ctrl_samples_manual) |
| | req(input$chip_trt_samples_manual) |
| |
|
| | if (length(input$chip_ctrl_samples_manual) == 0 || |
| | length(input$chip_trt_samples_manual) == 0) { |
| | showNotification("请至少为每组选择一个样本!", type = "warning") |
| | return(NULL) |
| | } |
| |
|
| | chip_data$manual_ctrl_samples <- input$chip_ctrl_samples_manual |
| | chip_data$manual_trt_samples <- input$chip_trt_samples_manual |
| |
|
| | showNotification( |
| | sprintf("✅ 已应用手动分组: %d 对照 + %d 处理", |
| | length(input$chip_ctrl_samples_manual), |
| | length(input$chip_trt_samples_manual)), |
| | type = "message" |
| | ) |
| | }) |
| |
|
| | |
| | observeEvent(input$run_chip_analysis, { |
| | req(chip_data$series_matrix) |
| |
|
| | |
| | if (!is.null(chip_data$manual_ctrl_samples) && !is.null(chip_data$manual_trt_samples)) { |
| | |
| | ctrl_samples <- chip_data$manual_ctrl_samples |
| | trt_samples <- chip_data$manual_trt_samples |
| | cat("✅ 使用手动粘贴的分组\n") |
| | } else if (!is.null(input$chip_use_auto_groups) && input$chip_use_auto_groups && |
| | !is.null(chip_data$group_info) && !is.null(chip_data$group_info$pattern_name)) { |
| | |
| | ctrl_samples <- chip_data$group_info$ctrl_samples |
| | trt_samples <- chip_data$group_info$trt_samples |
| | cat("✅ 使用自动检测的分组\n") |
| | } else { |
| | |
| | ctrl_samples <- input$chip_ctrl_samples |
| | trt_samples <- input$chip_trt_samples |
| | cat("✅ 使用下拉选择的分组\n") |
| | } |
| |
|
| | |
| | if (is.null(ctrl_samples) || length(ctrl_samples) == 0 || |
| | is.null(trt_samples) || length(trt_samples) == 0) { |
| | showNotification("请先设置对照组和处理组样本!", type = "error") |
| | return(NULL) |
| | } |
| |
|
| | |
| | showNotification("正在运行差异分析...", type = "message") |
| |
|
| | |
| | if (!is.null(chip_data$standard_expression) && chip_data$ready_for_analysis) { |
| | |
| | expr_matrix <- chip_data$standard_expression |
| | cat("✅ 使用标准格式数据(已探针注释和去重)\n") |
| | cat(sprintf(" 表达矩阵: %d 基因 × %d 样本\n", |
| | nrow(expr_matrix), ncol(expr_matrix))) |
| |
|
| | } else if (!is.null(chip_data$expr_deduped)) { |
| | |
| | expr_matrix <- chip_data$expr_deduped |
| | cat("✅ 使用去重后的表达数据\n") |
| |
|
| | } else if (!is.null(chip_data$merged_matrix)) { |
| | |
| | |
| | merged_df <- chip_data$merged_matrix |
| | |
| | numeric_cols <- sapply(merged_df, function(x) is.numeric(x)) |
| | expr_matrix <- as.matrix(merged_df[, numeric_cols, drop = FALSE]) |
| | |
| | if ("ProbeID" %in% colnames(merged_df)) { |
| | rownames(expr_matrix) <- merged_df$ProbeID |
| | } else { |
| | rownames(expr_matrix) <- merged_df$Gene |
| | } |
| | cat("✅ 使用合并后的数据(探针已注释)\n") |
| |
|
| | } else { |
| | |
| | probe_mapping <- chip_data$probe_mapping |
| |
|
| | if (is.null(probe_mapping)) { |
| | |
| | expr_matrix <- chip_data$series_matrix |
| | cat("⚠️ 未加载注释文件,使用探针ID作为基因符号\n") |
| | } else { |
| | |
| | expr_matrix <- aggregate_probe_expression( |
| | chip_data$series_matrix, |
| | probe_mapping |
| | ) |
| |
|
| | if (is.null(expr_matrix)) { |
| | showNotification("探针注释失败!", type = "error") |
| | return(NULL) |
| | } |
| | } |
| | } |
| |
|
| | |
| | limma_res <- run_limma_analysis( |
| | expr_matrix = expr_matrix, |
| | ctrl_samples = ctrl_samples, |
| | trt_samples = trt_samples, |
| | logfc_threshold = input$chip_logfc_threshold, |
| | pvalue_threshold = input$chip_pvalue_threshold, |
| | pval_type = input$chip_pval_type |
| | ) |
| |
|
| | if (is.null(limma_res)) { |
| | showNotification("差异分析失败!", type = "error") |
| | return(NULL) |
| | } |
| |
|
| | |
| | formatted_results <- format_chip_results_for_pipeline( |
| | limma_res, |
| | expr_matrix, |
| | ctrl_samples, |
| | trt_samples |
| | ) |
| |
|
| | |
| | chip_data$limma_results <- limma_res |
| | chip_data$formatted_results <- formatted_results |
| |
|
| | showNotification( |
| | sprintf("✅ 分析完成: %d 个显著差异基因", |
| | limma_res$n_significant), |
| | type = "message" |
| | ) |
| | }) |
| |
|
| | |
| | output$chip_results_ui <- renderUI({ |
| | req(chip_data$limma_results) |
| |
|
| | limma_res <- chip_data$limma_results |
| |
|
| | tagList( |
| | |
| | fluidRow( |
| | column(3, |
| | wellPanel( |
| | style = "background: #f8f9fa; border: 2px solid #dee2e6; text-align: center; padding: 20px;", |
| | h3(limma_res$n_total, style = "color: #495057; margin: 10px 0;"), |
| | h6("总基因数", style = "color: #6c757d; margin: 0;") |
| | ) |
| | ), |
| | column(3, |
| | wellPanel( |
| | style = "background: #e7f3ff; border: 2px solid #007bff; text-align: center; padding: 20px;", |
| | h3(limma_res$n_significant, style = "color: #007bff; margin: 10px 0;"), |
| | icon("star", style = "color: #007bff; font-size: 24px;"), |
| | h6("显著差异", style = "color: #007bff; margin: 5px 0 0 0;") |
| | ) |
| | ), |
| | column(3, |
| | wellPanel( |
| | style = "background: #d4edda; border: 2px solid #28a745; text-align: center; padding: 20px;", |
| | h3(limma_res$n_up, style = "color: #28a745; margin: 10px 0;"), |
| | icon("arrow-up", style = "color: #28a745; font-size: 24px;"), |
| | h6("上调", style = "color: #28a745; margin: 5px 0 0 0;") |
| | ) |
| | ), |
| | column(3, |
| | wellPanel( |
| | style = "background: #f8d7da; border: 2px solid #dc3545; text-align: center; padding: 20px;", |
| | h3(limma_res$n_down, style = "color: #dc3545; margin: 10px 0;"), |
| | icon("arrow-down", style = "color: #dc3545; font-size: 24px;"), |
| | h6("下调", style = "color: #dc3545; margin: 5px 0 0 0;") |
| | ) |
| | ) |
| | ), |
| |
|
| | hr(), |
| |
|
| | |
| | h4("差异分析结果"), |
| | DTOutput("chip_results_table"), |
| |
|
| | br(), |
| |
|
| | |
| | downloadButton("download_chip_results", "📥 下载结果", class = "btn-success") |
| | ) |
| | }) |
| |
|
| | |
| | output$chip_results_table <- renderDT({ |
| | req(chip_data$limma_results) |
| |
|
| | results <- chip_data$limma_results$results |
| |
|
| | |
| | pval_col <- if (input$chip_pval_type == "adj.P.Val") "adj.P.Val" else "P.Value" |
| |
|
| | results$Significant <- ifelse( |
| | results[[pval_col]] < input$chip_pvalue_threshold & |
| | abs(results$logFC) >= input$chip_logfc_threshold, |
| | "Yes", "No" |
| | ) |
| |
|
| | |
| | |
| | results_ordered <- results[, c("ID", "SYMBOL", "logFC", "AveExpr", "t", |
| | "P.Value", "adj.P.Val", "B", "Significant")] |
| |
|
| | |
| | id_sample <- head(results_ordered$ID, 5) |
| | cat(sprintf("📊 差异分析结果ID列示例: %s\n", paste(id_sample, collapse = ", "))) |
| | cat(sprintf("📊 ID列类型: %s\n", class(results_ordered$ID)[1])) |
| |
|
| | |
| | is_entrez_id <- all(grepl("^[0-9]+$", results_ordered$ID[!is.na(results_ordered$ID)])) |
| | if (!is_entrez_id) { |
| | cat("⚠️ 警告: ID列包含基因符号而非Entrez Gene ID!\n") |
| | cat("💡 这可能是因为SOFT文件中缺少EntrezID列\n") |
| | } |
| |
|
| | datatable( |
| | results_ordered, |
| | options = list( |
| | pageLength = 25, |
| | scrollX = TRUE, |
| | order = list(list(6, 'asc')) |
| | ), |
| | filter = 'top', |
| | rownames = FALSE |
| | ) %>% |
| | formatRound(columns = c('logFC', 'AveExpr', 't', 'P.Value', 'adj.P.Val'), digits = 4) %>% |
| | formatStyle( |
| | columns = c('ID', 'SYMBOL'), |
| | backgroundColor = '#e8f4f8', |
| | fontWeight = 'bold' |
| | ) %>% |
| | formatStyle( |
| | 'Significant', |
| | color = styleEqual(c('Yes', 'No'), c('green', 'grey')), |
| | fontWeight = 'bold' |
| | ) |
| | }) |
| |
|
| | |
| | output$download_chip_results <- downloadHandler( |
| | filename = function() { |
| | sprintf("chip_analysis_results_%s.csv", |
| | Sys.Date()) |
| | }, |
| | content = function(file) { |
| | req(chip_data$limma_results) |
| |
|
| | results <- chip_data$limma_results$results |
| | write.csv(results, file, row.names = FALSE) |
| | } |
| | ) |
| | } |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| | parse_sample_list <- function(pasted_text, expr_matrix) { |
| | cat("🔍 开始解析样本列表...\n") |
| |
|
| | |
| | lines <- strsplit(pasted_text, "\n")[[1]] |
| | lines <- trimws(lines) |
| | lines <- lines[lines != ""] |
| |
|
| | if (length(lines) == 0) { |
| | cat("⚠️ 粘贴内容为空\n") |
| | return(NULL) |
| | } |
| |
|
| | cat(sprintf("📊 读取到 %d 行\n", length(lines))) |
| |
|
| | |
| | matrix_samples <- colnames(expr_matrix) |
| |
|
| | |
| | valid_samples <- intersect(lines, matrix_samples) |
| |
|
| | if (length(valid_samples) == 0) { |
| | cat("⚠️ 未找到匹配的样本\n") |
| | cat(sprintf(" 粘贴的样本: %s\n", paste(head(lines, 3), collapse = ", "))) |
| | cat(sprintf(" 可用样本: %s\n", paste(head(matrix_samples, 3), collapse = ", "))) |
| | return(NULL) |
| | } |
| |
|
| | cat(sprintf("✅ 匹配成功: %d / %d 个样本\n", |
| | length(valid_samples), length(lines))) |
| |
|
| | |
| | unmatched <- setdiff(lines, matrix_samples) |
| | if (length(unmatched) > 0) { |
| | cat(sprintf("⚠️ %d 个样本未匹配: %s\n", |
| | length(unmatched), |
| | paste(head(unmatched, 3), collapse = ", "))) |
| | } |
| |
|
| | return(valid_samples) |
| | } |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | parse_pasted_groups <- function(pasted_text, group_col_name, expr_matrix) { |
| | cat("🔍 开始解析粘贴的分组信息...\n") |
| |
|
| | |
| | lines <- strsplit(pasted_text, "\n")[[1]] |
| | lines <- lines[lines != ""] |
| |
|
| | if (length(lines) == 0) { |
| | return(list(success = FALSE, error = "粘贴内容为空")) |
| | } |
| |
|
| | |
| | if (!is.null(group_col_name) && group_col_name != "") { |
| | cat(sprintf("📋 使用分组列名: %s\n", group_col_name)) |
| |
|
| | |
| | tryCatch({ |
| | |
| | text_conn <- textConnection(lines) |
| | df <- read.table(text_conn, header = TRUE, sep = "\t", |
| | stringsAsFactors = FALSE, check.names = FALSE, |
| | quote = "\"", comment.char = "") |
| | close(text_conn) |
| |
|
| | |
| | if (!group_col_name %in% colnames(df)) { |
| | return(list(success = FALSE, |
| | error = sprintf("未找到分组列 '%s',可用列: %s", |
| | group_col_name, |
| | paste(colnames(df), collapse = ", ")))) |
| | } |
| |
|
| | |
| | groups <- df[[group_col_name]] |
| |
|
| | |
| | unique_groups <- unique(groups) |
| |
|
| | if (length(unique_groups) < 2) { |
| | return(list(success = FALSE, error = "分组列中只有一个组,需要至少2个组")) |
| | } |
| |
|
| | |
| | if (length(unique_groups) > 2) { |
| | cat(sprintf("⚠️ 检测到 %d 个组,将使用前两个: %s\n", |
| | length(unique_groups), |
| | paste(unique_groups[1:2], collapse = ", "))) |
| | unique_groups <- unique_groups[1:2] |
| | } |
| |
|
| | |
| | ctrl_name <- unique_groups[1] |
| | trt_name <- unique_groups[2] |
| |
|
| | ctrl_samples <- colnames(df)[groups == ctrl_name] |
| | trt_samples <- colnames(df)[groups == trt_name] |
| |
|
| | |
| | ctrl_samples <- ctrl_samples[ctrl_samples != group_col_name] |
| | trt_samples <- trt_samples[trt_samples != group_col_name] |
| |
|
| | cat(sprintf("✅ 解析成功: %d 对照 (%s) + %d 处理 (%s)\n", |
| | length(ctrl_samples), ctrl_name, |
| | length(trt_samples), trt_name)) |
| |
|
| | return(list( |
| | success = TRUE, |
| | ctrl_samples = ctrl_samples, |
| | trt_samples = trt_samples |
| | )) |
| |
|
| | }, error = function(e) { |
| | return(list(success = FALSE, error = paste("解析表格失败:", e$message))) |
| | }) |
| | } |
| |
|
| | |
| | cat("🤖 自动检测分组模式...\n") |
| |
|
| | |
| | first_line <- lines[1] |
| |
|
| | tryCatch({ |
| | |
| | text_conn <- textConnection(lines) |
| | df <- read.table(text_conn, header = TRUE, sep = "\t", |
| | stringsAsFactors = FALSE, check.names = FALSE, |
| | quote = "\"", comment.char = "") |
| | close(text_conn) |
| |
|
| | |
| | all_samples <- colnames(df)[-1] |
| |
|
| | |
| | matrix_samples <- colnames(expr_matrix) |
| |
|
| | |
| | common_samples <- intersect(all_samples, matrix_samples) |
| |
|
| | if (length(common_samples) < 2) { |
| | return(list(success = FALSE, |
| | error = sprintf("在粘贴内容中只找到 %d 个有效样本,需要至少2个", |
| | length(common_samples)))) |
| | } |
| |
|
| | cat(sprintf("📊 找到 %d 个有效样本\n", length(common_samples))) |
| |
|
| | |
| | |
| | sample_names <- common_samples |
| |
|
| | |
| | if (ncol(df) > 2) { |
| | |
| | potential_groups <- df[[2]] |
| |
|
| | if (length(unique(potential_groups)) == 2) { |
| | unique_g <- unique(potential_groups) |
| | ctrl_samples <- sample_names[potential_groups == unique_g[1]] |
| | trt_samples <- sample_names[potential_groups == unique_g[2]] |
| |
|
| | cat(sprintf("✅ 自动检测分组: %d 对照 + %d 处理\n", |
| | length(ctrl_samples), length(trt_samples))) |
| |
|
| | return(list( |
| | success = TRUE, |
| | ctrl_samples = ctrl_samples, |
| | trt_samples = trt_samples |
| | )) |
| | } |
| | } |
| |
|
| | |
| | cat("⚠️ 无法自动检测分组,返回所有样本供手动选择\n") |
| |
|
| | |
| | n_samples <- length(sample_names) |
| | mid_point <- ceiling(n_samples / 2) |
| |
|
| | ctrl_samples <- sample_names[1:mid_point] |
| | trt_samples <- sample_names[(mid_point+1):n_samples] |
| |
|
| | cat(sprintf("⚠️ 默认分组: 前 %d 个为对照,后 %d 个为处理\n", |
| | length(ctrl_samples), length(trt_samples))) |
| |
|
| | return(list( |
| | success = TRUE, |
| | ctrl_samples = ctrl_samples, |
| | trt_samples = trt_samples, |
| | warning = "无法自动检测分组模式,已按位置默认分组,请手动调整" |
| | )) |
| |
|
| | }, error = function(e) { |
| | return(list(success = FALSE, error = paste("解析失败:", e$message))) |
| | }) |
| | } |
| |
|
| |
|