Export YuanSeq to Hugging Face without binary assets

7e6a9d1 7 days ago

140 kB

	# =====================================================
	# 芯片数据分析模块 (Microarray Data Analysis)
	# =====================================================
	# 功能：
	# 1. 解析 GEO Series Matrix 文件
	# 2. 解析 SOFT 平台注释文件
	# 3. 探针注释 (探针ID → 基因符号)
	# 4. limma 差异分析
	# 5. 与现有下游模块集成 (KEGG/GO/GSEA/TF/通路活性)
	# =====================================================

	# =====================================================
	# 1. GEO Series Matrix 文件解析
	# =====================================================

	#' 解析 GEO Series Matrix 文件
	#'
	#' @param file_path Series Matrix 文件路径
	#' @return list 包含表达矩阵和元数据
	parse_geo_series_matrix <- function(file_path) {
	cat("📂 开始解析 GEO Series Matrix 文件...\n")

	tryCatch({
	# 读取所有行
	lines <- readLines(file_path, warn = FALSE)

	# 查找数据起始标记
	start_idx <- which(lines == "!series_matrix_table_begin")

	if (length(start_idx) == 0) {
	return(list(
	success = FALSE,
	error = "未找到 !series_matrix_table_begin 标记，这不是有效的 GEO Series Matrix 文件"
	))
	}

	start_idx <- start_idx[1] + 1 # 跳过标记行本身

	# 查找数据结束标记（可选）
	end_idx <- which(lines == "!series_matrix_table_end")
	if (length(end_idx) > 0) {
	end_idx <- end_idx[1] - 1
	} else {
	# 如果没有结束标记，读取到文件末尾
	end_idx <- length(lines)
	}

	cat(sprintf("✅ 找到数据区域: 第 %d - %d 行\n", start_idx, end_idx))

	# 提取矩阵数据
	matrix_lines <- lines[start_idx:end_idx]

	# 读取为数据框
	text_connection <- textConnection(matrix_lines)
	expr_matrix <- read.table(
	text_connection,
	header = TRUE,
	row.names = 1,
	sep = "\t",
	quote = "\"", # 允许引号
	comment.char = "",
	stringsAsFactors = FALSE,
	check.names = FALSE
	)
	close(text_connection)

	# 去除列名中的引号
	colnames(expr_matrix) <- gsub('"', '', colnames(expr_matrix))
	rownames(expr_matrix) <- gsub('"', '', rownames(expr_matrix))

	cat(sprintf("✅ 清理引号后样本名示例: %s\n", paste(head(colnames(expr_matrix), 3), collapse = ", ")))
	cat(sprintf("✅ 清理引号后探针ID示例: %s\n", paste(head(rownames(expr_matrix), 3), collapse = ", ")))

	# 提取元数据（用于样本分组提示）
	metadata <- extract_geo_metadata(lines[1:(start_idx-2)])

	cat(sprintf("✅ 解析完成: %d 探针 × %d 样本\n",
	nrow(expr_matrix), ncol(expr_matrix)))

	return(list(
	success = TRUE,
	matrix = expr_matrix,
	metadata = metadata,
	n_probes = nrow(expr_matrix),
	n_samples = ncol(expr_matrix),
	sample_names = colnames(expr_matrix)
	))

	}, error = function(e) {
	return(list(
	success = FALSE,
	error = paste("解析文件时出错:", e$message)
	))
	})
	}

	#' 提取 GEO 元数据
	#'
	#' @param metadata_lines 元数据行
	#' @return list 包含样本描述等信息
	extract_geo_metadata <- function(metadata_lines) {
	result <- list(
	has_metadata = FALSE,
	sample_descriptions = NULL,
	sample_titles = NULL
	)

	# 查找样本描述行
	desc_line <- grep("^!Sample_description", metadata_lines, value = TRUE)

	if (length(desc_line) > 0) {
	# 提取描述信息
	descriptions <- gsub("^!Sample_description\t", "", desc_line)
	descriptions <- gsub('"', '', descriptions)
	# 分割成向量
	descriptions <- strsplit(descriptions[1], "\t")[[1]]
	result$sample_descriptions <- descriptions
	result$has_metadata <- TRUE
	}

	# 查找样本标题行
	title_line <- grep("^!Sample_title", metadata_lines, value = TRUE)

	if (length(title_line) > 0) {
	# 提取标题信息
	titles <- gsub("^!Sample_title\t", "", title_line)
	titles <- gsub('"', '', titles)
	# 分割成向量
	titles <- strsplit(titles[1], "\t")[[1]]
	result$sample_titles <- titles
	result$has_metadata <- TRUE
	}

	return(result)
	}

	# =====================================================
	# 2. SOFT 平台文件解析
	# =====================================================

	#' 解析 SOFT 平台注释文件
	#'
	#' @param file_path SOFT 文件路径
	#' @param separator 字段分隔符（正则表达式，默认为Tab）
	#' @return list 包含原始表格和探针-基因映射
	parse_platform_annotation <- function(file_path, separator = "\t") {
	cat("📋 开始解析 SOFT 平台文件...\n")
	cat(sprintf("📋 使用分隔符: %s\n", separator))

	tryCatch({
	lines <- readLines(file_path, warn = FALSE)

	# 查找平台表起始标记
	start_idx <- which(lines == "!platform_table_begin")

	if (length(start_idx) == 0) {
	return(list(
	success = FALSE,
	error = "未找到 !platform_table_begin 标记"
	))
	}

	start_idx <- start_idx[1] + 1

	# 读取到文件末尾或下一个 ^ 标记
	remaining_lines <- lines[start_idx:length(lines)]
	next_section <- which(grepl("^\\^", remaining_lines))

	if (length(next_section) > 0) {
	end_idx <- start_idx + next_section[1] - 2
	} else {
	end_idx <- length(lines)
	}

	table_lines <- lines[start_idx:end_idx]

	# 读取表头（使用用户指定的分隔符）
	header <- strsplit(table_lines[1], separator)[[1]]

	# 读取数据（使用用户指定的分隔符）
	text_conn <- textConnection(table_lines)
	raw_table <- read.table(
	text_conn,
	header = TRUE,
	sep = separator,
	quote = "",
	stringsAsFactors = FALSE,
	fill = TRUE,
	check.names = FALSE
	)
	close(text_conn)

	cat(sprintf("📋 平台注释文件解析: %d 列 × %d 行\n",
	ncol(raw_table), nrow(raw_table)))
	cat("📋 列名:", paste(colnames(raw_table), collapse = ", "), "\n")

	# 🔍 显示各列的示例数据，帮助用户选择
	cat("📋 各列示例数据:\n")
	for (col in colnames(raw_table)) {
	sample_vals <- head(raw_table[[col]][!is.na(raw_table[[col]]) & raw_table[[col]] != ""], 3)
	cat(sprintf(" %s: %s\n", col, paste(sample_vals, collapse = ", ")))
	}

	# ❌ 不再自动检测基因符号列，由用户手动选择
	cat("⚠️ 请用户手动选择基因符号列\n")

	# 返回原始表格，不进行自动检测和映射
	return(list(
	success = TRUE,
	raw_table = raw_table,
	mapping = NULL, # 不提供自动映射
	gene_symbol_col = NULL, # 不提供自动检测的列名
	needs_manual_selection = TRUE, # 标记需要手动选择
	message = "请手动选择ID列和基因列"
	))

	}, error = function(e) {
	return(list(
	success = FALSE,
	error = paste("解析 SOFT 文件时出错:", e$message)
	))
	})
	}

	#' 智能检测基因符号列（改进版 - 基于内容分析）
	#'
	#' @param table 平台注释数据框
	#' @return 字符列名或 NULL
	detect_gene_symbol_column <- function(table) {
	cat("🔍 开始智能检测基因符号列...\n")

	# 常见的基因符号列名（优先级高）
	high_priority_names <- c(
	"GENE_SYMBOL",
	"Gene.Symbol",
	"Gene_Symbol",
	"gene_symbol",
	"SYMBOL",
	"Symbol",
	"Gene Symbol"
	)

	# 方法1: 优先级列名匹配
	for (name in high_priority_names) {
	if (name %in% colnames(table)) {
	# 验证：检查该列是否真的包含基因符号格式
	col_data <- table[[name]]
	col_data <- col_data[!is.na(col_data) & col_data != ""]
	n_check <- min(50, length(col_data))

	if (n_check >= 10) {
	# 检查是否包含典型基因符号格式
	# 基因符号：字母开头，可能包含数字和连字符，长度2-20
	pattern <- "^[A-Z][A-Z0-9\\-]{1,15}$"
	match_ratio <- sum(grepl(pattern, col_data[1:n_check])) / n_check

	if (match_ratio > 0.5) { # 超过50%匹配
	cat(sprintf("✅ 智能检测到基因列(高优先级): %s (匹配率: %.1f%%)\n",
	name, match_ratio*100))
	return(name)
	}
	}
	}
	}

	# 方法2: 列名部分匹配
	for (name in high_priority_names) {
	matching_cols <- colnames(table)[sapply(colnames(table), function(col) {
	grepl(name, col, ignore.case = TRUE)
	})]

	if (length(matching_cols) > 0) {
	# 验证内容
	for (col_name in matching_cols) {
	col_data <- table[[col_name]]
	col_data <- col_data[!is.na(col_data) & col_data != ""]
	n_check <- min(50, length(col_data))

	if (n_check >= 10) {
	pattern <- "^[A-Z][A-Z0-9\\-]{1,15}$"
	match_ratio <- sum(grepl(pattern, col_data[1:n_check])) / n_check

	if (match_ratio > 0.5) {
	cat(sprintf("✅ 智能检测到基因列(模糊匹配): %s (匹配率: %.1f%%)\n",
	col_name, match_ratio*100))
	return(col_name)
	}
	}
	}
	}
	}

	# 方法3: 检查所有列的内容特征
	best_col <- NULL
	best_match_ratio <- 0

	for (col_name in colnames(table)) {
	# 跳过明显不是基因的列
	if (col_name %in% c("ID", "SPOT_ID", "CONTROL_TYPE", "REFSEQ", "GB_ACC",
	"UNIGENE_ID", "ENSEMBL_ID", "TIGR_ID", "ACCESSION_STRING",
	"CHROMOSOMAL_LOCATION", "CYTOBAND", "DESCRIPTION", "GO_ID",
	"SEQUENCE")) {
	next
	}

	col_data <- table[[col_name]]
	col_data <- col_data[!is.na(col_data) & col_data != ""]
	n_check <- min(100, length(col_data))

	if (n_check < 10) next

	# 检查基因符号特征
	# 典型基因符号：TP53, EGFR, BRCA1, MYC, IL-6, TNF-alpha
	patterns <- c(
	"^[A-Z][A-Z0-9]{1,10}$", # 简单基因符号：TP53, EGFR
	"^[A-Z][A-Z0-9]{1,5}-[0-9]+$", # 带数字的：BRCA1-001
	"^[A-Z]{2,6}-[A-Z0-9]{1,3}$" # 带连字符的：IL-6, TNF-a
	)

	match_count <- sum(sapply(patterns, function(p) {
	sum(grepl(p, col_data[1:n_check]))
	}))

	match_ratio <- match_count / n_check

	if (match_ratio > best_match_ratio && match_ratio > 0.3) {
	best_match_ratio <- match_ratio
	best_col <- col_name
	}
	}

	if (!is.null(best_col)) {
	cat(sprintf("✅ 智能检测到基因列(内容分析): %s (匹配率: %.1f%%)\n",
	best_col, best_match_ratio*100))
	return(best_col)
	}

	cat("⚠️ 无法自动检测基因符号列，需要用户手动选择\n")
	return(NULL)
	}

	#' 执行探针注释（合并表达矩阵和基因映射）
	#'
	#' @param expr_matrix 探针表达矩阵
	#' @param probe_gene_map 探针-基因映射数据框
	#' @return 基因表达矩阵
	annotate_probe_matrix <- function(expr_matrix, probe_gene_map) {
	cat("🔄 开始探针注释...\n")

	# 找到共同的探针
	common_probes <- intersect(rownames(expr_matrix), probe_gene_map$probe_id)

	if (length(common_probes) == 0) {
	stop("❌ 探针ID不匹配，请检查平台注释文件是否正确")
	}

	cat(sprintf("📊 共同探针数: %d (表达矩阵: %d, 注释文件: %d)\n",
	length(common_probes),
	nrow(expr_matrix),
	nrow(probe_gene_map)))

	# 提取共同探针的表达数据
	expr_subset <- expr_matrix[common_probes, , drop = FALSE]

	# 添加探针ID列以便合并
	expr_subset <- data.frame(probe_id = rownames(expr_subset), expr_subset,
	stringsAsFactors = FALSE)

	# 合并基因符号
	merged <- merge(expr_subset, probe_gene_map, by = "probe_id")

	# 移除探针ID列
	probe_id_col <- which(names(merged) == "probe_id")
	if (length(probe_id_col) > 0) {
	merged <- merged[, -probe_id_col]
	}

	# 按基因分组，对表达值取平均值
	# 如果一个基因对应多个探针，取平均值
	library(dplyr)

	expr_annotated <- merged %>%
	group_by(gene_symbol) %>%
	summarise(across(everything(), mean, na.rm = TRUE)) %>%
	column_to_rownames("gene_symbol")

	# 转换为矩阵
	expr_annotated <- as.matrix(expr_annotated)

	cat(sprintf("✅ 探针注释完成: %d 探针 → %d 基因\n",
	length(common_probes),
	nrow(expr_annotated)))

	# 检查是否有基因被重复注释
	n_probes_per_gene <- table(merged$gene_symbol)
	n_multi_probe_genes <- sum(n_probes_per_gene > 1)

	if (n_multi_probe_genes > 0) {
	cat(sprintf("📊 其中 %d 个基因有多个探针(已取平均)\n",
	n_multi_probe_genes))
	}

	return(expr_annotated)
	}

	# =====================================================
	# 3. limma 差异分析
	# =====================================================

	#' 使用 limma 进行芯片数据差异分析
	#'
	#' @param expr_matrix 基因表达矩阵
	#' @param ctrl_samples 对照组样本名
	#' @param trt_samples 处理组样本名
	#' @param pvalue_threshold P值阈值
	#' @param logfc_threshold log2FC阈值
	#' @param pval_type P值类型："adj.P.Val" 或 "P.Value"
	#' @return 差异分析结果
	run_limma_analysis <- function(expr_matrix, ctrl_samples, trt_samples,
	pvalue_threshold = 0.05,
	logfc_threshold = 1,
	pval_type = "adj.P.Val") {
	cat("🧬 开始 limma 差异分析...\n")

	# 检查样本
	if (length(ctrl_samples) == 0 \|\| length(trt_samples) == 0) {
	stop("❌ 对照组和处理组样本数不能为0")
	}

	# 重新排序矩阵（按照 Control -> Treatment）
	sample_order <- c(ctrl_samples, trt_samples)

	# 检查样本是否存在
	missing_samples <- setdiff(sample_order, colnames(expr_matrix))
	if (length(missing_samples) > 0) {
	stop(sprintf("❌ 以下样本不存在于表达矩阵中: %s",
	paste(missing_samples, collapse = ", ")))
	}

	expr_ordered <- expr_matrix[, sample_order, drop = FALSE]

	# 🆕 检查并移除非数值列（如ProbeID和Gene）
	# limma需要纯数值表达矩阵
	if ("ProbeID" %in% colnames(expr_ordered)) {
	cat("📋 移除ProbeID列（非数值）\n")
	expr_ordered <- expr_ordered[, colnames(expr_ordered) != "ProbeID", drop = FALSE]
	}

	if ("Gene" %in% colnames(expr_ordered)) {
	cat("📋 移除Gene列（非数值）\n")
	expr_ordered <- expr_ordered[, colnames(expr_ordered) != "Gene", drop = FALSE]
	}

	# 确保所有列都是数值
	expr_ordered <- as.matrix(expr_ordered)
	storage.mode(expr_ordered) <- "numeric"

	cat(sprintf("📊 最终分析矩阵: %d 基因 × %d 样本\n",
	nrow(expr_ordered), ncol(expr_ordered)))

	# 创建分组因子
	group <- factor(c(rep("Control", length(ctrl_samples)),
	rep("Treatment", length(trt_samples))))

	cat(sprintf("📊 样本分组: Control=%d, Treatment=%d\n",
	length(ctrl_samples), length(trt_samples)))

	# 加载 limma
	library(limma)

	# 设计矩阵
	design <- model.matrix(~0 + group)
	colnames(design) <- levels(group)

	cat("📊 设计矩阵:\n")
	print(design)

	# 线性模型拟合
	fit <- lmFit(expr_ordered, design)

	# 设置对比
	contrast.matrix <- makeContrasts(Treatment-Control, levels=design)

	cat("📊 对比矩阵:\n")
	print(contrast.matrix)

	fit2 <- contrasts.fit(fit, contrast.matrix)
	fit2 <- eBayes(fit2)

	# 提取结果
	results <- topTable(fit2,
	number = Inf,
	adjust.method = "BH",
	sort.by = "P")

	# 🔧 修复：正确设置ID和SYMBOL列
	# rownames(results) 是基因符号（如"FAM174B", "TP53"）
	# expr_matrix现在包含EntrezID列（如果有），用于ID列
	results$SYMBOL <- rownames(results) # SYMBOL = 基因符号（行名）

	# ID列使用EntrezID（如果存在），否则使用基因符号
	if ("EntrezID" %in% colnames(expr_matrix)) {
	# expr_matrix的行名对应results的行名
	results$ID <- expr_matrix[rownames(results), "EntrezID"] # ID = Entrez Gene ID
	cat("✅ ID列使用Entrez Gene ID，SYMBOL列使用基因符号\n")
	} else {
	results$ID <- rownames(results) # 没有EntrezID时，ID也用基因符号
	cat("✅ ID列和SYMBOL列都使用基因符号（无EntrezID）\n")
	}

	# 计算统计信息 - 根据用户选择的P值类型
	n_total <- nrow(results)

	# 🔧 使用用户选择的P值类型
	pval_col <- if (pval_type == "adj.P.Val") "adj.P.Val" else "P.Value"
	pval_values <- results[[pval_col]]

	n_significant <- sum(pval_values < pvalue_threshold, na.rm = TRUE)
	n_up <- sum(results$logFC > logfc_threshold & pval_values < pvalue_threshold,
	na.rm = TRUE)
	n_down <- sum(results$logFC < -logfc_threshold & pval_values < pvalue_threshold,
	na.rm = TRUE)

	cat(sprintf("✅ limma 分析完成: %d 个基因\n", n_total))
	cat(sprintf(" 使用P值类型: %s\n", pval_type))
	cat(sprintf(" 显著差异基因 (%s < %.3f): %d (%.1f%%)\n",
	pval_type, pvalue_threshold, n_significant, n_significant/n_total*100))
	cat(sprintf(" 上调基因 (log2FC > %.2f): %d\n", logfc_threshold, n_up))
	cat(sprintf(" 下调基因 (log2FC < %.2f): %d\n", logfc_threshold, n_down))

	return(list(
	results = results,
	n_total = n_total,
	n_significant = n_significant,
	n_up = n_up,
	n_down = n_down,
	design = design,
	fit = fit2
	))
	}

	# =====================================================
	# 4. 探针表达量聚合
	# =====================================================

	#' 聚合探针表达量到基因水平
	#'
	#' @param probe_matrix 探针表达矩阵（行为探针，列为样本）
	#' @param probe_mapping 探针-基因映射数据框（从 parse_soft_platform 返回）
	#' @return 基因表达矩阵（行为基因，列为样本）
	aggregate_probe_expression <- function(probe_matrix, probe_mapping) {
	cat("🔄 开始聚合探针表达量...\n")

	# 提取映射关系
	probe_ids <- probe_mapping$probe_id
	gene_symbols <- probe_mapping$gene_symbol

	# 创建映射向量（探针ID -> 基因符号）
	names(gene_symbols) <- probe_ids

	# 过滤掉未映射的探针
	common_probes <- intersect(rownames(probe_matrix), probe_ids)

	if (length(common_probes) == 0) {
	cat("⚠️ 未找到匹配的探针ID\n")
	return(NULL)
	}

	cat(sprintf("✅ 匹配探针: %d / %d\n", length(common_probes), nrow(probe_matrix)))

	# 子集表达矩阵和映射
	expr_subset <- probe_matrix[common_probes, , drop = FALSE]
	gene_symbols_subset <- gene_symbols[common_probes]

	# 移除NA和空字符串
	valid_mask <- !is.na(gene_symbols_subset) & gene_symbols_subset != ""
	expr_subset <- expr_subset[valid_mask, , drop = FALSE]
	gene_symbols_subset <- gene_symbols_subset[valid_mask]

	# 聚合方法：对于每个基因，选择表达量最高的探针
	cat("📊 聚合策略: 选择最高表达探针\n")

	# 获取唯一基因
	unique_genes <- unique(gene_symbols_subset)
	cat(sprintf("📊 唯一基因数: %d\n", length(unique_genes)))

	# 为每个基因选择表达量最高的探针
	gene_expr_list <- lapply(unique_genes, function(gene) {
	# 找到该基因的所有探针
	gene_probes <- which(gene_symbols_subset == gene)

	if (length(gene_probes) == 1) {
	# 只有一个探针，直接使用
	return(expr_subset[gene_probes, , drop = FALSE])
	} else {
	# 多个探针，选择平均表达量最高的
	avg_expr <- rowMeans(expr_subset[gene_probes, , drop = FALSE])
	best_probe <- gene_probes[which.max(avg_expr)]
	return(expr_subset[best_probe, , drop = FALSE])
	}
	})

	# 合并为基因表达矩阵
	gene_expr_matrix <- do.call(rbind, gene_expr_list)
	rownames(gene_expr_matrix) <- unique_genes

	cat(sprintf("✅ 聚合完成: %d 个基因\n", nrow(gene_expr_matrix)))

	return(gene_expr_matrix)
	}

	# =====================================================
	# 5. 智能样本分组系统（通用版本）
	# =====================================================

	#' 从 Series Matrix 元数据中自动检测分组
	#'
	#' @param sample_names 样本名向量
	#' @param sample_descriptions 样本描述向量（可选）
	#' @param sample_titles 样本标题向量（可选）
	#' @return list 包含分组建议和分组方法
	detect_chip_groups_auto <- function(sample_names,
	sample_descriptions = NULL,
	sample_titles = NULL) {
	cat("🔍 开始自动检测分组模式...\n")
	cat(sprintf("📊 总样本数: %d\n", length(sample_names)))

	# 定义常见的分组模式
	group_patterns <- list(
	# 模式1: 时间序列 (before/after, baseline/followup)
	list(
	name = "时间序列",
	ctrl_keywords = c("before", "baseline", "time0", "initial", "visit1"),
	trt_keywords = c("after", "post", "follow", "final", "visit2", "visit3")
	),

	# 模式2: 处理对照 (control/treated)
	list(
	name = "处理对照",
	ctrl_keywords = c("control", "ctrl", "untreated", "vehicle", "placebo"),
	trt_keywords = c("treatment", "treated", "drug", "compound", "stimulated")
	),

	# 模式3: 疾病对照 (normal/disease)
	list(
	name = "疾病对照",
	ctrl_keywords = c("normal", "healthy", "control", "wild"),
	trt_keywords = c("disease", "patient", "cancer", "tumor", "sick")
	),

	# 模式4: 基因型 (wildtype/mutant)
	list(
	name = "基因型",
	ctrl_keywords = c("wild", "wt", "wildtype", "normal", "control"),
	trt_keywords = c("mutant", "mut", "knockout", "ko", "transgenic", "tg")
	),

	# 模式5: 剂量反应 (control/dose)
	list(
	name = "剂量反应",
	ctrl_keywords = c("dose0", "dose_0", "control", "vehicle", "untreated"),
	trt_keywords = c("dose", "treatment", "low", "medium", "high")
	),

	# 模式6: 激活/抑制
	list(
	name = "激活抑制",
	ctrl_keywords = c("inactive", "unstimulated", "resting", "control"),
	trt_keywords = c("active", "stimulated", "induced", "activated")
	)
	)

	# 方法1: 从 sample_descriptions 检测
	if (!is.null(sample_descriptions) && length(sample_descriptions) > 0) {
	cat("📋 尝试从 Sample_description 检测分组...\n")

	for (pattern in group_patterns) {
	ctrl_match <- sapply(sample_descriptions, function(d) {
	any(sapply(pattern$ctrl_keywords, function(kw) {
	grepl(kw, d, ignore.case = TRUE)
	}))
	})

	trt_match <- sapply(sample_descriptions, function(d) {
	any(sapply(pattern$trt_keywords, function(kw) {
	grepl(kw, d, ignore.case = TRUE)
	}))
	})

	# 检查是否找到匹配
	if (sum(ctrl_match) > 0 && sum(trt_match) > 0) {
	ctrl_idx <- which(ctrl_match)
	trt_idx <- which(trt_match)

	cat(sprintf("✅ 检测到 '%s' 分组模式 (from description)\n", pattern$name))
	cat(sprintf(" 对照组: %d 个样本 (%s)\n",
	length(ctrl_idx),
	paste(sample_names[ctrl_idx], collapse = ", ")))
	cat(sprintf(" 处理组: %d 个样本 (%s)\n",
	length(trt_idx),
	paste(sample_names[trt_idx], collapse = ", ")))

	return(list(
	pattern_name = pattern$name,
	method = "auto_description",
	ctrl_samples = sample_names[ctrl_idx],
	trt_samples = sample_names[trt_idx],
	ctrl_indices = ctrl_idx,
	trt_indices = trt_idx,
	confidence = "high",
	source = "description"
	))
	}
	}
	}

	# 方法2: 从 sample_titles 检测
	if (!is.null(sample_titles) && length(sample_titles) > 0) {
	cat("📋 尝试从 Sample_title 检测分组...\n")

	for (pattern in group_patterns) {
	ctrl_match <- sapply(sample_titles, function(t) {
	any(sapply(pattern$ctrl_keywords, function(kw) {
	grepl(kw, t, ignore.case = TRUE)
	}))
	})

	trt_match <- sapply(sample_titles, function(t) {
	any(sapply(pattern$trt_keywords, function(kw) {
	grepl(kw, t, ignore.case = TRUE)
	}))
	})

	if (sum(ctrl_match) > 0 && sum(trt_match) > 0) {
	ctrl_idx <- which(ctrl_match)
	trt_idx <- which(trt_match)

	cat(sprintf("✅ 检测到 '%s' 分组模式 (from title)\n", pattern$name))
	cat(sprintf(" 对照组: %d 个样本\n", length(ctrl_idx)))
	cat(sprintf(" 处理组: %d 个样本\n", length(trt_idx)))

	return(list(
	pattern_name = pattern$name,
	method = "auto_title",
	ctrl_samples = sample_names[ctrl_idx],
	trt_samples = sample_names[trt_idx],
	ctrl_indices = ctrl_idx,
	trt_indices = trt_idx,
	confidence = "medium",
	source = "title"
	))
	}
	}
	}

	# 方法3: 从样本名本身检测
	cat("📋 尝试从样本名检测分组...\n")

	# 简化样本名（移除GSM前缀）
	simplified_names <- gsub("^GSM\\d+_", "", sample_names)

	for (pattern in group_patterns) {
	ctrl_match <- sapply(simplified_names, function(n) {
	any(sapply(pattern$ctrl_keywords, function(kw) {
	grepl(kw, n, ignore.case = TRUE)
	}))
	})

	trt_match <- sapply(simplified_names, function(n) {
	any(sapply(pattern$trt_keywords, function(kw) {
	grepl(kw, n, ignore.case = TRUE)
	}))
	})

	if (sum(ctrl_match) > 0 && sum(trt_match) > 0) {
	ctrl_idx <- which(ctrl_match)
	trt_idx <- which(trt_match)

	cat(sprintf("✅ 检测到 '%s' 分组模式 (from name)\n", pattern$name))
	cat(sprintf(" 对照组: %d 个样本\n", length(ctrl_idx)))
	cat(sprintf(" 处理组: %d 个样本\n", length(trt_idx)))

	return(list(
	pattern_name = pattern$name,
	method = "auto_name",
	ctrl_samples = sample_names[ctrl_idx],
	trt_samples = sample_names[trt_idx],
	ctrl_indices = ctrl_idx,
	trt_indices = trt_idx,
	confidence = "medium",
	source = "name"
	))
	}
	}

	# 所有方法都失败
	cat("⚠️ 未能自动检测到分组模式，请手动设置\n")

	return(list(
	pattern_name = NULL,
	method = "manual",
	ctrl_samples = NULL,
	trt_samples = NULL,
	ctrl_indices = NULL,
	trt_indices = NULL,
	confidence = NULL,
	source = NULL
	))
	}

	#' 检测配对关系
	#'
	#' @param sample_names 样本名
	#' @param metadata 元数据
	#' @return list 配对关系或NULL
	detect_pairing <- function(sample_names, metadata) {
	# 尝试从样本名中提取配对信息
	# 例如: Patient1_before, Patient1_after

	# 提取共同的配对标识符
	# 方法：移除已知的对照组/处理组关键词后，看剩余部分是否匹配

	# 简化样本名
	simplified <- gsub("^GSM\\d+_", "", sample_names)

	# 定义配对关键词
	pairing_keywords <- list(
	before = "after",
	baseline = "follow",
	control = "treated",
	time0 = "time1",
	visit1 = "visit2",
	pre = "post"
	)

	# 尝试检测配对模式
	for (kw1 in names(pairing_keywords)) {
	kw2 <- pairing_keywords[[kw1]]

	# 检查是否有样本包含这些关键词
	has_kw1 <- grepl(kw1, simplified, ignore.case = TRUE)
	has_kw2 <- grepl(kw2, simplified, ignore.case = TRUE)

	if (sum(has_kw1) > 0 && sum(has_kw2) > 0) {
	# 提取配对标识符
	ids_kw1 <- gsub(kw1, "", simplified[has_kw1], ignore.case = TRUE)
	ids_kw2 <- gsub(kw2, "", simplified[has_kw2], ignore.case = TRUE)

	# 找到共同的ID
	common_ids <- intersect(ids_kw1, ids_kw2)

	if (length(common_ids) > 0) {
	cat(sprintf("💡 检测到配对设计: %d 对样本\n", length(common_ids)))

	return(list(
	is_paired = TRUE,
	n_pairs = length(common_ids),
	pairing_pattern = sprintf("%s/%s", kw1, kw2)
	))
	}
	}
	}

	return(list(is_paired = FALSE))
	}

	# =====================================================
	# 5. 转换为标准格式（与现有模块兼容）
	# =====================================================

	#' 将芯片差异分析结果转换为标准格式
	#'
	#' @param limma_results limma 分析结果
	#' @param expr_matrix 表达矩阵
	#' @param ctrl_samples 对照组样本
	#' @param trt_samples 处理组样本
	#' @return 标准格式的差异分析结果
	format_chip_results_for_pipeline <- function(limma_results, expr_matrix,
	ctrl_samples, trt_samples) {
	# 🔧 修复：正确使用ID（EntrezID）和SYMBOL（基因符号）列
	limma_res <- limma_results$results

	deg_df <- data.frame(
	ID = limma_res$ID, # Entrez Gene ID（如果有的话）
	SYMBOL = limma_res$SYMBOL, # 基因符号
	log2FoldChange = limma_res$logFC, # log2倍数变化
	pvalue = limma_res$P.Value, # 原始p值
	padj = limma_res$adj.P.Val, # BH校正p值
	baseMean = limma_res$AveExpr, # 平均表达
	t = limma_res$t, # t统计量（用于TF活性分析）
	row.names = NULL,
	stringsAsFactors = FALSE
	)

	# ENTREZID列与ID列相同（都是Entrez Gene ID）
	deg_df$ENTREZID <- deg_df$ID

	return(list(
	deg_df = deg_df,
	background_genes = rownames(expr_matrix), # 背景基因（用于富集分析）
	expr_matrix = expr_matrix, # 完整表达矩阵（用于 AUCell/GSVA）
	ctrl_samples = ctrl_samples,
	trt_samples = trt_samples,
	method = "limma",
	n_significant = limma_results$n_significant,
	n_up = limma_results$n_up,
	n_down = limma_results$n_down
	))
	}

	# =====================================================
	# 6. Shiny Server 函数
	# =====================================================

	#' 芯片数据分析模块 UI 生成函数
	#'
	#' @return UI 元素
	chip_analysis_ui <- function() {
	tagList(
	fluidRow(
	column(12,
	div(
	class = "info-box",
	style = "background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
	color: white; padding: 25px; border-radius: 15px; margin-bottom: 25px;",
	h4("🧬 芯片数据分析模块", style = "margin-top: 0; color: white;"),
	p("支持 GEO Series Matrix 格式的芯片数据差异分析，自动探针注释，无缝集成下游富集分析。",
	style = "color: rgba(255,255,255,0.9); margin-bottom: 0;")
	)
	)
	),

	# 🎨 使用可折叠面板组织所有步骤
	tags$div(
	id = "chip_analysis_accordion",

	# ===== 面板1: 数据上传 =====
	tags$div(
	class = "panel panel-default",
	tags$div(
	class = "panel-heading",
	style = "cursor: pointer; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 15px;",
	`data-toggle` = "collapse",
	`data-target` = "#panel_upload",
	tags$h4(
	class = "panel-title",
	style = "margin: 0;",
	tags$span(icon("upload"), " 📁 步骤1: 数据上传与预览")
	)
	),
	tags$div(
	id = "panel_upload",
	class = "panel-collapse collapse in", # in = 默认展开

	wellPanel(
	# 文件上传
	fluidRow(
	column(6,
	h5("📄 上传数据文件", style = "color: #007AFF;"),
	fileInput("chip_series_matrix",
	"GEO Series Matrix 文件",
	accept = c(".txt", ".matrix.txt", "text/plain"),
	placeholder = "选择文件..."),
	helpText("GEO 数据库下载的 Series Matrix 文件（通常包含样本表达矩阵）")
	),
	column(6,
	fileInput("chip_soft_platform",
	"SOFT 平台注释文件 (可选)",
	accept = c(".txt", ".soft", "annot.txt", "text/plain"),
	placeholder = "选择文件..."),
	helpText("用于探针注释的 GPL 平台文件。如不上传，系统将尝试自动注释。")
	)
	),

	tags$hr(style = "border-color: #dee2e6;"),

	# 数据预览
	h5("📊 数据文件预览", style="color: #007AFF;"),

	fluidRow(
	column(6,
	h6("Series Matrix 文件（前5行）", style="color: #666;"),
	DTOutput("chip_series_matrix_preview")
	),
	column(6,
	h6("SOFT 文件（前10行）", style="color: #666;"),
	DTOutput("chip_soft_raw_preview")
	)
	)
	),
	tags$div(
	class = "panel-body",
	style = "padding: 15px;"
	)
	)
	),

	# ===== 面板2: 探针注释配置 =====
	tags$div(
	class = "panel panel-default",
	tags$div(
	class = "panel-heading",
	style = "cursor: pointer; background: linear-gradient(135deg, #9C27B0 0%, #7B1FA2 100%); color: white; padding: 15px;",
	`data-toggle` = "collapse",
	`data-target` = "#panel_annotation",
	tags$h4(
	class = "panel-title",
	style = "margin: 0;",
	tags$span(icon("cogs"), " 🧬 步骤2: 探针注释与数据合并")
	)
	),
	tags$div(
	id = "panel_annotation",
	class = "panel-collapse collapse", # 默认折叠

	wellPanel(
	# SOFT文件列名清单
	uiOutput("chip_soft_columns_list_ui"),

	tags$hr(style = "border-color: #dee2e6;"),

	# 探针注释配置
	h5("📋 探针注释配置", style = "color: #9C27B0;"),
	uiOutput("chip_soft_column_selection_panel"),

	tags$hr(style = "border-color: #dee2e6;"),

	# 合并操作按钮
	fluidRow(
	column(6,
	actionButton("chip_preview_merge", "👁️ 预览合并结果",
	class = "btn-info", style = "width: 100%;")
	),
	column(6,
	actionButton("chip_apply_merge", "✅ 应用配置并生成最终矩阵",
	class = "btn-success", style = "width: 100%;")
	)
	),

	# 合并预览
	conditionalPanel(
	condition = "input.chip_preview_merge",
	wellPanel(
	style = "background: #e8f5e9; border: 2px solid #4caf50;",
	h5("👁️ 合并结果预览（前5行）", style = "color: #2e7d32;"),
	DTOutput("chip_merge_preview_table")
	)
	),

	# 最终矩阵显示
	conditionalPanel(
	condition = "input.chip_apply_merge",
	wellPanel(
	style = "background: linear-gradient(135deg, #e8f5e9 0%, #c8e6c9 100%); border: 2px solid #4caf50;",
	h5("✅ 最终表达矩阵", style = "color: #2e7d32; font-size: 18px; font-weight: bold;"),
	uiOutput("chip_final_matrix_ui"),
	br(),
	helpText("💡 此矩阵可直接用于后续的差异分析。已将探针ID和基因符号合并到表达数据中。", style = "color: #2e7d33;")
	)
	)
	)
	),

	# ===== 面板3: 数据预处理与探针去重 =====
	tags$div(
	class = "panel panel-default",
	tags$div(
	class = "panel-heading",
	style = "cursor: pointer; background: linear-gradient(135deg, #ff9800 0%, #f57c00 100%); color: white; padding: 15px;",
	`data-toggle` = "collapse",
	`data-target` = "#panel_preprocess",
	tags$h4(
	class = "panel-title",
	style = "margin: 0;",
	tags$span(icon("sliders-h"), " 🔧 步骤3: 数据预处理与探针去重")
	)
	),
	tags$div(
	id = "panel_preprocess",
	class = "panel-collapse collapse in", # 默认展开

	wellPanel(
	# 预处理
	h5("📊 数据预处理（log2转换 + 标准化）", style = "color: #ff9800; font-weight: bold;"),
	fluidRow(
	column(6,
	checkboxInput("chip_auto_log2", "自动判断并执行log2转换", value = TRUE),
	checkboxInput("chip_normalize_data", "执行limma标准化（normalizeBetweenArrays）", value = TRUE)
	),
	column(6,
	actionButton("chip_preprocess_data", "⚙️ 执行预处理",
	class = "btn-warning", style = "width: 100%;")
	)
	),

	# 预处理结果
	conditionalPanel(
	condition = "input.chip_preprocess_data",
	uiOutput("chip_preprocess_result_ui")
	),

	tags$hr(style = "border-color: #ffc107;"),

	# 批次矫正
	h5("🎛️ 批次效应矫正（可选）", style = "color: #E91E63; font-weight: bold;"),
	fluidRow(
	column(6,
	selectInput("chip_batch_method", "批次矫正方法",
	choices = c("无" = "none", "ComBat (sva)" = "combat",
	"ComBat (limma)" = "limma", "SVA" = "sva"),
	selected = "none")
	),
	column(6,
	actionButton("chip_apply_batch_correct", "🎛️ 执行批次矫正",
	class = "btn-danger", style = "width: 100%;")
	)
	),

	# 批次矫正结果
	conditionalPanel(
	condition = "input.chip_apply_batch_correct",
	uiOutput("chip_batch_correct_result_ui")
	),

	tags$hr(style = "border-color: #ffc107;"),

	# 探针去重
	h5("✂️ 探针去重（保留表达量最高的探针）", style = "color: #ff9800; font-weight: bold;"),
	helpText("当一个基因对应多个探针时，保留表达量最高的探针。这将生成基因级别的表达矩阵。"),

	fluidRow(
	column(6,
	h6("去重前统计：", style = "color: #666;"),
	uiOutput("chip_before_dedupe_stats")
	),
	column(6,
	h6("去重后统计：", style = "color: #666;"),
	uiOutput("chip_after_dedupe_stats")
	)
	),

	actionButton("chip_dedupe_probes", "✂️ 执行探针去重",
	class = "btn-warning btn-lg", style = "width: 100%;"),

	# 去重结果
	conditionalPanel(
	condition = "input.chip_dedupe_probes",
	wellPanel(
	h6("✅ 去重完成", style = "color: #28a745;"),
	uiOutput("chip_dedupe_result_ui")
	)
	),

	tags$hr(style = "border-color: #ffc107;"),

	# 生成标准格式数据
	h5("💾 生成标准格式数据", style = "color: #ff9800; font-weight: bold;"),
	helpText("将处理后的表达矩阵转换成标准格式，可直接用于后续的差异分析、KEGG、GO等模块。"),

	fluidRow(
	column(12,
	div(
	style = "background: #d4edda; padding: 15px; border-radius: 8px; border: 1px solid #c3e6cb;",
	h6("💡 即可对接现有分析模块：", style = "color: #155724;"),
	tags$ul(style="margin: 10px 0; padding-left: 20px;",
	tags$li("差异分析（使用现有的差异分析模块）"),
	tags$li("KEGG富集分析（使用现有的KEGG模块）"),
	tags$li("GO富集分析（使用现有的GO模块）"),
	tags$li("GSEA分析（使用现有的GSEA模块）")
	)
	)
	)
	),

	actionButton("chip_generate_standard_data", "🚀 生成标准格式数据",
	class = "btn-success btn-lg", style = "width: 100%; font-size: 16px;")
	)
	)
	),

	# ===== 面板4: 差异分析 =====
	tags$div(
	class = "panel panel-default",
	tags$div(
	class = "panel-heading",
	style = "cursor: pointer; background: linear-gradient(135deg, #34C759 0%, #2e7d32 100%); color: white; padding: 15px;",
	`data-toggle` = "collapse",
	`data-target` = "#panel_diff_analysis",
	tags$h4(
	class = "panel-title",
	style = "margin: 0;",
	tags$span(icon("chart-bar"), " 🧬 步骤4: 差异分析")
	)
	),
	tags$div(
	id = "panel_diff_analysis",
	class = "panel-collapse collapse", # 默认折叠
	style = "padding: 15px;",

	wellPanel(
	# 样本分组
	uiOutput("chip_grouping_ui"),

	tags$hr(style = "border-color: #34C759;"),

	# 差异分析参数
	h5("🔬 差异分析参数", style = "color: #34C759; font-weight: bold;"),
	fluidRow(
	column(4,
	sliderInput("chip_logfc_threshold",
	"log2FoldChange 阈值:",
	min = 0, max = 5, value = 1, step = 0.1)
	),
	column(4,
	selectInput("chip_pval_type",
	"显著性指标:",
	choices = c("校正P值 (adj.P.Val)" = "adj.P.Val",
	"原始P值 (P.Value)" = "P.Value"),
	selected = "adj.P.Val")
	),
	column(4,
	sliderInput("chip_pvalue_threshold",
	"P值阈值:",
	min = 0.001, max = 0.1, value = 0.05, step = 0.001)
	)
	),

	fluidRow(
	column(12,
	checkboxInput("chip_paired_analysis",
	"配对样本分析（如果适用）",
	value = FALSE)
	)
	),

	tags$hr(style = "border-color: #34C759;"),

	actionButton("run_chip_analysis", "🚀 运行差异分析",
	class = "btn-primary btn-lg",
	style = "width: 100%; margin-top: 15px;")
	)
	)
	)
	),

	# ===== 面板5: 分析结果 =====
	tags$div(
	class = "panel panel-default",
	tags$div(
	class = "panel-heading",
	style = "cursor: pointer; background: linear-gradient(135deg, #007AFF 0%, #0051D5 100%); color: white; padding: 15px;",
	`data-toggle` = "collapse",
	`data-target` = "#panel_results",
	tags$h4(
	class = "panel-title",
	style = "margin: 0;",
	tags$span(icon("table"), " 📊 步骤5: 分析结果")
	)
	),
	tags$div(
	id = "panel_results",
	class = "panel-collapse collapse", # 默认折叠
	style = "padding: 15px;",

	wellPanel(
	# 结果统计和表格（chip_results_ui 已包含表格）
	uiOutput("chip_results_ui"),

	tags$hr(style = "border-color: #007AFF;"),

	# 下载按钮
	fluidRow(
	column(12,
	downloadButton("download_chip_results", "📥 下载结果", class = "btn-success")
	)
	)
	)
	)
	)
	)
	)
	}

	#' 芯片数据分析模块 Server 函数
	#'
	#' @param input Shiny input
	#' @param output Shiny output
	#' @param session Shiny session
	#' @param deg_results 差异分析结果容器（用于更新）
	chip_analysis_server <- function(input, output, session, deg_results) {
	cat("✅ 芯片分析模块已启动\n")

	# 渲染 UI
	output$chip_analysis_ui_output <- renderUI({
	chip_analysis_ui()
	})

	# 存储芯片数据
	chip_data <- reactiveValues(
	series_matrix = NULL,
	soft_platform = NULL,
	probe_mapping = NULL,
	expr_matrix = NULL,
	metadata = NULL,
	group_info = NULL,
	manual_ctrl_samples = NULL,
	manual_trt_samples = NULL
	)

	# 解析对照组样本
	observeEvent(input$chip_parse_ctrl, {
	req(input$chip_paste_ctrl)
	req(chip_data$series_matrix)

	pasted_text <- input$chip_paste_ctrl

	# 显示进度
	showNotification("正在解析对照组样本...", type = "message")

	# 解析样本列表
	samples <- parse_sample_list(pasted_text, chip_data$series_matrix)

	if (is.null(samples) \|\| length(samples) == 0) {
	showNotification("解析失败：未找到有效样本", type = "error")
	return(NULL)
	}

	# 保存对照组
	chip_data$manual_ctrl_samples <- samples

	showNotification(
	sprintf("✅ 成功解析对照组: %d 个样本", length(samples)),
	type = "message"
	)
	})

	# 解析处理组样本
	observeEvent(input$chip_parse_trt, {
	req(input$chip_paste_trt)
	req(chip_data$series_matrix)

	pasted_text <- input$chip_paste_trt

	# 显示进度
	showNotification("正在解析处理组样本...", type = "message")

	# 解析样本列表
	samples <- parse_sample_list(pasted_text, chip_data$series_matrix)

	if (is.null(samples) \|\| length(samples) == 0) {
	showNotification("解析失败：未找到有效样本", type = "error")
	return(NULL)
	}

	# 保存处理组
	chip_data$manual_trt_samples <- samples

	showNotification(
	sprintf("✅ 成功解析处理组: %d 个样本", length(samples)),
	type = "message"
	)
	})

	# 清除分组设置
	observeEvent(input$chip_clear_groups, {
	chip_data$manual_ctrl_samples <- NULL
	chip_data$manual_trt_samples <- NULL
	showNotification("已清除分组设置", type = "message")
	})

	# 解析 Series Matrix 文件
	observeEvent(input$chip_series_matrix, {
	req(input$chip_series_matrix)

	file_path <- input$chip_series_matrix$datapath

	# 显示进度
	showNotification("正在解析 GEO Series Matrix 文件...", type = "message")

	# 解析文件
	result <- parse_geo_series_matrix(file_path)

	if (!result$success) {
	showNotification(result$error, type = "error")
	return(NULL)
	}

	# 保存数据
	chip_data$series_matrix <- result$matrix
	chip_data$metadata <- result$metadata
	chip_data$expr_matrix <- result$matrix

	showNotification(
	sprintf("✅ 成功解析: %d 探针 × %d 样本",
	result$n_probes, result$n_samples),
	type = "message"
	)

	# 尝试自动检测分组
	if (!is.null(result$metadata)) {
	group_info <- detect_chip_groups_auto(
	sample_names = result$sample_names,
	sample_descriptions = result$metadata$sample_descriptions,
	sample_titles = result$metadata$sample_titles
	)
	chip_data$group_info <- group_info
	}
	}, ignoreNULL = TRUE)

	# Series Matrix 文件预览
	output$chip_series_matrix_preview <- renderDT({
	req(chip_data$series_matrix)

	# 提取前5行
	preview_matrix <- head(chip_data$series_matrix, 5)

	# 转换为数据框（保留行名）
	preview_df <- as.data.frame(preview_matrix)

	datatable(
	preview_df,
	options = list(
	dom = 't',
	paging = FALSE,
	scrollX = TRUE,
	columnDefs = list(list(
	className = 'dt-center',
	targets = "_all"
	))
	),
	rownames = TRUE, # 显示行名（探针ID）
	filter = 'none'
	) %>%
	formatStyle(columns = 1:min(5, ncol(preview_df)), fontSize = '85%')
	})

	# SOFT 文件预览UI
	output$chip_soft_preview_ui <- renderUI({
	# 只要有文件上传记录就显示预览
	if (!is.null(input$chip_soft_platform)) {
	cat(sprintf("🔍 渲染SOFT预览UI (文件已上传)\n"))

	if (!is.null(chip_data$soft_platform)) {
	cat(sprintf(" 数据可用: %d rows x %d cols\n",
	nrow(chip_data$soft_platform), ncol(chip_data$soft_platform)))
	} else {
	cat(" ⚠️ 数据尚未加载到chip_data\n")
	}

	tagList(
	h5("📄 SOFT 平台注释文件（前10行）", style = "color: #FF9800;"),
	helpText("这是SOFT注释文件的真实数据。可以看到ID列和多个可能的基因列。"),
	DTOutput("chip_soft_raw_preview")
	)
	} else {
	cat("⚠️ SOFT文件未上传\n")
	div(
	class = "alert alert-info",
	h5("📄 请先上传SOFT平台注释文件"),
	p("上传后将在此处显示文件预览。")
	)
	}
	})

	# 🆕 SOFT文件列名列表展示
	output$chip_soft_columns_list_ui <- renderUI({
	req(chip_data$soft_platform)

	soft_cols <- colnames(chip_data$soft_platform)

	tagList(
	h6("📌 所有列名（共", span(style = "color: #FF9500;", length(soft_cols)), "列）:", style = "color: #333;"),

	# 以表格形式展示所有列名
	div(
	style = "background: #fff3e0; padding: 15px; border-radius: 8px; border-left: 4px solid #FF9800;",

	# 每行显示3列
	div(style = "display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px;",
	lapply(seq_along(soft_cols), function(i) {
	col_name <- soft_cols[i]
	div(
	style = "background: white; padding: 8px; border-radius: 4px; border: 1px solid #FFB74D;",
	tags$span(style = "color: #E65100; font-weight: bold; font-family: monospace;",
	sprintf("%d. %s", i, col_name))
	)
	})
	)
	),

	br(),

	# 重要提示
	div(
	style = "background: #e3f2fd; padding: 12px; border-radius: 5px; border-left: 4px solid #2196F3;",
	h6("💡 如何选择基因列？", style = "color: #1976D2; margin-top: 0;"),
	tags$ul(style = "padding-left: 20px; margin: 5px 0;",
	tags$li("查看下方的列内容示例，了解每列包含什么数据"),
	tags$li("基因符号列通常包含：TP53, EGFR, BRCA1, MYC 等基因名称"),
	tags$li("ID列通常包含：数字或探针标识符（如 1553601_at）"),
	tags$li("点击下方的'查看列内容'按钮查看每列的前5行数据")
	)
	)
	)
	})

	# 🆕 显示各列内容示例
	output$chip_soft_column_examples_ui <- renderUI({
	req(chip_data$soft_platform)

	soft_cols <- colnames(chip_data$soft_platform)

	# 为每一列生成示例展示
	tagList(
	h6("📊 各列内容示例（前3行）", style = "color: #333;"),

	div(style = "max-height: 400px; overflow-y: auto;"),
	lapply(soft_cols, function(col_name) {
	# 获取该列的前3个非空值
	col_data <- chip_data$soft_platform[[col_name]]
	col_data <- col_data[!is.na(col_data) & col_data != ""]
	examples <- head(col_data, 3)

	wellPanel(
	style = "padding: 10px; margin-bottom: 10px;",
	h7(style = "color: #FF9800; font-weight: bold; margin-bottom: 5px;",
	sprintf("🔹 %s", col_name)),
	div(
	style = "background: #f5f5f5; padding: 8px; border-radius: 4px; font-family: monospace; font-size: 11px;",
	for (i in seq_along(examples)) {
	tags$div(
	sprintf(" %d. %s", i, as.character(examples[i])),
	style = i < length(examples) ? "margin-bottom: 5px;" : ""
	)
	}
	)
	)
	})
	)
	})

	# 解析 SOFT 平台文件
	observeEvent(input$chip_soft_platform, {
	req(input$chip_soft_platform)

	file_path <- input$chip_soft_platform$datapath

	showNotification("正在解析 SOFT 平台注释文件...", type = "message")

	result <- parse_platform_annotation(file_path, "\t") # 使用Tab作为分隔符

	if (!result$success) {
	showNotification(result$error, type = "error")
	return(NULL)
	}

	chip_data$soft_platform <- result$raw_table
	chip_data$probe_mapping <- NULL # 不使用自动映射
	chip_data$gene_symbol_col <- NULL # 不使用自动检测的列名

	cat(sprintf("💾 SOFT数据已保存: %d rows x %d cols\n",
	nrow(chip_data$soft_platform), ncol(chip_data$soft_platform)))
	cat("⚠️ 请用户手动选择ID列和基因列\n")

	showNotification(
	"✅ SOFT文件已加载，请在下方手动选择ID列和基因列",
	type = "message"
	)
	}, ignoreNULL = TRUE)

	# 重新解析SOFT文件（修改分隔符后）
	observeEvent(input$chip_reparse_soft, {
	req(input$chip_soft_platform)

	file_path <- input$chip_soft_platform$datapath

	# 获取分隔符，默认为Tab
	separator <- if (is.null(input$chip_soft_separator) \|\| input$chip_soft_separator == "") {
	"\t"
	} else {
	input$chip_soft_separator
	}

	showNotification("正在重新解析 SOFT 平台注释文件...", type = "message")

	result <- parse_platform_annotation(file_path, separator)

	if (!result$success) {
	showNotification(result$error, type = "error")
	return(NULL)
	}

	chip_data$soft_platform <- result$raw_table
	chip_data$probe_mapping <- NULL # 不使用自动映射
	chip_data$gene_symbol_col <- NULL # 不使用自动检测的列名

	showNotification(
	"✅ 重新解析完成，请手动选择ID列和基因列",
	type = "message"
	)
	})

	# 数据概览
	output$chip_data_summary <- renderDT({
	req(chip_data$series_matrix)

	matrix <- chip_data$series_matrix

	# 创建摘要表
	summary_df <- data.frame(
	项目 = c("探针数", "样本数", "样本名称"),
	值 = c(
	nrow(matrix),
	ncol(matrix),
	paste(colnames(matrix), collapse = ", ")
	)
	)

	# 添加分组信息
	if (!is.null(chip_data$group_info) && !is.null(chip_data$group_info$pattern_name)) {
	summary_df <- rbind(summary_df, data.frame(
	项目 = c("检测到分组模式", "对照组样本", "处理组样本"),
	值 = c(
	chip_data$group_info$pattern_name,
	paste(chip_data$group_info$ctrl_samples, collapse = ", "),
	paste(chip_data$group_info$trt_samples, collapse = ", ")
	)
	))
	}

	datatable(summary_df,
	options = list(dom = 't', paging = FALSE),
	rownames = FALSE)
	})

	# 注释状态显示
	output$chip_annotation_status <- renderUI({
	req(chip_data$series_matrix)

	# 检查是否加载了SOFT文件
	soft_loaded <- !is.null(chip_data$soft_platform)

	if (soft_loaded) {
	# 已加载SOFT文件但尚未配置映射
	div(
	class = "alert alert-info",
	h5("✅ SOFT文件已加载，等待配置", style = "color: #17a2b8;"),
	p(sprintf("总探针数: %d", nrow(chip_data$series_matrix))),
	p(sprintf("SOFT平台数据: %d 行 x %d 列",
	nrow(chip_data$soft_platform),
	ncol(chip_data$soft_platform))),
	p("💡 请在下方选择ID列和基因列以建立探针映射。", style = "color: #007bff; font-weight: bold;"),
	if (!is.null(chip_data$probe_mapping)) {
	p(sprintf("成功映射: %d (%.1f%%)",
	nrow(chip_data$probe_mapping),
	nrow(chip_data$probe_mapping) / nrow(chip_data$series_matrix) * 100))
	}
	)
	} else {
	# 未加载SOFT文件
	div(
	class = "alert alert-warning",
	h5("⚠️ 未加载探针注释文件", style = "color: #ffc107;"),
	p("将直接使用探针ID作为基因符号进行分析。"),
	p("强烈建议上传 SOFT 平台注释文件以获得准确的结果。")
	)
	}
	})

	# 🆕 SOFT文件列选择面板（使用renderUI而不是conditionalPanel）
	output$chip_soft_column_selection_panel <- renderUI({
	# 检查SOFT文件是否加载
	if (is.null(chip_data$soft_platform)) {
	return(NULL)
	}

	# 只打印一次日志，避免刷屏
	if (is.null(chip_data$panel_initialized)) {
	cat(sprintf("✅ 初始化SOFT列选择面板: %d 行 x %d 列\n",
	nrow(chip_data$soft_platform),
	ncol(chip_data$soft_platform)))
	chip_data$panel_initialized <- TRUE
	}

	# 使用isolate防止input变化触发重新渲染
	soft_cols <- isolate(colnames(chip_data$soft_platform))

	# 获取当前选择值（使用isolate避免建立依赖）
	current_id <- isolate({
	if (!is.null(input$chip_soft_id_col) && input$chip_soft_id_col != "") {
	input$chip_soft_id_col
	} else {
	""
	}
	})

	current_gene <- isolate({
	if (!is.null(input$chip_soft_gene_col) && input$chip_soft_gene_col != "") {
	input$chip_soft_gene_col
	} else {
	""
	}
	})

	wellPanel(
	style = "background: linear-gradient(135deg, #fff7e6 0%, #ffe6b3 100%); border: 2px solid #FF9800;",

	h4("📋 SOFT文件列选择", style = "color: #FF9800; margin-top: 0;"),

	helpText("💡 您可以预先浏览和选择SOFT文件的列，即使还未上传Series Matrix文件。"),

	br(),

	fluidRow(
	column(4,
	h5("选择ID列", style = "color: #9C27B0; font-weight: bold;"),
	selectInput("chip_soft_id_col",
	"选择ID列（必须与Series Matrix的探针ID匹配）",
	choices = c("", soft_cols),
	selected = current_id),
	helpText("选择SOFT文件中包含探针ID的列（通常为'ID'列）")
	),
	column(4,
	h5("选择基因列", style = "color: #9C27B0; font-weight: bold;"),
	selectInput("chip_soft_gene_col",
	"选择基因列（包含基因符号的列）",
	choices = c("", soft_cols),
	selected = current_gene),
	helpText("选择包含基因符号的列（如GENE_SYMBOL, SYMBOL）")
	),
	column(4,
	h5("选择EntrezID列（可选）", style = "color: #FF5722; font-weight: bold;"),
	selectInput("chip_soft_entrez_col",
	"选择Entrez Gene ID列",
	choices = c("", "自动检测", soft_cols),
	selected = ""),
	helpText("选择包含Entrez Gene ID的列（如GENE, ENTREZID）", style = "font-size: 11px; color: #FF5722;")
	)
	),

	# 状态提示单独的uiOutput，避免循环
	uiOutput("chip_selection_status")
	)
	})

	# 🆕 状态提示单独渲染（避免导致父级重新渲染）
	output$chip_selection_status <- renderUI({
	# 使用isolate避免触发父级renderUI
	id_col <- isolate(input$chip_soft_id_col)
	gene_col <- isolate(input$chip_soft_gene_col)
	entrez_col <- isolate(input$chip_soft_entrez_col)

	# 🔍 自动检测可能的EntrezID列
	possible_entrez_hint <- ""
	if (!is.null(chip_data$soft_platform)) {
	soft_cols <- colnames(chip_data$soft_platform)
	# 检查是否有常见的EntrezID列名
	entrez_candidates <- c("ENTREZ_GENE_ID", "ENTREZID", "EntrezID", "GeneID",
	"GENE_ID", "ENTREZ_GENE", "GENE")
	found_cols <- intersect(entrez_candidates, soft_cols)

	if (length(found_cols) > 0) {
	possible_entrez_hint <- p(sprintf("💡 检测到可能的EntrezID列: %s",
	paste(found_cols, collapse = ", ")),
	collapse = " ")
	}
	}

	if (is.null(id_col) \|\| id_col == "" \|\| is.null(gene_col) \|\| gene_col == "") {
	div(
	style = "background: #fff3cd; padding: 10px; border-radius: 5px; margin-top: 15px;",
	h6("⚠️ 未完成选择", style = "color: #856404;"),
	p("请选择ID列和基因列，然后上传Series Matrix文件并点击应用按钮。", style = "font-size: 12px;"),
	if (possible_entrez_hint != "") {
	p(possible_entrez_hint, style = "font-size: 11px; color: #FF5722; font-weight: bold;")
	}
	)
	} else {
	status_color <- "d4edda"
	status_text <- "✅ 已选择列"
	status_text_color <- "155724"

	# 检查是否选择了EntrezID列
	if (is.null(entrez_col) \|\| entrez_col == "") {
	if (possible_entrez_hint != "") {
	status_color <- "fff3cd"
	status_text <- "⚠️ 建议选择EntrezID列"
	status_text_color <- "856404"
	}
	}

	div(
	style = sprintf("background: %s; padding: 10px; border-radius: 5px; margin-top: 15px;", status_color),
	h6(status_text, style = sprintf("color: %s;", status_text_color)),
	p(sprintf("ID列: %s \| 基因列: %s", id_col, gene_col),
	style = "font-size: 12px; font-weight: bold;"),
	if (!is.null(entrez_col) && entrez_col != "" && entrez_col != "自动检测") {
	p(sprintf("EntrezID列: %s", entrez_col),
	style = "font-size: 12px; color: #FF5722; font-weight: bold;")
	},
	p("💡 请上传Series Matrix文件，然后点击'✅ 应用配置并生成最终矩阵'按钮。",
	style = "font-size: 12px;"),
	if (possible_entrez_hint != "" && (is.null(entrez_col) \|\| entrez_col == "")) {
	p(possible_entrez_hint,
	style = "font-size: 11px; color: #FF5722; font-weight: bold; margin-top: 5px;"
	)
	}
	)
	}
	})

	# 🆕 监听列选择（只在日志中记录，不触发UI更新）
	observe({
	# 只有两个列都选择了才处理
	req(input$chip_soft_id_col)
	req(input$chip_soft_gene_col)
	req(input$chip_soft_id_col != "")
	req(input$chip_soft_gene_col != "")

	# 保存选择到chip_data
	chip_data$selected_id_col <- input$chip_soft_id_col
	chip_data$selected_gene_col <- input$chip_soft_gene_col

	# 使用isolate避免触发UI重新渲染
	isolate({
	cat(sprintf("📋 用户已选择: ID列=%s, 基因列=%s\n",
	input$chip_soft_id_col,
	input$chip_soft_gene_col))
	})
	})

	# 🆕 合并工作流面板renderUI（替代conditionalPanel）
	output$chip_merge_workflow_panel <- renderUI({
	# 检查两个文件是否都已上传
	has_series <- !is.null(chip_data$series_matrix)
	has_soft <- !is.null(chip_data$soft_platform)

	if (!has_series \|\| !has_soft) {
	return(NULL)
	}

	# 检查是否已选择列
	has_id_col <- !is.null(input$chip_soft_id_col) && input$chip_soft_id_col != ""
	has_gene_col <- !is.null(input$chip_soft_gene_col) && input$chip_soft_gene_col != ""

	wellPanel(
	style = "background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); border: 2px solid #667eea;",

	h4("🔗 探针注释与数据合并工作流", style = "color: #667eea; margin-top: 0;"),

	# 工作流说明
	div(
	style = "background: rgba(255,255,255,0.8); padding: 15px; border-radius: 8px; margin-bottom: 20px;",
	h5("📋 重要提示", style = "color: #dc3545;"),
	helpText("您已在上方黄色面板选择了列，现在可以应用配置生成最终矩阵！"),
	if (has_id_col && has_gene_col) {
	tags$div(style = "background: #d4edda; padding: 10px; border-radius: 5px; border-left: 4px solid #28a745;",
	tags$strong("✅ 当前配置："),
	tags$ul(style = "margin: 10px 0;",
	tags$li(sprintf("ID列: %s", input$chip_soft_id_col)),
	tags$li(sprintf("基因列: %s", input$chip_soft_gene_col))
	)
	)
	} else {
	tags$div(style = "background: #fff3cd; padding: 10px; border-radius: 5px; border-left: 4px solid #ffc107;",
	tags$strong("⚠️ 请先在上方黄色面板选择列！")
	)
	}
	),

	# 应用按钮
	if (has_id_col && has_gene_col) {
	tagList(
	hr(style = "border-color: #667eea;"),
	h5("步骤5: 应用配置并生成最终矩阵", style = "color: #9C27B0; font-weight: bold;"),
	fluidRow(
	column(12,
	actionButton("chip_apply_merge", "✅ 应用配置并生成最终矩阵",
	class = "btn-success btn-lg btn-block",
	style = "font-size: 16px; padding: 15px;"),
	helpText("点击后将应用所有配置，生成带基因符号的表达矩阵。", style = "text-align: center;")
	)
	)
	)
	}
	)
	})

	# SOFT文件列选择UI
	output$chip_soft_columns_ui <- renderUI({
	req(chip_data$soft_platform)

	soft_cols <- colnames(chip_data$soft_platform)

	tagList(
	h5("🔍 SOFT文件列信息", style = "color: #9C27B0;"),
	p(sprintf("检测到 %d 列，请确认基因符号列：", ncol(chip_data$soft_platform))),
	fluidRow(
	column(8,
	selectInput("chip_gene_symbol_col",
	"选择基因符号列:",
	choices = soft_cols,
	selected = chip_data$gene_symbol_col)
	),
	column(4,
	br(),
	actionButton("chip_update_gene_col", "🔄 更新列选择",
	class = "btn-primary", style = "width: 100%;")
	)
	),
	helpText("💡 提示：系统已自动检测，如不正确可手动选择。",
	class = "text-info")
	)
	})

	# 更新基因符号列
	observeEvent(input$chip_update_gene_col, {
	req(chip_data$soft_platform)
	req(input$chip_gene_symbol_col)

	selected_col <- input$chip_gene_symbol_col

	# 重新提取映射
	probe_col <- chip_data$soft_platform[, 1] # 假设第一列是探针ID
	gene_col <- chip_data$soft_platform[, selected_col]

	# 移除NA和空
	valid_mask <- !is.na(gene_col) & gene_col != ""
	probe_col <- probe_col[valid_mask]
	gene_col <- gene_col[valid_mask]

	# 创建映射
	mapping <- data.frame(
	probe_id = as.character(probe_col),
	gene_symbol = as.character(gene_col),
	stringsAsFactors = FALSE
	)

	chip_data$probe_mapping <- mapping
	chip_data$gene_symbol_col <- selected_col

	showNotification(
	sprintf("✅ 已更新基因列为: %s (%d 个映射)", selected_col, nrow(mapping)),
	type = "message"
	)
	})

	# SOFT文件原始数据预览
	output$chip_soft_raw_preview <- renderDT({
	req(chip_data$soft_platform)

	# 显示前10行，但限制显示的列数
	preview_df <- head(chip_data$soft_platform, 10)

	# 🔧 截断过长的文本以避免渲染问题
	truncate_text <- function(text, max_len = 100) {
	if (is.character(text) \|\| is.factor(text)) {
	text <- as.character(text)
	text <- ifelse(nchar(text) > max_len,
	paste0(substr(text, 1, max_len), "..."),
	text)
	}
	return(text)
	}

	# 对所有列应用截断
	for (col in colnames(preview_df)) {
	preview_df[[col]] <- truncate_text(preview_df[[col]], max_len = 100)
	}

	# 限制显示的列数（最多显示15列，避免表格过宽）
	if (ncol(preview_df) > 15) {
	preview_df <- preview_df[, 1:15]
	cat(sprintf("⚠️ SOFT文件有%d列，仅显示前15列\n", ncol(chip_data$soft_platform)))
	}

	datatable(
	preview_df,
	options = list(
	dom = 't',
	paging = FALSE,
	scrollX = TRUE,
	scrollY = "400px",
	columnDefs = list(list(
	className = 'dt-center',
	targets = "_all"
	))
	),
	rownames = FALSE,
	filter = 'none',
	escape = FALSE # 允许HTML渲染（用于显示省略号）
	) %>%
	formatStyle(columns = 1:ncol(preview_df),
	fontSize = '85%',
	maxWidth = '200px',
	overflow = 'hidden',
	textOverflow = 'ellipsis')
	})

	# 探针-基因映射预览
	output$chip_probe_mapping_preview <- renderDT({
	req(chip_data$probe_mapping)

	# 显示前10行
	preview_df <- head(chip_data$probe_mapping, 10)

	datatable(
	preview_df,
	options = list(dom = 't', paging = FALSE),
	rownames = FALSE,
	colnames = c("探针ID", "基因符号")
	) %>%
	formatStyle(columns = c("探针ID", "基因符号"), fontSize = '90%')
	})

	# ============================================
	# 🆕 探针注释与合并工作流 - 服务端逻辑
	# ============================================

	# 步骤2: 渲染SOFT文件ID列选择器
	output$chip_soft_id_column_ui <- renderUI({
	req(chip_data$soft_platform)

	soft_cols <- colnames(chip_data$soft_platform)

	tagList(
	selectInput("chip_soft_id_col",
	"选择ID列（必须与Series Matrix的探针ID匹配）",
	choices = c("", soft_cols), # 添加空选项
	selected = ""),
	helpText("💡 提示：根据上方SOFT文件预览，选择包含探针ID的列（通常为'ID'列）", style = "color: #ff9800;")
	)
	})

	# 步骤3: 渲染SOFT文件基因列选择器
	output$chip_soft_gene_column_ui <- renderUI({
	req(chip_data$soft_platform)

	soft_cols <- colnames(chip_data$soft_platform)

	tagList(
	selectInput("chip_soft_gene_col",
	"选择基因列（包含基因符号的列）",
	choices = c("", soft_cols), # ✅ 添加空选项
	selected = ""), # ✅ 默认不选中，强制用户手动选择
	helpText("⚠️ 请根据上方SOFT文件预览和右侧数据示例，手动选择包含基因符号的列（如 GENE_SYMBOL）。",
	style = "color: #ff9800; font-weight: bold;")
	)
	})

	# 步骤4: 显示选中基因列的实际示例数据
	output$chip_gene_column_examples_ui <- renderUI({
	req(chip_data$soft_platform)
	req(input$chip_soft_gene_col)

	gene_col <- input$chip_soft_gene_col
	examples <- head(chip_data$soft_platform[[gene_col]], 5)

	tagList(
	h6("📋 实际数据示例（前5行）:", style = "color: #667eea;"),
	div(
	style = "background: #f8f9fa; padding: 10px; border-radius: 5px; font-family: monospace; font-size: 11px;",
	for (i in seq_along(examples)) {
	tags$div(
	sprintf("%d. %s", i, as.character(examples[i]))
	)
	}
	)
	)
	})

	# 步骤4: 正则表达式测试
	output$chip_regex_test_result_ui <- renderUI({
	req(input$chip_test_regex)

	# 获取测试数据
	example_text <- if (!is.null(input$chip_gene_extract_example) && input$chip_gene_extract_example != "") {
	input$chip_gene_extract_example
	} else {
	# 使用实际数据示例
	req(input$chip_soft_gene_col)
	examples <- head(chip_data$soft_platform[[input$chip_soft_gene_col]], 3)
	paste(examples, collapse = "\n")
	}

	regex_pattern <- input$chip_gene_regex %\|\|% "[A-Z][A-Z0-9]+"

	# 测试提取
	tryCatch({
	matches <- gregexpr(regex_pattern, example_text, perl = TRUE)
	extracted <- regmatches(example_text, matches)

	# 提取所有匹配项
	all_matches <- unique(unlist(extracted))
	all_matches <- all_matches[all_matches != ""]

	if (length(all_matches) == 0) {
	div(
	class = "alert alert-warning",
	h6("⚠️ 未找到匹配"),
	p("当前正则表达式无法从示例文本中提取基因符号。"),
	p(sprintf("正则表达式: %s", regex_pattern))
	)
	} else {
	tagList(
	div(
	class = "alert alert-success",
	h6("✅ 提取成功", style = "color: #28a745;"),
	p(sprintf("找到 %d 个匹配:", length(all_matches))),
	div(
	style = "background: #f8f9fa; padding: 10px; border-radius: 5px; margin-top: 10px;",
	tags$ul(style = "margin: 0; padding-left: 20px;",
	tagList(lapply(all_matches, function(m) {
	tags$li(style = "margin: 5px 0; font-family: monospace; color: #667eea;",
	sprintf("<strong>%s</strong>", m))
	}))
	)
	)
	),
	div(
	style = "margin-top: 10px;",
	h6("📊 提取统计:", style = "color: #667eea;"),
	tags$table(
	class = "table table-striped",
	tags$tbody(
	tags$tr(
	tags$td("原始文本长度"),
	tags$td(nchar(example_text))
	),
	tags$tr(
	tags$td("提取数量"),
	tags$td(length(all_matches))
	),
	tags$tr(
	tags$td("平均长度"),
	tags$td(round(mean(nchar(all_matches)), 1))
	)
	)
	)
	)
	)
	}
	}, error = function(e) {
	div(
	class = "alert alert-danger",
	h6("❌ 正则表达式错误"),
	p(e$message)
	)
	})
	})

	# 正则预设按钮逻辑
	observeEvent(input$chip_regex_preset1, {
	updateTextInput(session, "chip_gene_regex", value = "[A-Z]+")
	})

	observeEvent(input$chip_regex_preset2, {
	updateTextInput(session, "chip_gene_regex", value = "[A-Z][A-Z0-9]+")
	})

	observeEvent(input$chip_regex_preset3, {
	updateTextInput(session, "chip_gene_regex", value("\$([^)]+)\$"))
	})

	# 步骤5: 预览合并结果
	observeEvent(input$chip_preview_merge, {
	req(chip_data$series_matrix)
	req(chip_data$soft_platform)
	req(input$chip_soft_id_col)
	req(input$chip_soft_gene_col)

	# 转换Series Matrix行名为列
	series_df <- as.data.frame(chip_data$series_matrix)
	if (input$chip_convert_rownames %\|\|% TRUE) {
	series_df <- data.frame(ProbeID = rownames(series_df), series_df, row.names = NULL)
	}

	# 准备SOFT数据
	soft_df <- chip_data$soft_platform[, c(input$chip_soft_id_col, input$chip_soft_gene_col)]
	colnames(soft_df) <- c("ID", "GeneSymbol")

	# 合并数据 - 简化版预览
	merged_df <- merge(series_df, soft_df, by.x = "ProbeID", by.y = "ID", all.x = TRUE)

	# 重新排序列：ProbeID \| GeneSymbol \| 样本列
	sample_cols <- colnames(merged_df)[!colnames(merged_df) %in% c("ProbeID", "GeneSymbol", "ID")]
	merged_df <- merged_df[, c("ProbeID", "GeneSymbol", sample_cols)]

	# 保存预览结果
	chip_data$merged_preview <- head(merged_df, 5)

	showNotification("✅ 合并预览已生成", type = "message")
	})

	# 显示合并预览表格
	output$chip_merge_preview_table <- renderDT({
	req(chip_data$merged_preview)

	datatable(
	chip_data$merged_preview,
	options = list(
	dom = 't',
	paging = FALSE,
	scrollX = TRUE
	),
	rownames = FALSE,
	filter = 'none'
	) %>%
	formatStyle(columns = c("ProbeID", "Gene"), fontSize = '90%')
	})

	# 步骤5: 应用合并配置
	observeEvent(input$chip_apply_merge, {
	req(chip_data$series_matrix)
	req(chip_data$soft_platform)
	req(input$chip_soft_id_col)
	req(input$chip_soft_gene_col)

	showNotification("🔄 正在应用配置并生成最终矩阵...", type = "message")

	# 转换Series Matrix行名为列
	series_df <- as.data.frame(chip_data$series_matrix)
	if (input$chip_convert_rownames %\|\|% TRUE) {
	series_df <- data.frame(ProbeID = rownames(series_df), series_df, row.names = NULL)
	}

	cat(sprintf("🔍 Series探针ID示例: %s\n", paste(head(series_df$ProbeID, 3), collapse = ", ")))

	# 🔍 智能检测SOFT文件中的探针ID列
	user_id_col <- input$chip_soft_id_col
	user_id_sample <- head(chip_data$soft_platform[[user_id_col]][!is.na(chip_data$soft_platform[[user_id_col]])], 10)

	cat(sprintf("🔍 用户选择的ID列 '%s' 示例: %s\n", user_id_col, paste(head(user_id_sample, 3), collapse = ", ")))

	# 检查用户选择的ID列是否真的包含探针ID
	# 支持多种探针ID格式：
	# 1. Affymetrix: 12345_at, 1234567_x_at, 1234567_at
	# 2. Agilent: A_23_P100001, CN_123456
	# 3. Illumina: ILMN_123456
	is_probe_format <- any(grepl("._._at$\|.*_at$\|at$", user_id_sample)) \|\| # Affymetrix
	any(grepl("^[A-Z]+_\\d+_", user_id_sample)) \|\| # Agilent/Illumina前缀
	any(grepl("^CN_", user_id_sample)) # 其他常见格式

	# 准备SOFT数据
	if (is_probe_format) {
	cat("✅ 用户选择的ID列包含探针格式\n")
	soft_df <- chip_data$soft_platform[, c(user_id_col, input$chip_soft_gene_col)]
	colnames(soft_df) <- c("ID", "Gene_Raw")
	} else {
	cat("⚠️ 用户选择的ID列不包含探针格式，尝试使用SOFT行名\n")

	# 尝试使用SOFT文件的行名作为探针ID
	if (!is.null(rownames(chip_data$soft_platform))) {
	# 检查行名是否匹配探针ID
	rowname_sample <- head(rownames(chip_data$soft_platform), 10)
	cat(sprintf("🔍 SOFT行名示例: %s\n", paste(rowname_sample, collapse = ", ")))

	# 创建临时数据框使用行名
	soft_df <- data.frame(
	ID = rownames(chip_data$soft_platform),
	Gene_Raw = chip_data$soft_platform[[input$chip_soft_gene_col]],
	stringsAsFactors = FALSE
	)
	} else {
	cat("❌ SOFT文件没有行名，无法自动检测\n")
	showNotification("⚠️ 请选择包含探针ID的列（如ID、SPOT_ID等）", type = "warning", duration = 10)
	return()
	}
	}

	# 🆕 智能判断是否需要正则提取
	# 检查基因列的内容，如果已经是纯符号格式，不需要提取
	gene_sample <- head(soft_df$Gene_Raw[!is.na(soft_df$Gene_Raw) & soft_df$Gene_Raw != ""], 10)

	cat(sprintf("🔍 用户选择的基因列 '%s' 示例: %s\n",
	input$chip_soft_gene_col, paste(head(gene_sample, 3), collapse = ", ")))

	# 判断是否是纯数字ID
	is_numeric_id <- all(grepl("^[0-9]+$", gene_sample))

	if (is_numeric_id) {
	cat("⚠️ 用户选择的基因列包含数字ID而非基因符号！\n")
	cat("💡 建议：请选择包含基因符号的列（如GENE_SYMBOL）\n")

	# 尝试自动查找真正的基因符号列
	soft_cols <- colnames(chip_data$soft_platform)
	cat(sprintf("🔍 SOFT文件所有列名: %s\n", paste(soft_cols, collapse = ", ")))

	possible_symbol_cols <- c("GENE_SYMBOL", "SYMBOL", "GENE_NAME", "NAME", "DESCRIPTION")

	for (col in possible_symbol_cols) {
	if (col %in% soft_cols && col != input$chip_soft_gene_col) {
	test_data <- head(chip_data$soft_platform[[col]][!is.na(chip_data$soft_platform[[col]])], 10)
	cat(sprintf("🔍 检查列 '%s': 示例=%s\n", col, paste(head(test_data, 3), collapse = ", ")))

	if (!all(grepl("^[0-9]+$", test_data))) {
	cat(sprintf("✅ 自动检测到基因符号列: %s\n", col))
	cat(sprintf(" 示例: %s\n", paste(head(test_data, 3), collapse = ", ")))

	# 🔧 修复：创建新的soft_df，直接包含正确的列
	soft_df <- chip_data$soft_platform[, c(user_id_col, col)]
	colnames(soft_df) <- c("ID", "Gene_Raw")
	cat("✅ 已重新创建soft_df，使用正确的基因符号列\n")
	cat(sprintf("✅ 验证：soft_df的Gene_Raw列示例: %s\n", paste(head(soft_df$Gene_Raw[!is.na(soft_df$Gene_Raw)], 3), collapse = ", ")))
	break
	}
	}
	}
	}

	# 重新检查Gene_Raw
	gene_sample <- head(soft_df$Gene_Raw[!is.na(soft_df$Gene_Raw) & soft_df$Gene_Raw != ""], 10)

	# 判断是否已经是纯基因符号（不包含额外文本）
	# 典型基因符号：TP53, EGFR, BRCA1, MYC (大写字母+数字，无空格，无逗号)
	is_pure_symbol <- all(grepl("^[A-Z][A-Z0-9]{1,15}$", gene_sample))

	if (is_pure_symbol) {
	cat("✅ 基因列已是纯符号格式，直接使用，无需正则提取\n")
	cat(sprintf(" 示例: %s\n", paste(head(gene_sample, 3), collapse = ", ")))
	# 直接使用原列作为基因符号
	soft_df$GeneSymbol <- soft_df$Gene_Raw
	cat(sprintf("✅ GeneSymbol列已创建，示例: %s\n", paste(head(soft_df$GeneSymbol[!is.na(soft_df$GeneSymbol)], 3), collapse = ", ")))
	} else {
	cat("📋 基因列包含额外文本，应用正则提取\n")
	cat(sprintf(" 原始示例: %s\n", paste(head(gene_sample, 2), collapse = ", ")))

	# 应用正则提取
	regex_pattern <- input$chip_gene_regex %\|\|% "[A-Z][A-Z0-9]+"

	soft_df$GeneSymbol <- sapply(soft_df$Gene_Raw, function(x) {
	matches <- regmatches(x, gregexpr(regex_pattern, as.character(x), perl = TRUE))
	if (length(matches[[1]]) > 0) {
	matches[[1]][1] # 取第一个匹配
	} else {
	NA
	}
	})

	extracted_sample <- head(soft_df$GeneSymbol[!is.na(soft_df$GeneSymbol)], 5)
	cat(sprintf(" 提取示例: %s\n", paste(extracted_sample, collapse = ", ")))
	}

	# 🔧 尝试获取Entrez Gene ID（支持用户指定或自动检测）
	# 🔧 通用函数：清理EntrezID（移除非数字字符）
	clean_entrez_id <- function(entrez_str) {
	if (is.na(entrez_str) \|\| is.null(entrez_str) \|\| entrez_str == "") {
	return(NA)
	}
	# 转换为字符
	entrez_str <- as.character(entrez_str)
	# 移除所有非数字字符（保留数字0-9）
	cleaned <- gsub("[^0-9]", "", entrez_str)
	# 如果清理后为空，返回NA
	if (cleaned == "" \|\| is.na(cleaned)) {
	return(NA)
	}
	return(cleaned)
	}

	# 方法1: 用户手动指定EntrezID列
	if (!is.null(input$chip_soft_entrez_col) && input$chip_soft_entrez_col != "" && input$chip_soft_entrez_col != "自动检测") {
	user_entrez_col <- input$chip_soft_entrez_col
	if (user_entrez_col %in% colnames(chip_data$soft_platform)) {
	raw_entrez_ids <- as.character(chip_data$soft_platform[[user_entrez_col]][match(soft_df$ID, chip_data$soft_platform[[input$chip_soft_id_col]])])

	# 🔧 清理EntrezID（移除非数字字符）
	entrez_gene_ids <- sapply(raw_entrez_ids, clean_entrez_id)

	soft_df$EntrezID <- entrez_gene_ids

	# 统计清理情况
	na_count <- sum(is.na(entrez_gene_ids))
	valid_count <- sum(!is.na(entrez_gene_ids))
	entrez_sample <- head(entrez_gene_ids[!is.na(entrez_gene_ids)], 3)

	cat(sprintf("✅ 用户指定的EntrezID列 '%s' 已添加\n", user_entrez_col))
	cat(sprintf(" 有效ID数: %d, NA数: %d\n", valid_count, na_count))
	cat(sprintf(" 示例: %s\n", paste(entrez_sample, collapse = ", ")))

	if (na_count > 0) {
	cat(sprintf(" ⚠️ %d个ID被清理为NA（包含非数字字符）\n", na_count))
	}
	}
	}
	# 方法2: 自动检测EntrezID列（如果用户选择"自动检测"）
	else if (!is.null(input$chip_soft_entrez_col) && input$chip_soft_entrez_col == "自动检测") {
	# 常见的EntrezID列名（按优先级排序）
	possible_entrez_cols <- c("ENTREZ_GENE_ID", "ENTREZID", "EntrezID", "GeneID",
	"GENE_ID", "ENTREZ_GENE", "GENE", "Entrez Gene ID",
	"Entrez", "ENTREZ")

	found_entrez_col <- NULL
	for (col in possible_entrez_cols) {
	if (col %in% colnames(chip_data$soft_platform)) {
	# 检查该列是否包含数字ID（清理后）
	test_values <- head(chip_data$soft_platform[[col]], 100)
	test_values <- test_values[!is.na(test_values) & test_values != ""]

	# 🔧 清理测试值（移除非数字字符）
	cleaned_test <- sapply(test_values, clean_entrez_id)
	cleaned_test <- cleaned_test[!is.na(cleaned_test)]

	# 检查是否大部分可以清理为纯数字（EntrezID特征）
	if (length(cleaned_test) > 0) {
	is_numeric_id <- sum(grepl("^[0-9]+$", cleaned_test)) / length(cleaned_test) > 0.8

	if (is_numeric_id) {
	found_entrez_col <- col
	cat(sprintf("✅ 自动检测到EntrezID列: '%s'\n", col))
	break
	}
	}
	}
	}

	if (!is.null(found_entrez_col)) {
	raw_entrez_ids <- as.character(chip_data$soft_platform[[found_entrez_col]][match(soft_df$ID, chip_data$soft_platform[[input$chip_soft_id_col]])])

	# 🔧 清理EntrezID（移除非数字字符）
	entrez_gene_ids <- sapply(raw_entrez_ids, clean_entrez_id)

	soft_df$EntrezID <- entrez_gene_ids

	# 统计清理情况
	na_count <- sum(is.na(entrez_gene_ids))
	valid_count <- sum(!is.na(entrez_gene_ids))
	entrez_sample <- head(entrez_gene_ids[!is.na(entrez_gene_ids)], 3)

	cat(sprintf("✅ 自动检测的EntrezID列已添加\n", found_entrez_col))
	cat(sprintf(" 有效ID数: %d, NA数: %d\n", valid_count, na_count))
	cat(sprintf(" 示例: %s\n", paste(entrez_sample, collapse = ", ")))

	if (na_count > 0) {
	cat(sprintf(" ⚠️ %d个ID被清理为NA（包含非数字字符）\n", na_count))
	}
	} else {
	cat("⚠️ 自动检测未找到EntrezID列\n")
	}
	}
	# 方法3: 旧逻辑（仅检查GENE列）- 也添加清理
	else if ("GENE" %in% colnames(chip_data$soft_platform)) {
	raw_entrez_ids <- as.character(chip_data$soft_platform$GENE[match(soft_df$ID, chip_data$soft_platform[[input$chip_soft_id_col]])])

	# 🔧 清理EntrezID
	entrez_gene_ids <- sapply(raw_entrez_ids, clean_entrez_id)

	soft_df$EntrezID <- entrez_gene_ids

	# 统计清理情况
	na_count <- sum(is.na(entrez_gene_ids))
	valid_count <- sum(!is.na(entrez_gene_ids))
	entrez_sample <- head(entrez_gene_ids[!is.na(entrez_gene_ids)], 3)

	cat(sprintf("✅ Entrez Gene ID列（GENE）已添加\n"))
	cat(sprintf(" 有效ID数: %d, NA数: %d\n", valid_count, na_count))
	cat(sprintf(" 示例: %s\n", paste(entrez_sample, collapse = ", ")))

	if (na_count > 0) {
	cat(sprintf(" ⚠️ %d个ID被清理为NA（包含非数字字符）\n", na_count))
	}
	}

	# 🔧 不再创建Gene列！直接使用GeneSymbol用于去重，EntrezID用于差异分析
	cat("✅ 使用GeneSymbol列进行探针去重\n")
	cat("✅ 使用EntrezID列作为差异分析结果的ID\n")

	# 合并数据 - 保留所有列
	merged_df <- merge(series_df, soft_df, by.x = "ProbeID", by.y = "ID", all.x = TRUE)

	# 🔧 智能识别样本列（必须是数值型，排除ID列和基因列）
	# 规则：样本列 = 数值型列 + 不在排除列表中
	exclude_cols <- c("ProbeID", "GeneSymbol", "EntrezID", "Gene_Raw", "ID")

	# 识别数值型列（样本列）
	sample_cols <- character(0)
	for (col in colnames(merged_df)) {
	if (!(col %in% exclude_cols)) {
	# 检查是否为数值型
	if (is.numeric(merged_df[[col]])) {
	sample_cols <- c(sample_cols, col)
	}
	}
	}

	cat(sprintf("🔍 识别到 %d 个样本列: %s\n", length(sample_cols),
	paste(head(sample_cols, 5), collapse = ", ")))

	# 重新排序列：ProbeID \| GeneSymbol \| EntrezID \| 样本列
	if ("EntrezID" %in% colnames(merged_df)) {
	# 重新排列列顺序
	merged_df <- merged_df[, c("ProbeID", "GeneSymbol", "EntrezID", sample_cols)]

	# 🔧 强制确保EntrezID是字符型
	merged_df$EntrezID <- as.character(merged_df$EntrezID)
	cat("✅ EntrezID列已强制转换为字符型\n")
	} else {
	# 如果没有EntrezID，只保留GeneSymbol
	merged_df <- merged_df[, c("ProbeID", "GeneSymbol", sample_cols)]
	cat("⚠️ 未找到EntrezID列\n")
	}

	# 🔍 诊断：显示合并后的列结构
	cat(sprintf("🔍 合并后矩阵结构: %d 行 × %d 列\n", nrow(merged_df), ncol(merged_df)))
	cat(sprintf("🔍 列名: %s\n", paste(colnames(merged_df), collapse = ", ")))
	cat(sprintf("🔍 前3列: %s\n", paste(head(colnames(merged_df), 3), collapse = ", ")))
	cat(sprintf("🔍 后3列: %s\n", paste(tail(colnames(merged_df), 3), collapse = ", ")))

	# 显示各列的类型
	cat("🔍 列类型:\n")
	for (i in 1:min(5, ncol(merged_df))) {
	cat(sprintf(" %s: %s\n", colnames(merged_df)[i], class(merged_df[[i]])))
	}

	# 确认EntrezID列的位置和类型
	if ("EntrezID" %in% colnames(merged_df)) {
	entrez_col_idx <- which(colnames(merged_df) == "EntrezID")
	cat(sprintf("🔍 EntrezID列位置: 第%d列，类型: %s\n", entrez_col_idx, class(merged_df$EntrezID)))
	cat(sprintf("🔍 EntrezID示例: %s\n", paste(head(merged_df$EntrezID[!is.na(merged_df$EntrezID)], 3), collapse = ", ")))
	}

	# 保存最终合并的矩阵
	chip_data$merged_matrix <- merged_df

	# 🆕 检查数据列内容
	gene_symbol_sample <- head(merged_df$GeneSymbol[!is.na(merged_df$GeneSymbol)], 5)
	cat(sprintf("✅ GeneSymbol列包含基因符号（示例: %s）\n",
	paste(gene_symbol_sample, 3), collapse = ", "))

	if ("EntrezID" %in% colnames(merged_df)) {
	entrez_sample <- head(merged_df$EntrezID[!is.na(merged_df$EntrezID)], 5)
	cat(sprintf("✅ EntrezID列包含Entrez Gene ID（示例: %s）\n",
	paste(entrez_sample, collapse = ", ")))
	}

	# 统计信息
	n_total <- nrow(merged_df)
	n_annotated <- sum(!is.na(merged_df$GeneSymbol))
	annotation_rate <- n_annotated / n_total * 100

	# 🔍 诊断信息：检查GeneSymbol列匹配情况
	na_count <- sum(is.na(merged_df$GeneSymbol) \| merged_df$GeneSymbol == "")
	cat(sprintf("🔍 诊断: %d个探针的GeneSymbol为NA或空 (%.1f%%)\n", na_count, na_count/n_total*100))

	# 检查ID匹配情况
	series_probes <- series_df$ProbeID
	soft_ids <- soft_df$ID
	matched_probes <- sum(series_probes %in% soft_ids)
	cat(sprintf("🔍 ID匹配: %d / %d 个Series探针在SOFT文件中找到 (%.1f%%)\n",
	matched_probes, length(series_probes), matched_probes/length(series_probes)*100))

	# 如果匹配率太低，显示警告
	if (matched_probes / length(series_probes) < 0.5) {
	cat("⚠️ 警告：ID匹配率低于50%，请检查ID列选择是否正确！\n")
	cat(sprintf(" Series探针示例: %s\n", paste(head(series_probes, 3), collapse = ", ")))
	cat(sprintf(" SOFT ID示例: %s\n", paste(head(soft_ids, 3), collapse = ", ")))
	}

	cat(sprintf("✅ 合并完成: %d 个探针, %d 个已注释 (%.1f%%)\n",
	n_total, n_annotated, annotation_rate))

	showNotification(
	sprintf("✅ 合并完成！%d / %d 个探针已注释 (%.1f%%)",
	n_annotated, n_total, annotation_rate),
	type = "message",
	duration = 10
	)
	})

	# 显示最终合并矩阵的预览（在应用合并后）
	output$chip_final_matrix_ui <- renderUI({
	req(chip_data$merged_matrix)

	tagList(
	h5("📊 最终表达矩阵（前5行）", style = "color: #28a745;"),
	DTOutput("chip_final_matrix_table")
	)
	})

	output$chip_final_matrix_table <- renderDT({
	req(chip_data$merged_matrix)

	# 显示前5行和前10列（避免表格过宽）
	preview_df <- head(chip_data$merged_matrix, 5)
	if (ncol(preview_df) > 12) {
	preview_df <- preview_df[, 1:12] # ProbeID + Gene + 前10个样本
	}

	# 检查是否有ProbeID和Gene列
	has_probe_id <- "ProbeID" %in% colnames(preview_df)
	has_gene <- "Gene" %in% colnames(preview_df)

	# 创建datatable
	dt <- datatable(
	preview_df,
	options = list(
	dom = 't',
	paging = FALSE,
	scrollX = TRUE,
	columnDefs = list(list(
	className = 'dt-center',
	targets = "_all"
	))
	),
	rownames = FALSE,
	filter = 'none'
	)

	# 只对存在的列应用样式
	if (has_probe_id && has_gene) {
	dt <- dt %>%
	formatStyle(columns = c("ProbeID", "Gene"),
	backgroundColor = '#e8f4f8',
	fontWeight = 'bold')
	}

	dt
	})

	# ============================================
	# 🆕 数据预处理与探针去重 - Server逻辑
	# ============================================

	# 去重前统计显示
	output$chip_before_dedupe_stats <- renderUI({
	req(chip_data$merged_matrix)

	# 统计探针数量
	n_probes <- nrow(chip_data$merged_matrix)

	# 识别数值列（样本数据）
	numeric_cols <- sapply(chip_data$merged_matrix, function(x) is.numeric(x))
	n_samples <- sum(numeric_cols)

	# 统计探针-基因映射情况
	n_with_gene <- sum(!is.na(chip_data$merged_matrix$GeneSymbol))

	# 计算一个基因对应多个探针的情况
	gene_counts <- table(chip_data$merged_matrix$GeneSymbol)
	gene_counts <- gene_counts[names(gene_counts) != ""] # 移除空基因名
	n_multi_probes <- sum(gene_counts > 1)

	div(
	style = "font-size: 12px;",
	tags$ul(style = "padding-left: 15px; margin: 5px 0;",
	tags$li(sprintf("总探针数: %d", n_probes)),
	tags$li(sprintf("样本数: %d", n_samples)),
	tags$li(sprintf("有基因注释的探针: %d (%.1f%%)", n_with_gene, n_with_gene/n_probes*100)),
	tags$li(sprintf("一因多探针的基因数: %d", n_multi_probes))
	)
	)
	})

	# 步骤1: 数据预处理
	observeEvent(input$chip_preprocess_data, {
	req(chip_data$merged_matrix)

	showNotification("🔄 正在进行数据预处理...", type = "message")

	tryCatch({
	# 🔧 修复：智能提取表达数据（只保留数值列）
	merged_df <- chip_data$merged_matrix

	# 识别数值列
	numeric_cols <- sapply(merged_df, function(x) is.numeric(x))

	# 排除ProbeID和Gene列（它们在前两列）
	# 但要确保只使用数值列进行计算
	expr_cols <- which(numeric_cols)

	# 提取表达数据
	expr_matrix <- as.matrix(merged_df[, expr_cols, drop = FALSE])

	# 🔧 修复：使用ProbeID作为行名，而不是Gene（这样才能在去重时正确匹配）
	if ("ProbeID" %in% colnames(merged_df)) {
	rownames(expr_matrix) <- merged_df$ProbeID
	cat("✅ 表达矩阵行名使用ProbeID\n")
	} else if ("Gene" %in% colnames(merged_df)) {
	rownames(expr_matrix) <- merged_df$Gene
	cat("⚠️ 警告：使用Gene作为行名（ProbeID列不存在）\n")
	} else {
	rownames(expr_matrix) <- rownames(merged_df)
	}

	cat(sprintf("✅ 提取表达数据: %d 探针 × %d 样本\n",
	nrow(expr_matrix), ncol(expr_matrix)))

	# 保存原始数据用于对比
	chip_data$expr_before_preprocess <- expr_matrix

	# 1. 自动log2转换判断
	if (input$chip_auto_log2 %\|\|% TRUE) {
	ex <- expr_matrix
	qx <- as.numeric(quantile(ex, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm = TRUE))
	LogC <- (qx[5] > 100) \|\|
	(qx[6] - qx[1] > 50 && qx[2] > 0) \|\|
	(qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2)

	if (LogC) {
	ex[which(ex <= 0)] <- NaN
	expr_matrix <- log2(ex)
	chip_data$log2_performed <- TRUE
	cat("✅ log2转换已完成\n")
	} else {
	chip_data$log2_performed <- FALSE
	cat("ℹ️ 不需要log2转换\n")
	}
	}

	# 2. limma标准化
	if (input$chip_normalize_data %\|\|% TRUE) {
	library(limma)
	expr_matrix <- normalizeBetweenArrays(expr_matrix)
	chip_data$normalize_performed <- TRUE
	cat("✅ limma标准化已完成\n")
	}

	# 保存处理后的数据
	chip_data$expr_preprocessed <- expr_matrix

	# 生成结果报告
	chip_data$preprocess_report <- list(
	log2_performed = chip_data$log2_performed,
	normalize_performed = chip_data$normalize_performed,
	n_probes = nrow(expr_matrix),
	n_samples = ncol(expr_matrix),
	data_range = range(expr_matrix, na.rm = TRUE)
	)

	# 🆕 生成箱线图对比（矫正前后）
	tryCatch({
	library(ggplot2)
	library(reshape2)

	# 矫正前的数据 - 计算每个样本的统计值用于箱线图
	expr_before <- chip_data$expr_before_preprocess
	# 转置矩阵：行为样本，列为探针
	df_before <- as.data.frame(t(expr_before))
	df_before$Sample <- rownames(df_before)
	# 将数据从宽格式转换为长格式（使用reshape2的melt）
	df_before_long <- melt(df_before, id.vars = "Sample", variable.name = "Probe", value.name = "Expression")
	df_before_long$Stage <- "Before"

	# 矫正后的数据
	df_after <- as.data.frame(t(expr_matrix))
	df_after$Sample <- rownames(df_after)
	df_after_long <- melt(df_after, id.vars = "Sample", variable.name = "Probe", value.name = "Expression")
	df_after_long$Stage <- "After"

	# 合并数据
	df_combined <- rbind(df_before_long, df_after_long)

	# 绘制箱线图
	p <- ggplot(df_combined, aes(x = Sample, y = Expression, fill = Stage)) +
	geom_boxplot() +
	theme_bw() +
	theme(
	axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 8),
	legend.position = "top",
	plot.title = element_text(hjust = 0.5)
	) +
	labs(
	title = "数据预处理前后对比",
	x = "样本",
	y = "表达值",
	fill = "阶段"
	) +
	scale_fill_manual(values = c("Before" = "#E69F00", "After" = "#009E73"))

	chip_data$preprocess_boxplot <- p
	cat("✅ 箱线图已生成\n")
	}, error = function(e) {
	cat(sprintf("⚠️ 箱线图生成失败: %s\n", e$message))
	})

	showNotification("✅ 数据预处理完成！", type = "message", duration = 5)

	}, error = function(e) {
	showNotification(sprintf("❌ 预处理失败: %s", e$message), type = "error")
	})
	})

	# 预处理结果显示
	output$chip_preprocess_result_ui <- renderUI({
	req(chip_data$preprocess_report)
	req(chip_data$expr_preprocessed)

	report <- chip_data$preprocess_report
	expr_matrix <- chip_data$expr_preprocessed

	# 计算额外统计信息
	expr_mean <- mean(expr_matrix, na.rm = TRUE)
	expr_median <- median(expr_matrix, na.rm = TRUE)
	expr_sd <- sd(expr_matrix, na.rm = TRUE)

	tagList(
	h5("✅ 预处理完成", style = "color: #28a745; font-weight: bold;"),
	br(),

	# 基本统计信息
	wellPanel(
	style = "background: #f8f9fa; border: 1px solid #dee2e6;",
	h6("📊 基本统计", style = "color: #495057; margin-top: 0;"),
	tags$table(
	class = "table table-sm table-striped",
	style = "margin-bottom: 0;",
	tags$thead(
	tags$tr(
	tags$th("项目", style = "width: 50%;"),
	tags$th("值")
	)
	),
	tags$tbody(
	tags$tr(
	tags$td("log2转换"),
	tags$td({
	if (report$log2_performed)
	tags$span("✅ 是", class = "badge badge-success")
	else
	tags$span("❌ 否", class = "badge badge-secondary")
	})
	),
	tags$tr(
	tags$td("limma标准化 (quantile)"),
	tags$td({
	if (report$normalize_performed)
	tags$span("✅ 是", class = "badge badge-success")
	else
	tags$span("❌ 否", class = "badge badge-secondary")
	})
	),
	tags$tr(
	tags$td(tags$strong("探针数")),
	tags$td(sprintf("%d", report$n_probes))
	),
	tags$tr(
	tags$td(tags$strong("样本数")),
	tags$td(sprintf("%d", report$n_samples))
	),
	tags$tr(
	tags$td("数据范围"),
	tags$td(sprintf("%.3f ~ %.3f", report$data_range[1], report$data_range[2]))
	),
	tags$tr(
	tags$td("平均值"),
	tags$td(sprintf("%.3f", expr_mean))
	),
	tags$tr(
	tags$td("中位数"),
	tags$td(sprintf("%.3f", expr_median))
	),
	tags$tr(
	tags$td("标准差"),
	tags$td(sprintf("%.3f", expr_sd))
	)
	)
	)
	),
	br(),

	# 箱线图对比
	wellPanel(
	style = "background: white; border: 1px solid #dee2e6;",
	h6("📊 箱线图对比（矫正前后）", style = "color: #ff9800; margin-top: 0;"),
	plotOutput("chip_preprocess_boxplot", height = "500px")
	)
	)
	})

	# 🆕 渲染箱线图
	output$chip_preprocess_boxplot <- renderPlot({
	req(chip_data$preprocess_boxplot)
	chip_data$preprocess_boxplot
	})

	# ============================================
	# 🆕 批次矫正 - Server逻辑
	# ============================================

	# 自动检测批次
	observeEvent(input$chip_detect_batch, {
	req(chip_data$expr_preprocessed)
	req(input$chip_batch_pattern)

	pattern <- input$chip_batch_pattern
	sample_names <- colnames(chip_data$expr_preprocessed)

	# 从样本名中提取批次信息
	batches <- sapply(sample_names, function(x) {
	match <- regmatches(x, regexpr(paste0(pattern, "\\w+"), x, ignore.case = TRUE))
	if (length(match) > 0) {
	return(match)
	} else {
	return("Unknown")
	}
	})

	# 保存批次信息
	chip_data$batch_info <- batches
	chip_data$batch_method <- input$chip_batch_method %\|\|% "limma"

	# 统计批次分布
	batch_table <- table(batches)

	cat(sprintf("✅ 检测到 %d 个批次:\n", length(batch_table)))
	for (i in seq_along(batch_table)) {
	cat(sprintf(" - %s: %d 个样本\n", names(batch_table)[i], batch_table[i]))
	}

	showNotification(
	sprintf("✅ 检测到 %d 个批次", length(batch_table)),
	type = "message"
	)
	})

	# 解析手动指定的批次
	observeEvent(input$chip_batch_manual, {
	req(input$chip_batch_manual)

	tryCatch({
	lines <- strsplit(input$chip_batch_manual, "\n")[[1]]
	lines <- lines[lines != "" & !grepl("^\\s*$", lines)]

	batch_mapping <- list()
	for (line in lines) {
	parts <- strsplit(line, "\t")[[1]]
	if (length(parts) >= 2) {
	batch_mapping[[trimws(parts[1])]] <- trimws(parts[2])
	}
	}

	chip_data$batch_manual_mapping <- batch_mapping

	sample_names <- colnames(chip_data$expr_preprocessed)
	batches <- sapply(sample_names, function(x) {
	if (x %in% names(batch_mapping)) {
	return(batch_mapping[[x]])
	} else {
	return("Unknown")
	}
	})

	chip_data$batch_info <- batches

	batch_table <- table(batches)

	cat(sprintf("✅ 手动指定批次: %d 个批次\n", length(batch_table)))

	showNotification(
	sprintf("✅ 手动指定批次: %d 个批次", length(batch_table)),
	type = "message"
	)
	}, error = function(e) {
	showNotification(sprintf("❌ 解析批次信息失败: %s", e$message), type = "error")
	})
	})

	# 显示批次信息
	output$chip_batch_info_ui <- renderUI({
	req(chip_data$batch_info)

	batches <- chip_data$batch_info
	batch_table <- table(batches)

	tagList(
	tags$table(
	class = "table table-striped",
	tags$thead(
	tags$tr(
	tags$th("批次"),
	tags$th("样本数")
	)
	),
	tags$tbody(
	lapply(seq_along(batch_table), function(i) {
	tags$tr(
	tags$td(names(batch_table)[i]),
	tags$td(batch_table[i])
	)
	})
	)
	)
	)
	})

	# 执行批次矫正
	observeEvent(input$chip_apply_batch_correct, {
	req(chip_data$expr_preprocessed)
	req(chip_data$batch_info)

	showNotification("🔧 正在进行批次矫正...", type = "message")

	tryCatch({
	expr_matrix <- chip_data$expr_preprocessed
	batches <- chip_data$batch_info
	method <- input$chip_batch_method %\|\|% "limma"

	cat(sprintf("📊 批次矫正方法: %s\n", method))

	if (method == "limma") {
	# 使用 limma::removeBatchEffect
	library(limma)

	batch_factor <- factor(batches)
	design <- model.matrix(~1, data = data.frame(batch = batch_factor))

	expr_corrected <- removeBatchEffect(expr_matrix, batch = batch_factor)

	cat("✅ limma::removeBatchEffect 批次矫正完成\n")

	} else if (method == "combat") {
	# 使用 sva::ComBat
	library(sva)

	batch_factor <- factor(batches)

	expr_corrected <- ComBat(
	dat = expr_matrix,
	batch = batch_factor,
	mod = NULL,
	par.prior = TRUE,
	prior.plots = FALSE
	)

	cat("✅ sva::ComBat 批次矫正完成\n")
	}

	# 保存矫正后的数据
	chip_data$expr_batch_corrected <- expr_corrected
	chip_data$batch_correct_method <- method

	# 计算矫正前后的差异
	mean_diff <- mean(abs(expr_matrix - expr_corrected), na.rm = TRUE)

	# 生成报告
	chip_data$batch_correct_report <- list(
	method = method,
	n_batches = length(unique(batches)),
	batch_distribution = table(batches),
	mean_change = mean_diff,
	n_probes = nrow(expr_corrected),
	n_samples = ncol(expr_corrected)
	)

	showNotification(
	sprintf("✅ 批次矫正完成！方法: %s", method),
	type = "message",
	duration = 10
	)

	}, error = function(e) {
	showNotification(
	sprintf("❌ 批次矫正失败: %s", e$message),
	type = "error",
	duration = 10
	)
	cat(sprintf("❌ 批次矫正错误: %s\n", e$message))
	})
	})

	# 显示批次矫正结果
	output$chip_batch_correct_result_ui <- renderUI({
	req(chip_data$batch_correct_report)

	report <- chip_data$batch_correct_report

	method_name <- if (report$method == "limma") {
	"limma::removeBatchEffect"
	} else {
	"sva::ComBat"
	}

	tagList(
	h6("✅ 批次矫正完成", style = "color: #E91E63;"),
	tags$table(
	class = "table table-striped",
	tags$thead(
	tags$tr(
	tags$th("项目"),
	tags$th("值")
	)
	),
	tags$tbody(
	tags$tr(
	tags$td("矫正方法"),
	tags$td(method_name)
	),
	tags$tr(
	tags$td("批次数"),
	tags$td(report$n_batches)
	),
	tags$tr(
	tags$td("探针/基因数"),
	tags$td(report$n_probes)
	),
	tags$tr(
	tags$td("样本数"),
	tags$td(report$n_samples)
	),
	tags$tr(
	tags$td("平均变化幅度"),
	tags$td(sprintf("%.4f", report$mean_change))
	)
	)
	),
	br(),
	helpText("💡 提示：批次矫正后的数据已保存，将用于后续的探针去重和差异分析。")
	)
	})

	# 步骤2: 探针去重
	observeEvent(input$chip_dedupe_probes, {
	req(chip_data$expr_preprocessed)

	showNotification("✂️ 正在进行探针去重...", type = "message")

	tryCatch({
	library(dplyr)
	library(tibble)

	# ✅ 优先使用批次矫正后的数据
	if (!is.null(chip_data$expr_batch_corrected)) {
	expr_matrix <- chip_data$expr_batch_corrected
	cat("✅ 使用批次矫正后的数据进行探针去重\n")
	} else {
	expr_matrix <- chip_data$expr_preprocessed
	cat("✅ 使用预处理后的数据进行探针去重\n")
	}

	# 转换为数据框并添加探针ID和基因信息
	expr_df <- as.data.frame(expr_matrix)
	expr_df <- expr_df %>%
	rownames_to_column("ProbeID")

	# 🔧 检查merged_matrix中的可用列
	available_cols <- colnames(chip_data$merged_matrix)
	cat(sprintf("📋 merged_matrix可用列: %s\n", paste(available_cols, collapse = ", ")))

	# 动态选择要合并的列（只选择存在的列）
	merge_cols <- c("ProbeID")
	if ("GeneSymbol" %in% available_cols) {
	merge_cols <- c(merge_cols, "GeneSymbol")
	}
	if ("EntrezID" %in% available_cols) {
	merge_cols <- c(merge_cols, "EntrezID")
	}

	cat(sprintf("📋 将合并列: %s\n", paste(merge_cols, collapse = ", ")))

	# 执行合并
	expr_df <- expr_df %>%
	inner_join(chip_data$merged_matrix[, merge_cols, drop = FALSE], by = "ProbeID")

	# 🔧 清理合并后的EntrezID列（移除非数字字符）
	if ("EntrezID" %in% colnames(expr_df)) {
	clean_entrez_id <- function(entrez_str) {
	if (is.na(entrez_str) \|\| is.null(entrez_str) \|\| entrez_str == "") {
	return(NA)
	}
	entrez_str <- as.character(entrez_str)
	# 移除所有非数字字符（///, //, -, 等）
	cleaned <- gsub("[^0-9]", "", entrez_str)
	if (cleaned == "" \|\| is.na(cleaned)) {
	return(NA)
	}
	return(cleaned)
	}

	# 统计清理前的情况
	na_before <- sum(is.na(expr_df$EntrezID))

	# 应用清理
	expr_df$EntrezID <- sapply(expr_df$EntrezID, clean_entrez_id)

	# 统计清理后的情况
	na_after <- sum(is.na(expr_df$EntrezID))

	cat(sprintf("🔧 EntrezID清理: NA前=%d, NA后=%d, 新增NA=%d\n",
	na_before, na_after, na_after - na_before))

	if (na_after > na_before) {
	cat(sprintf("⚠️ %d个EntrezID包含非数字字符，已清理为NA\n", na_after - na_before))
	}
	}

	# 🔍 诊断：检查合并后的GeneSymbol列
	cat(sprintf("🔍 去重前: %d 行，GeneSymbol NA=%d, 空字符串=%d\n",
	nrow(expr_df),
	sum(is.na(expr_df$GeneSymbol)),
	sum(expr_df$GeneSymbol == "")))

	# 移除没有基因符号的探针
	expr_df <- expr_df[!is.na(expr_df$GeneSymbol) & expr_df$GeneSymbol != "", ]

	cat(sprintf("🔍 移除NA后: %d 行剩余\n", nrow(expr_df)))

	# 如果移除后没有数据，警告并跳过去重
	if (nrow(expr_df) == 0) {
	cat("⚠️ 警告：移除NA基因后没有数据！请检查合并步骤的GeneSymbol列\n")
	showNotification("❌ 去重失败：所有基因都是NA，请检查ID列和基因列是否匹配", type = "error", duration = 10)
	return()
	}

	# 探针去重：保留表达量最高的探针
	# 使用GeneSymbol去重，但保留EntrezID列供差异分析使用
	# 最终expr_deduped包含：行名=GeneSymbol, 列=EntrezID + 样本数据
	if ("EntrezID" %in% colnames(expr_df)) {
	expr_df <- expr_df %>%
	select(-ProbeID) %>% # 去掉探针ID列
	select(GeneSymbol, EntrezID, everything()) %>% # 基因符号和ID放前面
	mutate(rowMean = rowMeans(.[, -(1:2)])) %>% # 计算平均表达量（排除前两列）
	arrange(desc(rowMean)) %>% # 按表达量降序排列
	distinct(GeneSymbol, .keep_all = TRUE) %>% # 按基因符号去重
	select(-rowMean) %>% # 删除辅助列（保留EntrezID列！）
	column_to_rownames("GeneSymbol") # GeneSymbol作为行名，EntrezID保留为列

	cat("✅ 去重使用GeneSymbol，行名=GeneSymbol，EntrezID保留为列供差异分析使用\n")
	} else {
	# 没有EntrezID，使用GeneSymbol
	expr_df <- expr_df %>%
	select(-ProbeID) %>% # 去掉探针ID列
	select(GeneSymbol, everything()) %>% # 基因符号放第一
	mutate(rowMean = rowMeans(.[, -1])) %>% # 计算平均表达量
	arrange(desc(rowMean)) %>% # 按表达量降序排列
	distinct(GeneSymbol, .keep_all = TRUE) %>% # 去重
	select(-rowMean) %>% # 删除辅助列
	column_to_rownames("GeneSymbol") # 基因符号变回行名

	cat("✅ 去重使用GeneSymbol，最终结果使用GeneSymbol作为行名\n")
	}

	# 保存去重后的数据
	chip_data$expr_deduped <- expr_df

	# 统计信息
	n_before <- nrow(chip_data$expr_preprocessed)
	n_after <- nrow(expr_df)
	reduction_rate <- (n_before - n_after) / n_before * 100

	chip_data$dedupe_report <- list(
	n_probes_before = n_before,
	n_genes_after = n_after,
	n_removed = n_before - n_after,
	reduction_rate = reduction_rate,
	n_samples = ncol(expr_df)
	)

	cat(sprintf("✅ 探针去重完成: %d 探针 → %d 基因 (%.1f%% 减少)\n",
	n_before, n_after, reduction_rate))

	showNotification(
	sprintf("✅ 去重完成！%d 探针 → %d 基因", n_before, n_after),
	type = "message",
	duration = 5
	)

	}, error = function(e) {
	showNotification(sprintf("❌ 去重失败: %s", e$message), type = "error")
	})
	})

	# 去重后统计显示
	output$chip_after_dedupe_stats <- renderUI({
	req(chip_data$dedupe_report)

	report <- chip_data$dedupe_report

	div(
	style = "font-size: 12px; color: #28a745;",
	tags$ul(style = "padding-left: 15px; margin: 5px 0;",
	tags$li(sprintf("基因数: %d", report$n_genes_after)),
	tags$li(sprintf("样本数: %d", report$n_samples)),
	tags$li(sprintf("减少探针数: %d (%.1f%%)", report$n_removed, report$reduction_rate))
	)
	)
	})

	# 去重结果显示
	output$chip_dedupe_result_ui <- renderUI({
	req(chip_data$dedupe_report)

	report <- chip_data$dedupe_report

	tagList(
	tags$table(
	class = "table table-striped",
	tags$thead(
	tags$tr(
	tags$th("项目"),
	tags$th("值")
	)
	),
	tags$tbody(
	tags$tr(
	tags$td("去重前探针数"),
	tags$td(report$n_probes_before)
	),
	tags$tr(
	tags$td("去重后基因数"),
	tags$td(report$n_genes_after)
	),
	tags$tr(
	tags$td("减少的探针数"),
	tags$td(report$n_removed)
	),
	tags$tr(
	tags$td("减少比例"),
	tags$td(sprintf("%.1f%%", report$reduction_rate))
	),
	tags$tr(
	tags$td("样本数"),
	tags$td(report$n_samples)
	)
	)
	),

	br(),

	h6("📊 去重后表达矩阵预览（前5行 × 前10列）：", style = "color: #666;"),

	# 显示去重后的矩阵预览
	DTOutput("chip_deduped_matrix_preview")
	)
	})

	# 去重后矩阵预览表格
	output$chip_deduped_matrix_preview <- renderDT({
	req(chip_data$expr_deduped)

	preview_df <- head(chip_data$expr_deduped, 5)
	if (ncol(preview_df) > 10) {
	preview_df <- preview_df[, 1:10]
	}

	datatable(
	as.data.frame(preview_df),
	options = list(
	dom = 't',
	paging = FALSE,
	scrollX = TRUE
	),
	rownames = TRUE,
	filter = 'none'
	) %>%
	formatStyle(columns = 1:ncol(preview_df), fontSize = '85%')
	})

	# 步骤3: 生成标准格式数据
	observeEvent(input$chip_generate_standard_data, {
	req(chip_data$expr_deduped)

	showNotification("🚀 正在生成标准格式数据...", type = "message")

	tryCatch({
	# 保存为标准格式（可直接用于现有分析模块）
	chip_data$standard_expression <- chip_data$expr_deduped

	# 保存样本名称
	chip_data$sample_names <- colnames(chip_data$expr_deduped)

	# 保存基因名称
	chip_data$gene_names <- rownames(chip_data$expr_deduped)

	# 标记数据已准备好
	chip_data$ready_for_analysis <- TRUE

	cat(sprintf("✅ 标准格式数据已生成: %d 基因 × %d 样本\n",
	nrow(chip_data$standard_expression),
	ncol(chip_data$standard_expression)))

	showNotification(
	sprintf("✅ 标准格式数据已生成！%d 基因 × %d 样本",
	nrow(chip_data$standard_expression),
	ncol(chip_data$standard_expression)),
	type = "message",
	duration = 10
	)

	}, error = function(e) {
	showNotification(sprintf("❌ 生成失败: %s", e$message), type = "error")
	})
	})

	# 标准数据摘要显示
	output$chip_standard_data_summary <- renderUI({
	req(chip_data$standard_expression)

	tagList(
	h6("📊 数据摘要", style = "color: #155724;"),
	tags$table(
	class = "table table-striped",
	tags$thead(
	tags$tr(
	tags$th("项目"),
	tags$th("值")
	)
	),
	tags$tbody(
	tags$tr(
	tags$td("基因数"),
	tags$td(nrow(chip_data$standard_expression))
	),
	tags$tr(
	tags$td("样本数"),
	tags$td(ncol(chip_data$standard_expression))
	),
	tags$tr(
	tags$td("样本名称"),
	tags$td(paste(head(chip_data$sample_names, 5), collapse = ", "))
	)
	)
	),
	br(),
	h6("✅ 现在可以进行以下分析：", style = "color: #155724;"),
	tags$ul(style = "padding-left: 20px;",
	tags$li("切换到“差异分析”模块进行limma分析"),
	tags$li("使用样本分组功能设置对照组和处理组"),
	tags$li("进行KEGG和GO富集分析"),
	tags$li("生成火山图和其他可视化")
	)
	)
	})

	# ============================================
	# 原有的分组逻辑继续
	# ============================================

	# 动态生成分组 UI
	output$chip_grouping_ui <- renderUI({
	req(chip_data$series_matrix)

	sample_names <- colnames(chip_data$series_matrix)

	# 如果自动检测到分组
	if (!is.null(chip_data$group_info) && !is.null(chip_data$group_info$pattern_name)) {
	tagList(
	div(
	class = "alert alert-info",
	h5("✅ 自动检测到分组模式"),
	p(sprintf("模式: %s", chip_data$group_info$pattern_name)),
	p(sprintf("对照组: %s",
	paste(chip_data$group_info$ctrl_samples, collapse = ", "))),
	p(sprintf("处理组: %s",
	paste(chip_data$group_info$trt_samples, collapse = ", "))),
	checkboxInput("chip_use_auto_groups",
	"使用自动检测的分组",
	value = TRUE)
	),

	conditionalPanel(
	condition = "!input.chip_use_auto_groups",
	h5("手动选择分组:"),
	helpText("💡 提示：可以点击输入框后，直接粘贴样本名称（用逗号或空格分隔）"),
	fluidRow(
	column(6,
	selectizeInput("chip_ctrl_samples",
	"对照组样本:",
	choices = sample_names,
	multiple = TRUE,
	options = list(create = TRUE))
	),
	column(6,
	selectizeInput("chip_trt_samples",
	"处理组样本:",
	choices = sample_names,
	multiple = TRUE,
	options = list(create = TRUE))
	)
	)
	)
	)
	} else {
	# 手动分组
	tagList(
	div(
	class = "alert alert-warning",
	h5("⚠️ 未能自动检测分组模式"),
	p("请使用上方的快速粘贴功能，或手动指定对照组和处理组样本。")
	),
	helpText("💡 提示：可以点击输入框后，直接粘贴样本名称（用逗号或空格分隔）"),
	fluidRow(
	column(6,
	selectizeInput("chip_ctrl_samples",
	"对照组样本:",
	choices = sample_names,
	multiple = TRUE,
	options = list(create = TRUE))
	),
	column(6,
	selectizeInput("chip_trt_samples",
	"处理组样本:",
	choices = sample_names,
	multiple = TRUE,
	options = list(create = TRUE))
	)
	)
	)
	}
	})

	# 显示当前分组状态
	output$chip_current_groups_ui <- renderUI({
	req(chip_data$series_matrix)

	# 检查是否有手动设置的分组
	has_manual <- !is.null(chip_data$manual_ctrl_samples) &&
	!is.null(chip_data$manual_trt_samples)

	# 检查是否有自动检测的分组
	has_auto <- !is.null(chip_data$group_info) &&
	!is.null(chip_data$group_info$pattern_name)

	if (has_manual) {
	div(
	class = "alert alert-success",
	h5("✅ 当前分组（手动设置）"),
	p(sprintf("对照组 (%d个): %s",
	length(chip_data$manual_ctrl_samples),
	paste(chip_data$manual_ctrl_samples, collapse = ", "))),
	p(sprintf("处理组 (%d个): %s",
	length(chip_data$manual_trt_samples),
	paste(chip_data$manual_trt_samples, collapse = ", ")))
	)
	} else if (has_auto) {
	div(
	class = "alert alert-info",
	h5("🤖 当前分组（自动检测）"),
	p(sprintf("模式: %s", chip_data$group_info$pattern_name)),
	p(sprintf("对照组 (%d个): %s",
	length(chip_data$group_info$ctrl_samples),
	paste(chip_data$group_info$ctrl_samples, collapse = ", "))),
	p(sprintf("处理组 (%d个): %s",
	length(chip_data$group_info$trt_samples),
	paste(chip_data$group_info$trt_samples, collapse = ", ")))
	)
	} else {
	div(
	class = "alert alert-warning",
	h5("⚠️ 尚未设置分组"),
	p("请使用上方的快速粘贴功能设置分组，或使用手动选择。")
	)
	}
	})

	# 手动分组UI（折叠状态）
	output$chip_manual_grouping_ui <- renderUI({
	req(chip_data$series_matrix)

	sample_names <- colnames(chip_data$series_matrix)

	tagList(
	h5("📝 手动选择样本（备选方案）", style = "color: #6E6E73;"),
	helpText("如果快速粘贴不方便，可以在这里手动选择样本："),
	fluidRow(
	column(6,
	selectInput("chip_ctrl_samples_manual",
	"对照组样本:",
	choices = sample_names,
	multiple = TRUE)
	),
	column(6,
	selectInput("chip_trt_samples_manual",
	"处理组样本:",
	choices = sample_names,
	multiple = TRUE)
	)
	),
	actionButton("chip_apply_manual_groups",
	"✅ 应用手动选择的分组",
	class = "btn-success",
	style = "width: 100%; margin-top: 10px;")
	)
	})

	# 应用手动选择的分组
	observeEvent(input$chip_apply_manual_groups, {
	req(input$chip_ctrl_samples_manual)
	req(input$chip_trt_samples_manual)

	if (length(input$chip_ctrl_samples_manual) == 0 \|\|
	length(input$chip_trt_samples_manual) == 0) {
	showNotification("请至少为每组选择一个样本！", type = "warning")
	return(NULL)
	}

	chip_data$manual_ctrl_samples <- input$chip_ctrl_samples_manual
	chip_data$manual_trt_samples <- input$chip_trt_samples_manual

	showNotification(
	sprintf("✅ 已应用手动分组: %d 对照 + %d 处理",
	length(input$chip_ctrl_samples_manual),
	length(input$chip_trt_samples_manual)),
	type = "message"
	)
	})

	# 运行差异分析
	observeEvent(input$run_chip_analysis, {
	req(chip_data$series_matrix)

	# 获取分组信息（优先级：手动粘贴 > 自动检测 > 下拉选择）
	if (!is.null(chip_data$manual_ctrl_samples) && !is.null(chip_data$manual_trt_samples)) {
	# 使用手动粘贴的分组
	ctrl_samples <- chip_data$manual_ctrl_samples
	trt_samples <- chip_data$manual_trt_samples
	cat("✅ 使用手动粘贴的分组\n")
	} else if (!is.null(input$chip_use_auto_groups) && input$chip_use_auto_groups &&
	!is.null(chip_data$group_info) && !is.null(chip_data$group_info$pattern_name)) {
	# 使用自动检测的分组
	ctrl_samples <- chip_data$group_info$ctrl_samples
	trt_samples <- chip_data$group_info$trt_samples
	cat("✅ 使用自动检测的分组\n")
	} else {
	# 使用下拉选择的分组
	ctrl_samples <- input$chip_ctrl_samples
	trt_samples <- input$chip_trt_samples
	cat("✅ 使用下拉选择的分组\n")
	}

	# 验证分组
	if (is.null(ctrl_samples) \|\| length(ctrl_samples) == 0 \|\|
	is.null(trt_samples) \|\| length(trt_samples) == 0) {
	showNotification("请先设置对照组和处理组样本！", type = "error")
	return(NULL)
	}

	# 显示进度
	showNotification("正在运行差异分析...", type = "message")

	# ✅ 优先使用经过完整预处理流程的数据
	if (!is.null(chip_data$standard_expression) && chip_data$ready_for_analysis) {
	# 使用标准格式数据（已探针注释、预处理、去重）
	expr_matrix <- chip_data$standard_expression
	cat("✅ 使用标准格式数据（已探针注释和去重）\n")
	cat(sprintf(" 表达矩阵: %d 基因 × %d 样本\n",
	nrow(expr_matrix), ncol(expr_matrix)))

	} else if (!is.null(chip_data$expr_deduped)) {
	# 使用去重后的数据
	expr_matrix <- chip_data$expr_deduped
	cat("✅ 使用去重后的表达数据\n")

	} else if (!is.null(chip_data$merged_matrix)) {
	# 使用合并后的数据（探针已注释，但未去重）
	# 需要去掉ProbeID和Gene列
	merged_df <- chip_data$merged_matrix
	# 识别数值列（样本数据）
	numeric_cols <- sapply(merged_df, function(x) is.numeric(x))
	expr_matrix <- as.matrix(merged_df[, numeric_cols, drop = FALSE])
	# 使用ProbeID作为行名（因为可能还需要探针级别的信息）
	if ("ProbeID" %in% colnames(merged_df)) {
	rownames(expr_matrix) <- merged_df$ProbeID
	} else {
	rownames(expr_matrix) <- merged_df$Gene
	}
	cat("✅ 使用合并后的数据（探针已注释）\n")

	} else {
	# 最后的备选方案：使用原始Series Matrix
	probe_mapping <- chip_data$probe_mapping

	if (is.null(probe_mapping)) {
	# 如果没有加载注释文件，使用探针ID作为基因符号
	expr_matrix <- chip_data$series_matrix
	cat("⚠️ 未加载注释文件，使用探针ID作为基因符号\n")
	} else {
	# 使用注释映射（旧的自动检测方法）
	expr_matrix <- aggregate_probe_expression(
	chip_data$series_matrix,
	probe_mapping
	)

	if (is.null(expr_matrix)) {
	showNotification("探针注释失败！", type = "error")
	return(NULL)
	}
	}
	}

	# 运行 limma 分析
	limma_res <- run_limma_analysis(
	expr_matrix = expr_matrix,
	ctrl_samples = ctrl_samples,
	trt_samples = trt_samples,
	logfc_threshold = input$chip_logfc_threshold,
	pvalue_threshold = input$chip_pvalue_threshold,
	pval_type = input$chip_pval_type # 🔧 传入用户选择的P值类型
	)

	if (is.null(limma_res)) {
	showNotification("差异分析失败！", type = "error")
	return(NULL)
	}

	# 转换为标准格式
	formatted_results <- format_chip_results_for_pipeline(
	limma_res,
	expr_matrix,
	ctrl_samples,
	trt_samples
	)

	# 保存结果到 reactiveValues
	chip_data$limma_results <- limma_res
	chip_data$formatted_results <- formatted_results

	showNotification(
	sprintf("✅ 分析完成: %d 个显著差异基因",
	limma_res$n_significant),
	type = "message"
	)
	})

	# 显示结果
	output$chip_results_ui <- renderUI({
	req(chip_data$limma_results)

	limma_res <- chip_data$limma_results

	tagList(
	# 统计摘要
	fluidRow(
	column(3,
	wellPanel(
	style = "background: #f8f9fa; border: 2px solid #dee2e6; text-align: center; padding: 20px;",
	h3(limma_res$n_total, style = "color: #495057; margin: 10px 0;"),
	h6("总基因数", style = "color: #6c757d; margin: 0;")
	)
	),
	column(3,
	wellPanel(
	style = "background: #e7f3ff; border: 2px solid #007bff; text-align: center; padding: 20px;",
	h3(limma_res$n_significant, style = "color: #007bff; margin: 10px 0;"),
	icon("star", style = "color: #007bff; font-size: 24px;"),
	h6("显著差异", style = "color: #007bff; margin: 5px 0 0 0;")
	)
	),
	column(3,
	wellPanel(
	style = "background: #d4edda; border: 2px solid #28a745; text-align: center; padding: 20px;",
	h3(limma_res$n_up, style = "color: #28a745; margin: 10px 0;"),
	icon("arrow-up", style = "color: #28a745; font-size: 24px;"),
	h6("上调", style = "color: #28a745; margin: 5px 0 0 0;")
	)
	),
	column(3,
	wellPanel(
	style = "background: #f8d7da; border: 2px solid #dc3545; text-align: center; padding: 20px;",
	h3(limma_res$n_down, style = "color: #dc3545; margin: 10px 0;"),
	icon("arrow-down", style = "color: #dc3545; font-size: 24px;"),
	h6("下调", style = "color: #dc3545; margin: 5px 0 0 0;")
	)
	)
	),

	hr(),

	# 结果表格
	h4("差异分析结果"),
	DTOutput("chip_results_table"),

	br(),

	# 下载按钮
	downloadButton("download_chip_results", "📥 下载结果", class = "btn-success")
	)
	})

	# 结果表格
	output$chip_results_table <- renderDT({
	req(chip_data$limma_results)

	results <- chip_data$limma_results$results

	# 🔧 添加显著性标记 - 根据用户选择的P值类型
	pval_col <- if (input$chip_pval_type == "adj.P.Val") "adj.P.Val" else "P.Value"

	results$Significant <- ifelse(
	results[[pval_col]] < input$chip_pvalue_threshold &
	abs(results$logFC) >= input$chip_logfc_threshold,
	"Yes", "No"
	)

	# 🔧 修复：重新排列列顺序，确保ID和SYMBOL列在前
	# 期望顺序：ID, SYMBOL, logFC, AveExpr, t, P.Value, adj.P.Val, B, Significant
	results_ordered <- results[, c("ID", "SYMBOL", "logFC", "AveExpr", "t",
	"P.Value", "adj.P.Val", "B", "Significant")]

	# 🔍 诊断：检查ID列的内容
	id_sample <- head(results_ordered$ID, 5)
	cat(sprintf("📊 差异分析结果ID列示例: %s\n", paste(id_sample, collapse = ", ")))
	cat(sprintf("📊 ID列类型: %s\n", class(results_ordered$ID)[1]))

	# 检查ID列是否为基因符号（包含字母）还是EntrezID（纯数字）
	is_entrez_id <- all(grepl("^[0-9]+$", results_ordered$ID[!is.na(results_ordered$ID)]))
	if (!is_entrez_id) {
	cat("⚠️ 警告: ID列包含基因符号而非Entrez Gene ID！\n")
	cat("💡 这可能是因为SOFT文件中缺少EntrezID列\n")
	}

	datatable(
	results_ordered,
	options = list(
	pageLength = 25,
	scrollX = TRUE,
	order = list(list(6, 'asc')) # 按P.Value排序（第6列）
	),
	filter = 'top',
	rownames = FALSE
	) %>%
	formatRound(columns = c('logFC', 'AveExpr', 't', 'P.Value', 'adj.P.Val'), digits = 4) %>%
	formatStyle(
	columns = c('ID', 'SYMBOL'),
	backgroundColor = '#e8f4f8',
	fontWeight = 'bold'
	) %>%
	formatStyle(
	'Significant',
	color = styleEqual(c('Yes', 'No'), c('green', 'grey')),
	fontWeight = 'bold'
	)
	})

	# 下载结果
	output$download_chip_results <- downloadHandler(
	filename = function() {
	sprintf("chip_analysis_results_%s.csv",
	Sys.Date())
	},
	content = function(file) {
	req(chip_data$limma_results)

	results <- chip_data$limma_results$results
	write.csv(results, file, row.names = FALSE)
	}
	)
	}

	# =====================================================
	# 模块结束
	# =====================================================

	# =====================================================
	# 7. 辅助函数：解析粘贴的样本列表
	# =====================================================

	#' 解析粘贴的样本列表（每行一个样本名）
	#'
	#' @param pasted_text 粘贴的文本内容
	#' @param expr_matrix 表达矩阵（用于验证样本名）
	#' @return vector 样本名向量
	parse_sample_list <- function(pasted_text, expr_matrix) {
	cat("🔍 开始解析样本列表...\n")

	# 分割成行
	lines <- strsplit(pasted_text, "\n")[[1]]
	lines <- trimws(lines) # 移除首尾空白
	lines <- lines[lines != ""] # 移除空行

	if (length(lines) == 0) {
	cat("⚠️ 粘贴内容为空\n")
	return(NULL)
	}

	cat(sprintf("📊 读取到 %d 行\n", length(lines)))

	# 获取表达矩阵的样本名
	matrix_samples <- colnames(expr_matrix)

	# 匹配样本名
	valid_samples <- intersect(lines, matrix_samples)

	if (length(valid_samples) == 0) {
	cat("⚠️ 未找到匹配的样本\n")
	cat(sprintf(" 粘贴的样本: %s\n", paste(head(lines, 3), collapse = ", ")))
	cat(sprintf(" 可用样本: %s\n", paste(head(matrix_samples, 3), collapse = ", ")))
	return(NULL)
	}

	cat(sprintf("✅ 匹配成功: %d / %d 个样本\n",
	length(valid_samples), length(lines)))

	# 检查是否有未匹配的样本
	unmatched <- setdiff(lines, matrix_samples)
	if (length(unmatched) > 0) {
	cat(sprintf("⚠️ %d 个样本未匹配: %s\n",
	length(unmatched),
	paste(head(unmatched, 3), collapse = ", ")))
	}

	return(valid_samples)
	}

	#' 解析用户粘贴的分组信息（旧版本，保留备用）
	#'
	#' @param pasted_text 粘贴的文本内容
	#' @param group_col_name 分组列名（可选）
	#' @param expr_matrix 表达矩阵（用于验证样本名）
	#' @return list 包含 ctrl_samples 和 trt_samples
	parse_pasted_groups <- function(pasted_text, group_col_name, expr_matrix) {
	cat("🔍 开始解析粘贴的分组信息...\n")

	# 分割成行
	lines <- strsplit(pasted_text, "\n")[[1]]
	lines <- lines[lines != ""] # 移除空行

	if (length(lines) == 0) {
	return(list(success = FALSE, error = "粘贴内容为空"))
	}

	# 方法1: 如果指定了分组列名，按列名解析
	if (!is.null(group_col_name) && group_col_name != "") {
	cat(sprintf("📋 使用分组列名: %s\n", group_col_name))

	# 尝试读取为表格
	tryCatch({
	# 读取文本
	text_conn <- textConnection(lines)
	df <- read.table(text_conn, header = TRUE, sep = "\t",
	stringsAsFactors = FALSE, check.names = FALSE,
	quote = "\"", comment.char = "")
	close(text_conn)

	# 检查是否存在分组列
	if (!group_col_name %in% colnames(df)) {
	return(list(success = FALSE,
	error = sprintf("未找到分组列 '%s'，可用列: %s",
	group_col_name,
	paste(colnames(df), collapse = ", "))))
	}

	# 提取分组信息
	groups <- df[[group_col_name]]

	# 获取唯一分组
	unique_groups <- unique(groups)

	if (length(unique_groups) < 2) {
	return(list(success = FALSE, error = "分组列中只有一个组，需要至少2个组"))
	}

	# 如果超过2个组，取前2个
	if (length(unique_groups) > 2) {
	cat(sprintf("⚠️ 检测到 %d 个组，将使用前两个: %s\n",
	length(unique_groups),
	paste(unique_groups[1:2], collapse = ", ")))
	unique_groups <- unique_groups[1:2]
	}

	# 根据分组列提取样本
	ctrl_name <- unique_groups[1]
	trt_name <- unique_groups[2]

	ctrl_samples <- colnames(df)[groups == ctrl_name]
	trt_samples <- colnames(df)[groups == trt_name]

	# 移除分组列本身
	ctrl_samples <- ctrl_samples[ctrl_samples != group_col_name]
	trt_samples <- trt_samples[trt_samples != group_col_name]

	cat(sprintf("✅ 解析成功: %d 对照 (%s) + %d 处理 (%s)\n",
	length(ctrl_samples), ctrl_name,
	length(trt_samples), trt_name))

	return(list(
	success = TRUE,
	ctrl_samples = ctrl_samples,
	trt_samples = trt_samples
	))

	}, error = function(e) {
	return(list(success = FALSE, error = paste("解析表格失败:", e$message)))
	})
	}

	# 方法2: 没有指定列名，自动检测
	cat("🤖 自动检测分组模式...\n")

	# 尝试检测第一行是否为列名
	first_line <- lines[1]

	tryCatch({
	# 读取为表格
	text_conn <- textConnection(lines)
	df <- read.table(text_conn, header = TRUE, sep = "\t",
	stringsAsFactors = FALSE, check.names = FALSE,
	quote = "\"", comment.char = "")
	close(text_conn)

	# 获取所有样本名（列名，排除第一列ID列）
	all_samples <- colnames(df)[-1] # 排除ID列

	# 获取表达矩阵的样本名
	matrix_samples <- colnames(expr_matrix)

	# 找到交集
	common_samples <- intersect(all_samples, matrix_samples)

	if (length(common_samples) < 2) {
	return(list(success = FALSE,
	error = sprintf("在粘贴内容中只找到 %d 个有效样本，需要至少2个",
	length(common_samples))))
	}

	cat(sprintf("📊 找到 %d 个有效样本\n", length(common_samples)))

	# 尝试从列名中自动分组
	# 使用现有的自动检测函数
	sample_names <- common_samples

	# 如果有多余的列，尝试从第二列开始检测分组模式
	if (ncol(df) > 2) {
	# 检查第二列是否有分组信息
	potential_groups <- df[[2]]

	if (length(unique(potential_groups)) == 2) {
	unique_g <- unique(potential_groups)
	ctrl_samples <- sample_names[potential_groups == unique_g[1]]
	trt_samples <- sample_names[potential_groups == unique_g[2]]

	cat(sprintf("✅ 自动检测分组: %d 对照 + %d 处理\n",
	length(ctrl_samples), length(trt_samples)))

	return(list(
	success = TRUE,
	ctrl_samples = ctrl_samples,
	trt_samples = trt_samples
	))
	}
	}

	# 如果无法自动检测，返回所有样本让用户手动选择
	cat("⚠️ 无法自动检测分组，返回所有样本供手动选择\n")

	# 默认：前一半作为对照，后一半作为处理
	n_samples <- length(sample_names)
	mid_point <- ceiling(n_samples / 2)

	ctrl_samples <- sample_names[1:mid_point]
	trt_samples <- sample_names[(mid_point+1):n_samples]

	cat(sprintf("⚠️ 默认分组: 前 %d 个为对照，后 %d 个为处理\n",
	length(ctrl_samples), length(trt_samples)))

	return(list(
	success = TRUE,
	ctrl_samples = ctrl_samples,
	trt_samples = trt_samples,
	warning = "无法自动检测分组模式，已按位置默认分组，请手动调整"
	))

	}, error = function(e) {
	return(list(success = FALSE, error = paste("解析失败:", e$message)))
	})
	}