Yuanclaw / workflow /scripts /06_tf_activity.R

Export YuanSeq to Hugging Face without binary assets

7e6a9d1 about 1 month ago

3.58 kB

	#!/usr/bin/env Rscript
	# =====================================================
	# YuanSeq Snakemake - 转录因子活性分析 (decoupleR)
	# =====================================================

	suppressPackageStartupMessages({
	library(dplyr)
	library(tidyr)
	library(tibble)
	library(decoupleR)
	library(ggplot2)
	})

	args <- commandArgs(trailingOnly = TRUE)
	parse_arg <- function(name, default = NULL) {
	idx <- grep(paste0("^--", name, "="), args)
	if (length(idx) == 0) {
	idx <- match(paste0("--", name), args)
	if (!is.na(idx) && idx < length(args)) return(args[idx + 1])
	return(default)
	}
	sub(paste0("^--", name, "="), "", args[idx][1])
	}

	deg_file <- parse_arg("deg")
	output_csv <- parse_arg("output_csv")
	output_pdf <- parse_arg("output_pdf")
	species <- parse_arg("species", "Mm")
	method <- parse_arg("method", "ulm")

	if (is.null(deg_file) \|\| is.null(output_csv) \|\| is.null(output_pdf)) {
	stop("Usage: Rscript 06_tf_activity.R --deg=DEG.csv --output_csv=OUT.csv --output_pdf=OUT.pdf [--species=Mm] [--method=ulm]")
	}

	deg <- read.csv(deg_file, stringsAsFactors = FALSE)

	# 需要 SYMBOL 和 t_stat
	if (!"t_stat" %in% colnames(deg)) {
	deg$t_stat <- qnorm(1 - deg$pvalue / 2) * sign(deg$log2FoldChange)
	}

	stats_df <- deg %>%
	filter(!is.na(SYMBOL), !is.na(t_stat), is.finite(t_stat), t_stat != 0) %>%
	group_by(SYMBOL) %>%
	filter(abs(log2FoldChange) == max(abs(log2FoldChange))) %>%
	ungroup() %>%
	distinct(SYMBOL, .keep_all = TRUE) %>%
	select(SYMBOL, t_stat)

	if (nrow(stats_df) < 5) {
	stop("有效基因数不足 5 个，无法进行 TF 活性分析")
	}

	organism <- if (species == "Mm") "mouse" else "human"
	collectri_dir <- parse_arg("collectri_dir", ".")
	net_file <- file.path(collectri_dir, paste0("collectri_", organism, ".rds"))

	if (file.exists(net_file)) {
	net <- readRDS(net_file)
	} else {
	message("下载 CollecTRI 网络...")
	net <- decoupleR::get_collectri(organism = organism, split_complexes = FALSE)
	dir.create(collectri_dir, recursive = TRUE, showWarnings = FALSE)
	saveRDS(net, net_file)
	}

	mat_input <- stats_df %>% column_to_rownames("SYMBOL") %>% as.matrix()

	run_tf <- switch(method,
	ulm = decoupleR::run_ulm(mat_input, net, .source = "source", .target = "target", .mor = "mor"),
	mlm = decoupleR::run_mlm(mat_input, net, .source = "source", .target = "target", .mor = "mor"),
	wsum = decoupleR::run_wsum(mat_input, net, .source = "source", .target = "target", .mor = "mor"),
	viper = decoupleR::run_viper(mat_input, net, .source = "source", .target = "target", .mor = "mor"),
	decoupleR::run_ulm(mat_input, net, .source = "source", .target = "target", .mor = "mor")
	)

	# decoupleR 返回 source, target, statistic, score, p_value 等列
	df_out <- run_tf %>%
	select(source, score, p_value) %>%
	distinct(source, .keep_all = TRUE) %>%
	arrange(p_value)

	dir.create(dirname(output_csv), recursive = TRUE, showWarnings = FALSE)
	write.csv(df_out, output_csv, row.names = FALSE)
	message("TF 活性结果已保存: ", output_csv)

	# 条形图
	df_plot <- head(df_out[order(df_out$p_value), ], 20)
	df_plot$source <- factor(df_plot$source, levels = rev(df_plot$source))

	p <- ggplot(df_plot, aes(x = score, y = source, fill = -log10(p_value + 1e-10))) +
	geom_col() +
	scale_fill_gradient(low = "#3498db", high = "#e74c3c") +
	labs(x = "Activity Score", y = "Transcription Factor", fill = "-log10(P)") +
	theme_bw(base_size = 10) +
	theme(axis.text.y = element_text(size = 9))
	ggsave(output_pdf, p, width = 8, height = max(5, nrow(df_plot) * 0.25))
	message("TF 活性图已保存: ", output_pdf)