Yuanclaw / workflow /scripts /06_tf_activity.R
huashu's picture
Export YuanSeq to Hugging Face without binary assets
7e6a9d1
#!/usr/bin/env Rscript
# =====================================================
# YuanSeq Snakemake - 转录因子活性分析 (decoupleR)
# =====================================================
suppressPackageStartupMessages({
library(dplyr)
library(tidyr)
library(tibble)
library(decoupleR)
library(ggplot2)
})
args <- commandArgs(trailingOnly = TRUE)
parse_arg <- function(name, default = NULL) {
idx <- grep(paste0("^--", name, "="), args)
if (length(idx) == 0) {
idx <- match(paste0("--", name), args)
if (!is.na(idx) && idx < length(args)) return(args[idx + 1])
return(default)
}
sub(paste0("^--", name, "="), "", args[idx][1])
}
deg_file <- parse_arg("deg")
output_csv <- parse_arg("output_csv")
output_pdf <- parse_arg("output_pdf")
species <- parse_arg("species", "Mm")
method <- parse_arg("method", "ulm")
if (is.null(deg_file) || is.null(output_csv) || is.null(output_pdf)) {
stop("Usage: Rscript 06_tf_activity.R --deg=DEG.csv --output_csv=OUT.csv --output_pdf=OUT.pdf [--species=Mm] [--method=ulm]")
}
deg <- read.csv(deg_file, stringsAsFactors = FALSE)
# 需要 SYMBOL 和 t_stat
if (!"t_stat" %in% colnames(deg)) {
deg$t_stat <- qnorm(1 - deg$pvalue / 2) * sign(deg$log2FoldChange)
}
stats_df <- deg %>%
filter(!is.na(SYMBOL), !is.na(t_stat), is.finite(t_stat), t_stat != 0) %>%
group_by(SYMBOL) %>%
filter(abs(log2FoldChange) == max(abs(log2FoldChange))) %>%
ungroup() %>%
distinct(SYMBOL, .keep_all = TRUE) %>%
select(SYMBOL, t_stat)
if (nrow(stats_df) < 5) {
stop("有效基因数不足 5 个,无法进行 TF 活性分析")
}
organism <- if (species == "Mm") "mouse" else "human"
collectri_dir <- parse_arg("collectri_dir", ".")
net_file <- file.path(collectri_dir, paste0("collectri_", organism, ".rds"))
if (file.exists(net_file)) {
net <- readRDS(net_file)
} else {
message("下载 CollecTRI 网络...")
net <- decoupleR::get_collectri(organism = organism, split_complexes = FALSE)
dir.create(collectri_dir, recursive = TRUE, showWarnings = FALSE)
saveRDS(net, net_file)
}
mat_input <- stats_df %>% column_to_rownames("SYMBOL") %>% as.matrix()
run_tf <- switch(method,
ulm = decoupleR::run_ulm(mat_input, net, .source = "source", .target = "target", .mor = "mor"),
mlm = decoupleR::run_mlm(mat_input, net, .source = "source", .target = "target", .mor = "mor"),
wsum = decoupleR::run_wsum(mat_input, net, .source = "source", .target = "target", .mor = "mor"),
viper = decoupleR::run_viper(mat_input, net, .source = "source", .target = "target", .mor = "mor"),
decoupleR::run_ulm(mat_input, net, .source = "source", .target = "target", .mor = "mor")
)
# decoupleR 返回 source, target, statistic, score, p_value 等列
df_out <- run_tf %>%
select(source, score, p_value) %>%
distinct(source, .keep_all = TRUE) %>%
arrange(p_value)
dir.create(dirname(output_csv), recursive = TRUE, showWarnings = FALSE)
write.csv(df_out, output_csv, row.names = FALSE)
message("TF 活性结果已保存: ", output_csv)
# 条形图
df_plot <- head(df_out[order(df_out$p_value), ], 20)
df_plot$source <- factor(df_plot$source, levels = rev(df_plot$source))
p <- ggplot(df_plot, aes(x = score, y = source, fill = -log10(p_value + 1e-10))) +
geom_col() +
scale_fill_gradient(low = "#3498db", high = "#e74c3c") +
labs(x = "Activity Score", y = "Transcription Factor", fill = "-log10(P)") +
theme_bw(base_size = 10) +
theme(axis.text.y = element_text(size = 9))
ggsave(output_pdf, p, width = 8, height = max(5, nrow(df_plot) * 0.25))
message("TF 活性图已保存: ", output_pdf)