import rpy2.robjects as ro from rpy2.robjects import pandas2ri from rpy2.robjects.conversion import localconverter from rpy2.robjects.packages import importr pandas2ri.activate() # 导入必要的 R 包 stats = importr("stats") ape = importr("ape") igraph = importr("igraph", robject_translations={".env": "_env_"}) openxlsx = importr("openxlsx") def get_r_matrix(df): with localconverter(ro.default_converter + pandas2ri.converter): r_tm_matrix = ro.conversion.py2rpy(df) return r_tm_matrix export_matrix_to_newick_r = ro.r( """ convert_to_newick <- function(tm_matrix, output_file) { # 导入 ape 包 if (!require(ape, quietly = TRUE)) { install.packages("ape", repos = "https://cran.r-project.org") library(ape) } # 计算距离矩阵 dist_matrix <- dist(tm_matrix) # 层次聚类 hclust_tree <- hclust(dist_matrix, method = "ward.D2") # 转为 phylo 对象 phylo_tree <- as.phylo(hclust_tree) # 导出为 Newick 格式 write.tree(phylo_tree, file = output_file) newick_str <- write.tree(phylo_tree) return(newick_str) } """ ) export_similarity_network_r = ro.r( """ create_similarity_network_r <- function(threshold, tm_matrix, excel_path) { # 导入必要的包 if (!require(igraph, quietly = TRUE)) { install.packages("igraph", repos = "https://cran.r-project.org") library(igraph) } if (!require(openxlsx, quietly = TRUE)) { install.packages("openxlsx", repos = "https://cran.r-project.org") library(openxlsx) } # 根据相似性阈值创建边缘列表,并过滤掉自环 overthresholdedges <- which(tm_matrix >= threshold, arr.ind = TRUE) overthresholdedges <- overthresholdedges[overthresholdedges[, 1] != overthresholdedges[, 2], ] # 创建空的图形对象 graph <- graph.empty() # 添加节点 nodes <- rownames(tm_matrix) graph <- add_vertices(graph, nv = length(nodes), name = nodes) # 添加边 for (i in 1:nrow(overthresholdedges)) { graph <- add_edges(graph, c(overthresholdedges[i, 1], overthresholdedges[i, 2])) } # 转换为无向图 graph <- as.undirected(graph, mode = "collapse") # 计算聚类 clusters <- fastgreedy.community(graph) # 获取每个聚类的大小 cluster_sizes <- sizes(clusters) # 按聚类大小降序排序 sorted_clusters <- clusters[order(cluster_sizes, decreasing = TRUE)] # 获取每个聚类的成员 cluster_members <- membership(clusters) # 找到孤立节点 singleton_nodes <- names(cluster_members[cluster_members %in% which(sizes(clusters) == 1)]) # 创建Cytoscape导出文件 cytoscape_export <- createWorkbook() # 创建边Sheet addWorksheet(cytoscape_export, sheetName = "Edges") writeData(cytoscape_export, sheet = "Edges", x = "Source", startCol = 1, startRow = 1) writeData(cytoscape_export, sheet = "Edges", x = "Target", startCol = 2, startRow = 1) # 获取边列表 edges <- get.edgelist(graph) # 填充边Sheet数据 if (nrow(edges) > 0) { writeData(cytoscape_export, sheet = "Edges", x = V(graph)[edges[, 1]]$name, startCol = 1, startRow = 2) writeData(cytoscape_export, sheet = "Edges", x = V(graph)[edges[, 2]]$name, startCol = 2, startRow = 2) } # 找到当前边Sheet的最后一行 last_edge_row <- nrow(edges) + 1 # 添加孤立节点 if (length(singleton_nodes) > 0) { writeData(cytoscape_export, sheet = "Edges", x = singleton_nodes, startCol = 1, startRow = last_edge_row + 1) } # 保存Excel文件 saveWorkbook(cytoscape_export, excel_path, overwrite = TRUE) # 创建一个空的数据框用于储存节点和聚类信息 export_clusters <- data.frame(protein = character(), cluster_name = character(), stringsAsFactors = FALSE) # 遍历 sorted_clusters cluster_index <- 1 # 初始化簇索引 for (cluster_name in names(sorted_clusters)) { proteins <- sorted_clusters[[cluster_name]] # 将每个 protein 和对应的 cluster_name 添加到数据框 for (protein in proteins) { # 检查 protein 是否在 singleton_nodes 中 if (protein %in% singleton_nodes) { current_cluster_name <- "singleton" # 修改为 "singleton" } else { current_cluster_name <- as.character(cluster_index) # 使用簇索引 } export_clusters <- rbind(export_clusters, data.frame(protein = protein, cluster_name = current_cluster_name)) } cluster_index <- cluster_index + 1 # 索引加1 } # 返回聚类结果 return(list(cluster_data = export_clusters, graph = graph)) } """ )