Spaces:

Xianfish9
/

DeepKMulti

Sleeping

App Files Files Community

Xianfish9 commited on Nov 30, 2025

Commit

ad61623

verified ·

1 Parent(s): 2020c05

Update app.py

Browse files

Files changed (1) hide show

app.py +135 -85

app.py CHANGED Viewed

@@ -1,207 +1,257 @@
 import numpy as np
 import os
 import re
 # --- 依赖导入 ---
 from model import CAFN
 from Feature_extraction_algorithms.PSTAAP import PSTAAP_feature, load_precomputed_fr_matrix
 from Feature_extraction_algorithms.Physicochemical import PC_feature
-try:
-    FR_MATRIX_PATH = 'Fr_train.mat'
-    if not os.path.exists(FR_MATRIX_PATH):
-        raise FileNotFoundError(f"PSTAAP初始化失败：找不到矩阵文件 {FR_MATRIX_PATH}")
-    load_precomputed_fr_matrix(FR_MATRIX_PATH)
 except Exception as e:
     print(f"PSTAAP 初始化过程中发生严重错误: {e}")
-    model = None
 # --- 3. 特征提取函数 (与之前相同) ---
     data = np.hstack((data, feature))
     return data.astype(np.float32), data2.astype(np.float32)
-# --- 4. 核心预测函数 (重构为处理单个49-mer片段) ---
 def predict_single_49mer(sequence_49mer):
     """
     对单个、长度为49的序列片段进行预测。
-    这是底层的预测引擎。
     """
     if model is None:
-        # 这个错误不应该在UI层面抛出，而是在后台日志中记录
         print("错误：模型核心未加载。")
         return None
     sequence_list = [sequence_49mer]
-    x1_np, x2_np = extract_features_from_seq(sequence_list)
     tensor_x1 = torch.tensor(x1_np).to(device)
     tensor_x2 = torch.tensor(x2_np).to(device)
         outputs = model(tensor_x1, tensor_x2)
     probabilities = torch.sigmoid(outputs).squeeze().cpu().numpy()
-#Lysine-Acetylation（K-Ac）
     labels = ["Lysine-Acetyllysine (K-Ac)", "Lysine-Crotonyllysine (K-Cr)", "Lysine-Methyllysine (K-Me)", "Lysine-Succinyllysine (K-Succ)"]
     result = {label: float(prob) for label, prob in zip(labels, probabilities)}
     return result
-# --- 5. 新增：FASTA格式解析与主处理流程 ---
 def parse_fasta(fasta_string):
-    """从FASTA格式文本中提取序列。"""
-    # 移除FASTA头（以'>'开头的行）
     sequence_lines = [line for line in fasta_string.splitlines() if not line.startswith('>')]
-    # 连接所有行并移除任何空白字符
     return "".join(sequence_lines).replace(" ", "").replace("\n", "").upper()
 def process_fasta_and_predict(fasta_input):
-    """
-    接收FASTA输入，找到所有K位点，进行切片和预测，
-    并返回用于Gradio HighlightedText组件的数据和一个包含预测结果的状态字典。
-    """
     if not fasta_input or not isinstance(fasta_input, str):
         raise gr.Error("Please enter a valid FASTA format sequence.")
     sequence = parse_fasta(fasta_input)
     if len(sequence) < 49:
-        raise gr.Error(f"The sequence is too short! It needs to be at least 49 amino acids. The current length is {len(sequence)}。")
-    # 存储每个可预测K位���（索引）及其预测结果
     predictions_map = {}
-    # 寻找所有 'K' 的索引
     k_indices = [m.start() for m in re.finditer('K', sequence)]
     for k_index in k_indices:
-        # 尝试以K为中心截取片段 (K前24个, K, K后24个)
         start, end = k_index - 24, k_index + 25
-        # 边界检查，如果长度不足49则跳过
         if start >= 0 and end <= len(sequence):
             fragment = sequence[start:end]
             prediction_result = predict_single_49mer(fragment)
             if prediction_result:
-                # 使用K的原始索引作为键
                 predictions_map[k_index] = prediction_result
     if not predictions_map:
-        # 如果没有一个K位点可以被成功预测
-        return [(sequence, None)], {}, "No valid K sites were found in the sequence for prediction (i.e., there were not enough amino acids before and after K)."
-    # --- 构建Gradio HighlightedText的输入格式 ---
     highlight_data = []
     last_pos = 0
-    # 按索引排序，确保我们按顺序处理序列
     sorted_predictable_indices = sorted(predictions_map.keys())
     for k_index in sorted_predictable_indices:
-        # 添加K之前未高亮的部分
         highlight_data.append((sequence[last_pos:k_index], None))
-        # 添加需要高亮的K，并用其索引作为标签
         highlight_data.append(("K", str(k_index)))
         last_pos = k_index + 1
-    # 添加最后一个K之后剩余的部分
     highlight_data.append((sequence[last_pos:], None))
     initial_info = "Processing complete! Click on the highlighted 'K' site in the sequence below to see its prediction."
     return highlight_data, predictions_map, initial_info
-# --- 6. 新增：Gradio事件处理函数 ---
 def show_results_for_site(evt: gr.SelectData, state_data):
     """
-    当用户点击高亮的K时，此函数被触发。
-    它从state_data中查找并返回该位点的预测结果。
     """
     if evt.value:
-        # evt.value 是 ('K', '索引字符串')
-        k_index_str = evt.value[1]
-        k_index = int(k_index_str)
-        # 从状态字典中获取结果
         result_dict = state_data.get(k_index)
         if result_dict:
-            site_info = f"Prediction results for the segment centered at 'K' at position {k_index + 1}:"
-            return result_dict, site_info
-    # 如果没有选择或出现错误
     return None, "Please click on the highlighted 'K' site in the sequence above to view the results."
-# --- 7. 创建并启动 Gradio 界面 (使用 gr.Blocks) ---
 fasta_example = """>sp|P05141|ADT2_HUMAN ADP/ATP translocase 2 OS=Homo sapiens OX=9606 GN=SLC25A5 PE=1 SV=7
 MTDAAVSFAKDFLAGGVAAAISKTAVAPIERVKLLLQVQHASKQITADKQYKGIIDCVVR
 IPKEQGVLSFWRGNLANVIRYFPTQALNFAFKDKYKQIFLGGVDKRTQFWLYFAGNLASG
     gr.Markdown(
         """
         # DeepKMulti Model: Multi-label Classifier for Lysine Modifications
-        **Supports FASTA format input, allowing interactive viewing of the modification possibilities of each lysine site in the protein sequence.**
         """
     )
     with gr.Row():
             fasta_input = gr.Textbox(
                 lines=10,
                 label="Input FASTA format protein sequence",
-                placeholder="Please paste your FASTA formatted sequence here (we provide an example sequence below)..."
             )
             submit_btn = gr.Button("Submit Prediction", variant="primary")
         with gr.Column(scale=3):
             gr.Markdown("### Prediction Results")
             info_text = gr.Textbox(label="State", interactive=False, value="Waiting for input...")
-            # 用于存储所有位点的预测结果，对用户不可见
-            predictions_state = gr.State({})
-            results_output = gr.Label(num_top_classes=4, label="After clicking on the colored 'K' site, the results will be displayed here")
     gr.Markdown("---")
     gr.Markdown("### Visualized Sequence")
-    # 使用 a[class='predictable-k'] 来应用CSS
     highlighted_output = gr.HighlightedText(
         label="Sequence Analysis",
-        color_map={"predictable-k": "red"}, # 旧版Gradio的用法
-        # 在新版Gradio中，CSS通过gr.Blocks的css参数全局定义更可靠
     )
     gr.Examples(
         outputs=[results_output, info_text]
     )
-# 启动应用
-demo.launch(debug=True)

+#test
+import gradio as gr
+import torch
 import numpy as np
 import os
 import re
+import pandas as pd  # --- 新增：引入 pandas 用于处理表格数据 ---
 # --- 依赖导入 ---
+# 请确保 model.py, Feature_extraction_algorithms 文件夹在同一目录下
 from model import CAFN
 from Feature_extraction_algorithms.PSTAAP import PSTAAP_feature, load_precomputed_fr_matrix
 from Feature_extraction_algorithms.Physicochemical import PC_feature
+# --- 1. 模型加载 (与之前相同) ---
+MODEL_PATH = "Adam_lr7e-05_weightdecay0.0001_epochs3480.pth"
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+def load_model(model_path):
+    model = CAFN().to(device)
+    if os.path.exists(model_path):
+        model.load_state_dict(torch.load(model_path, map_location=device))
+        model.eval()
+        print("模型加载成功！")
+        return model
+    else:
+        print(f"错误：在路径 {model_path} 未找到模型文件")
+        return None
+model = load_model(MODEL_PATH)
+# --- 2. PSTAAP 特征提取器初始化 (与之前相同) ---
+try:
+    FR_MATRIX_PATH = 'Fr_train.mat'
+    if not os.path.exists(FR_MATRIX_PATH):
+        # 如果是本地运行且文件确实存在，请忽略此模拟错误；
+        # 这里为了防止代码报错，如果文件不存在可以仅打印警告
+        print(f"警告：找不到矩阵文件 {FR_MATRIX_PATH}，如果是测试环境请忽略。")
+    else:
+        load_precomputed_fr_matrix(FR_MATRIX_PATH)
 except Exception as e:
     print(f"PSTAAP 初始化过程中发生严重错误: {e}")
+    # model = None # 暂时注释掉，以免本地测试时因为缺文件直接无法运行
 # --- 3. 特征提取函数 (与之前相同) ---
+def extract_features_from_seq(sequence_list):
+    data2 = PC_feature(sequence_list)
+    N = len(sequence_list)
+    empty_list_array = [[] for _ in range(N)]
+    data = np.array(empty_list_array, dtype=object)
+    feature = PSTAAP_feature(sequence_list)
     data = np.hstack((data, feature))
     return data.astype(np.float32), data2.astype(np.float32)
+# --- 4. 核心预测函数 (保持不变，依旧返回浮点数) ---
 def predict_single_49mer(sequence_49mer):
     """
     对单个、长度为49的序列片段进行预测。
     """
     if model is None:
         print("错误：模型核心未加载。")
         return None
     sequence_list = [sequence_49mer]
+    # 注意：如果缺少依赖文件，这里可能会报错，请确保环境完整
+    try:
+        x1_np, x2_np = extract_features_from_seq(sequence_list)
+    except Exception as e:
+        print(f"特征提取失败: {e}")
+        return None
     tensor_x1 = torch.tensor(x1_np).to(device)
     tensor_x2 = torch.tensor(x2_np).to(device)
+    with torch.no_grad():
         outputs = model(tensor_x1, tensor_x2)
     probabilities = torch.sigmoid(outputs).squeeze().cpu().numpy()
     labels = ["Lysine-Acetyllysine (K-Ac)", "Lysine-Crotonyllysine (K-Cr)", "Lysine-Methyllysine (K-Me)", "Lysine-Succinyllysine (K-Succ)"]
+    # 这里保持返回原始 float 数据，方便后续处理
     result = {label: float(prob) for label, prob in zip(labels, probabilities)}
     return result
+# --- 5. FASTA格式解析与主处理流程 (与之前相同) ---
 def parse_fasta(fasta_string):
     sequence_lines = [line for line in fasta_string.splitlines() if not line.startswith('>')]
     return "".join(sequence_lines).replace(" ", "").replace("\n", "").upper()
 def process_fasta_and_predict(fasta_input):
     if not fasta_input or not isinstance(fasta_input, str):
         raise gr.Error("Please enter a valid FASTA format sequence.")
     sequence = parse_fasta(fasta_input)
     if len(sequence) < 49:
+        raise gr.Error(f"The sequence is too short! It needs to be at least 49 amino acids. The current length is {len(sequence)}.")
     predictions_map = {}
     k_indices = [m.start() for m in re.finditer('K', sequence)]
     for k_index in k_indices:
         start, end = k_index - 24, k_index + 25
         if start >= 0 and end <= len(sequence):
             fragment = sequence[start:end]
             prediction_result = predict_single_49mer(fragment)
             if prediction_result:
                 predictions_map[k_index] = prediction_result
     if not predictions_map:
+        return [(sequence, None)], {}, "No valid K sites were found in the sequence for prediction."
     highlight_data = []
     last_pos = 0
     sorted_predictable_indices = sorted(predictions_map.keys())
     for k_index in sorted_predictable_indices:
         highlight_data.append((sequence[last_pos:k_index], None))
         highlight_data.append(("K", str(k_index)))
         last_pos = k_index + 1
     highlight_data.append((sequence[last_pos:], None))
     initial_info = "Processing complete! Click on the highlighted 'K' site in the sequence below to see its prediction."
     return highlight_data, predictions_map, initial_info
+# --- 6. 修改重点：Gradio事件处理函数 ---
 def show_results_for_site(evt: gr.SelectData, state_data):
     """
+    当用户点击高亮的K时触发。
+    此处我们将结果格式化为DataFrame，并精确控制百分比格式。
     """
     if evt.value:
+        k_index_str = evt.value[1]
+        try:
+            k_index = int(k_index_str)
+        except ValueError:
+            return None, "Invalid selection."
         result_dict = state_data.get(k_index)
         if result_dict:
+            site_info = f"Prediction results for 'K' at position {k_index + 1}:"
+            # --- 修改开始：构建详细的表格数据 ---
+            table_data = []
+            for label, score in result_dict.items():
+                # 使用 f-string 的 :.2% 语法，将 0.9299 转换为 92.99%
+                percentage_str = f"{score:.2%}"
+                table_data.append([label, percentage_str])
+            # 创建 Pandas DataFrame
+            df_result = pd.DataFrame(table_data, columns=["Modification Type", "Probability"])
+            # --- 修改结束 ---
+            return df_result, site_info
     return None, "Please click on the highlighted 'K' site in the sequence above to view the results."
+# --- 7. 创建并启动 Gradio 界面 ---
 fasta_example = """>sp|P05141|ADT2_HUMAN ADP/ATP translocase 2 OS=Homo sapiens OX=9606 GN=SLC25A5 PE=1 SV=7
 MTDAAVSFAKDFLAGGVAAAISKTAVAPIERVKLLLQVQHASKQITADKQYKGIIDCVVR
 IPKEQGVLSFWRGNLANVIRYFPTQALNFAFKDKYKQIFLGGVDKRTQFWLYFAGNLASG
+GAAGATSLCFVYPLDFARTRLAADVGKAGAEREFRGLGDCLVKIYKSDGIKGLYQGFNVS
+VQGIIIYRAAYFGIYDTAKGMLPDPKNTHIVISWMIAQTVTAVAGLTSYPFDTVRRRMMM
+QSGRKGTDIMYTGTLDCWRKIARDEGGKAFFKGAWSNVLRGMGGAFVLVLYDEIKKYT"""
+with gr.Blocks(css=".predictable-k {color: red; font-weight: bold;}") as demo:
     gr.Markdown(
         """
         # DeepKMulti Model: Multi-label Classifier for Lysine Modifications
+        **Supports FASTA format input, allowing interactive viewing of the modification possibilities of each lysine site.**
         """
     )
     with gr.Row():
+        with gr.Column(scale=2):
             fasta_input = gr.Textbox(
                 lines=10,
                 label="Input FASTA format protein sequence",
+                placeholder="Paste your FASTA sequence here..."
             )
             submit_btn = gr.Button("Submit Prediction", variant="primary")
         with gr.Column(scale=3):
             gr.Markdown("### Prediction Results")
             info_text = gr.Textbox(label="State", interactive=False, value="Waiting for input...")
+            predictions_state = gr.State({})
+            # --- 修改重点：将 gr.Label 替换为 gr.DataFrame ---
+            results_output = gr.DataFrame(
+                headers=["Modification Type", "Probability"],
+                datatype=["str", "str"],
+                label="Detailed Probabilities",
+                interactive=False
+            )
+            # ------------------------------------------------
     gr.Markdown("---")
     gr.Markdown("### Visualized Sequence")
     highlighted_output = gr.HighlightedText(
         label="Sequence Analysis",
+        color_map={"predictable-k": "red"},
     )
     gr.Examples(
+        examples=[[fasta_example]],
+        inputs=fasta_input,
+        label="Example sequence"
+    )
+    # --- 设定事件逻辑 ---
+    submit_btn.click(
+        fn=process_fasta_and_predict,
+        inputs=fasta_input,
+        outputs=[highlighted_output, predictions_state, info_text]
+    )
+    highlighted_output.select(
+        fn=show_results_for_site,
+        inputs=[predictions_state],
         outputs=[results_output, info_text]
     )
+demo.launch()