Spaces:

Xianfish9
/

DeepKMulti

Sleeping

App Files Files Community

Xianfish9 commited on Nov 30, 2025

Commit

6a13dea

verified ·

1 Parent(s): 9c08eb6

Update app.py

Browse files

Files changed (1) hide show

app.py +127 -187

app.py CHANGED Viewed

@@ -1,246 +1,186 @@
 import numpy as np
 import os
 import re
-import pandas as pd
-import torch
-import gradio as gr
 # --- 依赖导入 ---
-# 请确保目录结构正确
-try:
-    from model import CAFN
-    from Feature_extraction_algorithms.PSTAAP import PSTAAP_feature, load_precomputed_fr_matrix
-    from Feature_extraction_algorithms.Physicochemical import PC_feature
-except ImportError as e:
-    print(f"警告：依赖导入失败，请检查文件路径。错误: {e}")
-    # 设置占位符防止直接崩溃
-    CAFN = None
-    PSTAAP_feature = None
-    PC_feature = None
-    load_precomputed_fr_matrix = lambda x: None
-# --- 1. 初始化设置 ---
-device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-FR_MATRIX_PATH = 'Fr_train.mat'
-MODEL_WEIGHTS_PATH = 'DeepKMulti.pth' # 请确保此文件存在
-# 初始化 PSTAAP
 try:
     if not os.path.exists(FR_MATRIX_PATH):
-        print(f"警告：找不到矩阵文件 {FR_MATRIX_PATH}，如果是测试环境请忽略。")
-    else:
-        load_precomputed_fr_matrix(FR_MATRIX_PATH)
 except Exception as e:
-    print(f"PSTAAP 初始化错误: {e}")
-# --- 2. 加载模型 ---
-model = None
-if CAFN is not None:
-    try:
-        # 这里需要根据实际参数实例化模型
-        model = CAFN().to(device)
-        if os.path.exists(MODEL_WEIGHTS_PATH):
-            model.load_state_dict(torch.load(MODEL_WEIGHTS_PATH, map_location=device))
-            model.eval()
-            print("模型加载成功！")
-        else:
-            print(f"警告: 权重文件 {MODEL_WEIGHTS_PATH} 不存在")
-    except Exception as e:
-        print(f"模型加载失败: {e}")
-# --- 3. 特征提取函数 ---
-def extract_features_from_seq(sequence_list):
     """
-    包装特征提取逻辑
     """
-    if PSTAAP_feature is None:
-        raise RuntimeError("特征提取模块未加载")
-    # 模拟特征提取，请根据你实际的 Feature_extraction_algorithms 逻辑调整
-    x1_features = PSTAAP_feature(sequence_list)
-    x2_features = PC_feature(sequence_list)
-    # 转换为 Numpy 数组
-    x1_np = np.array(x1_features, dtype=np.float32)
-    x2_np = np.array(x2_features, dtype=np.float32)
-    return x1_np, x2_np
-# --- 4. 核心预测函数 ---
-def predict_single_49mer(sequence_49mer):
-    if model is None:
-        print("错误：模型未加载")
-        return None
-    try:
-        sequence_list = [sequence_49mer]
-        x1_np, x2_np = extract_features_from_seq(sequence_list)
-        tensor_x1 = torch.tensor(x1_np).to(device)
-        tensor_x2 = torch.tensor(x2_np).to(device)
-        with torch.no_grad():
-            outputs = model(tensor_x1, tensor_x2)
-            probabilities = torch.sigmoid(outputs).squeeze().cpu().numpy()
-        # 处理 batch_size=1 的维度问题
-        if probabilities.ndim == 0:
-            probabilities = np.array([probabilities])
-        labels = ["Lysine-Acetyllysine (K-Ac)", "Lysine-Crotonyllysine (K-Cr)", "Lysine-Methyllysine (K-Me)", "Lysine-Succinyllysine (K-Succ)"]
-        # 保持原始 float，格式化留给前端展示函数
-        result = {label: float(prob) for label, prob in zip(labels, probabilities)}
-        return result
-    except Exception as e:
-        print(f"预测出错: {e}")
-        return None
-# --- 5. FASTA 解析与处理 ---
 def parse_fasta(fasta_string):
     sequence_lines = [line for line in fasta_string.splitlines() if not line.startswith('>')]
     return "".join(sequence_lines).replace(" ", "").replace("\n", "").upper()
 def process_fasta_and_predict(fasta_input):
     if not fasta_input or not isinstance(fasta_input, str):
         raise gr.Error("Please enter a valid FASTA format sequence.")
     sequence = parse_fasta(fasta_input)
     if len(sequence) < 49:
-        raise gr.Error(f"Sequence too short (Length: {len(sequence)}). Minimum 49 AA required.")
     predictions_map = {}
     k_indices = [m.start() for m in re.finditer('K', sequence)]
     for k_index in k_indices:
         start, end = k_index - 24, k_index + 25
         if start >= 0 and end <= len(sequence):
             fragment = sequence[start:end]
-            res = predict_single_49mer(fragment)
-            if res:
-                predictions_map[k_index] = res
     if not predictions_map:
-        return [(sequence, None)], {}, "No valid K sites found."
-    # 构建高亮数据
     highlight_data = []
     last_pos = 0
-    sorted_indices = sorted(predictions_map.keys())
-    for k_index in sorted_indices:
-        if k_index > last_pos:
-            highlight_data.append((sequence[last_pos:k_index], None))
         highlight_data.append(("K", str(k_index)))
         last_pos = k_index + 1
-    if last_pos < len(sequence):
-        highlight_data.append((sequence[last_pos:], None))
-    return highlight_data, predictions_map, "Processing complete! Click on a red 'K' to see details."
-# --- 6. 结果展示函数 (这里控制小数位) ---
 def show_results_for_site(evt: gr.SelectData, state_data):
-    # 处理选中事件
-    selected_val = evt.value
-    k_index_str = None
-    # 兼容不同 Gradio 版本的返回值
-    if isinstance(selected_val, (list, tuple)) and len(selected_val) == 2:
-        if selected_val[0] == "K":
-            k_index_str = selected_val[1]
-    elif isinstance(selected_val, str):
-        # 某些情况可能直接返回 label 字符串，需视具体版本而定
-        # 这里主要依赖上方的高亮组件传回 index 字符串
-        pass
-    if k_index_str and state_data:
-        try:
-            k_index = int(k_index_str)
-            result_dict = state_data.get(k_index)
-            if result_dict:
-                site_info = f"Prediction results for 'K' at position {k_index + 1}:"
-                table_data = []
-                for label, score in result_dict.items():
-                    # -----------------------------------------------------
-                    # 【核心修改】控制小数位数
-                    # 方式1：百分比 (推荐) -> "95.12%"
-                    val_str = f"{score:.0%}"
-                    # 方式2：保留4位小数 -> "0.9512"
-                    # val_str = f"{score:.4f}"
-                    # -----------------------------------------------------
-                    table_data.append([label, val_str])
-                df_result = pd.DataFrame(table_data, columns=["Modification Type", "Probability"])
-                return df_result, site_info
-        except ValueError:
-            pass
-    return None, "Please click on a highlighted 'K' site."
-# --- 7. Gradio 界面 ---
-fasta_example_str = """>sp|P05141|ADT2_HUMAN Example
-MTDAAVSFAKDFLAGGVAAAISKTAVAPIERVKLLLQVQHASKQITADKQYKGIIDCVVR
-IPKEQGVLSFWRGNLANVIRYFPTQALNFAFKDKYKQIFLGGVDKRTQFWLYFAGNLASG
-"""
-css = ".predictable-k { color: white; background-color: #d32f2f; font-weight: bold; }"
-with gr.Blocks(css=css, title="DeepKMulti") as demo:
-    gr.Markdown("# DeepKMulti Prediction Tool")
     with gr.Row():
-        with gr.Column(scale=2):
             fasta_input = gr.Textbox(
-                lines=8,
-                label="Input FASTA",
-                value=fasta_example_str,
-                placeholder="Paste sequence here..."
             )
-            submit_btn = gr.Button("Submit", variant="primary")
         with gr.Column(scale=3):
-            gr.Markdown("### Results")
-            info_text = gr.Textbox(label="Status", value="Waiting...", interactive=False)
-            # 隐藏的状态组件，用于存储数据
-            predictions_state = gr.State({})
-            # 使用 DataFrame 展示表格
-            results_output = gr.DataFrame(
-                headers=["Modification Type", "Probability"],
-                datatype=["str", "str"], # 设置为 str 以保持百分比格式不被自动转回 float
-                label="Site Probabilities",
-                interactive=False
-            )
-    gr.Markdown("### Sequence Map")
     highlighted_output = gr.HighlightedText(
-        label="Click 'K' to view",
-        combine_adjacent=False,
-        show_legend=False
-    )
-    # 事件绑定
-    submit_btn.click(
-        process_fasta_and_predict,
-        inputs=[fasta_input],
-        outputs=[highlighted_output, predictions_state, info_text]
     )
-    highlighted_output.select(
-        show_results_for_site,
-        inputs=[predictions_state],
         outputs=[results_output, info_text]
     )
-if __name__ == "__main__":
-    demo.launch()

+以下是我编写的app.py代码：
 import numpy as np
 import os
 import re
 # --- 依赖导入 ---
+from model import CAFN
+from Feature_extraction_algorithms.PSTAAP import PSTAAP_feature, load_precomputed_fr_matrix
+from Feature_extraction_algorithms.Physicochemical import PC_feature
 try:
+    FR_MATRIX_PATH = 'Fr_train.mat'
     if not os.path.exists(FR_MATRIX_PATH):
+        raise FileNotFoundError(f"PSTAAP初始化失败：找不到矩阵文件 {FR_MATRIX_PATH}")
+    load_precomputed_fr_matrix(FR_MATRIX_PATH)
 except Exception as e:
+    print(f"PSTAAP 初始化过程中发生严重错误: {e}")
+    model = None
+# --- 3. 特征提取函数 (与之前相同) ---
+    data = np.hstack((data, feature))
+    return data.astype(np.float32), data2.astype(np.float32)
+# --- 4. 核心预测函数 (重构为处理单个49-mer片段) ---
+def predict_single_49mer(sequence_49mer):
     """
+    对单个、长度为49的序列片段进行预测。
+    这是底层的预测引擎。
     """
+    if model is None:
+        # 这个错误不应该在UI层面抛出，而是在后台日志中记录
+        print("错误：模型核心未加载。")
+        return None
+    sequence_list = [sequence_49mer]
+    x1_np, x2_np = extract_features_from_seq(sequence_list)
+    tensor_x1 = torch.tensor(x1_np).to(device)
+    tensor_x2 = torch.tensor(x2_np).to(device)
+    outputs = model(tensor_x1, tensor_x2)
+    probabilities = torch.sigmoid(outputs).squeeze().cpu().numpy()
+#Lysine-Acetylation（K-Ac）
+    labels = ["Lysine-Acetyllysine (K-Ac)", "Lysine-Crotonyllysine (K-Cr)", "Lysine-Methyllysine (K-Me)", "Lysine-Succinyllysine (K-Succ)"]
+    result = {label: float(prob) for label, prob in zip(labels, probabilities)}
+    return result
+# --- 5. 新增：FASTA格式解析与主处理流程 ---
 def parse_fasta(fasta_string):
+    """从FASTA格式文本中提取序列。"""
+    # 移除FASTA头（以'>'开头的行）
     sequence_lines = [line for line in fasta_string.splitlines() if not line.startswith('>')]
+    # 连接所有行并移除任何空白字符
     return "".join(sequence_lines).replace(" ", "").replace("\n", "").upper()
 def process_fasta_and_predict(fasta_input):
+    """
+    接收FASTA输入，找到所有K位点，进行切片和预测，
+    并返回用于Gradio HighlightedText组件的数据和一个包含预测结果的状态字典。
+    """
     if not fasta_input or not isinstance(fasta_input, str):
         raise gr.Error("Please enter a valid FASTA format sequence.")
     sequence = parse_fasta(fasta_input)
     if len(sequence) < 49:
+        raise gr.Error(f"The sequence is too short! It needs to be at least 49 amino acids. The current length is {len(sequence)}。")
+    # 存储每个可预测K位点（索引）及其预测结果
     predictions_map = {}
+    # 寻找所有 'K' 的索引
     k_indices = [m.start() for m in re.finditer('K', sequence)]
     for k_index in k_indices:
+        # 尝试以K为中心截取片段 (K前24个, K, K后24个)
         start, end = k_index - 24, k_index + 25
+        # 边界检查，如果长度不足49则跳过
         if start >= 0 and end <= len(sequence):
             fragment = sequence[start:end]
+            prediction_result = predict_single_49mer(fragment)
+            if prediction_result:
+                # 使用K的原始索引作为键
+                predictions_map[k_index] = prediction_result
     if not predictions_map:
+        # 如果没有一个K位点可以被成功预测
+        return [(sequence, None)], {}, "No valid K sites were found in the sequence for prediction (i.e., there were not enough amino acids before and after K)."
+    # --- 构建Gradio HighlightedText的输入格式 ---
     highlight_data = []
     last_pos = 0
+    # 按索引排序，确保我们按顺序处理序列
+    sorted_predictable_indices = sorted(predictions_map.keys())
+    for k_index in sorted_predictable_indices:
+        # 添加K之前未高亮的部分
+        highlight_data.append((sequence[last_pos:k_index], None))
+        # 添加需要高亮的K，并用其索引作为标签
         highlight_data.append(("K", str(k_index)))
         last_pos = k_index + 1
+    # 添加最后一个K之后剩余的部分
+    highlight_data.append((sequence[last_pos:], None))
+    initial_info = "Processing complete! Click on the highlighted 'K' site in the sequence below to see its prediction."
+    return highlight_data, predictions_map, initial_info
+# --- 6. 新增：Gradio事件处理函数 ---
 def show_results_for_site(evt: gr.SelectData, state_data):
+    """
+    当用户点击高亮的K时，此函数被触发。
+    它从state_data中查找并返回该位点的预测结果。
+    """
+    if evt.value:
+        # evt.value 是 ('K', '索引字符串')
+        k_index_str = evt.value[1]
+        k_index = int(k_index_str)
+        # 从状态字典中获取结果
+        result_dict = state_data.get(k_index)
+        if result_dict:
+            site_info = f"Prediction results for the segment centered at 'K' at position {k_index + 1}:"
+            return result_dict, site_info
+    # 如果没有选择或出现错误
+    return None, "Please click on the highlighted 'K' site in the sequence above to view the results."
+# --- 7. 创建并启动 Gradio 界面 (使用 gr.Blocks) ---
+fasta_example = """>sp|P05141|ADT2_HUMAN ADP/ATP translocase 2 OS=Homo sapiens OX=9606 GN=SLC25A5 PE=1 SV=7
+MTDAAVSFAKDFLAGGVAAAISKTAVAPIERVKLLLQVQHASKQITADKQYKGIIDCVVR
+IPKEQGVLSFWRGNLANVIRYFPTQALNFAFKDKYKQIFLGGVDKRTQFWLYFAGNLASG
+    gr.Markdown(
+        """
+        # DeepKMulti Model: Multi-label Classifier for Lysine Modifications
+        **Supports FASTA format input, allowing interactive viewing of the modification possibilities of each lysine site in the protein sequence.**
+        """
+    )
     with gr.Row():
             fasta_input = gr.Textbox(
+                lines=10,
+                label="Input FASTA format protein sequence",
+                placeholder="Please paste your FASTA formatted sequence here (we provide an example sequence below)..."
             )
+            submit_btn = gr.Button("Submit Prediction", variant="primary")
         with gr.Column(scale=3):
+            gr.Markdown("### Prediction Results")
+            info_text = gr.Textbox(label="State", interactive=False, value="Waiting for input...")
+            # 用于存储所有位点的预测结果，对用户不可见
+            predictions_state = gr.State({})
+            results_output = gr.Label(num_top_classes=4, label="After clicking on the colored 'K' site, the results will be displayed here")
+    gr.Markdown("---")
+    gr.Markdown("### Visualized Sequence")
+    # 使用 a[class='predictable-k'] 来应用CSS
     highlighted_output = gr.HighlightedText(
+        label="Sequence Analysis",
+        color_map={"predictable-k": "red"}, # 旧版Gradio的用法
+        # 在新版Gradio中，CSS通过gr.Blocks的css参数全局定义更可靠
     )
+    gr.Examples(
         outputs=[results_output, info_text]
     )
+# 启动应用
+demo.launch(debug=True)