Spaces:

Xianfish9
/

DeepKMulti

Sleeping

App Files Files Community

Xianfish9 commited on Oct 19

Commit

ec1b79a

verified ·

1 Parent(s): 1eeed0f

Create app.py

Browse files

Files changed (1) hide show

app.py +114 -0

app.py ADDED Viewed

	@@ -0,0 +1,114 @@

+#Adam_lr7e-05_weightdecay0.0001_epochs3480.pth
+import gradio as gr
+import torch
+import numpy as np
+import os
+import re
+# --- 依赖导入 ---
+# 从你的代码库中导入必要的模块
+# 这要求你的文件结构是正确的 (例如: /Feature_extraction_algorithms/PSTAAP.py)
+from model import CAFN
+from Feature_extraction_algorithms.PSTAAP import PSTAAP_feature
+from Feature_extraction_algorithms.Physicochemical import PC_feature
+# --- 1. 模型加载 ---
+# 确保 'your_model_name.pth' 和你上传的文件名完全一致
+MODEL_PATH = "Adam_lr7e-05_weightdecay0.0001_epochs3480.pth"  # <--- 在这里修改成你的 .pth 文件名
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+def load_model(model_path):
+    model = CAFN().to(device)
+    if os.path.exists(model_path):
+        model.load_state_dict(torch.load(model_path, map_location=device))
+        model.eval()  # 设置为评估模式
+        print("模型加载成功！")
+        return model
+    else:
+        print(f"错误：在路径 {model_path} 未找到模型文件")
+        return None
+model = load_model(MODEL_PATH)
+# --- 2. 特征提取函数 ---
+# 这个函数直接改编自你的 dataProcess.py
+def extract_features_from_seq(sequence_list, test_PSTAAP=True):
+    """
+    接收一个包含序列的列表，返回模型所需的两个特征张量 x1 和 x2。
+    """
+    # 提取 PC_feature (对应 x2)
+    data2 = PC_feature(sequence_list)
+    # 提取 PSTAAP_feature (对应 x1)
+    N = len(sequence_list)
+    empty_list_array = [[] for _ in range(N)]
+    data = np.array(empty_list_array, dtype=object)
+    feature = PSTAAP_feature(sequence_list, test_PSTAAP)
+    data = np.hstack((data, feature))
+    # 返回 NumPy 数组
+    return data.astype(np.float32), data2.astype(np.float32)
+# --- 3. 核心预测函数 ---
+# Gradio 界面会调用这个函数
+def predict(sequence_input):
+    if model is None:
+        return {"错误": "模型未能加载，请检查后台日志"}
+    # 输入验证
+    if not sequence_input or not isinstance(sequence_input, str):
+        return {"错误": "请输入有效的生物序列"}
+    # 将输入的字符串处理成符合规范的格式
+    # .strip() 去除首尾空格, .upper() 转换为大写 (如果需要)
+    cleaned_sequence = sequence_input.strip().upper()
+    # 将单个序列放入列表中，因为特征提取函数期望一个列表
+    sequence_list = [cleaned_sequence]
+    # a. 调用特征提取
+    try:
+        x1_np, x2_np = extract_features_from_seq(sequence_list, test_PSTAAP=True)
+    except Exception as e:
+        # 如果特征提取失败，向用户显示错误
+        return {f"特征提取失败": str(e)}
+    # b. 将 NumPy 数组转换为 PyTorch 张量
+    # 特征提取函数应该已经为单个序列返回了正确的形状 (1, ...)，所以不需要 .unsqueeze()
+    tensor_x1 = torch.tensor(x1_np).to(device)
+    tensor_x2 = torch.tensor(x2_np).to(device)
+    # c. 进行预测
+    with torch.no_grad():
+        outputs = model(tensor_x1, tensor_x2)
+    # d. 处理输出
+    # 你的模型输出是4个类别。我们用 sigmoid 来获取每个类别的概率
+    probabilities = torch.sigmoid(outputs).squeeze().cpu().numpy()
+    # e. 格式化成字典，方便在界面上显示
+    # 根据你的 make_ylabel 函数，这四个类别分别对应 a, c, m, s
+    labels = ["类别 A (a)", "类别 C (c)", "类别 M (m)", "类别 S (s)"]
+    result = {label: float(prob) for label, prob in zip(labels, probabilities)}
+    return result
+# --- 4. 创建并启动 Gradio 界面 ---
+demo = gr.Interface(
+    fn=predict,
+    inputs=gr.Textbox(
+        lines=7,
+        label="输入生物序列 (Input Sequence)",
+        placeholder="请在这里粘贴你的序列..."
+    ),
+    outputs=gr.Label(num_top_classes=4, label="预测概率 (Prediction Probabilities)"),
+    title="CAFN 模型部署：多标签序列分类器",
+    description="输入一个生物序列，模型将预测它属于四个类别 (A, C, M, S) 中每一个的概率。",
+    examples=[
+        ["PLEPIPIVAAAAA"],
+        ["GMWSGGGGISGSLIIVIRAELGVPSGMMILGYLN"],
+    ]
+)
+# 启动应用
+demo.launch()