Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -59,55 +59,56 @@ def extract_features_from_seq(sequence_list):
|
|
| 59 |
# --- 4. 核心预测函数 ---
|
| 60 |
def predict(sequence_input):
|
| 61 |
if model is None:
|
| 62 |
-
# 如果模型加载失败,可以提前抛出错误
|
| 63 |
raise gr.Error("模型未能加载或初始化失败,请检查后台日志。")
|
| 64 |
|
| 65 |
if not sequence_input or not isinstance(sequence_input, str):
|
| 66 |
-
# 对于无效输入,也直接抛出错误
|
| 67 |
raise gr.Error("请输入有效的生物序列。")
|
| 68 |
|
| 69 |
cleaned_sequence = sequence_input.strip().upper()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
sequence_list = [cleaned_sequence]
|
| 71 |
|
| 72 |
-
#
|
| 73 |
-
# 让任何可能发生的错误自然地被Gradio捕获
|
| 74 |
x1_np, x2_np = extract_features_from_seq(sequence_list)
|
| 75 |
|
| 76 |
-
# 将 NumPy 数组转换为 PyTorch 张量
|
| 77 |
tensor_x1 = torch.tensor(x1_np).to(device)
|
| 78 |
tensor_x2 = torch.tensor(x2_np).to(device)
|
| 79 |
|
| 80 |
-
# 模型预测
|
| 81 |
with torch.no_grad():
|
| 82 |
outputs = model(tensor_x1, tensor_x2)
|
| 83 |
|
| 84 |
-
# 计算概率
|
| 85 |
probabilities = torch.sigmoid(outputs).squeeze().cpu().numpy()
|
| 86 |
|
| 87 |
-
# 准备输出结果
|
| 88 |
labels = ["类别 A (a)", "类别 C (c)", "类别 M (m)", "类别 S (s)"]
|
| 89 |
-
# 确保即使只有一个序列,结果也能正确处理
|
| 90 |
-
if probabilities.ndim == 0: # 如果只有一个输出
|
| 91 |
-
probabilities = [probabilities]
|
| 92 |
-
|
| 93 |
result = {label: float(prob) for label, prob in zip(labels, probabilities)}
|
| 94 |
|
| 95 |
return result
|
| 96 |
|
| 97 |
# --- 5. 创建并启动 Gradio 界面 ---
|
|
|
|
|
|
|
| 98 |
demo = gr.Interface(
|
| 99 |
fn=predict,
|
| 100 |
inputs=gr.Textbox(
|
| 101 |
lines=7,
|
| 102 |
label="输入生物序列 (Input Sequence)",
|
| 103 |
-
|
|
|
|
| 104 |
),
|
| 105 |
outputs=gr.Label(num_top_classes=4, label="预测概率 (Prediction Probabilities)"),
|
| 106 |
title="CAFN 模型部署:多标签序列分类器",
|
| 107 |
-
|
|
|
|
|
|
|
| 108 |
examples=[
|
| 109 |
-
[
|
| 110 |
-
|
|
|
|
| 111 |
]
|
| 112 |
)
|
| 113 |
|
|
|
|
| 59 |
# --- 4. 核心预测函数 ---
|
| 60 |
def predict(sequence_input):
|
| 61 |
if model is None:
|
|
|
|
| 62 |
raise gr.Error("模型未能加载或初始化失败,请检查后台日志。")
|
| 63 |
|
| 64 |
if not sequence_input or not isinstance(sequence_input, str):
|
|
|
|
| 65 |
raise gr.Error("请输入有效的生物序列。")
|
| 66 |
|
| 67 |
cleaned_sequence = sequence_input.strip().upper()
|
| 68 |
+
|
| 69 |
+
# --- 新增:在这里进行长度检查 ---
|
| 70 |
+
EXPECTED_LENGTH = 49 # 定义期望的序列长度
|
| 71 |
+
if len(cleaned_sequence) != EXPECTED_LENGTH:
|
| 72 |
+
raise gr.Error(f"输入序列长度错误!模型要求序列长度必须为 {EXPECTED_LENGTH} 个字符,但您输入的长度为 {len(cleaned_sequence)}。")
|
| 73 |
+
|
| 74 |
sequence_list = [cleaned_sequence]
|
| 75 |
|
| 76 |
+
# 现在只有在长度正确时,才会执行下面的特征提取
|
|
|
|
| 77 |
x1_np, x2_np = extract_features_from_seq(sequence_list)
|
| 78 |
|
|
|
|
| 79 |
tensor_x1 = torch.tensor(x1_np).to(device)
|
| 80 |
tensor_x2 = torch.tensor(x2_np).to(device)
|
| 81 |
|
|
|
|
| 82 |
with torch.no_grad():
|
| 83 |
outputs = model(tensor_x1, tensor_x2)
|
| 84 |
|
|
|
|
| 85 |
probabilities = torch.sigmoid(outputs).squeeze().cpu().numpy()
|
| 86 |
|
|
|
|
| 87 |
labels = ["类别 A (a)", "类别 C (c)", "类别 M (m)", "类别 S (s)"]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
result = {label: float(prob) for label, prob in zip(labels, probabilities)}
|
| 89 |
|
| 90 |
return result
|
| 91 |
|
| 92 |
# --- 5. 创建并启动 Gradio 界面 ---
|
| 93 |
+
valid_example_sequence = "CGKSFIWSSTLFKHKRIHTGEKPYKCEECGKAFNHSQILLHIRHKRMHT"[:49]# 简单用49个'A'作为示例,你可以替换成一个更有代表性的序列
|
| 94 |
+
|
| 95 |
demo = gr.Interface(
|
| 96 |
fn=predict,
|
| 97 |
inputs=gr.Textbox(
|
| 98 |
lines=7,
|
| 99 |
label="输入生物序列 (Input Sequence)",
|
| 100 |
+
# 在占位符中提示长度要求
|
| 101 |
+
placeholder="请在这里粘贴长度为 49 的序列..."
|
| 102 |
),
|
| 103 |
outputs=gr.Label(num_top_classes=4, label="预测概率 (Prediction Probabilities)"),
|
| 104 |
title="CAFN 模型部署:多标签序列分类器",
|
| 105 |
+
# 在描述中明确强调长度要求
|
| 106 |
+
description="输入一个生物序列,模型将预测它属于四个类别 (A, C, M, S) 中每一个的概率。\n\n**重要提示:本模型要求输入的序列长度必须为 49 个字符。**",
|
| 107 |
+
# 提供一个或多个长度正确的示例
|
| 108 |
examples=[
|
| 109 |
+
[valid_example_sequence],
|
| 110 |
+
# 如果有其他示例,也确保它们长度是49
|
| 111 |
+
# ["LFPYASLRRWHQNVQDLMVAIDNLQEFFSSLPKGLHLLLRLQFLPQSL"[:49]]
|
| 112 |
]
|
| 113 |
)
|
| 114 |
|