Upload app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,454 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import torch
|
| 3 |
+
import numpy as np
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from torch import nn
|
| 6 |
+
from transformers import AutoTokenizer, AutoModel
|
| 7 |
+
from peft import get_peft_model, LoraConfig, TaskType
|
| 8 |
+
import os
|
| 9 |
+
|
| 10 |
+
# ================= 配置区 (保持不变) =================
|
| 11 |
+
MODEL_DIR = "."
|
| 12 |
+
BASE_MODEL_NAME = "facebook/esm2_t30_150M_UR50D"
|
| 13 |
+
LABELS = ['anti_acne', 'anti_aging', 'anti_inflammatory', 'anti_oxidant', 'repair', 'whitening', 'delivery', 'negative']
|
| 14 |
+
|
| 15 |
+
# ================= 核心组件 (保持不变) =================
|
| 16 |
+
AA_PROPS = {'A': 1.8, 'R': -4.5, 'N': -3.5, 'D': -3.5, 'C': 2.5, 'Q': -3.5, 'E': -3.5, 'G': -0.4, 'H': -3.2, 'I': 4.5, 'L': 3.8, 'K': -3.9, 'M': 1.9, 'F': 2.8, 'P': -1.6, 'S': -0.8, 'T': -0.7, 'W': -0.9, 'Y': -1.3, 'V': 4.2}
|
| 17 |
+
AA_CHARGE = {'R': 1, 'K': 1, 'H': 0.1, 'D': -1, 'E': -1}
|
| 18 |
+
|
| 19 |
+
def compute_biophysics(seq):
|
| 20 |
+
length = len(seq)
|
| 21 |
+
if length == 0: return [0]*5
|
| 22 |
+
hydro = sum([AA_PROPS.get(aa, 0) for aa in seq]) / length
|
| 23 |
+
charge = sum([AA_CHARGE.get(aa, 0) for aa in seq])
|
| 24 |
+
weight = length * 110 / 1000.0
|
| 25 |
+
n_term = AA_PROPS.get(seq[0], 0)
|
| 26 |
+
c_term = AA_CHARGE.get(seq[-1], 0)
|
| 27 |
+
return np.array([hydro, charge, weight, n_term, c_term], dtype=np.float32)
|
| 28 |
+
|
| 29 |
+
class AdaptiveFusionModel(nn.Module):
|
| 30 |
+
def __init__(self, base_model, num_labels, feature_dim=5):
|
| 31 |
+
super().__init__()
|
| 32 |
+
self.esm = base_model
|
| 33 |
+
self.num_labels = num_labels
|
| 34 |
+
hidden_size = base_model.config.hidden_size
|
| 35 |
+
|
| 36 |
+
self.esm_classifier = nn.Sequential(nn.Dropout(0.1), nn.Linear(hidden_size, num_labels))
|
| 37 |
+
self.feature_classifier = nn.Sequential(nn.Linear(feature_dim, 64), nn.BatchNorm1d(64), nn.ReLU(), nn.Dropout(0.1), nn.Linear(64, num_labels))
|
| 38 |
+
self.gate_weight = nn.Parameter(torch.tensor([1.38]))
|
| 39 |
+
|
| 40 |
+
def forward(self, input_ids, attention_mask=None, extra_features=None, **kwargs):
|
| 41 |
+
outputs = self.esm(input_ids=input_ids, attention_mask=attention_mask, **kwargs)
|
| 42 |
+
cls_embedding = outputs.last_hidden_state[:, 0, :]
|
| 43 |
+
logits_esm = self.esm_classifier(cls_embedding)
|
| 44 |
+
|
| 45 |
+
if extra_features is not None:
|
| 46 |
+
logits_feat = self.feature_classifier(extra_features)
|
| 47 |
+
alpha = torch.sigmoid(self.gate_weight)
|
| 48 |
+
logits = alpha * logits_esm + (1 - alpha) * logits_feat
|
| 49 |
+
else:
|
| 50 |
+
logits = logits_esm
|
| 51 |
+
alpha = None
|
| 52 |
+
return logits, alpha
|
| 53 |
+
|
| 54 |
+
# ================= 模型加载 (保持不变) =================
|
| 55 |
+
print("🚀 正在加载 BioOracle V14...")
|
| 56 |
+
device = torch.device('cpu')
|
| 57 |
+
|
| 58 |
+
# 加载 Tokenizer
|
| 59 |
+
print("📥 加载 Tokenizer...")
|
| 60 |
+
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_NAME)
|
| 61 |
+
|
| 62 |
+
# 加载基座模型
|
| 63 |
+
print("🧠 加载 ESM-2 150M 模型(首次约 600MB,请等待)...")
|
| 64 |
+
base_model = AutoModel.from_pretrained(BASE_MODEL_NAME)
|
| 65 |
+
|
| 66 |
+
# 应用 LoRA
|
| 67 |
+
print("🔧 应用 LoRA 配置...")
|
| 68 |
+
peft_config = LoraConfig(
|
| 69 |
+
task_type=TaskType.FEATURE_EXTRACTION,
|
| 70 |
+
r=32, lora_alpha=64, lora_dropout=0.1,
|
| 71 |
+
target_modules=["query", "key", "value", "dense"]
|
| 72 |
+
)
|
| 73 |
+
base_model = get_peft_model(base_model, peft_config)
|
| 74 |
+
|
| 75 |
+
# 构建模型
|
| 76 |
+
print("⚙️ 构建融合架构...")
|
| 77 |
+
model = AdaptiveFusionModel(base_model, num_labels=len(LABELS))
|
| 78 |
+
|
| 79 |
+
# 加载权重
|
| 80 |
+
weights_path = os.path.join(MODEL_DIR, "v14_weights.bin")
|
| 81 |
+
if not os.path.exists(weights_path):
|
| 82 |
+
raise FileNotFoundError(f"❌ 找不到权重文件: {weights_path}")
|
| 83 |
+
|
| 84 |
+
print("💾 加载 V14 权重(638MB)...")
|
| 85 |
+
state_dict = torch.load(weights_path, map_location=torch.device('cpu'), weights_only=False)
|
| 86 |
+
|
| 87 |
+
# 🔥 智能匹配权重键名(修复 PEFT 前缀不匹配问题)
|
| 88 |
+
model_keys = set(model.state_dict().keys())
|
| 89 |
+
weight_keys = set(state_dict.keys())
|
| 90 |
+
|
| 91 |
+
# 情况1:权重没有 base_model 前缀,但模型有(需要添加前缀)
|
| 92 |
+
if any('base_model.model' in k for k in model_keys) and not any('base_model.model' in k for k in weight_keys):
|
| 93 |
+
print("⚙️ 调整权重键名以匹配 PEFT 模型结构...")
|
| 94 |
+
new_state_dict = {}
|
| 95 |
+
for key, value in state_dict.items():
|
| 96 |
+
if key.startswith('esm.'):
|
| 97 |
+
# esm.xxx → esm.base_model.model.xxx
|
| 98 |
+
new_key = key.replace('esm.', 'esm.base_model.model.', 1)
|
| 99 |
+
new_state_dict[new_key] = value
|
| 100 |
+
else:
|
| 101 |
+
new_state_dict[key] = value
|
| 102 |
+
state_dict = new_state_dict
|
| 103 |
+
|
| 104 |
+
# 情况2:权重有 base_model 前缀,但模型没有(需要删除前缀)
|
| 105 |
+
elif not any('base_model.model' in k for k in model_keys) and any('base_model.model' in k for k in weight_keys):
|
| 106 |
+
print("⚙️ 移除 PEFT 前缀以匹配标准模型结构...")
|
| 107 |
+
new_state_dict = {}
|
| 108 |
+
for key, value in state_dict.items():
|
| 109 |
+
new_key = key.replace('base_model.model.', '')
|
| 110 |
+
new_state_dict[new_key] = value
|
| 111 |
+
state_dict = new_state_dict
|
| 112 |
+
|
| 113 |
+
# 加载权重(使用 strict=False 允许部分不匹配)
|
| 114 |
+
missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
|
| 115 |
+
|
| 116 |
+
if missing_keys:
|
| 117 |
+
print(f"⚠️ 缺失 {len(missing_keys)} 个键(可能是新增的参���,如 pooler 层)")
|
| 118 |
+
print(f" 示例: {list(missing_keys)[:3]}")
|
| 119 |
+
if unexpected_keys:
|
| 120 |
+
print(f"⚠️ 忽略 {len(unexpected_keys)} 个多余的键")
|
| 121 |
+
print(f" 示例: {list(unexpected_keys)[:3]}")
|
| 122 |
+
|
| 123 |
+
model.to('cpu')
|
| 124 |
+
model.eval()
|
| 125 |
+
|
| 126 |
+
print("✅ 模型加载完成!")
|
| 127 |
+
|
| 128 |
+
# 获取门控权重
|
| 129 |
+
gate_val = torch.sigmoid(model.gate_weight).item()
|
| 130 |
+
esm_weight = gate_val
|
| 131 |
+
feat_weight = 1 - gate_val
|
| 132 |
+
|
| 133 |
+
# ================= 预测函数 (中文版 - 保持原有逻辑) =================
|
| 134 |
+
def predict_peptide(sequence):
|
| 135 |
+
"""
|
| 136 |
+
预测肽序列的生物活性 (中文输出)
|
| 137 |
+
"""
|
| 138 |
+
# 输入验证
|
| 139 |
+
seq = sequence.strip().upper()
|
| 140 |
+
valid_aa = set("ACDEFGHIKLMNPQRSTVWY")
|
| 141 |
+
|
| 142 |
+
if not seq:
|
| 143 |
+
return "❌ 请输入序列", None, None
|
| 144 |
+
|
| 145 |
+
if not set(seq).issubset(valid_aa):
|
| 146 |
+
return "❌ 请输入有效的氨基酸序列(仅限20种标准氨基酸单字母缩写)", None, None
|
| 147 |
+
|
| 148 |
+
# 数据准备
|
| 149 |
+
inputs = tokenizer(seq, return_tensors="pt", padding="max_length", max_length=128).to(device)
|
| 150 |
+
raw_feats = compute_biophysics(seq)
|
| 151 |
+
feats_tensor = torch.tensor([raw_feats], dtype=torch.float).to(device)
|
| 152 |
+
|
| 153 |
+
# 模型推理
|
| 154 |
+
with torch.no_grad():
|
| 155 |
+
logits, _ = model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'], extra_features=feats_tensor)
|
| 156 |
+
probs = torch.sigmoid(logits).cpu().numpy()[0]
|
| 157 |
+
|
| 158 |
+
# 处理结果
|
| 159 |
+
df_res = pd.DataFrame({"功效标签": LABELS, "置信度": probs})
|
| 160 |
+
df_res = df_res.sort_values(by="置信度", ascending=False).reset_index(drop=True)
|
| 161 |
+
|
| 162 |
+
top_label = df_res.iloc[0]['功效标签']
|
| 163 |
+
top_score = df_res.iloc[0]['置信度']
|
| 164 |
+
|
| 165 |
+
# 生成结论
|
| 166 |
+
if top_score > 0.8:
|
| 167 |
+
conclusion = f"""
|
| 168 |
+
### ✅ 高潜力活性肽
|
| 169 |
+
|
| 170 |
+
**主要预测功效**: {top_label}
|
| 171 |
+
**置信度**: {top_score:.2%}
|
| 172 |
+
|
| 173 |
+
模型强烈建议将此序列纳入后续湿实验验证流程。
|
| 174 |
+
"""
|
| 175 |
+
elif top_score > 0.3:
|
| 176 |
+
conclusion = f"""
|
| 177 |
+
### ⚠️ 中等潜力 / 需进一步改造
|
| 178 |
+
|
| 179 |
+
**主要预测功效**: {top_label}
|
| 180 |
+
**置信度**: {top_score:.2%}
|
| 181 |
+
|
| 182 |
+
该序列可能具有一定活性,或是已知活性肽的突变体。建议结合结构生物学分析。
|
| 183 |
+
"""
|
| 184 |
+
else:
|
| 185 |
+
conclusion = f"""
|
| 186 |
+
### ❌ 疑似无效序列(负样本)
|
| 187 |
+
|
| 188 |
+
**最高置信度**: {top_score:.2%}
|
| 189 |
+
|
| 190 |
+
模型判断该序列主要表现为负样本特征,建议剔除。
|
| 191 |
+
"""
|
| 192 |
+
|
| 193 |
+
# 生成生物物理特征文本
|
| 194 |
+
biophysics_text = f"""
|
| 195 |
+
**生物物理特征分析**:
|
| 196 |
+
- 平均疏水性: {raw_feats[0]:.2f}
|
| 197 |
+
- 净电荷: {raw_feats[1]:.2f}
|
| 198 |
+
- 估算分子量: {raw_feats[2]:.3f} kDa
|
| 199 |
+
- N端疏水性: {raw_feats[3]:.2f}
|
| 200 |
+
- C端电荷: {raw_feats[4]:.2f}
|
| 201 |
+
"""
|
| 202 |
+
|
| 203 |
+
# 格式化完整结果表
|
| 204 |
+
df_formatted = df_res.copy()
|
| 205 |
+
df_formatted['置信度'] = df_formatted['置信度'].apply(lambda x: f"{x:.4%}")
|
| 206 |
+
|
| 207 |
+
return conclusion, biophysics_text, df_formatted
|
| 208 |
+
|
| 209 |
+
# ================= 预测函数 (英文版 - 新增) =================
|
| 210 |
+
def predict_peptide_en(sequence):
|
| 211 |
+
"""
|
| 212 |
+
Predict peptide bioactivity (English Output)
|
| 213 |
+
"""
|
| 214 |
+
# Input Validation
|
| 215 |
+
seq = sequence.strip().upper()
|
| 216 |
+
valid_aa = set("ACDEFGHIKLMNPQRSTVWY")
|
| 217 |
+
|
| 218 |
+
if not seq:
|
| 219 |
+
return "❌ Please enter a sequence", None, None
|
| 220 |
+
|
| 221 |
+
if not set(seq).issubset(valid_aa):
|
| 222 |
+
return "❌ Invalid sequence. Please use standard 1-letter amino acid codes.", None, None
|
| 223 |
+
|
| 224 |
+
# Data Preparation (Same as Chinese version)
|
| 225 |
+
inputs = tokenizer(seq, return_tensors="pt", padding="max_length", max_length=128).to(device)
|
| 226 |
+
raw_feats = compute_biophysics(seq)
|
| 227 |
+
feats_tensor = torch.tensor([raw_feats], dtype=torch.float).to(device)
|
| 228 |
+
|
| 229 |
+
# Inference
|
| 230 |
+
with torch.no_grad():
|
| 231 |
+
logits, _ = model(input_ids=inputs['input_ids'], attention_mask=inputs['attention_mask'], extra_features=feats_tensor)
|
| 232 |
+
probs = torch.sigmoid(logits).cpu().numpy()[0]
|
| 233 |
+
|
| 234 |
+
# Process Results
|
| 235 |
+
df_res = pd.DataFrame({"Efficacy Label": LABELS, "Confidence": probs})
|
| 236 |
+
df_res = df_res.sort_values(by="Confidence", ascending=False).reset_index(drop=True)
|
| 237 |
+
|
| 238 |
+
top_label = df_res.iloc[0]['Efficacy Label']
|
| 239 |
+
top_score = df_res.iloc[0]['Confidence']
|
| 240 |
+
|
| 241 |
+
# Generate Conclusion (English)
|
| 242 |
+
if top_score > 0.8:
|
| 243 |
+
conclusion = f"""
|
| 244 |
+
### ✅ High Potential Peptide
|
| 245 |
+
|
| 246 |
+
**Predicted Efficacy**: {top_label}
|
| 247 |
+
**Confidence**: {top_score:.2%}
|
| 248 |
+
|
| 249 |
+
Strongly recommended for wet-lab validation.
|
| 250 |
+
"""
|
| 251 |
+
elif top_score > 0.3:
|
| 252 |
+
conclusion = f"""
|
| 253 |
+
### ⚠️ Moderate Potential / Optimization Needed
|
| 254 |
+
|
| 255 |
+
**Predicted Efficacy**: {top_label}
|
| 256 |
+
**Confidence**: {top_score:.2%}
|
| 257 |
+
|
| 258 |
+
May have some activity or be a mutant of a known peptide. Structural analysis suggested.
|
| 259 |
+
"""
|
| 260 |
+
else:
|
| 261 |
+
conclusion = f"""
|
| 262 |
+
### ❌ Likely Negative / Inactive
|
| 263 |
+
|
| 264 |
+
**Max Confidence**: {top_score:.2%}
|
| 265 |
+
|
| 266 |
+
Predicted as a negative sample. Suggested to discard.
|
| 267 |
+
"""
|
| 268 |
+
|
| 269 |
+
# Biophysics Text (English)
|
| 270 |
+
biophysics_text = f"""
|
| 271 |
+
**Biophysical Properties**:
|
| 272 |
+
- Avg Hydrophobicity: {raw_feats[0]:.2f}
|
| 273 |
+
- Net Charge: {raw_feats[1]:.2f}
|
| 274 |
+
- Est. Molecular Weight: {raw_feats[2]:.3f} kDa
|
| 275 |
+
- N-term Hydrophobicity: {raw_feats[3]:.2f}
|
| 276 |
+
- C-term Charge: {raw_feats[4]:.2f}
|
| 277 |
+
"""
|
| 278 |
+
|
| 279 |
+
# Format Table
|
| 280 |
+
df_formatted = df_res.copy()
|
| 281 |
+
df_formatted['Confidence'] = df_formatted['Confidence'].apply(lambda x: f"{x:.4%}")
|
| 282 |
+
|
| 283 |
+
return conclusion, biophysics_text, df_formatted
|
| 284 |
+
|
| 285 |
+
# ================= Gradio 界面 (前端设计升级) =================
|
| 286 |
+
# 自定义 CSS - 增强医疗科技感
|
| 287 |
+
custom_css = """
|
| 288 |
+
.gradio-container {
|
| 289 |
+
font-family: 'Helvetica Neue', Arial, sans-serif;
|
| 290 |
+
background-color: #f9fbfd;
|
| 291 |
+
}
|
| 292 |
+
.header-area {
|
| 293 |
+
text-align: center;
|
| 294 |
+
margin-bottom: 20px;
|
| 295 |
+
padding: 20px;
|
| 296 |
+
background: linear-gradient(135deg, #eef2f3 0%, #8e9eab 100%);
|
| 297 |
+
border-radius: 12px;
|
| 298 |
+
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
|
| 299 |
+
}
|
| 300 |
+
.header-area h1 {
|
| 301 |
+
color: #2c3e50;
|
| 302 |
+
font-size: 2.5em;
|
| 303 |
+
margin-bottom: 5px;
|
| 304 |
+
}
|
| 305 |
+
.header-area h3 {
|
| 306 |
+
color: #546e7a;
|
| 307 |
+
font-weight: 300;
|
| 308 |
+
}
|
| 309 |
+
.stat-box {
|
| 310 |
+
background: white;
|
| 311 |
+
padding: 15px;
|
| 312 |
+
border-radius: 8px;
|
| 313 |
+
border-left: 5px solid #3498db;
|
| 314 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.05);
|
| 315 |
+
}
|
| 316 |
+
.primary-btn {
|
| 317 |
+
background-color: #2980b9 !important;
|
| 318 |
+
}
|
| 319 |
+
"""
|
| 320 |
+
|
| 321 |
+
# 使用 Soft 主题作为基础
|
| 322 |
+
theme = gr.themes.Soft(
|
| 323 |
+
primary_hue="blue",
|
| 324 |
+
secondary_hue="slate",
|
| 325 |
+
).set(
|
| 326 |
+
button_primary_background_fill="#2980b9",
|
| 327 |
+
button_primary_background_fill_hover="#3498db",
|
| 328 |
+
)
|
| 329 |
+
|
| 330 |
+
# 创建界面
|
| 331 |
+
with gr.Blocks(css=custom_css, theme=theme, title="BioOracle V14") as demo:
|
| 332 |
+
|
| 333 |
+
# 顶部 Header 区域
|
| 334 |
+
with gr.Row():
|
| 335 |
+
gr.HTML(
|
| 336 |
+
"""
|
| 337 |
+
<div class="header-area">
|
| 338 |
+
<h1>🧬 BioOracle V14</h1>
|
| 339 |
+
<h3>Giant Biogene AI Screening System | 巨子智筛 AI 活性肽发现系统</h3>
|
| 340 |
+
<p>Powered by ESM-2 150M & Biophysics Guided Learning</p>
|
| 341 |
+
</div>
|
| 342 |
+
"""
|
| 343 |
+
)
|
| 344 |
+
|
| 345 |
+
# 模型状态折叠面板 (双语通用)
|
| 346 |
+
with gr.Accordion("🧠 Model Internal Status / 模型大脑状态", open=False):
|
| 347 |
+
with gr.Row():
|
| 348 |
+
gr.Markdown(
|
| 349 |
+
f"""
|
| 350 |
+
<div class="stat-box">
|
| 351 |
+
<b>自适应融合权重 (Adaptive Fusion Weights)</b>:<br>
|
| 352 |
+
<ul>
|
| 353 |
+
<li>ESM-2 Deep Semantics (AI Intuition): <b>{esm_weight:.1%}</b></li>
|
| 354 |
+
<li>Biophysics Rules (Physical Laws): <b>{feat_weight:.1%}</b></li>
|
| 355 |
+
</ul>
|
| 356 |
+
<p style="color: grey; font-size: 0.9em;">
|
| 357 |
+
The model automatically balances between deep learning features and physical rules.<br>
|
| 358 |
+
模型自动学会了主要依赖 ESM-2 大模型的深度理解,同时使用物理化学规则作为辅助校验。
|
| 359 |
+
</p>
|
| 360 |
+
</div>
|
| 361 |
+
"""
|
| 362 |
+
)
|
| 363 |
+
|
| 364 |
+
# 多语言选项卡
|
| 365 |
+
with gr.Tabs():
|
| 366 |
+
|
| 367 |
+
# ============ Tab 1: 中文版 ============
|
| 368 |
+
with gr.TabItem("🇨🇳 中文版 (Chinese)"):
|
| 369 |
+
with gr.Row():
|
| 370 |
+
with gr.Column(scale=2):
|
| 371 |
+
sequence_input_zh = gr.Textbox(
|
| 372 |
+
label="输入待筛选的肽序列",
|
| 373 |
+
placeholder="例如: GHK",
|
| 374 |
+
info="输入氨基酸序列(单字母缩写),模型将评估其潜在生物活性",
|
| 375 |
+
lines=2
|
| 376 |
+
)
|
| 377 |
+
predict_btn_zh = gr.Button("🚀 开始演算", variant="primary", size="lg")
|
| 378 |
+
|
| 379 |
+
with gr.Column(scale=3):
|
| 380 |
+
conclusion_output_zh = gr.Markdown(label="活性评估结论")
|
| 381 |
+
|
| 382 |
+
with gr.Row():
|
| 383 |
+
biophysics_output_zh = gr.Markdown(label="生物物理特征")
|
| 384 |
+
results_table_zh = gr.Dataframe(
|
| 385 |
+
label="完整预测数据表",
|
| 386 |
+
headers=["功效标签", "置信度"],
|
| 387 |
+
datatype=["str", "str"],
|
| 388 |
+
row_count=8
|
| 389 |
+
)
|
| 390 |
+
|
| 391 |
+
gr.Examples(
|
| 392 |
+
examples=[["GHK"], ["KTTKS"], ["HGK"], ["AECKVQVR"]],
|
| 393 |
+
inputs=sequence_input_zh,
|
| 394 |
+
label="示例序列"
|
| 395 |
+
)
|
| 396 |
+
|
| 397 |
+
# 中文版事件绑定
|
| 398 |
+
predict_btn_zh.click(
|
| 399 |
+
fn=predict_peptide,
|
| 400 |
+
inputs=sequence_input_zh,
|
| 401 |
+
outputs=[conclusion_output_zh, biophysics_output_zh, results_table_zh]
|
| 402 |
+
)
|
| 403 |
+
|
| 404 |
+
# ============ Tab 2: 英文版 ============
|
| 405 |
+
with gr.TabItem("🇺🇸 English Version"):
|
| 406 |
+
with gr.Row():
|
| 407 |
+
with gr.Column(scale=2):
|
| 408 |
+
sequence_input_en = gr.Textbox(
|
| 409 |
+
label="Input Peptide Sequence",
|
| 410 |
+
placeholder="e.g., GHK",
|
| 411 |
+
info="Enter amino acid sequence (single letter codes) for bioactivity assessment",
|
| 412 |
+
lines=2
|
| 413 |
+
)
|
| 414 |
+
predict_btn_en = gr.Button("🚀 Analyze Sequence", variant="primary", size="lg")
|
| 415 |
+
|
| 416 |
+
with gr.Column(scale=3):
|
| 417 |
+
conclusion_output_en = gr.Markdown(label="Assessment Conclusion")
|
| 418 |
+
|
| 419 |
+
with gr.Row():
|
| 420 |
+
biophysics_output_en = gr.Markdown(label="Biophysical Properties")
|
| 421 |
+
results_table_en = gr.Dataframe(
|
| 422 |
+
label="Full Prediction Data",
|
| 423 |
+
headers=["Efficacy Label", "Confidence"],
|
| 424 |
+
datatype=["str", "str"],
|
| 425 |
+
row_count=8
|
| 426 |
+
)
|
| 427 |
+
|
| 428 |
+
gr.Examples(
|
| 429 |
+
examples=[["GHK"], ["KTTKS"], ["HGK"], ["AECKVQVR"]],
|
| 430 |
+
inputs=sequence_input_en,
|
| 431 |
+
label="Example Sequences"
|
| 432 |
+
)
|
| 433 |
+
|
| 434 |
+
# 英文版事件绑定
|
| 435 |
+
predict_btn_en.click(
|
| 436 |
+
fn=predict_peptide_en,
|
| 437 |
+
inputs=sequence_input_en,
|
| 438 |
+
outputs=[conclusion_output_en, biophysics_output_en, results_table_en]
|
| 439 |
+
)
|
| 440 |
+
|
| 441 |
+
# 底部版权信息
|
| 442 |
+
gr.Markdown(
|
| 443 |
+
"""
|
| 444 |
+
---
|
| 445 |
+
<div style="text-align: center; color: #7f8c8d; font-size: 0.9em;">
|
| 446 |
+
<b>BioOracle V14</b> | Design for Giant Biogene Internship Project<br>
|
| 447 |
+
<i>Disclaimer: Predictions are for research reference only. Wet-lab validation is required.</i>
|
| 448 |
+
</div>
|
| 449 |
+
"""
|
| 450 |
+
)
|
| 451 |
+
|
| 452 |
+
# 启动应用
|
| 453 |
+
if __name__ == "__main__":
|
| 454 |
+
demo.launch()
|