Spaces:

isyslab
/

LocPred-Prok

Running

App Files Files Community

wangleiofficial commited on Dec 9, 2025

Commit

a2cdbf7

verified ·

1 Parent(s): 886c88b

Update app.py

Browse files

Files changed (1) hide show

app.py +132 -9

app.py CHANGED Viewed

@@ -6,21 +6,144 @@ import gradio as gr
 from transformers import AutoTokenizer, AutoModel
 # ==========================
-# 0-3 部分：保持你的底层逻辑完全不变
 # ==========================
-# ... (请保持之前的 Imports, Model Definition, Load Models, Predict Function 代码完全一致) ...
-# 为了节省篇幅，这里假设你已经保留了之前代码的第0到第3部分 (直到 def predict 为止)
-# 务必确保运行前包含之前的 Model 类定义和加载逻辑！
 # ==========================
-# 4. Academic Research Interface
 # ==========================
 # 学术风格 CSS
 academic_css = """
 body { font-family: 'Roboto', 'Helvetica Neue', Arial, sans-serif; }
 .header-container {
-    background: linear-gradient(to right, #1e3a8a, #3b82f6); /* 深蓝学术风 */
     color: white;
     padding: 2.5rem;
     border-radius: 10px;
@@ -106,6 +229,8 @@ with gr.Blocks(theme=theme, css=academic_css, title="LocPred-Prok Web Server") a
                 # 右侧输出
                 with gr.Column(scale=4):
                     gr.Markdown("### 📊 Prediction Results")
                     output_label = gr.Label(num_top_classes=NUM_CLASSES, label="Probabilities")
                     # 解释性文字
@@ -164,9 +289,6 @@ with gr.Blocks(theme=theme, css=academic_css, title="LocPred-Prok Web Server") a
             </table>
             """)
-            # 这里可以放架构图，如果你有图片链接的话
-            # gr.Image("https://your-image-url.com/architecture.png", label="Model Architecture")
         # === Tab 3: Citation (引用) ===
         with gr.TabItem("📝 Citation"):
             gr.Markdown("If you use LocPred-Prok in your research, please cite our paper:")
@@ -194,4 +316,5 @@ with gr.Blocks(theme=theme, css=academic_css, title="LocPred-Prok Web Server") a
     submit_btn.click(fn=predict, inputs=sequence_input, outputs=output_label)
     clear_btn.click(lambda: None, outputs=[output_label])
 app.launch()

 from transformers import AutoTokenizer, AutoModel
 # ==========================
+# 🚧 0. 防止 Hugging Face 缓存溢出
 # ==========================
+os.environ["HF_HOME"] = "/tmp/hf_cache"
+os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
+os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
+# 每次启动时清理旧缓存
+for path in ["/tmp/hf_cache", os.path.expanduser("~/.cache/huggingface")]:
+    shutil.rmtree(path, ignore_errors=True)
+    os.makedirs(path, exist_ok=True)
+# ==========================
+# 1. Model Definition (模型架构定义)
+# ==========================
+class AttentionPooling(nn.Module):
+    """Attention Pooling Layer"""
+    def __init__(self, d_model):
+        super().__init__()
+        self.attention_net = nn.Linear(d_model, 1)
+    def forward(self, x, mask):
+        attn_logits = self.attention_net(x).squeeze(2)
+        attn_logits.masked_fill_(mask == 0, -float('inf'))
+        attn_weights = F.softmax(attn_logits, dim=1)
+        return torch.bmm(attn_weights.unsqueeze(1), x).squeeze(1)
+class ProtDualBranchEnhancedClassifier(nn.Module):
+    """Enhanced dual-branch model"""
+    def __init__(self, d_model, projection_dim, num_classes, dropout, kernel_size):
+        super().__init__()
+        self.cls_projector = nn.Linear(d_model, projection_dim)
+        self.token_refiner = nn.Sequential(
+            nn.Conv1d(d_model, d_model, kernel_size, padding='same'),
+            nn.ReLU()
+        )
+        self.attention_pooling = AttentionPooling(d_model)
+        self.tok_projector = nn.Linear(d_model, projection_dim)
+        fused_dim = projection_dim * 2
+        self.gate = nn.Sequential(
+            nn.Linear(fused_dim, fused_dim),
+            nn.Sigmoid()
+        )
+        self.classifier_head = nn.Sequential(
+            nn.LayerNorm(fused_dim),
+            nn.Linear(fused_dim, fused_dim * 2),
+            nn.ReLU(),
+            nn.Dropout(dropout),
+            nn.Linear(fused_dim * 2, num_classes)
+        )
+    def forward(self, cls_embedding, token_embeddings, mask):
+        z_cls = self.cls_projector(cls_embedding)
+        tok_emb_permuted = token_embeddings.permute(0, 2, 1)
+        refined_tok_emb = self.token_refiner(tok_emb_permuted).permute(0, 2, 1)
+        z_tok_pooled = self.attention_pooling(refined_tok_emb, mask)
+        z_tok = self.tok_projector(z_tok_pooled)
+        z_fused_concat = torch.cat([z_cls, z_tok], dim=1)
+        gate_values = self.gate(z_fused_concat)
+        z_fused_gated = z_fused_concat * gate_values
+        return self.classifier_head(z_fused_gated)
+# ==========================
+# 2. Load Models and Files (加载模型与配置)
+# ==========================
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+PLM_MODEL_NAME = "facebook/esm2_t30_150M_UR50D"
+CLASSIFIER_PATH = "best_model_esm2_t30_150M_UR50D.pth"
+LABEL_MAP_PATH = "label_map.json"
+# --- 加载标签映射 (这里定义了 NUM_CLASSES) ---
+if not os.path.exists(LABEL_MAP_PATH):
+    raise FileNotFoundError(f"Error: Missing '{LABEL_MAP_PATH}'. Please upload it to your Space.")
+with open(LABEL_MAP_PATH, 'r') as f:
+    label_to_idx = json.load(f)
+    idx_to_label = {v: k for k, v in label_to_idx.items()}
+# ✅ 关键变量定义
+NUM_CLASSES = len(idx_to_label)
+D_MODEL = 640
+# --- 加载预训练蛋白模型 ---
+print("🔹 Loading Protein Language Model...")
+tokenizer = AutoTokenizer.from_pretrained(PLM_MODEL_NAME)
+plm_model = AutoModel.from_pretrained(PLM_MODEL_NAME).to(DEVICE)
+plm_model.eval()
+print("✅ PLM loaded successfully.")
+# --- 加载下游分类器 ---
+print("🔹 Loading downstream classifier...")
+classifier = ProtDualBranchEnhancedClassifier(
+    d_model=D_MODEL, projection_dim=32, num_classes=NUM_CLASSES,
+    dropout=0.3, kernel_size=3
+).to(DEVICE)
+if not os.path.exists(CLASSIFIER_PATH):
+    raise FileNotFoundError(f"Error: Could not find '{CLASSIFIER_PATH}'. Please upload your trained .pth file.")
+classifier.load_state_dict(torch.load(CLASSIFIER_PATH, map_location=DEVICE))
+classifier.eval()
+print("✅ Classifier loaded. Application is ready!")
 # ==========================
+# 3. Prediction Function (预测函数)
+# ==========================
+def predict(sequence_input):
+    if not sequence_input or sequence_input.isspace():
+        raise gr.Error("Sequence cannot be empty.")
+    # Clean FASTA header if present
+    sequence = "".join(sequence_input.split('\n')[1:]) if sequence_input.startswith('>') else sequence_input
+    sequence = re.sub(r'[^A-Z]', '', sequence.upper())
+    if not sequence:
+        raise gr.Error("Invalid sequence format. Please enter amino acids (A-Z).")
+    with torch.no_grad():
+        inputs = tokenizer(sequence, return_tensors="pt", truncation=True, max_length=1024).to(DEVICE)
+        outputs = plm_model(**inputs)
+        hidden_states = outputs.last_hidden_state
+        cls_embedding = hidden_states[:, 0, :]
+        token_embeddings = hidden_states[:, 1:-1, :]
+        token_mask = inputs['attention_mask'][:, 1:-1]
+        logits = classifier(cls_embedding, token_embeddings, token_mask)
+        probabilities = F.softmax(logits, dim=1)[0]
+    confidences = {idx_to_label[i]: float(prob) for i, prob in enumerate(probabilities)}
+    return confidences
+# ==========================
+# 4. Academic Research Interface (UI 界面)
 # ==========================
 # 学术风格 CSS
 academic_css = """
 body { font-family: 'Roboto', 'Helvetica Neue', Arial, sans-serif; }
 .header-container {
+    background: linear-gradient(to right, #1e3a8a, #3b82f6);
     color: white;
     padding: 2.5rem;
     border-radius: 10px;
                 # 右侧输出
                 with gr.Column(scale=4):
                     gr.Markdown("### 📊 Prediction Results")
+                    # ✅ 这里使用了 NUM_CLASSES，现在它已经在前面定义过了
                     output_label = gr.Label(num_top_classes=NUM_CLASSES, label="Probabilities")
                     # 解释性文字
             </table>
             """)
         # === Tab 3: Citation (引用) ===
         with gr.TabItem("📝 Citation"):
             gr.Markdown("If you use LocPred-Prok in your research, please cite our paper:")
     submit_btn.click(fn=predict, inputs=sequence_input, outputs=output_label)
     clear_btn.click(lambda: None, outputs=[output_label])
+# 启动
 app.launch()