Spaces:

isyslab
/

LocPred-Prok

Running

App Files Files Community

wangleiofficial commited on Dec 10, 2025

Commit

f7d2100

verified ·

1 Parent(s): 9539727

Update app.py

Browse files

Files changed (1) hide show

app.py +293 -181

app.py CHANGED Viewed

@@ -1,35 +1,34 @@
-import os, shutil, json, re
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import gradio as gr
-import matplotlib.pyplot as plt
-import matplotlib.patches as patches
-import numpy as np
-from io import BytesIO
-from PIL import Image
 from transformers import AutoTokenizer, AutoModel
 # ==========================
-# 0. 环境初始化
 # ==========================
-plt.switch_backend('Agg')
 os.environ["HF_HOME"] = "/tmp/hf_cache"
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
 os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
 for path in ["/tmp/hf_cache", os.path.expanduser("~/.cache/huggingface")]:
     shutil.rmtree(path, ignore_errors=True)
     os.makedirs(path, exist_ok=True)
 # ==========================
-# 1. 模型定义 (保持不变)
 # ==========================
 class AttentionPooling(nn.Module):
     def __init__(self, d_model):
         super().__init__()
         self.attention_net = nn.Linear(d_model, 1)
     def forward(self, x, mask):
         attn_logits = self.attention_net(x).squeeze(2)
         attn_logits.masked_fill_(mask == 0, -float('inf'))
@@ -37,20 +36,29 @@ class AttentionPooling(nn.Module):
         return torch.bmm(attn_weights.unsqueeze(1), x).squeeze(1)
 class ProtDualBranchEnhancedClassifier(nn.Module):
     def __init__(self, d_model, projection_dim, num_classes, dropout, kernel_size):
         super().__init__()
         self.cls_projector = nn.Linear(d_model, projection_dim)
         self.token_refiner = nn.Sequential(
-            nn.Conv1d(d_model, d_model, kernel_size, padding='same'), nn.ReLU()
         )
         self.attention_pooling = AttentionPooling(d_model)
         self.tok_projector = nn.Linear(d_model, projection_dim)
         fused_dim = projection_dim * 2
-        self.gate = nn.Sequential(nn.Linear(fused_dim, fused_dim), nn.Sigmoid())
         self.classifier_head = nn.Sequential(
-            nn.LayerNorm(fused_dim), nn.Linear(fused_dim, fused_dim * 2),
-            nn.ReLU(), nn.Dropout(dropout), nn.Linear(fused_dim * 2, num_classes)
         )
     def forward(self, cls_embedding, token_embeddings, mask):
         z_cls = self.cls_projector(cls_embedding)
         tok_emb_permuted = token_embeddings.permute(0, 2, 1)
@@ -63,17 +71,20 @@ class ProtDualBranchEnhancedClassifier(nn.Module):
         return self.classifier_head(z_fused_gated)
 # ==========================
-# 2. 加载模型
 # ==========================
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 PLM_MODEL_NAME = "facebook/esm2_t30_150M_UR50D"
 CLASSIFIER_PATH = "best_model_esm2_t30_150M_UR50D.pth"
 LABEL_MAP_PATH = "label_map.json"
-# 简单检查
-if not os.path.exists(LABEL_MAP_PATH): raise FileNotFoundError(f"Missing {LABEL_MAP_PATH}")
-if not os.path.exists(CLASSIFIER_PATH): raise FileNotFoundError(f"Missing {CLASSIFIER_PATH}")
 with open(LABEL_MAP_PATH, 'r') as f:
     label_to_idx = json.load(f)
     idx_to_label = {v: k for k, v in label_to_idx.items()}
@@ -81,32 +92,32 @@ with open(LABEL_MAP_PATH, 'r') as f:
 NUM_CLASSES = len(idx_to_label)
 D_MODEL = 640
-print("🔹 Loading models...")
 tokenizer = AutoTokenizer.from_pretrained(PLM_MODEL_NAME)
-plm_model = AutoModel.from_pretrained(PLM_MODEL_NAME).to(DEVICE).eval()
-classifier = ProtDualBranchEnhancedClassifier(D_MODEL, 32, NUM_CLASSES, 0.3, 3).to(DEVICE)
 classifier.load_state_dict(torch.load(CLASSIFIER_PATH, map_location=DEVICE))
 classifier.eval()
-print("✅ Ready.")
 # ==========================
-# 3. 彻底修复的绘图引擎 (Centric Coordinates)
 # ==========================
-def draw_uniprot_style_cell(target_class):
     target = target_class.lower() if target_class else ""
-    # === 配色定义 ===
-    c = {
-        'stroke': '#37474F',       # 默认深灰轮廓
-        'bg_peri': '#E1F5FE',      # 默认周质背景 (浅蓝)
-        'bg_cyto': '#FFF9C4',      # 默认胞质背景 (浅黄)
-        'highlight_stroke': '#D50000', # 高亮轮廓 (深红)
-        'highlight_fill': '#FFCDD2',   # 高亮填充 (淡红)
-        'dna': '#B0BEC5',
-        'ribo': '#90A4AE'
-    }
-    # === 状态判断 ===
     is_om = "outer membrane" in target
     is_peri = "periplasm" in target
     is_cw = "cell wall" in target
@@ -114,124 +125,120 @@ def draw_uniprot_style_cell(target_class):
     is_cyto = "cytoplasm" in target or "cytosol" in target
     is_secreted = "extracellular" in target or "secreted" in target
-    # === 画布初始化 ===
-    # 中心点 (Cx, Cy) = (5, 3)
-    fig, ax = plt.subplots(figsize=(8, 4.5), dpi=150)
-    ax.set_xlim(0, 10)
-    ax.set_ylim(0, 6)
-    ax.axis('off')
-    # === 核心辅助函数：绘制绝对居中的胶囊 ===
-    def draw_centered_capsule(width, height, fill_color, edge_color, lw, z, linestyle='-'):
-        # FancyBboxPatch 的 xy 是左下角坐标。
-        # 要居中，左下角 x = CenterX - Width/2
-        x = 5.0 - width / 2
-        y = 3.0 - height / 2
-        # rounding_size 设为高度的一半，这就变成了标准的胶囊/药丸形状
-        r = height / 2
-        patch = patches.FancyBboxPatch(
-            (x, y), width, height,
-            boxstyle=f"round,pad=0,rounding_size={r}",
-            fc=fill_color, ec=edge_color, lw=lw, linestyle=linestyle, zorder=z
-        )
-        ax.add_patch(patch)
-        return x, y, width, height # 返回坐标供后续标注使用
-    # === 1. 绘制 Layer 1: 外膜 (Outer Membrane) ===
-    # 如果是 Periplasm 高亮，那么底色变红；否则是默认浅蓝
-    # 如果是 OuterMembrane 高亮，那么边框变红变粗
-    peri_fill = c['highlight_fill'] if is_peri else c['bg_peri']
-    om_edge = c['highlight_stroke'] if is_om else c['stroke']
-    om_lw = 3.5 if is_om else 1.5
-    # 绘制最大的胶囊 (代表外膜轮廓 + 周质背景)
-    # 尺寸: 8.5 x 4.2
-    draw_centered_capsule(8.5, 4.2, peri_fill, om_edge, om_lw, z=1)
-    # === 2. 绘制 Layer 2: 细胞壁 (Cell Wall) ===
-    # 位于中间层
-    cw_edge = c['highlight_stroke'] if is_cw else '#78909C'
-    cw_lw = 2.5 if is_cw else 1.0
-    cw_ls = '-' if is_cw else '--' # 平时虚线，高亮实线
-    # 尺寸: 7.5 x 3.2
-    draw_centered_capsule(7.5, 3.2, "none", cw_edge, cw_lw, z=2, linestyle=cw_ls)
-    # === 3. 绘制 Layer 3: 内膜 (Inner Membrane) + 胞质 (Cytoplasm) ===
-    # 如果是 Cytoplasm 高亮，填充变红；否则默认浅黄
-    # 如果是 InnerMembrane 高亮，边框变红变粗
-    cyto_fill = c['highlight_fill'] if is_cyto else c['bg_cyto']
-    im_edge = c['highlight_stroke'] if is_im else c['stroke']
-    im_lw = 3.5 if is_im else 1.5
-    # 尺寸: 6.5 x 2.2
-    draw_centered_capsule(6.5, 2.2, cyto_fill, im_edge, im_lw, z=3)
-    # === 4. 内部细节 (DNA & Ribosomes) ===
-    # 仅装饰，画在最中心
-    # DNA 线条
-    t = np.linspace(0, 12, 200)
-    x_dna = 5 + 2.2 * np.cos(t) * np.sin(t*0.5)
-    y_dna = 3 + 0.6 * np.sin(t)
-    ax.plot(x_dna, y_dna, color=c['dna'], lw=1.5, zorder=4, alpha=0.6)
-    # 核糖体 (点)
-    rng = np.random.default_rng(42)
-    for _ in range(25):
-        # 在中心区域随机撒点
-        rx = rng.uniform(3.0, 7.0)
-        ry = rng.uniform(2.3, 3.7)
-        circle = patches.Circle((rx, ry), radius=0.05, fc=c['ribo'], zorder=4)
-        ax.add_patch(circle)
-    # === 5. 分泌蛋白 (Secreted) ===
-    if is_secreted:
-        ax.text(5, 5.5, "SECRETED / EXTRACELLULAR", ha='center', va='center',
-                color=c['highlight_stroke'], fontweight='bold')
-        # 画几个向上的箭头
-        ax.arrow(5, 5.2, 0, 0.4, head_width=0.2, fc=c['highlight_stroke'], ec=c['highlight_stroke'], width=0.05)
-    # === 6. 标注系统 (Labeling) ===
-    # 使用 annotate 自动画箭头指引
-    # 定义各层的指引坐标 (全部取右侧中点)
-    # CenterY = 3.
-    # OuterMembrane Edge X ≈ 5 + 8.5/2 = 9.25
-    # Periplasm X ≈ 5 + 8.0/2 = 9.0 (Inside OM)
-    # InnerMembrane Edge X ≈ 5 + 6.5/2 = 8.25
-    # Cytoplasm X ≈ 5
-    labels = [
-        ("Outer Membrane", (9.25, 3.0), (10, 4.5), is_om),
-        ("Periplasm",      (8.0, 3.8),  (9.5, 5.2), is_peri), # 指向胶囊上方空隙
-        ("Cell Wall",      (8.75, 3.0), (10, 3.5), is_cw),    # 指向中间虚线
-        ("Inner Membrane", (8.25, 3.0), (10, 2.5), is_im),
-        ("Cytoplasm",      (5.0, 3.0),  (5.0, 1.0), is_cyto)  # 指向中心，文字在下方
-    ]
-    for txt, xy_target, xy_text, active in labels:
-        color = c['highlight_stroke'] if active else '#546E7A'
-        weight = 'bold' if active else 'normal'
-        # 如果激活，画红色实线箭头；否则画灰色细箭头
-        arrow_props = dict(arrowstyle="->", color=color, lw=1.5 if active else 0.8)
-        ax.annotate(txt, xy=xy_target, xytext=xy_text,
-                    arrowprops=arrow_props,
-                    fontsize=10, fontweight=weight, color=color,
-                    ha='center', va='center')
-    # 底部标题
-    ax.text(5, 0.2, f"Prediction: {target_class}", ha='center', va='bottom',
-            fontsize=12, fontweight='bold', color='#263238')
-    buf = BytesIO()
-    plt.savefig(buf, format='png', bbox_inches='tight', transparent=True, dpi=150)
-    buf.seek(0)
-    img = Image.open(buf)
-    plt.close(fig)
-    return img
 # ==========================
 # 4. 预测逻辑
@@ -239,68 +246,173 @@ def draw_uniprot_style_cell(target_class):
 def predict(sequence_input):
     if not sequence_input or sequence_input.isspace():
         raise gr.Error("Please input a protein sequence.")
     seq = "".join(sequence_input.split('\n')[1:]) if sequence_input.startswith('>') else sequence_input
-    seq = re.sub(r'[^A-Z]', '', seq.upper())[:1024]
-    if not seq: raise gr.Error("Invalid Sequence.")
     with torch.no_grad():
         inputs = tokenizer(seq, return_tensors="pt", truncation=True, max_length=1024).to(DEVICE)
         outputs = plm_model(**inputs)
-        logits = classifier(outputs.last_hidden_state[:, 0, :], outputs.last_hidden_state[:, 1:-1, :], inputs['attention_mask'][:, 1:-1])
         probs = F.softmax(logits, dim=1)[0]
     top_prob, top_idx = torch.max(probs, dim=0)
     top_label = idx_to_label[top_idx.item()]
     confidences = {idx_to_label[i]: float(p) for i, p in enumerate(probs)}
-    # 生成修正后的图
-    cell_diagram = draw_uniprot_style_cell(top_label)
-    return confidences, cell_diagram
 # ==========================
-# 5. UI 界面
 # ==========================
 paper_css = """
-@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;700&display=swap');
 body { font-family: 'Roboto', sans-serif !important; background-color: #ffffff; color: #1a1a1a; }
-.header-box { background: #ffffff; padding: 2rem 0; border-bottom: 1px solid #e5e7eb; margin-bottom: 2rem; }
-.header-title { font-size: 2.5rem; font-weight: 700; color: #000000; letter-spacing: -1px; }
-.badge { display: inline-block; padding: 4px 10px; font-size: 0.8rem; background: #f1f5f9; border: 1px solid #e2e8f0; border-radius: 4px; margin-right: 8px; }
-.content-box { background: #ffffff; border: 1px solid #e5e7eb; border-radius: 8px; padding: 1.5rem; }
 """
-theme = gr.themes.Base(primary_hue="blue", font=[gr.themes.GoogleFont("Roboto"), "ui-sans-serif", "system-ui"]).set(
-    body_background_fill="#ffffff", block_background_fill="#ffffff", block_border_width="1px"
 )
 with gr.Blocks(theme=theme, css=paper_css, title="LocPred-Prok") as app:
     with gr.Column(elem_classes="header-box"):
         gr.HTML("""
             <div class="header-title">LocPred-Prok</div>
-            <div style="font-size: 1.2rem; color: #52525b; margin: 10px 0;">Accurate prokaryotic subcellular localization using dual-branch protein language models</div>
-            <div><span class="badge">Article</span><span class="badge">ESM-2 Enhanced</span><span class="badge">Gram-negative</span></div>
         """)
     with gr.Tabs():
-        with gr.TabItem("Prediction"):
             with gr.Row():
                 with gr.Column(scale=4, elem_classes="content-box"):
-                    gr.Markdown("### Sequence Input")
-                    sequence_input = gr.Textbox(lines=10, show_label=False, placeholder=">Sequence...")
                     with gr.Row():
-                        gr.ClearButton(sequence_input, value="Clear")
-                        submit_btn = gr.Button("Analyze", variant="primary")
-                    gr.Examples([
-                        [">Outer Membrane Protein\nAPKNTWYTGAKLGWSQYHDTGFINNNGPTHENQLGAGAFGGYQVNPYVGFEMGYDWLGRMPYKGSVENGAYKAQGVQLTAKLGYPITDDLDIYTRLGGMVWRADTKSNVYGKNHDTGVSPVFAGGVEYAITPEIATRLEYQWTNNIGDAHTIGTRPDNGMLSLGVSYRFGQGEAAPVVAPAPAPAPEVQTKHFTLKSDVLFNFNKATLKPEGQAALDQLYSQLSNLDPKDGSVVVLGYTDRIGSDAYNQGLSERRAQSVVDYLISKGIPADKISARGMGESNPVTGNTCDNVKQRAALIDCLAPDRRVEIEVKGIKDVVTQPQA"],
-                        [">Cytoplasmic Protein\nARYLGPKLKLSRREGTDLFLKSGVRAIDTKCKIEQAPGQHGARKPRLSDYGVQLREKQKVRRIYGVLERQFRNYYKEAARLKGNTGENLLALLEGRLDNVVYRMGFG"]
-                    ], inputs=sequence_input, label="Examples")
                 with gr.Column(scale=6, elem_classes="content-box"):
-                    gr.Markdown("### Localization Visualization")
-                    output_image = gr.Image(label="Visualization", show_label=False, show_download_button=True, interactive=False, type="pil", height=400)
                     gr.Markdown("#### Confidence Scores")
                     output_label = gr.Label(num_top_classes=NUM_CLASSES, show_label=False)
-    submit_btn.click(fn=predict, inputs=sequence_input, outputs=[output_label, output_image])
 app.launch()

+import os
+import json
+import re
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import gradio as gr
 from transformers import AutoTokenizer, AutoModel
 # ==========================
+# 0. 环境与缓存设置
 # ==========================
 os.environ["HF_HOME"] = "/tmp/hf_cache"
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
 os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
+# 清理旧缓存 (可选)
+import shutil
 for path in ["/tmp/hf_cache", os.path.expanduser("~/.cache/huggingface")]:
     shutil.rmtree(path, ignore_errors=True)
     os.makedirs(path, exist_ok=True)
 # ==========================
+# 1. 模型架构定义
 # ==========================
 class AttentionPooling(nn.Module):
+    """Attention Pooling Layer"""
     def __init__(self, d_model):
         super().__init__()
         self.attention_net = nn.Linear(d_model, 1)
     def forward(self, x, mask):
         attn_logits = self.attention_net(x).squeeze(2)
         attn_logits.masked_fill_(mask == 0, -float('inf'))
         return torch.bmm(attn_weights.unsqueeze(1), x).squeeze(1)
 class ProtDualBranchEnhancedClassifier(nn.Module):
+    """Enhanced dual-branch model architecture"""
     def __init__(self, d_model, projection_dim, num_classes, dropout, kernel_size):
         super().__init__()
         self.cls_projector = nn.Linear(d_model, projection_dim)
         self.token_refiner = nn.Sequential(
+            nn.Conv1d(d_model, d_model, kernel_size, padding='same'),
+            nn.ReLU()
         )
         self.attention_pooling = AttentionPooling(d_model)
         self.tok_projector = nn.Linear(d_model, projection_dim)
         fused_dim = projection_dim * 2
+        self.gate = nn.Sequential(
+            nn.Linear(fused_dim, fused_dim),
+            nn.Sigmoid()
+        )
         self.classifier_head = nn.Sequential(
+            nn.LayerNorm(fused_dim),
+            nn.Linear(fused_dim, fused_dim * 2),
+            nn.ReLU(),
+            nn.Dropout(dropout),
+            nn.Linear(fused_dim * 2, num_classes)
         )
     def forward(self, cls_embedding, token_embeddings, mask):
         z_cls = self.cls_projector(cls_embedding)
         tok_emb_permuted = token_embeddings.permute(0, 2, 1)
         return self.classifier_head(z_fused_gated)
 # ==========================
+# 2. 加载模型与资源
 # ==========================
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 PLM_MODEL_NAME = "facebook/esm2_t30_150M_UR50D"
 CLASSIFIER_PATH = "best_model_esm2_t30_150M_UR50D.pth"
 LABEL_MAP_PATH = "label_map.json"
+# 文件存在性检查
+if not os.path.exists(LABEL_MAP_PATH):
+    raise FileNotFoundError(f"Error: Missing '{LABEL_MAP_PATH}'. Please upload it to your Space.")
+if not os.path.exists(CLASSIFIER_PATH):
+    raise FileNotFoundError(f"Error: Missing '{CLASSIFIER_PATH}'. Please upload it to your Space.")
+# 加载 Label Map
 with open(LABEL_MAP_PATH, 'r') as f:
     label_to_idx = json.load(f)
     idx_to_label = {v: k for k, v in label_to_idx.items()}
 NUM_CLASSES = len(idx_to_label)
 D_MODEL = 640
+print(f"🔹 Loading ESM-2 Model ({PLM_MODEL_NAME})...")
 tokenizer = AutoTokenizer.from_pretrained(PLM_MODEL_NAME)
+plm_model = AutoModel.from_pretrained(PLM_MODEL_NAME).to(DEVICE)
+plm_model.eval()
+print("🔹 Loading Custom Classifier...")
+classifier = ProtDualBranchEnhancedClassifier(
+    d_model=D_MODEL, projection_dim=32, num_classes=NUM_CLASSES,
+    dropout=0.3, kernel_size=3
+).to(DEVICE)
 classifier.load_state_dict(torch.load(CLASSIFIER_PATH, map_location=DEVICE))
 classifier.eval()
+print("✅ All Models Loaded Successfully.")
 # ==========================
+# 3. SVG 矢量绘图引擎 (完美对齐版)
 # ==========================
+def generate_bacterial_svg(target_class):
+    """
+    Generate a high-quality SVG vector diagram for bacterial localization.
+    Coordinates are hardcoded to ensure perfect alignment.
+    """
     target = target_class.lower() if target_class else ""
+    # --- 1. 状态判断 ---
     is_om = "outer membrane" in target
     is_peri = "periplasm" in target
     is_cw = "cell wall" in target
     is_cyto = "cytoplasm" in target or "cytosol" in target
     is_secreted = "extracellular" in target or "secreted" in target
+    # --- 2. 颜色配置 (学术蓝/黄风格) ---
+    colors = {
+        # 填充色：平时浅色，激活变粉红
+        "om_fill": "#FFCDD2" if is_peri else "#E1F5FE",
+        "im_fill": "#FFCDD2" if is_cyto else "#FFF9C4",
+        # 边框色：平时深灰，激活变鲜红
+        "om_stroke": "#D32F2F" if is_om else "#37474F",
+        "cw_stroke": "#D32F2F" if is_cw else "#90A4AE",
+        "im_stroke": "#D32F2F" if is_im else "#37474F",
+        # 线宽
+        "om_width": "4" if is_om else "2",
+        "cw_width": "3" if is_cw else "1.5",
+        "im_width": "4" if is_im else "2",
+        # 细胞壁虚线
+        "cw_dash": "0" if is_cw else "6,4",
+        # 标签颜色
+        "label_hl": "#D32F2F",
+        "label_norm": "#546E7A",
+        "arrow_hl": "#D32F2F",
+        "arrow_norm": "#90A4AE"
+    }
+    # 获取标签样式的辅助函数
+    def get_style(active):
+        if active:
+            return colors["label_hl"], "bold", colors["arrow_hl"], "2.5", "url(#arrowhead_hl)"
+        else:
+            return colors["label_norm"], "normal", colors["arrow_norm"], "1.0", "url(#arrowhead_norm)"
+    s_om = get_style(is_om)
+    s_peri = get_style(is_peri)
+    s_cw = get_style(is_cw)
+    s_im = get_style(is_im)
+    s_cyto = get_style(is_cyto)
+    # --- 3. 生成 SVG 字符串 ---
+    svg = f"""
+    <svg width="100%" height="100%" viewBox="0 0 800 450" xmlns="http://www.w3.org/2000/svg">
+        <defs>
+            <marker id="arrowhead_norm" markerWidth="10" markerHeight="7" refX="9" refY="3.5" orient="auto">
+                <polygon points="0 0, 10 3.5, 0 7" fill="{colors['arrow_norm']}" />
+            </marker>
+            <marker id="arrowhead_hl" markerWidth="10" markerHeight="7" refX="9" refY="3.5" orient="auto">
+                <polygon points="0 0, 10 3.5, 0 7" fill="{colors['arrow_hl']}" />
+            </marker>
+        </defs>
+        <rect width="800" height="450" fill="white" />
+        <g transform="translate(50, 50)">
+            <rect x="0" y="0" width="500" height="300" rx="150" ry="150"
+                  fill="{colors['om_fill']}" stroke="{colors['om_stroke']}" stroke-width="{colors['om_width']}" />
+            <rect x="40" y="40" width="420" height="220" rx="110" ry="110"
+                  fill="none" stroke="{colors['cw_stroke']}" stroke-width="{colors['cw_width']}" stroke-dasharray="{colors['cw_dash']}" />
+            <rect x="80" y="80" width="340" height="140" rx="70" ry="70"
+                  fill="{colors['im_fill']}" stroke="{colors['im_stroke']}" stroke-width="{colors['im_width']}" />
+            <g opacity="0.6">
+                <path d="M 180 150 Q 220 100 250 150 T 320 150" fill="none" stroke="#B0BEC5" stroke-width="3" />
+                <path d="M 190 140 Q 230 190 250 140 T 310 160" fill="none" stroke="#B0BEC5" stroke-width="3" />
+                <circle cx="150" cy="120" r="3" fill="#90A4AE" />
+                <circle cx="350" cy="180" r="3" fill="#90A4AE" />
+                <circle cx="250" cy="100" r="3" fill="#90A4AE" />
+                <circle cx="200" cy="200" r="3" fill="#90A4AE" />
+            </g>
+        </g>
+        {f'''
+        <g transform="translate(300, 20)">
+            <text x="0" y="0" text-anchor="middle" fill="{colors['label_hl']}" font-weight="bold" font-family="Arial" font-size="14">SECRETED / EXTRACELLULAR</text>
+            <line x1="0" y1="5" x2="0" y2="25" stroke="{colors['arrow_hl']}" stroke-width="2" marker-end="url(#arrowhead_hl)" />
+        </g>
+        ''' if is_secreted else ""}
+        <g font-family="Arial, sans-serif">
+            <g transform="translate(580, 80)">
+                <text x="0" y="5" fill="{s_om[0]}" font-weight="{s_om[1]}" font-size="14">Outer Membrane</text>
+                <line x1="-10" y1="0" x2="-80" y2="0" stroke="{s_om[2]}" stroke-width="{s_om[3]}" marker-end="{s_om[4]}" />
+            </g>
+            <g transform="translate(580, 140)">
+                <text x="0" y="5" fill="{s_peri[0]}" font-weight="{s_peri[1]}" font-size="14">Periplasm</text>
+                <line x1="-10" y1="0" x2="-100" y2="0" stroke="{s_peri[2]}" stroke-width="{s_peri[3]}" marker-end="{s_peri[4]}" />
+            </g>
+            <g transform="translate(580, 200)">
+                <text x="0" y="5" fill="{s_cw[0]}" font-weight="{s_cw[1]}" font-size="14">Cell Wall</text>
+                <line x1="-10" y1="0" x2="-120" y2="0" stroke="{s_cw[2]}" stroke-width="{s_cw[3]}" marker-end="{s_cw[4]}" />
+            </g>
+            <g transform="translate(580, 260)">
+                <text x="0" y="5" fill="{s_im[0]}" font-weight="{s_im[1]}" font-size="14">Inner Membrane</text>
+                <line x1="-10" y1="0" x2="-150" y2="0" stroke="{s_im[2]}" stroke-width="{s_im[3]}" marker-end="{s_im[4]}" />
+            </g>
+            <g transform="translate(580, 320)">
+                <text x="0" y="5" fill="{s_cyto[0]}" font-weight="{s_cyto[1]}" font-size="14">Cytoplasm</text>
+                <line x1="-10" y1="0" x2="-200" y2="0" stroke="{s_cyto[2]}" stroke-width="{s_cyto[3]}" marker-end="{s_cyto[4]}" />
+            </g>
+        </g>
+        <text x="400" y="420" text-anchor="middle" font-family="Arial" font-size="18" font-weight="bold" fill="#37474F">
+            Predicted Localization: {target_class}
+        </text>
+    </svg>
+    """
+    return svg
 # ==========================
 # 4. 预测逻辑
 def predict(sequence_input):
     if not sequence_input or sequence_input.isspace():
         raise gr.Error("Please input a protein sequence.")
+    # 清洗输入
     seq = "".join(sequence_input.split('\n')[1:]) if sequence_input.startswith('>') else sequence_input
+    seq = re.sub(r'[^A-Z]', '', seq.upper())
+    if not seq: raise gr.Error("Invalid Amino Acid Sequence.")
+    if len(seq) > 1024: seq = seq[:1024] # 截断防止OOM
     with torch.no_grad():
         inputs = tokenizer(seq, return_tensors="pt", truncation=True, max_length=1024).to(DEVICE)
         outputs = plm_model(**inputs)
+        # 提取特征
+        hidden_states = outputs.last_hidden_state
+        cls_embedding = hidden_states[:, 0, :]
+        token_embeddings = hidden_states[:, 1:-1, :]
+        token_mask = inputs['attention_mask'][:, 1:-1]
+        # 模型推理
+        logits = classifier(cls_embedding, token_embeddings, token_mask)
         probs = F.softmax(logits, dim=1)[0]
+    # 获取结果
     top_prob, top_idx = torch.max(probs, dim=0)
     top_label = idx_to_label[top_idx.item()]
     confidences = {idx_to_label[i]: float(p) for i, p in enumerate(probs)}
+    # 生成 SVG 可视化
+    svg_content = generate_bacterial_svg(top_label)
+    return confidences, svg_content
 # ==========================
+# 5. UI 界面 (学术风格)
 # ==========================
 paper_css = """
+@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap');
 body { font-family: 'Roboto', sans-serif !important; background-color: #ffffff; color: #1a1a1a; }
+/* Header */
+.header-box {
+    background: #ffffff;
+    padding: 2rem 0;
+    border-bottom: 1px solid #e5e7eb;
+    margin-bottom: 2rem;
+}
+.header-title {
+    font-size: 2.2rem;
+    font-weight: 700;
+    color: #0f172a;
+    letter-spacing: -0.5px;
+}
+.header-subtitle {
+    font-size: 1.1rem;
+    color: #64748b;
+    font-weight: 300;
+    margin-top: 8px;
+}
+.badge {
+    display: inline-flex;
+    align-items: center;
+    padding: 4px 12px;
+    font-size: 0.85rem;
+    font-weight: 500;
+    color: #0f172a;
+    background: #f1f5f9;
+    border: 1px solid #e2e8f0;
+    border-radius: 99px;
+    margin-right: 10px;
+}
+/* Content Box */
+.content-box {
+    background: #ffffff;
+    border: 1px solid #e2e8f0;
+    border-radius: 8px;
+    padding: 1.5rem;
+    box-shadow: 0 1px 2px 0 rgba(0, 0, 0, 0.05);
+}
+/* Button */
+button.primary {
+    background-color: #2563eb !important;
+    color: white !important;
+    border-radius: 6px !important;
+    font-weight: 500;
+}
 """
+theme = gr.themes.Base(
+    primary_hue="blue",
+    font=[gr.themes.GoogleFont("Roboto"), "ui-sans-serif", "system-ui"]
+).set(
+    body_background_fill="#ffffff",
+    block_background_fill="#ffffff",
+    block_border_width="1px",
+    block_label_background_fill="#ffffff"
 )
 with gr.Blocks(theme=theme, css=paper_css, title="LocPred-Prok") as app:
+    # --- Header ---
     with gr.Column(elem_classes="header-box"):
         gr.HTML("""
             <div class="header-title">LocPred-Prok</div>
+            <div class="header-subtitle">
+                Deep learning framework for prokaryotic subcellular localization using dual-branch architecture
+            </div>
+            <div style="margin-top: 15px;">
+                <span class="badge">Research Article</span>
+                <span class="badge">ESM-2 Enhanced</span>
+                <span class="badge">Gram-negative Bacteria</span>
+            </div>
         """)
+    # --- Main Content ---
     with gr.Tabs():
+        with gr.TabItem("Prediction Interface"):
             with gr.Row():
+                # Input Column
                 with gr.Column(scale=4, elem_classes="content-box"):
+                    gr.Markdown("### 1. Sequence Input")
+                    gr.Markdown("<span style='color:#64748b; font-size:0.9rem'>Enter a protein sequence in FASTA format or raw amino acids.</span>")
+                    sequence_input = gr.Textbox(
+                        lines=12,
+                        show_label=False,
+                        placeholder=">Sequence_ID\nMKFKLTAGCL..."
+                    )
                     with gr.Row():
+                        clear_btn = gr.ClearButton(sequence_input, value="Clear")
+                        submit_btn = gr.Button("Run Analysis", variant="primary")
+                    gr.Markdown("#### Test Examples")
+                    gr.Examples(
+                        examples=[
+                            [">Outer Membrane Protein (OmpA)\nAPKNTWYTGAKLGWSQYHDTGFINNNGPTHENQLGAGAFGGYQVNPYVGFEMGYDWLGRMPYKGSVENGAYKAQGVQLTAKLGYPITDDLDIYTRLGGMVWRADTKSNVYGKNHDTGVSPVFAGGVEYAITPEIATRLEYQWTNNIGDAHTIGTRPDNGMLSLGVSYRFGQGEAAPVVAPAPAPAPEVQTKHFTLKSDVLFNFNKATLKPEGQAALDQLYSQLSNLDPKDGSVVVLGYTDRIGSDAYNQGLSERRAQSVVDYLISKGIPADKISARGMGESNPVTGNTCDNVKQRAALIDCLAPDRRVEIEVKGIKDVVTQPQA"],
+                            [">Cytoplasmic Protein (Ribosomal)\nARYLGPKLKLSRREGTDLFLKSGVRAIDTKCKIEQAPGQHGARKPRLSDYGVQLREKQKVRRIYGVLERQFRNYYKEAARLKGNTGENLLALLEGRLDNVVYRMGFG"]
+                        ],
+                        inputs=sequence_input,
+                        label=None
+                    )
+                # Output Column
                 with gr.Column(scale=6, elem_classes="content-box"):
+                    gr.Markdown("### 2. Localization Results")
+                    # 使用 HTML 组件展示 SVG
+                    output_svg = gr.HTML(label="Visualization", show_label=False)
                     gr.Markdown("#### Confidence Scores")
                     output_label = gr.Label(num_top_classes=NUM_CLASSES, show_label=False)
+        with gr.TabItem("About & Methodology"):
+             gr.Markdown("""
+             ### Methodology
+             **LocPred-Prok** employs a dual-branch neural network architecture...
+             """)
+    # --- Interaction ---
+    submit_btn.click(
+        fn=predict,
+        inputs=sequence_input,
+        outputs=[output_label, output_svg]
+    )
+    clear_btn.click(lambda: [None, None], outputs=[output_label, output_svg])
+# Launch
 app.launch()