Spaces:

isyslab
/

LocPred-Prok

Running

App Files Files Community

wangleiofficial commited on Dec 11, 2025

Commit

0c3ed7a

verified ·

1 Parent(s): f7d2100

Update app.py

Browse files

Files changed (1) hide show

app.py +237 -286

app.py CHANGED Viewed

@@ -5,119 +5,118 @@ import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import gradio as gr
 from transformers import AutoTokenizer, AutoModel
 # ==========================
 # 0. 环境与缓存设置
 # ==========================
 os.environ["HF_HOME"] = "/tmp/hf_cache"
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
 os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
-# 清理旧缓存 (可选)
 import shutil
 for path in ["/tmp/hf_cache", os.path.expanduser("~/.cache/huggingface")]:
     shutil.rmtree(path, ignore_errors=True)
     os.makedirs(path, exist_ok=True)
 # ==========================
-# 1. 模型架构定义
 # ==========================
 class AttentionPooling(nn.Module):
-    """Attention Pooling Layer"""
     def __init__(self, d_model):
         super().__init__()
         self.attention_net = nn.Linear(d_model, 1)
     def forward(self, x, mask):
-        attn_logits = self.attention_net(x).squeeze(2)
         attn_logits.masked_fill_(mask == 0, -float('inf'))
         attn_weights = F.softmax(attn_logits, dim=1)
-        return torch.bmm(attn_weights.unsqueeze(1), x).squeeze(1)
 class ProtDualBranchEnhancedClassifier(nn.Module):
-    """Enhanced dual-branch model architecture"""
     def __init__(self, d_model, projection_dim, num_classes, dropout, kernel_size):
         super().__init__()
         self.cls_projector = nn.Linear(d_model, projection_dim)
         self.token_refiner = nn.Sequential(
-            nn.Conv1d(d_model, d_model, kernel_size, padding='same'),
             nn.ReLU()
         )
         self.attention_pooling = AttentionPooling(d_model)
         self.tok_projector = nn.Linear(d_model, projection_dim)
         fused_dim = projection_dim * 2
-        self.gate = nn.Sequential(
-            nn.Linear(fused_dim, fused_dim),
-            nn.Sigmoid()
-        )
         self.classifier_head = nn.Sequential(
-            nn.LayerNorm(fused_dim),
-            nn.Linear(fused_dim, fused_dim * 2),
-            nn.ReLU(),
-            nn.Dropout(dropout),
             nn.Linear(fused_dim * 2, num_classes)
         )
     def forward(self, cls_embedding, token_embeddings, mask):
         z_cls = self.cls_projector(cls_embedding)
         tok_emb_permuted = token_embeddings.permute(0, 2, 1)
         refined_tok_emb = self.token_refiner(tok_emb_permuted).permute(0, 2, 1)
-        z_tok_pooled = self.attention_pooling(refined_tok_emb, mask)
         z_tok = self.tok_projector(z_tok_pooled)
         z_fused_concat = torch.cat([z_cls, z_tok], dim=1)
         gate_values = self.gate(z_fused_concat)
         z_fused_gated = z_fused_concat * gate_values
-        return self.classifier_head(z_fused_gated)
 # ==========================
-# 2. 加载模型与资源
 # ==========================
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 PLM_MODEL_NAME = "facebook/esm2_t30_150M_UR50D"
 CLASSIFIER_PATH = "best_model_esm2_t30_150M_UR50D.pth"
 LABEL_MAP_PATH = "label_map.json"
-# 文件存在性检查
-if not os.path.exists(LABEL_MAP_PATH):
-    raise FileNotFoundError(f"Error: Missing '{LABEL_MAP_PATH}'. Please upload it to your Space.")
-if not os.path.exists(CLASSIFIER_PATH):
-    raise FileNotFoundError(f"Error: Missing '{CLASSIFIER_PATH}'. Please upload it to your Space.")
 # 加载 Label Map
 with open(LABEL_MAP_PATH, 'r') as f:
     label_to_idx = json.load(f)
     idx_to_label = {v: k for k, v in label_to_idx.items()}
 NUM_CLASSES = len(idx_to_label)
 D_MODEL = 640
-print(f"🔹 Loading ESM-2 Model ({PLM_MODEL_NAME})...")
 tokenizer = AutoTokenizer.from_pretrained(PLM_MODEL_NAME)
-plm_model = AutoModel.from_pretrained(PLM_MODEL_NAME).to(DEVICE)
-plm_model.eval()
-print("🔹 Loading Custom Classifier...")
-classifier = ProtDualBranchEnhancedClassifier(
-    d_model=D_MODEL, projection_dim=32, num_classes=NUM_CLASSES,
-    dropout=0.3, kernel_size=3
-).to(DEVICE)
 classifier.load_state_dict(torch.load(CLASSIFIER_PATH, map_location=DEVICE))
 classifier.eval()
-print("✅ All Models Loaded Successfully.")
 # ==========================
-# 3. SVG 矢量绘图引擎 (完美对齐版)
 # ==========================
 def generate_bacterial_svg(target_class):
-    """
-    Generate a high-quality SVG vector diagram for bacterial localization.
-    Coordinates are hardcoded to ensure perfect alignment.
-    """
     target = target_class.lower() if target_class else ""
-    # --- 1. 状态判断 ---
     is_om = "outer membrane" in target
     is_peri = "periplasm" in target
     is_cw = "cell wall" in target
@@ -125,294 +124,246 @@ def generate_bacterial_svg(target_class):
     is_cyto = "cytoplasm" in target or "cytosol" in target
     is_secreted = "extracellular" in target or "secreted" in target
-    # --- 2. 颜色配置 (学术蓝/黄风格) ---
-    colors = {
-        # 填充色：平时浅色，激活变粉红
-        "om_fill": "#FFCDD2" if is_peri else "#E1F5FE",
-        "im_fill": "#FFCDD2" if is_cyto else "#FFF9C4",
-        # 边框色：平时深灰，激活变鲜红
-        "om_stroke": "#D32F2F" if is_om else "#37474F",
-        "cw_stroke": "#D32F2F" if is_cw else "#90A4AE",
-        "im_stroke": "#D32F2F" if is_im else "#37474F",
-        # 线宽
-        "om_width": "4" if is_om else "2",
-        "cw_width": "3" if is_cw else "1.5",
-        "im_width": "4" if is_im else "2",
-        # 细胞壁虚线
-        "cw_dash": "0" if is_cw else "6,4",
-        # 标签颜色
-        "label_hl": "#D32F2F",
-        "label_norm": "#546E7A",
-        "arrow_hl": "#D32F2F",
-        "arrow_norm": "#90A4AE"
-    }
-    # 获取标签样式的辅助函数
-    def get_style(active):
-        if active:
-            return colors["label_hl"], "bold", colors["arrow_hl"], "2.5", "url(#arrowhead_hl)"
-        else:
-            return colors["label_norm"], "normal", colors["arrow_norm"], "1.0", "url(#arrowhead_norm)"
-    s_om = get_style(is_om)
-    s_peri = get_style(is_peri)
-    s_cw = get_style(is_cw)
-    s_im = get_style(is_im)
-    s_cyto = get_style(is_cyto)
-    # --- 3. 生成 SVG 字符串 ---
-    svg = f"""
-    <svg width="100%" height="100%" viewBox="0 0 800 450" xmlns="http://www.w3.org/2000/svg">
-        <defs>
-            <marker id="arrowhead_norm" markerWidth="10" markerHeight="7" refX="9" refY="3.5" orient="auto">
-                <polygon points="0 0, 10 3.5, 0 7" fill="{colors['arrow_norm']}" />
-            </marker>
-            <marker id="arrowhead_hl" markerWidth="10" markerHeight="7" refX="9" refY="3.5" orient="auto">
-                <polygon points="0 0, 10 3.5, 0 7" fill="{colors['arrow_hl']}" />
-            </marker>
-        </defs>
-        <rect width="800" height="450" fill="white" />
-        <g transform="translate(50, 50)">
-            <rect x="0" y="0" width="500" height="300" rx="150" ry="150"
-                  fill="{colors['om_fill']}" stroke="{colors['om_stroke']}" stroke-width="{colors['om_width']}" />
-            <rect x="40" y="40" width="420" height="220" rx="110" ry="110"
-                  fill="none" stroke="{colors['cw_stroke']}" stroke-width="{colors['cw_width']}" stroke-dasharray="{colors['cw_dash']}" />
-            <rect x="80" y="80" width="340" height="140" rx="70" ry="70"
-                  fill="{colors['im_fill']}" stroke="{colors['im_stroke']}" stroke-width="{colors['im_width']}" />
-            <g opacity="0.6">
-                <path d="M 180 150 Q 220 100 250 150 T 320 150" fill="none" stroke="#B0BEC5" stroke-width="3" />
-                <path d="M 190 140 Q 230 190 250 140 T 310 160" fill="none" stroke="#B0BEC5" stroke-width="3" />
-                <circle cx="150" cy="120" r="3" fill="#90A4AE" />
-                <circle cx="350" cy="180" r="3" fill="#90A4AE" />
-                <circle cx="250" cy="100" r="3" fill="#90A4AE" />
-                <circle cx="200" cy="200" r="3" fill="#90A4AE" />
             </g>
         </g>
         {f'''
-        <g transform="translate(300, 20)">
-            <text x="0" y="0" text-anchor="middle" fill="{colors['label_hl']}" font-weight="bold" font-family="Arial" font-size="14">SECRETED / EXTRACELLULAR</text>
-            <line x1="0" y1="5" x2="0" y2="25" stroke="{colors['arrow_hl']}" stroke-width="2" marker-end="url(#arrowhead_hl)" />
         </g>
         ''' if is_secreted else ""}
-        <g font-family="Arial, sans-serif">
-            <g transform="translate(580, 80)">
-                <text x="0" y="5" fill="{s_om[0]}" font-weight="{s_om[1]}" font-size="14">Outer Membrane</text>
-                <line x1="-10" y1="0" x2="-80" y2="0" stroke="{s_om[2]}" stroke-width="{s_om[3]}" marker-end="{s_om[4]}" />
-            </g>
-            <g transform="translate(580, 140)">
-                <text x="0" y="5" fill="{s_peri[0]}" font-weight="{s_peri[1]}" font-size="14">Periplasm</text>
-                <line x1="-10" y1="0" x2="-100" y2="0" stroke="{s_peri[2]}" stroke-width="{s_peri[3]}" marker-end="{s_peri[4]}" />
-            </g>
-            <g transform="translate(580, 200)">
-                <text x="0" y="5" fill="{s_cw[0]}" font-weight="{s_cw[1]}" font-size="14">Cell Wall</text>
-                <line x1="-10" y1="0" x2="-120" y2="0" stroke="{s_cw[2]}" stroke-width="{s_cw[3]}" marker-end="{s_cw[4]}" />
-            </g>
-            <g transform="translate(580, 260)">
-                <text x="0" y="5" fill="{s_im[0]}" font-weight="{s_im[1]}" font-size="14">Inner Membrane</text>
-                <line x1="-10" y1="0" x2="-150" y2="0" stroke="{s_im[2]}" stroke-width="{s_im[3]}" marker-end="{s_im[4]}" />
-            </g>
-            <g transform="translate(580, 320)">
-                <text x="0" y="5" fill="{s_cyto[0]}" font-weight="{s_cyto[1]}" font-size="14">Cytoplasm</text>
-                <line x1="-10" y1="0" x2="-200" y2="0" stroke="{s_cyto[2]}" stroke-width="{s_cyto[3]}" marker-end="{s_cyto[4]}" />
-            </g>
-        </g>
-        <text x="400" y="420" text-anchor="middle" font-family="Arial" font-size="18" font-weight="bold" fill="#37474F">
-            Predicted Localization: {target_class}
-        </text>
-    </svg>
-    """
-    return svg
 # ==========================
-# 4. 预测逻辑
 # ==========================
 def predict(sequence_input):
-    if not sequence_input or sequence_input.isspace():
-        raise gr.Error("Please input a protein sequence.")
-    # 清洗输入
     seq = "".join(sequence_input.split('\n')[1:]) if sequence_input.startswith('>') else sequence_input
-    seq = re.sub(r'[^A-Z]', '', seq.upper())
-    if not seq: raise gr.Error("Invalid Amino Acid Sequence.")
-    if len(seq) > 1024: seq = seq[:1024] # 截断防止OOM
     with torch.no_grad():
         inputs = tokenizer(seq, return_tensors="pt", truncation=True, max_length=1024).to(DEVICE)
         outputs = plm_model(**inputs)
-        # 提取特征
         hidden_states = outputs.last_hidden_state
         cls_embedding = hidden_states[:, 0, :]
-        token_embeddings = hidden_states[:, 1:-1, :]
         token_mask = inputs['attention_mask'][:, 1:-1]
-        # 模型推理
-        logits = classifier(cls_embedding, token_embeddings, token_mask)
         probs = F.softmax(logits, dim=1)[0]
-    # 获取结果
-    top_prob, top_idx = torch.max(probs, dim=0)
-    top_label = idx_to_label[top_idx.item()]
     confidences = {idx_to_label[i]: float(p) for i, p in enumerate(probs)}
-    # 生成 SVG 可视化
-    svg_content = generate_bacterial_svg(top_label)
-    return confidences, svg_content
 # ==========================
-# 5. UI 界面 (学术风格)
 # ==========================
-paper_css = """
-@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap');
-body { font-family: 'Roboto', sans-serif !important; background-color: #ffffff; color: #1a1a1a; }
-/* Header */
-.header-box {
-    background: #ffffff;
-    padding: 2rem 0;
-    border-bottom: 1px solid #e5e7eb;
-    margin-bottom: 2rem;
-}
-.header-title {
-    font-size: 2.2rem;
-    font-weight: 700;
-    color: #0f172a;
-    letter-spacing: -0.5px;
 }
-.header-subtitle {
-    font-size: 1.1rem;
-    color: #64748b;
-    font-weight: 300;
-    margin-top: 8px;
 }
-.badge {
-    display: inline-flex;
-    align-items: center;
-    padding: 4px 12px;
-    font-size: 0.85rem;
-    font-weight: 500;
-    color: #0f172a;
-    background: #f1f5f9;
-    border: 1px solid #e2e8f0;
-    border-radius: 99px;
-    margin-right: 10px;
 }
-/* Content Box */
-.content-box {
-    background: #ffffff;
-    border: 1px solid #e2e8f0;
-    border-radius: 8px;
-    padding: 1.5rem;
-    box-shadow: 0 1px 2px 0 rgba(0, 0, 0, 0.05);
-}
-/* Button */
-button.primary {
-    background-color: #2563eb !important;
-    color: white !important;
-    border-radius: 6px !important;
-    font-weight: 500;
 }
 """
-theme = gr.themes.Base(
-    primary_hue="blue",
-    font=[gr.themes.GoogleFont("Roboto"), "ui-sans-serif", "system-ui"]
-).set(
-    body_background_fill="#ffffff",
-    block_background_fill="#ffffff",
-    block_border_width="1px",
-    block_label_background_fill="#ffffff"
-)
-with gr.Blocks(theme=theme, css=paper_css, title="LocPred-Prok") as app:
-    # --- Header ---
-    with gr.Column(elem_classes="header-box"):
-        gr.HTML("""
             <div class="header-title">LocPred-Prok</div>
-            <div class="header-subtitle">
-                Deep learning framework for prokaryotic subcellular localization using dual-branch architecture
-            </div>
-            <div style="margin-top: 15px;">
-                <span class="badge">Research Article</span>
-                <span class="badge">ESM-2 Enhanced</span>
-                <span class="badge">Gram-negative Bacteria</span>
-            </div>
-        """)
-    # --- Main Content ---
-    with gr.Tabs():
-        with gr.TabItem("Prediction Interface"):
             with gr.Row():
-                # Input Column
-                with gr.Column(scale=4, elem_classes="content-box"):
-                    gr.Markdown("### 1. Sequence Input")
-                    gr.Markdown("<span style='color:#64748b; font-size:0.9rem'>Enter a protein sequence in FASTA format or raw amino acids.</span>")
-                    sequence_input = gr.Textbox(
-                        lines=12,
-                        show_label=False,
-                        placeholder=">Sequence_ID\nMKFKLTAGCL..."
-                    )
-                    with gr.Row():
-                        clear_btn = gr.ClearButton(sequence_input, value="Clear")
-                        submit_btn = gr.Button("Run Analysis", variant="primary")
-                    gr.Markdown("#### Test Examples")
-                    gr.Examples(
-                        examples=[
-                            [">Outer Membrane Protein (OmpA)\nAPKNTWYTGAKLGWSQYHDTGFINNNGPTHENQLGAGAFGGYQVNPYVGFEMGYDWLGRMPYKGSVENGAYKAQGVQLTAKLGYPITDDLDIYTRLGGMVWRADTKSNVYGKNHDTGVSPVFAGGVEYAITPEIATRLEYQWTNNIGDAHTIGTRPDNGMLSLGVSYRFGQGEAAPVVAPAPAPAPEVQTKHFTLKSDVLFNFNKATLKPEGQAALDQLYSQLSNLDPKDGSVVVLGYTDRIGSDAYNQGLSERRAQSVVDYLISKGIPADKISARGMGESNPVTGNTCDNVKQRAALIDCLAPDRRVEIEVKGIKDVVTQPQA"],
-                            [">Cytoplasmic Protein (Ribosomal)\nARYLGPKLKLSRREGTDLFLKSGVRAIDTKCKIEQAPGQHGARKPRLSDYGVQLREKQKVRRIYGVLERQFRNYYKEAARLKGNTGENLLALLEGRLDNVVYRMGFG"]
-                        ],
-                        inputs=sequence_input,
-                        label=None
-                    )
-                # Output Column
-                with gr.Column(scale=6, elem_classes="content-box"):
-                    gr.Markdown("### 2. Localization Results")
-                    # 使用 HTML 组件展示 SVG
-                    output_svg = gr.HTML(label="Visualization", show_label=False)
-                    gr.Markdown("#### Confidence Scores")
-                    output_label = gr.Label(num_top_classes=NUM_CLASSES, show_label=False)
-        with gr.TabItem("About & Methodology"):
-             gr.Markdown("""
-             ### Methodology
-             **LocPred-Prok** employs a dual-branch neural network architecture...
-             """)
-    # --- Interaction ---
-    submit_btn.click(
-        fn=predict,
-        inputs=sequence_input,
-        outputs=[output_label, output_svg]
-    )
-    clear_btn.click(lambda: [None, None], outputs=[output_label, output_svg])
-# Launch
 app.launch()

 import torch.nn as nn
 import torch.nn.functional as F
 import gradio as gr
+import matplotlib.pyplot as plt
+import numpy as np
 from transformers import AutoTokenizer, AutoModel
 # ==========================
 # 0. 环境与缓存设置
 # ==========================
+# 强制使用非交互式后端，防止 matplotlib 在服务器报错
+plt.switch_backend('Agg')
 os.environ["HF_HOME"] = "/tmp/hf_cache"
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
 os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
 import shutil
 for path in ["/tmp/hf_cache", os.path.expanduser("~/.cache/huggingface")]:
     shutil.rmtree(path, ignore_errors=True)
     os.makedirs(path, exist_ok=True)
 # ==========================
+# 1. 模型架构定义 (支持 Attention 输出)
 # ==========================
 class AttentionPooling(nn.Module):
     def __init__(self, d_model):
         super().__init__()
         self.attention_net = nn.Linear(d_model, 1)
     def forward(self, x, mask):
+        # x shape: (Batch, Seq_Len, Dim)
+        attn_logits = self.attention_net(x).squeeze(2)
         attn_logits.masked_fill_(mask == 0, -float('inf'))
         attn_weights = F.softmax(attn_logits, dim=1)
+        # 返回: (Pooled_Embedding, Weights)
+        # Weights 用于 Panel D 的可视化
+        return torch.bmm(attn_weights.unsqueeze(1), x).squeeze(1), attn_weights
 class ProtDualBranchEnhancedClassifier(nn.Module):
     def __init__(self, d_model, projection_dim, num_classes, dropout, kernel_size):
         super().__init__()
         self.cls_projector = nn.Linear(d_model, projection_dim)
         self.token_refiner = nn.Sequential(
+            nn.Conv1d(d_model, d_model, kernel_size, padding='same'),
             nn.ReLU()
         )
         self.attention_pooling = AttentionPooling(d_model)
         self.tok_projector = nn.Linear(d_model, projection_dim)
         fused_dim = projection_dim * 2
+        self.gate = nn.Sequential(nn.Linear(fused_dim, fused_dim), nn.Sigmoid())
         self.classifier_head = nn.Sequential(
+            nn.LayerNorm(fused_dim),
+            nn.Linear(fused_dim, fused_dim * 2),
+            nn.ReLU(),
+            nn.Dropout(dropout),
             nn.Linear(fused_dim * 2, num_classes)
         )
     def forward(self, cls_embedding, token_embeddings, mask):
+        # Branch 1: Global Semantic
         z_cls = self.cls_projector(cls_embedding)
+        # Branch 2: Local Structural
         tok_emb_permuted = token_embeddings.permute(0, 2, 1)
         refined_tok_emb = self.token_refiner(tok_emb_permuted).permute(0, 2, 1)
+        # ⚠️ 获取 Pooling 权重用于可视化
+        z_tok_pooled, pooling_weights = self.attention_pooling(refined_tok_emb, mask)
         z_tok = self.tok_projector(z_tok_pooled)
+        # Fusion Gate
         z_fused_concat = torch.cat([z_cls, z_tok], dim=1)
         gate_values = self.gate(z_fused_concat)
         z_fused_gated = z_fused_concat * gate_values
+        return self.classifier_head(z_fused_gated), pooling_weights
 # ==========================
+# 2. 加载模型与配置
 # ==========================
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 PLM_MODEL_NAME = "facebook/esm2_t30_150M_UR50D"
 CLASSIFIER_PATH = "best_model_esm2_t30_150M_UR50D.pth"
 LABEL_MAP_PATH = "label_map.json"
+# 检查文件
+if not os.path.exists(LABEL_MAP_PATH): raise FileNotFoundError(f"Missing {LABEL_MAP_PATH}")
+if not os.path.exists(CLASSIFIER_PATH): raise FileNotFoundError(f"Missing {CLASSIFIER_PATH}")
 # 加载 Label Map
 with open(LABEL_MAP_PATH, 'r') as f:
     label_to_idx = json.load(f)
     idx_to_label = {v: k for k, v in label_to_idx.items()}
 NUM_CLASSES = len(idx_to_label)
 D_MODEL = 640
+print("🔹 Loading models...")
 tokenizer = AutoTokenizer.from_pretrained(PLM_MODEL_NAME)
+plm_model = AutoModel.from_pretrained(PLM_MODEL_NAME).to(DEVICE).eval()
+classifier = ProtDualBranchEnhancedClassifier(D_MODEL, 32, NUM_CLASSES, 0.3, 3).to(DEVICE)
+# strict=False 允许加载即使权重文件中没有 pooling_weights 相关的特定状态（通常不影响）
 classifier.load_state_dict(torch.load(CLASSIFIER_PATH, map_location=DEVICE))
 classifier.eval()
+print("✅ Ready.")
 # ==========================
+# 3. Panel B: SVG 绘图引擎 (贝塞尔曲线 + 锚点)
 # ==========================
 def generate_bacterial_svg(target_class):
     target = target_class.lower() if target_class else ""
+    # 1. 状态判断
     is_om = "outer membrane" in target
     is_peri = "periplasm" in target
     is_cw = "cell wall" in target
     is_cyto = "cytoplasm" in target or "cytosol" in target
     is_secreted = "extracellular" in target or "secreted" in target
+    # 2. 颜色配置 (高对比度科研风)
+    c = {
+        # 激活态: 鲜红
+        "hl_stroke": "#D32F2F", "hl_fill": "#FFEBEE", "hl_text": "#B71C1C", "hl_dot": "#D32F2F",
+        # 未激活态: 极淡的灰白 (背景化)
+        "bg_stroke": "#90A4AE", "bg_fill_om": "#F5F5F5", "bg_fill_im": "#FAFAFA",
+        "bg_text": "#78909C", "bg_line": "#CFD8DC", "bg_dot": "#B0BEC5"
+    }
+    # 样式生成器
+    def style(active, base_fill, base_stroke, w_act="4", w_norm="2"):
+        if active: return c["hl_fill"], c["hl_stroke"], w_act
+        return base_fill, base_stroke, width_norm
+    om_f, om_s, om_w = style(is_peri, c["bg_fill_om"], c["hl_stroke"] if is_om else c["bg_stroke"])
+    cw_s = c["hl_stroke"] if is_cw else "#B0BEC5"
+    cw_w, cw_d = ("3", "0") if is_cw else ("1.5", "6,4")
+    im_f, im_s, im_w = style(is_cyto, c["bg_fill_im"], c["hl_stroke"] if is_im else c["bg_stroke"])
+    # 标签样式 (文字颜色, 字重, 线条颜色, 线宽, 锚点颜色, 锚点半径)
+    def label_style(active):
+        if active: return c["hl_text"], "bold", c["hl_stroke"], "2.5", c["hl_dot"], "5"
+        return c["bg_text"], "normal", c["bg_line"], "1.5", c["bg_dot"], "3"
+    l_om, l_peri, l_cw, l_im, l_cyto = label_style(is_om), label_style(is_peri), label_style(is_cw), label_style(is_im), label_style(is_cyto)
+    # 3. 坐标定义
+    bx, by = 280, 210 # 细菌中心
+    tx = 600          # 标签文字起始 X 坐标
+    # 目标锚点 (Target Anchor Points) - 精确落在结构上
+    targets = {
+        "om":   (bx + 140, by - 120), # 外膜线
+        "peri": (bx + 120, by - 90),  # 周质间隙
+        "cw":   (bx + 100, by - 70),  # 细胞壁线
+        "im":   (bx + 70,  by - 50),  # 内膜线
+        "cyto": (bx,       by)        # 胞质中心
+    }
+    text_y = {"om": 90, "peri": 150, "cw": 210, "im": 270, "cyto": 330}
+    # 4. 贝塞尔曲线连接器
+    def draw_connector(key, style_tuple, label_text):
+        txt_col, weight, line_col, width, dot_col, r = style_tuple
+        tx_pos, ty_pos = tx, text_y[key]
+        ex, ey = targets[key]
+        # 贝塞尔控制点：形成 S 形曲线
+        c1x, c1y = tx_pos - 100, ty_pos
+        c2x, c2y = ex + 50, ey
+        path = f"M {tx_pos - 10} {ty_pos - 5} C {c1x} {c1y}, {c2x} {c2y}, {ex} {ey}"
+        return f"""
+        <g>
+            <text x="{tx_pos}" y="{ty_pos}" fill="{txt_col}" font-weight="{weight}" font-size="15" font-family="Arial">{label_text}</text>
+            <path d="{path}" fill="none" stroke="{line_col}" stroke-width="{width}" />
+            <circle cx="{ex}" cy="{ey}" r="{r}" fill="{dot_col}" stroke="white" stroke-width="1" />
+        </g>
+        """
+    svg = f"""<svg width="100%" height="100%" viewBox="0 0 800 420" xmlns="http://www.w3.org/2000/svg">
+        <g transform="translate(280, 210)">
+            <rect x="-150" y="-150" width="300" height="300" rx="150" ry="150" fill="{om_f}" stroke="{om_s}" stroke-width="{om_w}" />
+            <rect x="-110" y="-110" width="220" height="220" rx="110" ry="110" fill="none" stroke="{cw_s}" stroke-width="{cw_w}" stroke-dasharray="{cw_d}" />
+            <rect x="-70" y="-70" width="140" height="140" rx="70" ry="70" fill="{im_f}" stroke="{im_s}" stroke-width="{im_w}" />
+            <g opacity="0.4">
+                <path d="M -30 -20 Q 0 -60 30 -20 T 60 -10" fill="none" stroke="#CFD8DC" stroke-width="3" />
+                <circle cx="-40" cy="30" r="3" fill="#B0BEC5" /> <circle cx="20" cy="40" r="3" fill="#B0BEC5" />
             </g>
         </g>
         {f'''
+        <g transform="translate(500, 40)">
+            <text x="0" y="0" text-anchor="middle" fill="{c['hl_stroke']}" font-weight="bold" font-family="Arial" font-size="14">SECRETED</text>
+            <path d="M 0 10 L 0 40" stroke="{c['hl_stroke']}" stroke-width="2" marker-end="url(#arrow_hl)" />
         </g>
         ''' if is_secreted else ""}
+        <defs><marker id="arrow_hl" markerWidth="10" markerHeight="7" refX="9" refY="3.5" orient="auto"><polygon points="0 0, 10 3.5, 0 7" fill="{c['hl_stroke']}" /></marker></defs>
+        {draw_connector("om", l_om, "Outer Membrane")}
+        {draw_connector("peri", l_peri, "Periplasm")}
+        {draw_connector("cw", l_cw, "Cell Wall")}
+        {draw_connector("im", l_im, "Inner Membrane")}
+        {draw_connector("cyto", l_cyto, "Cytoplasm")}
+    </svg>"""
+    return svg
+# ==========================
+# 4. Panel D: Attention 绘图引擎
+# ==========================
+def draw_pooling_weights(weights, sequence):
+    """
+    Visualize Attention Pooling Weights (1D Heatmap/Bar).
+    """
+    # 归一化
+    if weights.max() > 0:
+        weights = (weights - weights.min()) / (weights.max() - weights.min())
+    fig, ax = plt.subplots(figsize=(6, 3), dpi=120)
+    x = np.arange(len(weights))
+    # 绘制红色条形
+    ax.bar(x, weights, width=1.0, color='#D32F2F', alpha=0.8, label='Attention')
+    # 样式
+    ax.set_title("Learned Motif Importance (Attention Pooling)", fontsize=10, fontweight='bold', color='#37474F')
+    ax.set_xlabel("Residue Position", fontsize=9)
+    ax.set_ylabel("Weight", fontsize=9)
+    ax.spines['top'].set_visible(False)
+    ax.spines['right'].set_visible(False)
+    ax.spines['left'].set_visible(False)
+    ax.set_yticks([])
+    # 标注最高峰 (Potential Motif)
+    threshold = np.percentile(weights, 98) # 更加严格的阈值
+    if weights.max() > threshold:
+        peak_idx = np.argmax(weights)
+        ax.annotate('Key Motif', xy=(peak_idx, weights[peak_idx]), xytext=(peak_idx, weights[peak_idx]+0.2),
+                    arrowprops=dict(facecolor='#37474F', shrink=0.05, width=1, headwidth=5),
+                    ha='center', fontsize=8, color='#37474F')
+    plt.tight_layout()
+    return fig
 # ==========================
+# 5. 预测主逻辑
 # ==========================
 def predict(sequence_input):
+    if not sequence_input or sequence_input.isspace(): raise gr.Error("Empty Input")
     seq = "".join(sequence_input.split('\n')[1:]) if sequence_input.startswith('>') else sequence_input
+    seq = re.sub(r'[^A-Z]', '', seq.upper())[:1024]
+    if not seq: raise gr.Error("Invalid Sequence")
     with torch.no_grad():
         inputs = tokenizer(seq, return_tensors="pt", truncation=True, max_length=1024).to(DEVICE)
         outputs = plm_model(**inputs)
         hidden_states = outputs.last_hidden_state
         cls_embedding = hidden_states[:, 0, :]
+        token_embeddings = hidden_states[:, 1:-1, :] # No CLS/EOS
         token_mask = inputs['attention_mask'][:, 1:-1]
+        # ⚠️ 获取 logits 和 weights
+        logits, pooling_weights = classifier(cls_embedding, token_embeddings, token_mask)
         probs = F.softmax(logits, dim=1)[0]
+    # 1. 结果
+    top_label = idx_to_label[torch.max(probs, dim=0)[1].item()]
     confidences = {idx_to_label[i]: float(p) for i, p in enumerate(probs)}
+    # 2. Panel B: SVG
+    svg = generate_bacterial_svg(top_label)
+    # 3. Panel D: Attention Plot
+    # 取 batch 中第一个样本的 weights
+    w_np = pooling_weights[0].cpu().numpy()
+    attn_plot = draw_pooling_weights(w_np, seq)
+    return confidences, svg, attn_plot
 # ==========================
+# 6. UI Layout (4-Block Paper Style)
 # ==========================
+layout_css = """
+@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;800&display=swap');
+body { background-color: #ffffff; font-family: 'Inter', sans-serif; }
+/* Header: Sky Blue Theme */
+.header-div {
+    background: linear-gradient(to right, #E0F7FA, #E1F5FE);
+    padding: 1.5rem;
+    border-radius: 8px;
+    margin-bottom: 20px;
+    text-align: center;
+    border: 1px solid #B3E5FC;
 }
+.header-title { font-size: 2.2rem; font-weight: 800; color: #0288D1; margin-bottom: 5px; }
+.header-sub { font-size: 1.0rem; color: #0277BD; }
+/* Panel Cards */
+.panel-card {
+    border: 1px solid #e2e8f0;
+    border-radius: 8px;
+    padding: 15px;
+    background: white;
+    height: 100%;
+    display: flex;
+    flex-direction: column;
 }
+.panel-header {
+    font-weight: 700; color: #475569; border-bottom: 2px solid #f1f5f9;
+    padding-bottom: 8px; margin-bottom: 12px; font-size: 1.0rem;
 }
+.panel-label {
+    display: inline-block; background: #E0F7FA; color: #0277BD; border: 1px solid #B2EBF2;
+    padding: 2px 8px; border-radius: 4px; font-size: 0.8rem; margin-right: 8px; font-weight: 800;
 }
 """
+theme = gr.themes.Soft(primary_hue="sky").set(body_background_fill="white", block_background_fill="white", block_border_width="0px")
+with gr.Blocks(theme=theme, css=layout_css, title="LocPred-Prok") as app:
+    gr.HTML("""
+        <div class="header-div">
             <div class="header-title">LocPred-Prok</div>
+            <div class="header-sub">Deep Learning Framework for Prokaryotic Subcellular Localization</div>
+        </div>
+    """)
+    # Row 1: A & B
+    with gr.Row():
+        with gr.Column(elem_classes="panel-card"):
+            gr.Markdown("<div class='panel-header'><span class='panel-label'>A</span>Sequence Input</div>")
+            sequence_input = gr.Textbox(lines=8, show_label=False, placeholder=">Sequence...")
             with gr.Row():
+                clear_btn = gr.ClearButton(sequence_input, value="Clear")
+                submit_btn = gr.Button("Predict Analysis", variant="primary")
+            gr.Examples([
+                [">Outer Membrane\nAPKNTWYTGAKLGWSQYHDTGFINNNGPTHENQLGAGAFGGYQVNPYVGFEMGYDWLGRMPYKGSVENGAYKAQGVQLTAKLGYPITDDLDIYTRLGGMVWRADTKSNVYGKNHDTGVSPVFAGGVEYAITPEIATRLEYQWTNNIGDAHTIGTRPDNGMLSLGVSYRFGQGEAAPVVAPAPAPAPEVQTKHFTLKSDVLFNFNKATLKPEGQAALDQLYSQLSNLDPKDGSVVVLGYTDRIGSDAYNQGLSERRAQSVVDYLISKGIPADKISARGMGESNPVTGNTCDNVKQRAALIDCLAPDRRVEIEVKGIKDVVTQPQA"]
+            ], inputs=sequence_input, label=None)
+        with gr.Column(elem_classes="panel-card"):
+            gr.Markdown("<div class='panel-header'><span class='panel-label'>B</span>Localization Visualization</div>")
+            output_svg = gr.HTML(label="Visual", show_label=False)
+    # Row 2: C & D
+    with gr.Row():
+        with gr.Column(elem_classes="panel-card"):
+            gr.Markdown("<div class='panel-header'><span class='panel-label'>C</span>Prediction Confidence</div>")
+            output_label = gr.Label(num_top_classes=NUM_CLASSES, show_label=False)
+        with gr.Column(elem_classes="panel-card"):
+            gr.Markdown("<div class='panel-header'><span class='panel-label'>D</span>Learned Motif Importance (Attention)</div>")
+            output_plot = gr.Plot(label="Attention", show_label=False)
+    submit_btn.click(fn=predict, inputs=sequence_input, outputs=[output_label, output_svg, output_plot])
+    clear_btn.click(lambda: [None, None, None], outputs=[output_label, output_svg, output_plot])
 app.launch()