Spaces:

isyslab
/

LocPred-Prok

Running

App Files Files Community

wangleiofficial commited on Dec 11, 2025

Commit

80c9b83

verified ·

1 Parent(s): 0c3ed7a

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -36

app.py CHANGED Viewed

@@ -9,9 +9,9 @@ import matplotlib.pyplot as plt
 import numpy as np
 from transformers import AutoTokenizer, AutoModel
-# ==========================
-# 0. 环境与缓存设置
-# ==========================
 # 强制使用非交互式后端，防止 matplotlib 在服务器报错
 plt.switch_backend('Agg')
@@ -24,9 +24,9 @@ for path in ["/tmp/hf_cache", os.path.expanduser("~/.cache/huggingface")]:
     shutil.rmtree(path, ignore_errors=True)
     os.makedirs(path, exist_ok=True)
-# ==========================
-# 1. 模型架构定义 (支持 Attention 输出)
-# ==========================
 class AttentionPooling(nn.Module):
     def __init__(self, d_model):
         super().__init__()
@@ -81,9 +81,9 @@ class ProtDualBranchEnhancedClassifier(nn.Module):
         return self.classifier_head(z_fused_gated), pooling_weights
-# ==========================
-# 2. 加载模型与配置
-# ==========================
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 PLM_MODEL_NAME = "facebook/esm2_t30_150M_UR50D"
 CLASSIFIER_PATH = "best_model_esm2_t30_150M_UR50D.pth"
@@ -105,14 +105,13 @@ tokenizer = AutoTokenizer.from_pretrained(PLM_MODEL_NAME)
 plm_model = AutoModel.from_pretrained(PLM_MODEL_NAME).to(DEVICE).eval()
 classifier = ProtDualBranchEnhancedClassifier(D_MODEL, 32, NUM_CLASSES, 0.3, 3).to(DEVICE)
-# strict=False 允许加载即使权重文件中没有 pooling_weights 相关的特定状态（通常不影响）
 classifier.load_state_dict(torch.load(CLASSIFIER_PATH, map_location=DEVICE))
 classifier.eval()
 print("✅ Ready.")
-# ==========================
-# 3. Panel B: SVG 绘图引擎 (贝塞尔曲线 + 锚点)
-# ==========================
 def generate_bacterial_svg(target_class):
     target = target_class.lower() if target_class else ""
@@ -133,28 +132,29 @@ def generate_bacterial_svg(target_class):
         "bg_text": "#78909C", "bg_line": "#CFD8DC", "bg_dot": "#B0BEC5"
     }
-    # 样式生成器
     def style(active, base_fill, base_stroke, w_act="4", w_norm="2"):
-        if active: return c["hl_fill"], c["hl_stroke"], w_act
-        return base_fill, base_stroke, width_norm
     om_f, om_s, om_w = style(is_peri, c["bg_fill_om"], c["hl_stroke"] if is_om else c["bg_stroke"])
     cw_s = c["hl_stroke"] if is_cw else "#B0BEC5"
     cw_w, cw_d = ("3", "0") if is_cw else ("1.5", "6,4")
     im_f, im_s, im_w = style(is_cyto, c["bg_fill_im"], c["hl_stroke"] if is_im else c["bg_stroke"])
-    # 标签样式 (文字颜色, 字重, 线条颜色, 线宽, 锚点颜色, 锚点半径)
     def label_style(active):
         if active: return c["hl_text"], "bold", c["hl_stroke"], "2.5", c["hl_dot"], "5"
         return c["bg_text"], "normal", c["bg_line"], "1.5", c["bg_dot"], "3"
     l_om, l_peri, l_cw, l_im, l_cyto = label_style(is_om), label_style(is_peri), label_style(is_cw), label_style(is_im), label_style(is_cyto)
-    # 3. 坐标定义
     bx, by = 280, 210 # 细菌中心
     tx = 600          # 标签文字起始 X 坐标
-    # 目标锚点 (Target Anchor Points) - 精确落在结构上
     targets = {
         "om":   (bx + 140, by - 120), # 外膜线
         "peri": (bx + 120, by - 90),  # 周质间隙
@@ -165,13 +165,13 @@ def generate_bacterial_svg(target_class):
     text_y = {"om": 90, "peri": 150, "cw": 210, "im": 270, "cyto": 330}
-    # 4. 贝塞尔曲线连接器
     def draw_connector(key, style_tuple, label_text):
         txt_col, weight, line_col, width, dot_col, r = style_tuple
         tx_pos, ty_pos = tx, text_y[key]
         ex, ey = targets[key]
-        # 贝塞尔控制点：形成 S 形曲线
         c1x, c1y = tx_pos - 100, ty_pos
         c2x, c2y = ex + 50, ey
@@ -213,9 +213,9 @@ def generate_bacterial_svg(target_class):
     </svg>"""
     return svg
-# ==========================
-# 4. Panel D: Attention 绘图引擎
-# ==========================
 def draw_pooling_weights(weights, sequence):
     """
     Visualize Attention Pooling Weights (1D Heatmap/Bar).
@@ -239,8 +239,8 @@ def draw_pooling_weights(weights, sequence):
     ax.spines['left'].set_visible(False)
     ax.set_yticks([])
-    # 标注最高峰 (Potential Motif)
-    threshold = np.percentile(weights, 98) # 更加严格的阈值
     if weights.max() > threshold:
         peak_idx = np.argmax(weights)
         ax.annotate('Key Motif', xy=(peak_idx, weights[peak_idx]), xytext=(peak_idx, weights[peak_idx]+0.2),
@@ -250,9 +250,9 @@ def draw_pooling_weights(weights, sequence):
     plt.tight_layout()
     return fig
-# ==========================
-# 5. 预测主逻辑
-# ==========================
 def predict(sequence_input):
     if not sequence_input or sequence_input.isspace(): raise gr.Error("Empty Input")
@@ -269,11 +269,11 @@ def predict(sequence_input):
         token_embeddings = hidden_states[:, 1:-1, :] # No CLS/EOS
         token_mask = inputs['attention_mask'][:, 1:-1]
-        # ⚠️ 获取 logits 和 weights
         logits, pooling_weights = classifier(cls_embedding, token_embeddings, token_mask)
         probs = F.softmax(logits, dim=1)[0]
-    # 1. 结果
     top_label = idx_to_label[torch.max(probs, dim=0)[1].item()]
     confidences = {idx_to_label[i]: float(p) for i, p in enumerate(probs)}
@@ -287,9 +287,9 @@ def predict(sequence_input):
     return confidences, svg, attn_plot
-# ==========================
-# 6. UI Layout (4-Block Paper Style)
-# ==========================
 layout_css = """
 @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;800&display=swap');
 body { background-color: #ffffff; font-family: 'Inter', sans-serif; }
@@ -330,6 +330,7 @@ theme = gr.themes.Soft(primary_hue="sky").set(body_background_fill="white", bloc
 with gr.Blocks(theme=theme, css=layout_css, title="LocPred-Prok") as app:
     gr.HTML("""
         <div class="header-div">
             <div class="header-title">LocPred-Prok</div>
@@ -337,7 +338,7 @@ with gr.Blocks(theme=theme, css=layout_css, title="LocPred-Prok") as app:
         </div>
     """)
-    # Row 1: A & B
     with gr.Row():
         with gr.Column(elem_classes="panel-card"):
             gr.Markdown("<div class='panel-header'><span class='panel-label'>A</span>Sequence Input</div>")
@@ -353,7 +354,7 @@ with gr.Blocks(theme=theme, css=layout_css, title="LocPred-Prok") as app:
             gr.Markdown("<div class='panel-header'><span class='panel-label'>B</span>Localization Visualization</div>")
             output_svg = gr.HTML(label="Visual", show_label=False)
-    # Row 2: C & D
     with gr.Row():
         with gr.Column(elem_classes="panel-card"):
             gr.Markdown("<div class='panel-header'><span class='panel-label'>C</span>Prediction Confidence</div>")

 import numpy as np
 from transformers import AutoTokenizer, AutoModel
+# ==============================================================================
+# 0. 环境与缓存设置 (Environment Setup)
+# ==============================================================================
 # 强制使用非交互式后端，防止 matplotlib 在服务器报错
 plt.switch_backend('Agg')
     shutil.rmtree(path, ignore_errors=True)
     os.makedirs(path, exist_ok=True)
+# ==============================================================================
+# 1. 模型架构定义 (Model Architecture)
+# ==============================================================================
 class AttentionPooling(nn.Module):
     def __init__(self, d_model):
         super().__init__()
         return self.classifier_head(z_fused_gated), pooling_weights
+# ==============================================================================
+# 2. 加载模型与配置 (Load Resources)
+# ==============================================================================
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 PLM_MODEL_NAME = "facebook/esm2_t30_150M_UR50D"
 CLASSIFIER_PATH = "best_model_esm2_t30_150M_UR50D.pth"
 plm_model = AutoModel.from_pretrained(PLM_MODEL_NAME).to(DEVICE).eval()
 classifier = ProtDualBranchEnhancedClassifier(D_MODEL, 32, NUM_CLASSES, 0.3, 3).to(DEVICE)
 classifier.load_state_dict(torch.load(CLASSIFIER_PATH, map_location=DEVICE))
 classifier.eval()
 print("✅ Ready.")
+# ==============================================================================
+# 3. Panel B: SVG 绘图引擎 (Visualization Engine)
+# ==============================================================================
 def generate_bacterial_svg(target_class):
     target = target_class.lower() if target_class else ""
         "bg_text": "#78909C", "bg_line": "#CFD8DC", "bg_dot": "#B0BEC5"
     }
+    # 3. 样式生成器 (这里修复了之前的 bug)
     def style(active, base_fill, base_stroke, w_act="4", w_norm="2"):
+        if active:
+            return c["hl_fill"], c["hl_stroke"], w_act
+        # ✅ 修复点：这里原来写成了 width_norm，现已修正为 w_norm
+        return base_fill, base_stroke, w_norm
     om_f, om_s, om_w = style(is_peri, c["bg_fill_om"], c["hl_stroke"] if is_om else c["bg_stroke"])
     cw_s = c["hl_stroke"] if is_cw else "#B0BEC5"
     cw_w, cw_d = ("3", "0") if is_cw else ("1.5", "6,4")
     im_f, im_s, im_w = style(is_cyto, c["bg_fill_im"], c["hl_stroke"] if is_im else c["bg_stroke"])
+    # 标签样式
     def label_style(active):
         if active: return c["hl_text"], "bold", c["hl_stroke"], "2.5", c["hl_dot"], "5"
         return c["bg_text"], "normal", c["bg_line"], "1.5", c["bg_dot"], "3"
     l_om, l_peri, l_cw, l_im, l_cyto = label_style(is_om), label_style(is_peri), label_style(is_cw), label_style(is_im), label_style(is_cyto)
+    # 4. 坐标定义
     bx, by = 280, 210 # 细菌中心
     tx = 600          # 标签文字起始 X 坐标
     targets = {
         "om":   (bx + 140, by - 120), # 外膜线
         "peri": (bx + 120, by - 90),  # 周质间隙
     text_y = {"om": 90, "peri": 150, "cw": 210, "im": 270, "cyto": 330}
+    # 5. 贝塞尔曲线连接器
     def draw_connector(key, style_tuple, label_text):
         txt_col, weight, line_col, width, dot_col, r = style_tuple
         tx_pos, ty_pos = tx, text_y[key]
         ex, ey = targets[key]
+        # 贝塞尔控制点
         c1x, c1y = tx_pos - 100, ty_pos
         c2x, c2y = ex + 50, ey
     </svg>"""
     return svg
+# ==============================================================================
+# 4. Panel D: Attention 绘图引擎 (Interpretability)
+# ==============================================================================
 def draw_pooling_weights(weights, sequence):
     """
     Visualize Attention Pooling Weights (1D Heatmap/Bar).
     ax.spines['left'].set_visible(False)
     ax.set_yticks([])
+    # 标注最高峰 (Key Motif)
+    threshold = np.percentile(weights, 98)
     if weights.max() > threshold:
         peak_idx = np.argmax(weights)
         ax.annotate('Key Motif', xy=(peak_idx, weights[peak_idx]), xytext=(peak_idx, weights[peak_idx]+0.2),
     plt.tight_layout()
     return fig
+# ==============================================================================
+# 5. 预测主逻辑 (Prediction Logic)
+# ==============================================================================
 def predict(sequence_input):
     if not sequence_input or sequence_input.isspace(): raise gr.Error("Empty Input")
         token_embeddings = hidden_states[:, 1:-1, :] # No CLS/EOS
         token_mask = inputs['attention_mask'][:, 1:-1]
+        # ⚠️ 获取 logits 和 pooling_weights
         logits, pooling_weights = classifier(cls_embedding, token_embeddings, token_mask)
         probs = F.softmax(logits, dim=1)[0]
+    # 1. 结果 (Panel C)
     top_label = idx_to_label[torch.max(probs, dim=0)[1].item()]
     confidences = {idx_to_label[i]: float(p) for i, p in enumerate(probs)}
     return confidences, svg, attn_plot
+# ==============================================================================
+# 6. UI 布局 (Four-Block Paper Style)
+# ==============================================================================
 layout_css = """
 @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;800&display=swap');
 body { background-color: #ffffff; font-family: 'Inter', sans-serif; }
 with gr.Blocks(theme=theme, css=layout_css, title="LocPred-Prok") as app:
+    # --- Header ---
     gr.HTML("""
         <div class="header-div">
             <div class="header-title">LocPred-Prok</div>
         </div>
     """)
+    # --- Row 1: Panels A & B ---
     with gr.Row():
         with gr.Column(elem_classes="panel-card"):
             gr.Markdown("<div class='panel-header'><span class='panel-label'>A</span>Sequence Input</div>")
             gr.Markdown("<div class='panel-header'><span class='panel-label'>B</span>Localization Visualization</div>")
             output_svg = gr.HTML(label="Visual", show_label=False)
+    # --- Row 2: Panels C & D ---
     with gr.Row():
         with gr.Column(elem_classes="panel-card"):
             gr.Markdown("<div class='panel-header'><span class='panel-label'>C</span>Prediction Confidence</div>")