Spaces:

isyslab
/

LocPred-Prok

Running

App Files Files Community

wangleiofficial commited on Dec 9, 2025

Commit

ca6ba24

verified ·

1 Parent(s): a2cdbf7

Update app.py

Browse files

Files changed (1) hide show

app.py +161 -162

app.py CHANGED Viewed

@@ -6,22 +6,20 @@ import gradio as gr
 from transformers import AutoTokenizer, AutoModel
 # ==========================
-# 🚧 0. 防止 Hugging Face 缓存溢出
 # ==========================
 os.environ["HF_HOME"] = "/tmp/hf_cache"
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
 os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
-# 每次启动时清理旧缓存
 for path in ["/tmp/hf_cache", os.path.expanduser("~/.cache/huggingface")]:
     shutil.rmtree(path, ignore_errors=True)
     os.makedirs(path, exist_ok=True)
 # ==========================
-# 1. Model Definition (模型架构定义)
 # ==========================
 class AttentionPooling(nn.Module):
-    """Attention Pooling Layer"""
     def __init__(self, d_model):
         super().__init__()
         self.attention_net = nn.Linear(d_model, 1)
@@ -33,7 +31,6 @@ class AttentionPooling(nn.Module):
         return torch.bmm(attn_weights.unsqueeze(1), x).squeeze(1)
 class ProtDualBranchEnhancedClassifier(nn.Module):
-    """Enhanced dual-branch model"""
     def __init__(self, d_model, projection_dim, num_classes, dropout, kernel_size):
         super().__init__()
         self.cls_projector = nn.Linear(d_model, projection_dim)
@@ -68,58 +65,51 @@ class ProtDualBranchEnhancedClassifier(nn.Module):
         return self.classifier_head(z_fused_gated)
 # ==========================
-# 2. Load Models and Files (加载模型与配置)
 # ==========================
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 PLM_MODEL_NAME = "facebook/esm2_t30_150M_UR50D"
 CLASSIFIER_PATH = "best_model_esm2_t30_150M_UR50D.pth"
 LABEL_MAP_PATH = "label_map.json"
-# --- 加载标签映射 (这里定义了 NUM_CLASSES) ---
 if not os.path.exists(LABEL_MAP_PATH):
-    raise FileNotFoundError(f"Error: Missing '{LABEL_MAP_PATH}'. Please upload it to your Space.")
 with open(LABEL_MAP_PATH, 'r') as f:
     label_to_idx = json.load(f)
     idx_to_label = {v: k for k, v in label_to_idx.items()}
-# ✅ 关键变量定义
 NUM_CLASSES = len(idx_to_label)
 D_MODEL = 640
-# --- 加载预训练蛋白模型 ---
-print("🔹 Loading Protein Language Model...")
 tokenizer = AutoTokenizer.from_pretrained(PLM_MODEL_NAME)
 plm_model = AutoModel.from_pretrained(PLM_MODEL_NAME).to(DEVICE)
 plm_model.eval()
-print("✅ PLM loaded successfully.")
-# --- 加载下游分类器 ---
-print("🔹 Loading downstream classifier...")
 classifier = ProtDualBranchEnhancedClassifier(
     d_model=D_MODEL, projection_dim=32, num_classes=NUM_CLASSES,
     dropout=0.3, kernel_size=3
 ).to(DEVICE)
 if not os.path.exists(CLASSIFIER_PATH):
-    raise FileNotFoundError(f"Error: Could not find '{CLASSIFIER_PATH}'. Please upload your trained .pth file.")
 classifier.load_state_dict(torch.load(CLASSIFIER_PATH, map_location=DEVICE))
 classifier.eval()
-print("✅ Classifier loaded. Application is ready!")
 # ==========================
-# 3. Prediction Function (预测函数)
 # ==========================
 def predict(sequence_input):
     if not sequence_input or sequence_input.isspace():
         raise gr.Error("Sequence cannot be empty.")
-    # Clean FASTA header if present
     sequence = "".join(sequence_input.split('\n')[1:]) if sequence_input.startswith('>') else sequence_input
     sequence = re.sub(r'[^A-Z]', '', sequence.upper())
     if not sequence:
-        raise gr.Error("Invalid sequence format. Please enter amino acids (A-Z).")
     with torch.no_grad():
         inputs = tokenizer(sequence, return_tensors="pt", truncation=True, max_length=1024).to(DEVICE)
@@ -136,185 +126,194 @@ def predict(sequence_input):
     return confidences
 # ==========================
-# 4. Academic Research Interface (UI 界面)
 # ==========================
-# 学术风格 CSS
-academic_css = """
-body { font-family: 'Roboto', 'Helvetica Neue', Arial, sans-serif; }
-.header-container {
-    background: linear-gradient(to right, #1e3a8a, #3b82f6);
-    color: white;
-    padding: 2.5rem;
-    border-radius: 10px;
-    margin-bottom: 20px;
     text-align: center;
 }
-.header-title { font-size: 2.5rem; font-weight: 700; margin-bottom: 0.5rem; }
-.header-subtitle { font-size: 1.2rem; opacity: 0.9; font-weight: 300; }
-.badge-container { display: flex; justify-content: center; gap: 15px; margin-top: 15px; }
-.badge {
-    background: rgba(255,255,255,0.2);
-    padding: 5px 15px;
-    border-radius: 20px;
-    font-size: 0.9rem;
-    border: 1px solid rgba(255,255,255,0.4);
 }
-.highlight-box {
-    background: #f8fafc;
-    border-left: 5px solid #3b82f6;
-    padding: 15px;
-    margin: 20px 0;
-    color: #334155;
 }
-.performance-table { width: 100%; border-collapse: collapse; margin-top: 10px; }
-.performance-table th { background: #e2e8f0; padding: 8px; text-align: left; }
-.performance-table td { border-bottom: 1px solid #e2e8f0; padding: 8px; }
-.footer { text-align: center; color: #94a3b8; margin-top: 30px; font-size: 0.85rem; }
 """
-# 定义主题
-theme = gr.themes.Default(
     primary_hue="blue",
-    secondary_hue="slate",
-    neutral_hue="slate",
-    font=[gr.themes.GoogleFont("Roboto"), "ui-sans-serif", "system-ui"]
 )
-with gr.Blocks(theme=theme, css=academic_css, title="LocPred-Prok Web Server") as app:
-    # --- 1. 学术 Header ---
-    with gr.Column(elem_classes="header-container"):
         gr.HTML("""
-            <div class="header-title">LocPred-Prok</div>
-            <div class="header-subtitle">
-                Prokaryotic Protein Subcellular Localization Prediction with Dual-Branch Architecture
-            </div>
-            <div class="badge-container">
-                <span class="badge">🧬 ESM-2 150M Backbone</span>
-                <span class="badge">🏆 91.2% Accuracy</span>
-                <span class="badge">🎯 MCC 0.889</span>
             </div>
         """)
-    # --- 2. 核心功能区 (Tab结构) ---
     with gr.Tabs():
-        # === Tab 1: Web Server (预测工具) ===
-        with gr.TabItem("🚀 Prediction Server"):
             with gr.Row():
-                # 左侧输入
-                with gr.Column(scale=5):
-                    gr.Markdown("### 📥 Input Sequence (FASTA)")
                     sequence_input = gr.Textbox(
-                        lines=8,
-                        placeholder=">Example_Protein\nMKFKLTAGCLAVAGVLLASSFGADAEIVVNAIYDQVARTEDGVYTQGQLTGRRIELLNKLGIEPEDSLASTVIHEFVARVGDDHGIETIIDEFYRQHPSASL...",
                         show_label=False,
-                        elem_id="seq-input"
                     )
                     with gr.Row():
                         clear_btn = gr.ClearButton(components=[sequence_input], value="Clear")
-                        submit_btn = gr.Button("Run Prediction", variant="primary", scale=2)
-                    gr.Markdown("#### Example Sequences")
-                    gr.Examples(
-                        examples=[
-                            [">Gram-negative Outer Membrane Protein\nMSKLVKTLTISEISKAQNNGGKPAWCWYTLAMCGAGYDSGTCDYMYSHCFGIKHHSSGSSSYHC"],
-                            [">Gram-positive Cell Wall Protein\nMKFKLTAGCLAVAGVLLASSFGADAEIVVNAIYDQVARTEDGVYTQGQLTGRRIELLNKLGIEPEDSLASTVIHEFVARVGDDHGIETIIDEFYRQHPSASL"],
-                        ],
-                        inputs=sequence_input,
-                        label=None
-                    )
-                # 右侧输出
-                with gr.Column(scale=4):
-                    gr.Markdown("### 📊 Prediction Results")
-                    # ✅ 这里使用了 NUM_CLASSES，现在它已经在前面定义过了
-                    output_label = gr.Label(num_top_classes=NUM_CLASSES, label="Probabilities")
-                    # 解释性文字
-                    gr.Markdown(
-                        """
-                        <div style="font-size: 0.9rem; color: #64748b; margin-top: 10px;">
-                        <b>Note:</b> This model is optimized for challenging classes including
-                        <i>Gram-positive cell wall</i> and <i>Gram-negative outer membrane</i> proteins.
                         </div>
-                        """
-                    )
-        # === Tab 2: About & Abstract (论文展示) ===
-        with gr.TabItem("📖 About & Abstract"):
-            gr.Markdown("### Abstract")
-            gr.Markdown(
-                """
-                The precise localization of proteins within prokaryotic cells is fundamental to understanding their function.
-                **LocPred-Prok** is a novel deep learning framework that employs a purpose-built **dual-branch architecture**,
-                synergistically integrating global and local sequence features extracted from **ESM-2 (150M)** embeddings.
-                """
-            )
-            # 高亮核心发现
-            gr.HTML("""
-            <div class="highlight-box">
-                <b>💡 Key Findings:</b><br>
-                1. <b>Bigger ≠ Better:</b> Peak performance is achieved by the mid-sized ESM-2-150M, not the largest models.<br>
-                2. <b>Hard Classes Solved:</b> Exceptional performance on Gram-positive cell wall (MCC=0.84) and Gram-negative outer membrane (MCC=0.91).
-            </div>
-            """)
-            gr.Markdown("### 📈 Performance Metrics (Homology-Partitioned Benchmark)")
-            gr.HTML("""
-            <table class="performance-table">
-                <tr>
-                    <th>Metric</th>
-                    <th>LocPred-Prok Score</th>
-                    <th>Improvement</th>
-                </tr>
-                <tr>
-                    <td><b>Accuracy</b></td>
-                    <td><b>91.2%</b></td>
-                    <td>State-of-the-Art</td>
-                </tr>
-                <tr>
-                    <td><b>MCC (Overall)</b></td>
-                    <td><b>0.889</b></td>
-                    <td>Significant Leap</td>
-                </tr>
-                 <tr>
-                    <td>MCC (Outer Membrane)</td>
-                    <td>0.91</td>
-                    <td>High Precision</td>
-                </tr>
-            </table>
-            """)
-        # === Tab 3: Citation (引用) ===
-        with gr.TabItem("📝 Citation"):
-            gr.Markdown("If you use LocPred-Prok in your research, please cite our paper:")
-            gr.Code(
-                """
-@article{LocPredProk2025,
-  title={LocPred-Prok: Prokaryotic protein subcellular localization prediction with a dual-branch architecture and protein language model},
-  author={Your Name and Co-authors},
-  journal={Submission Journal},
   year={2025}
-}
-                """,
-                language="bibtex",
-                label="BibTeX"
-            )
     # --- Footer ---
     gr.HTML("""
-        <div class="footer">
-            Developed by iSysLab | <a href="https://github.com/isyslab-hust" target="_blank">GitHub</a> | Based on ESM-2 & PyTorch
         </div>
     """)
-    # 绑定事件
     submit_btn.click(fn=predict, inputs=sequence_input, outputs=output_label)
     clear_btn.click(lambda: None, outputs=[output_label])
-# 启动
 app.launch()

 from transformers import AutoTokenizer, AutoModel
 # ==========================
+# 🚧 0. 基础设置与缓存清理 (保持不变)
 # ==========================
 os.environ["HF_HOME"] = "/tmp/hf_cache"
 os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf_cache"
 os.environ["HF_HUB_DISABLE_SYMLINKS_WARNING"] = "1"
 for path in ["/tmp/hf_cache", os.path.expanduser("~/.cache/huggingface")]:
     shutil.rmtree(path, ignore_errors=True)
     os.makedirs(path, exist_ok=True)
 # ==========================
+# 1. Model Definition (保持不变)
 # ==========================
 class AttentionPooling(nn.Module):
     def __init__(self, d_model):
         super().__init__()
         self.attention_net = nn.Linear(d_model, 1)
         return torch.bmm(attn_weights.unsqueeze(1), x).squeeze(1)
 class ProtDualBranchEnhancedClassifier(nn.Module):
     def __init__(self, d_model, projection_dim, num_classes, dropout, kernel_size):
         super().__init__()
         self.cls_projector = nn.Linear(d_model, projection_dim)
         return self.classifier_head(z_fused_gated)
 # ==========================
+# 2. Load Models (保持不变)
 # ==========================
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 PLM_MODEL_NAME = "facebook/esm2_t30_150M_UR50D"
 CLASSIFIER_PATH = "best_model_esm2_t30_150M_UR50D.pth"
 LABEL_MAP_PATH = "label_map.json"
 if not os.path.exists(LABEL_MAP_PATH):
+    raise FileNotFoundError(f"Error: Missing '{LABEL_MAP_PATH}'.")
 with open(LABEL_MAP_PATH, 'r') as f:
     label_to_idx = json.load(f)
     idx_to_label = {v: k for k, v in label_to_idx.items()}
 NUM_CLASSES = len(idx_to_label)
 D_MODEL = 640
+print("🔹 Loading models...")
 tokenizer = AutoTokenizer.from_pretrained(PLM_MODEL_NAME)
 plm_model = AutoModel.from_pretrained(PLM_MODEL_NAME).to(DEVICE)
 plm_model.eval()
 classifier = ProtDualBranchEnhancedClassifier(
     d_model=D_MODEL, projection_dim=32, num_classes=NUM_CLASSES,
     dropout=0.3, kernel_size=3
 ).to(DEVICE)
 if not os.path.exists(CLASSIFIER_PATH):
+    raise FileNotFoundError(f"Error: Could not find '{CLASSIFIER_PATH}'.")
 classifier.load_state_dict(torch.load(CLASSIFIER_PATH, map_location=DEVICE))
 classifier.eval()
+print("✅ Ready.")
 # ==========================
+# 3. Predict Logic (保持不变)
 # ==========================
 def predict(sequence_input):
     if not sequence_input or sequence_input.isspace():
         raise gr.Error("Sequence cannot be empty.")
     sequence = "".join(sequence_input.split('\n')[1:]) if sequence_input.startswith('>') else sequence_input
     sequence = re.sub(r'[^A-Z]', '', sequence.upper())
     if not sequence:
+        raise gr.Error("Invalid sequence.")
     with torch.no_grad():
         inputs = tokenizer(sequence, return_tensors="pt", truncation=True, max_length=1024).to(DEVICE)
     return confidences
 # ==========================
+# 4. Ultra-Modern UI Design
 # ==========================
+# 极简现代风 CSS
+modern_css = """
+@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600;800&display=swap');
+body {
+    font-family: 'Inter', sans-serif !important;
+    background-color: #f8fafc;
+}
+/* 1. 顶部 Hero Section */
+.hero-container {
     text-align: center;
+    padding: 3rem 1rem;
+    margin-bottom: 1rem;
+}
+.hero-title {
+    font-size: 3rem;
+    font-weight: 800;
+    margin-bottom: 0.5rem;
+    background: -webkit-linear-gradient(45deg, #0f172a, #334155);
+    -webkit-background-clip: text;
+    -webkit-text-fill-color: transparent;
+    letter-spacing: -1px;
+}
+.hero-subtitle {
+    font-size: 1.25rem;
+    color: #64748b;
+    font-weight: 300;
+    max-width: 600px;
+    margin: 0 auto;
+}
+/* 2. 卡片风格 */
+.modern-card {
+    background: white;
+    border-radius: 16px;
+    padding: 24px;
+    border: 1px solid #e2e8f0;
+    box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05), 0 2px 4px -1px rgba(0, 0, 0, 0.03);
+    transition: all 0.3s ease;
+}
+.modern-card:hover {
+    box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
+}
+/* 3. 输入框优化 - 模仿代码编辑器 */
+textarea {
+    font-family: 'SF Mono', 'Menlo', 'Monaco', 'Courier New', monospace !important;
+    font-size: 14px !important;
+    background-color: #f8fafc !important;
+    border: 1px solid #e2e8f0 !important;
+    border-radius: 8px !important;
+}
+/* 4. 按钮优化 */
+button.primary {
+    background: linear-gradient(135deg, #2563eb 0%, #1d4ed8 100%) !important;
+    border: none !important;
+    font-weight: 600 !important;
+    letter-spacing: 0.5px !important;
+    transition: transform 0.1s ease-in-out !important;
+}
+button.primary:hover {
+    transform: translateY(-2px);
+    box-shadow: 0 4px 12px rgba(37, 99, 235, 0.3);
+}
+/* 5. 标签页优化 */
+.tabs {
+    border: none !important;
+    background: transparent !important;
+}
+.tab-nav {
+    border-bottom: 1px solid #e2e8f0;
+    margin-bottom: 20px;
+}
+.tab-nav button {
+    font-weight: 600;
+    color: #64748b;
 }
+.tab-nav button.selected {
+    color: #2563eb;
+    border-bottom: 2px solid #2563eb;
 }
+/* 6. Footer */
+.footer-text {
+    text-align: center;
+    color: #94a3b8;
+    font-size: 0.8rem;
+    margin-top: 40px;
+    padding-bottom: 20px;
 }
 """
+# 使用极简主题作为底子
+theme = gr.themes.Soft(
     primary_hue="blue",
+    radius_size="lg",
+    font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"]
 )
+with gr.Blocks(theme=theme, css=modern_css, title="LocPred-Prok") as app:
+    # --- Hero Section ---
+    with gr.Column(elem_classes="hero-container"):
         gr.HTML("""
+            <div class="hero-title">LocPred-Prok</div>
+            <div class="hero-subtitle">
+                Next-generation prokaryotic subcellular localization using dual-branch protein language models.
             </div>
         """)
+    # --- Main Content ---
     with gr.Tabs():
+        # === TAB 1: Predict ===
+        with gr.TabItem("Predict", id="tab-predict"):
             with gr.Row():
+                # Input Column
+                with gr.Column(scale=3, elem_classes="modern-card"):
+                    gr.Markdown("### Sequence Input")
                     sequence_input = gr.Textbox(
+                        lines=12,
+                        placeholder="> Paste FASTA sequence here...",
                         show_label=False,
+                        container=False
                     )
                     with gr.Row():
                         clear_btn = gr.ClearButton(components=[sequence_input], value="Clear")
+                        submit_btn = gr.Button("Analyze Sequence", variant="primary", scale=2)
+                # Output Column
+                with gr.Column(scale=2, elem_classes="modern-card"):
+                    gr.Markdown("### Analysis Result")
+                    # 隐藏 Label 自身的文字标签，保持界面干净
+                    output_label = gr.Label(num_top_classes=NUM_CLASSES, show_label=False)
+                    gr.HTML("""
+                        <div style="margin-top: 20px; padding: 10px; background: #eff6ff; border-radius: 8px; font-size: 0.85rem; color: #1e40af;">
+                            ℹ️ <b>Model Insight:</b> Prediction is based on the fusion of global semantic features (ESM-2) and local structural refinements.
                         </div>
+                    """)
+        # === TAB 2: Methodology ===
+        with gr.TabItem("Methodology", id="tab-about"):
+            with gr.Column(elem_classes="modern-card"):
+                gr.Markdown("### The Architecture")
+                gr.Markdown(
+                    """
+                    **LocPred-Prok** moves beyond the "bigger is better" paradigm. Instead of relying solely on massive parameter counts, we engineered a specialized **Dual-Branch Architecture**:
+                    * **Global Branch:** Leverages the `ESM-2 (150M)` foundation model to capture deep semantic dependencies.
+                    * **Local Branch:** Utilizes convolutional refinement and attention pooling to detect subtle signal motifs often missed by global pooling.
+                    This synergy allows for precise identification of challenging localization sites, particularly in **Cell Wall** and **Outer Membrane** regions.
+                    """
+                )
+        # === TAB 3: Cite ===
+        with gr.TabItem("Cite", id="tab-cite"):
+            with gr.Column(elem_classes="modern-card"):
+                gr.Markdown("### BibTeX Reference")
+                gr.Code(
+                    value="""@article{LocPredProk2025,
+  title={LocPred-Prok: Prokaryotic protein subcellular localization prediction with a dual-branch architecture},
+  author={Your Name et al.},
+  journal={Bioinformatics},
   year={2025}
+}""",
+                    label=None,
+                    language=None, # 防止之前的报错
+                    interactive=False
+                )
     # --- Footer ---
     gr.HTML("""
+        <div class="footer-text">
+            © 2025 iSysLab HUST &nbsp;|&nbsp; Powered by PyTorch & ESM-2
         </div>
     """)
+    # Logic
     submit_btn.click(fn=predict, inputs=sequence_input, outputs=output_label)
     clear_btn.click(lambda: None, outputs=[output_label])
 app.launch()