Spaces:

GGproject10
/

simplified_tree_AI

No application file

App Files Files Community

re-type commited on Jun 13, 2025

Commit

1dbbf09

verified ·

1 Parent(s): d264132

Update app.py

Browse files

Files changed (1) hide show

app.py +137 -37

app.py CHANGED Viewed

@@ -17,6 +17,7 @@ from tensorflow.keras.models import load_model
 from analyzer import PhylogeneticTreeAnalyzer
 import tempfile
 import shutil
 import uuid
 from pathlib import Path
 from huggingface_hub import hf_hub_download
@@ -27,7 +28,7 @@ import stat
 import time
 import asyncio
 from fastapi import FastAPI, File, UploadFile, Form, HTTPException
-from fastapi.responses import FileResponse
 from pydantic import BaseModel
 from typing import Optional
 import uvicorn
@@ -43,10 +44,11 @@ try:
 except Exception as e:
     logging.basicConfig(level=logging.INFO, handlers=[log_handler])
     logging.warning(f"Failed to set up file logging: {e}")
 logger = logging.getLogger(__name__)
 logger.info(f"Gradio version: {gr.__version__}")
-# Set event loop policy
 try:
     asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
 except Exception as e:
@@ -61,39 +63,52 @@ TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
 QUERY_OUTPUT_DIR = os.path.join(BASE_DIR, "queries")
 os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
 MODEL_REPO = "GGproject10/best_boundary_aware_model"
 CSV_PATH = "f cleaned.csv"
-# Initialize models
 boundary_model = None
 keras_model = None
 kmer_to_index = None
 analyzer = None
-# --- Model Loading (from Script 2) ---
 def load_models_safely():
     global boundary_model, keras_model, kmer_to_index, analyzer
     logger.info("🔍 Loading models...")
     try:
-        boundary_path = hf_hub_download(repo_id=MODEL_REPO, filename="best_boundary_aware_model.pth", token=None)
         if os.path.exists(boundary_path):
             boundary_model = EnhancedGenePredictor(boundary_path)
-            logger.info("✅ Boundary model loaded.")
         else:
-            logger.error(f"❌ Boundary model file not found.")
     except Exception as e:
         logger.error(f"❌ Failed to load boundary model: {e}")
         boundary_model = None
     try:
-        keras_path = hf_hub_download(repo_id=MODEL_REPO, filename="best_model.keras", token=None)
-        kmer_path = hf_hub_download(repo_id=MODEL_REPO, filename="kmer_to_index.pkl", token=None)
         if os.path.exists(keras_path) and os.path.exists(kmer_path):
             keras_model = load_model(keras_path)
             with open(kmer_path, "rb") as f:
                 kmer_to_index = pickle.load(f)
-            logger.info("✅ Keras model loaded.")
         else:
-            logger.error(f"❌ Keras model files not found.")
     except Exception as e:
         logger.error(f"❌ Failed to load Keras model: {e}")
         keras_model = None
@@ -102,8 +117,12 @@ def load_models_safely():
         logger.info("🌳 Initializing tree analyzer...")
         analyzer = PhylogeneticTreeAnalyzer()
         csv_candidates = [
-            CSV_PATH, os.path.join(BASE_DIR, CSV_PATH), os.path.join(BASE_DIR, "app", CSV_PATH),
-            os.path.join(os.path.dirname(__file__), CSV_PATH), "f_cleaned.csv", os.path.join(BASE_DIR, "f_cleaned.csv")
         ]
         csv_loaded = False
         for csv_candidate in csv_candidates:
@@ -116,24 +135,26 @@ def load_models_safely():
                         break
                 except Exception as e:
                     logger.warning(f"CSV load failed for {csv_candidate}: {e}")
         if not csv_loaded:
-            logger.error("❌ Failed to load CSV data.")
             analyzer = None
         else:
             try:
                 if analyzer.train_ai_model():
-                    logger.info("✅ AI model training completed.")
                 else:
-                    logger.warning("⚠️ AI model training failed.")
             except Exception as e:
                 logger.warning(f"⚠️ AI model training failed: {e}")
     except Exception as e:
         logger.error(f"❌ Tree analyzer initialization failed: {e}")
         analyzer = None
 load_models_safely()
-# --- Tool Detection (from Script 2) ---
 def setup_binary_permissions():
     for binary in [MAFFT_PATH, IQTREE_PATH]:
         if os.path.exists(binary):
@@ -151,7 +172,12 @@ def check_tool_availability():
     for candidate in mafft_candidates:
         if shutil.which(candidate) or os.path.exists(candidate):
             try:
-                result = subprocess.run([candidate, "--help"], capture_output=True, text=True, timeout=5)
                 if result.returncode == 0 or "mafft" in result.stderr.lower():
                     mafft_available = True
                     mafft_cmd = candidate
@@ -165,7 +191,12 @@ def check_tool_availability():
     for candidate in iqtree_candidates:
         if shutil.which(candidate) or os.path.exists(candidate):
             try:
-                result = subprocess.run([candidate, "--help"], capture_output=True, text=True, timeout=5)
                 if result.returncode == 0 or "iqtree" in result.stderr.lower():
                     iqtree_available = True
                     iqtree_cmd = candidate
@@ -371,7 +402,7 @@ Tree Analysis: {'✅ OK' if 'Found' in simplified_ml_output else '❌ Failed'}
         error_msg = f"❌ Pipeline Error: {str(e)}"
         return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg, None, None
-async def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_file):
     temp_file_path = None
     try:
         if fasta_file_obj is None:
@@ -566,7 +597,8 @@ def create_gradio_interface():
                             dna_input = gr.Textbox(
                                 label="🧬 DNA Sequence",
                                 placeholder="Enter DNA sequence (ATCG format)...",
-                                lines=5
                             )
                         with gr.Column(scale=1):
                             similarity_score = gr.Slider(
@@ -574,11 +606,13 @@ def create_gradio_interface():
                                 maximum=99,
                                 value=95.0,
                                 step=1.0,
-                                label="🎯 Similarity Threshold (%)"
                             )
                             build_ml_tree = gr.Checkbox(
                                 label="🌲 Build ML Tree",
-                                value=False
                             )
                             analyze_btn = gr.Button("🔬 Analyze Sequence", variant="primary")
                 with gr.TabItem("📁 File Upload"):
@@ -586,7 +620,8 @@ def create_gradio_interface():
                         with gr.Column(scale=2):
                             file_input = gr.File(
                                 label="📄 Upload FASTA File",
-                                file_types=[".fasta", ".fa", ".fas", ".txt"]
                             )
                         with gr.Column(scale=1):
                             file_similarity_score = gr.Slider(
@@ -594,22 +629,44 @@ def create_gradio_interface():
                                 maximum=99,
                                 value=95.0,
                                 step=1.0,
-                                label="🎯 Similarity Threshold (%)"
                             )
                             file_build_ml_tree = gr.Checkbox(
                                 label="🌲 Build ML Tree",
-                                value=False
                             )
                             analyze_file_btn = gr.Button("🔬 Analyze File", variant="primary")
             gr.Markdown("## 📊 Analysis Results")
             with gr.Row():
                 with gr.Column():
-                    boundary_output = gr.Textbox(label="🎯 Boundary Detection", interactive=False, lines=2)
-                    keras_output = gr.Textbox(label="🧠 F Gene Validation", interactive=False, lines=2)
                 with gr.Column():
-                    ml_tree_output = gr.Textbox(label="🌲 Phylogenetic Placement", interactive=False, lines=2)
-                    tree_analysis_output = gr.Textbox(label="🌳 Tree Analysis", interactive=False, lines=2)
-            summary_output = gr.Textbox(label="📋 Summary", interactive=False, lines=8)
             with gr.Row():
                 aligned_file = gr.File(label="📄 Alignment File", visible=False)
                 tree_file = gr.File(label="🌲 Tree File", visible=False)
@@ -617,9 +674,27 @@ def create_gradio_interface():
                 report_html_file = gr.File(label="📊 Detailed Report HTML", visible=False)
             with gr.Tabs():
                 with gr.TabItem("🌳 Interactive Tree"):
-                    tree_html = gr.HTML(value="<div style='text-align: center; color: #666; padding: 20px;'>No tree generated yet.</div>")
                 with gr.TabItem("📊 Detailed Report"):
-                    report_html = gr.HTML(value="<div style='text-align: center; color: #666; padding: 20px;'>No report generated yet.</div>")
             analyze_btn.click(
                 fn=run_pipeline,
@@ -627,7 +702,10 @@ def create_gradio_interface():
                 outputs=[
                     boundary_output, keras_output, ml_tree_output, tree_analysis_output, summary_output,
                     aligned_file, tree_file, tree_html_file, report_html_file, tree_html, report_html
-                ]
             )
             analyze_file_btn.click(
@@ -636,18 +714,38 @@ def create_gradio_interface():
                 outputs=[
                     boundary_output, keras_output, ml_tree_output, tree_analysis_output, summary_output,
                     aligned_file, tree_file, tree_html_file, report_html_file, tree_html, report_html
-                ]
             )
             gr.Examples(
                 examples=[
-                    ["ATCG" * 100, 85.0, False],
-                    ["CGAT" * 100, 90.0, True]
                 ],
                 inputs=[dna_input, similarity_score, build_ml_tree],
                 label="Example Sequences"
             )
         return iface
     except Exception as e:
         logger.error(f"Gradio interface creation failed: {e}", exc_info=True)
@@ -664,6 +762,8 @@ def run_application():
         gradio_app = create_gradio_interface()
         gradio_app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
         logger.info("🚀 Starting Gene Analysis Pipeline...")
         uvicorn.run(
             app,
             host="0.0.0.0",

 from analyzer import PhylogeneticTreeAnalyzer
 import tempfile
 import shutil
+import sys
 import uuid
 from pathlib import Path
 from huggingface_hub import hf_hub_download
 import time
 import asyncio
 from fastapi import FastAPI, File, UploadFile, Form, HTTPException
+from fastapi.responses import HTMLResponse, FileResponse
 from pydantic import BaseModel
 from typing import Optional
 import uvicorn
 except Exception as e:
     logging.basicConfig(level=logging.INFO, handlers=[log_handler])
     logging.warning(f"Failed to set up file logging: {e}")
 logger = logging.getLogger(__name__)
 logger.info(f"Gradio version: {gr.__version__}")
+# Set event loop policy for compatibility with Gradio Spaces
 try:
     asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
 except Exception as e:
 QUERY_OUTPUT_DIR = os.path.join(BASE_DIR, "queries")
 os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
+# Model repository and file paths
 MODEL_REPO = "GGproject10/best_boundary_aware_model"
 CSV_PATH = "f cleaned.csv"
+# Initialize models as None
 boundary_model = None
 keras_model = None
 kmer_to_index = None
 analyzer = None
+# --- Model Loading ---
 def load_models_safely():
     global boundary_model, keras_model, kmer_to_index, analyzer
     logger.info("🔍 Loading models...")
     try:
+        boundary_path = hf_hub_download(
+            repo_id=MODEL_REPO,
+            filename="best_boundary_aware_model.pth",
+            token=None
+        )
         if os.path.exists(boundary_path):
             boundary_model = EnhancedGenePredictor(boundary_path)
+            logger.info("✅ Boundary model loaded successfully.")
         else:
+            logger.error(f"❌ Boundary model file not found after download.")
     except Exception as e:
         logger.error(f"❌ Failed to load boundary model: {e}")
         boundary_model = None
     try:
+        keras_path = hf_hub_download(
+            repo_id=MODEL_REPO,
+            filename="best_model.keras",
+            token=None
+        )
+        kmer_path = hf_hub_download(
+            repo_id=MODEL_REPO,
+            filename="kmer_to_index.pkl",
+            token=None
+        )
         if os.path.exists(keras_path) and os.path.exists(kmer_path):
             keras_model = load_model(keras_path)
             with open(kmer_path, "rb") as f:
                 kmer_to_index = pickle.load(f)
+            logger.info("✅ Keras model and k-mer index loaded successfully.")
         else:
+            logger.error(f"❌ Keras model or k-mer files not found.")
     except Exception as e:
         logger.error(f"❌ Failed to load Keras model: {e}")
         keras_model = None
         logger.info("🌳 Initializing tree analyzer...")
         analyzer = PhylogeneticTreeAnalyzer()
         csv_candidates = [
+            CSV_PATH,
+            os.path.join(BASE_DIR, CSV_PATH),
+            os.path.join(BASE_DIR, "app", CSV_PATH),
+            os.path.join(os.path.dirname(__file__), CSV_PATH),
+            "f_cleaned.csv",
+            os.path.join(BASE_DIR, "f_cleaned.csv")
         ]
         csv_loaded = False
         for csv_candidate in csv_candidates:
                         break
                 except Exception as e:
                     logger.warning(f"CSV load failed for {csv_candidate}: {e}")
+                    continue
         if not csv_loaded:
+            logger.error("❌ Failed to load CSV data from any candidate location.")
             analyzer = None
         else:
             try:
                 if analyzer.train_ai_model():
+                    logger.info("✅ AI model training completed successfully")
                 else:
+                    logger.warning("⚠️ AI model training failed; proceeding with basic analysis.")
             except Exception as e:
                 logger.warning(f"⚠️ AI model training failed: {e}")
     except Exception as e:
         logger.error(f"❌ Tree analyzer initialization failed: {e}")
         analyzer = None
+# Load models at startup
 load_models_safely()
+# --- Tool Detection ---
 def setup_binary_permissions():
     for binary in [MAFFT_PATH, IQTREE_PATH]:
         if os.path.exists(binary):
     for candidate in mafft_candidates:
         if shutil.which(candidate) or os.path.exists(candidate):
             try:
+                result = subprocess.run(
+                    [candidate, "--help"],
+                    capture_output=True,
+                    text=True,
+                    timeout=5
+                )
                 if result.returncode == 0 or "mafft" in result.stderr.lower():
                     mafft_available = True
                     mafft_cmd = candidate
     for candidate in iqtree_candidates:
         if shutil.which(candidate) or os.path.exists(candidate):
             try:
+                result = subprocess.run(
+                    [candidate, "--help"],
+                    capture_output=True,
+                    text=True,
+                    timeout=5
+                )
                 if result.returncode == 0 or "iqtree" in result.stderr.lower():
                     iqtree_available = True
                     iqtree_cmd = candidate
         error_msg = f"❌ Pipeline Error: {str(e)}"
         return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg, None, None
+async def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree):
     temp_file_path = None
     try:
         if fasta_file_obj is None:
                             dna_input = gr.Textbox(
                                 label="🧬 DNA Sequence",
                                 placeholder="Enter DNA sequence (ATCG format)...",
+                                lines=5,
+                                description="Paste your DNA sequence here"
                             )
                         with gr.Column(scale=1):
                             similarity_score = gr.Slider(
                                 maximum=99,
                                 value=95.0,
                                 step=1.0,
+                                label="🎯 Similarity Threshold (%)",
+                                description="Minimum similarity for tree analysis"
                             )
                             build_ml_tree = gr.Checkbox(
                                 label="🌲 Build ML Tree",
+                                value=False,
+                                description="Generate phylogenetic placement (slower)"
                             )
                             analyze_btn = gr.Button("🔬 Analyze Sequence", variant="primary")
                 with gr.TabItem("📁 File Upload"):
                         with gr.Column(scale=2):
                             file_input = gr.File(
                                 label="📄 Upload FASTA File",
+                                file_types=[".fasta", ".fa", ".fas", ".txt"],
+                                description="Upload a FASTA file containing your sequence"
                             )
                         with gr.Column(scale=1):
                             file_similarity_score = gr.Slider(
                                 maximum=99,
                                 value=95.0,
                                 step=1.0,
+                                label="🎯 Similarity Threshold (%)",
+                                description="Minimum similarity for tree analysis"
                             )
                             file_build_ml_tree = gr.Checkbox(
                                 label="🌲 Build ML Tree",
+                                value=False,
+                                description="Generate phylogenetic placement (slower)"
                             )
                             analyze_file_btn = gr.Button("🔬 Analyze File", variant="primary")
             gr.Markdown("## 📊 Analysis Results")
             with gr.Row():
                 with gr.Column():
+                    boundary_output = gr.Textbox(
+                        label="🎯 Boundary Detection",
+                        interactive=False,
+                        lines=2
+                    )
+                    keras_output = gr.Textbox(
+                        label="🧠 F Gene Validation",
+                        interactive=False,
+                        lines=2
+                    )
                 with gr.Column():
+                    ml_tree_output = gr.Textbox(
+                        label="🌲 Phylogenetic Placement",
+                        interactive=False,
+                        lines=2
+                    )
+                    tree_analysis_output = gr.Textbox(
+                        label="🌳 Tree Analysis",
+                        interactive=False,
+                        lines=2
+                    )
+            summary_output = gr.Textbox(
+                label="📋 Summary",
+                interactive=False,
+                lines=8
+            )
             with gr.Row():
                 aligned_file = gr.File(label="📄 Alignment File", visible=False)
                 tree_file = gr.File(label="🌲 Tree File", visible=False)
                 report_html_file = gr.File(label="📊 Detailed Report HTML", visible=False)
             with gr.Tabs():
                 with gr.TabItem("🌳 Interactive Tree"):
+                    tree_html = gr.HTML(
+                        value="<div style='text-align: center; color: #666; padding: 20px;'>No tree generated yet. Run analysis to see results.</div>"
+                    )
                 with gr.TabItem("📊 Detailed Report"):
+                    report_html = gr.HTML(
+                        label="Analysis Report",
+                        value="<div style='text-align: center; color: #666; padding: 20px;'>No report generated yet. Run analysis to see results.</div>"
+                    )
+            # Event handlers
+            def handle_analysis_output(*outputs):
+                boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output, aligned_file, phy_file, _, _, tree_html_content, report_html_content, tree_html_path, report_html_path = outputs
+                return (
+                    boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output,
+                    gr.File.update(value=aligned_file, visible=aligned_file is not None),
+                    gr.File.update(value=phy_file, visible=phy_file is not None),
+                    gr.File.update(value=tree_html_path, visible=tree_html_path is not None),
+                    gr.File.update(value=report_html_path, visible=report_html_path is not None),
+                    tree_html_content,
+                    report_html_content
+                )
             analyze_btn.click(
                 fn=run_pipeline,
                 outputs=[
                     boundary_output, keras_output, ml_tree_output, tree_analysis_output, summary_output,
                     aligned_file, tree_file, tree_html_file, report_html_file, tree_html, report_html
+                ],
+                _js="""(outputs) => {
+                    return outputs;
+                }"""
             )
             analyze_file_btn.click(
                 outputs=[
                     boundary_output, keras_output, ml_tree_output, tree_analysis_output, summary_output,
                     aligned_file, tree_file, tree_html_file, report_html_file, tree_html, report_html
+                ],
+                _js="""(outputs) => {
+                    return outputs;
+                }"""
             )
+            # Examples
             gr.Examples(
                 examples=[
+                    ["ATCG" * 250, 85.0, False],
+                    ["CGATCG" * 150, 90.0, True]
                 ],
                 inputs=[dna_input, similarity_score, build_ml_tree],
                 label="Example Sequences"
             )
+            gr.Markdown("""
+            ## 📚 Instructions
+            1. **Input**: Enter a DNA sequence (ATCG format) or upload a FASTA file
+            2. **Parameters**:
+               - Set similarity threshold for phylogenetic analysis (1-99%)
+               - Choose whether to build ML tree (slower but more accurate)
+            3. **Analysis**: Click analyze to run the complete pipeline
+            4. **Results**: View results in different tabs - summary, tree visualization, and detailed report
+            5. **Downloads**: Download alignment, tree, simplified tree HTML, and detailed report HTML files
+            ### 🔬 Pipeline Components:
+            - **Boundary Detection**: Identifies F gene regions
+            - **F Gene Validation**: Validates F gene using ML
+            - **Phylogenetic Placement**: Places sequence in reference tree (optional)
+            - **Tree Analysis**: Builds phylogenetic tree with similar sequences
+            """)
         return iface
     except Exception as e:
         logger.error(f"Gradio interface creation failed: {e}", exc_info=True)
         gradio_app = create_gradio_interface()
         gradio_app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
         logger.info("🚀 Starting Gene Analysis Pipeline...")
+        logger.info("📊 FastAPI docs available at: http://localhost:7860/docs")
+        logger.info("🧬 Gradio interface available at: http://localhost:7860/gradio")
         uvicorn.run(
             app,
             host="0.0.0.0",