Spaces:

GGproject10
/

simplified_tree_AI

No application file

App Files Files Community

re-type commited on Jun 13, 2025

Commit

a79d7d1

verified ·

1 Parent(s): 88cb35a

Update app.py

Browse files

Files changed (1) hide show

app.py +594 -615

app.py CHANGED Viewed

@@ -12,47 +12,75 @@ import pandas as pd
 import re
 import logging
 import numpy as np
-from predictor import EnhancedGenePredictor
-from tensorflow.keras.models import load_model
-from analyzer import PhylogeneticTreeAnalyzer
 import tempfile
 import shutil
 import sys
 import uuid
 from pathlib import Path
-from huggingface_hub import hf_hub_download
-from Bio import SeqIO
-from Bio.Seq import Seq
-from Bio.SeqRecord import SeqRecord
 import stat
 import time
-import asyncio
-from fastapi import FastAPI, File, UploadFile, Form, HTTPException
-from fastapi.responses import HTMLResponse, FileResponse
-from pydantic import BaseModel
-from typing import Optional
-import uvicorn
-# --- Logging Setup ---
-log_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
-log_handler = logging.StreamHandler()
-log_handler.setFormatter(log_formatter)
 try:
-    file_handler = logging.FileHandler('/tmp/app.log')
-    file_handler.setFormatter(log_formatter)
-    logging.basicConfig(level=logging.INFO, handlers=[log_handler, file_handler])
-except Exception as e:
-    logging.basicConfig(level=logging.INFO, handlers=[log_handler])
-    logging.warning(f"Failed to set up file logging: {e}")
-logger = logging.getLogger(__name__)
-logger.info(f"Gradio version: {gr.__version__}")
-# Set event loop policy for compatibility with Gradio Spaces
 try:
-    asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
-except Exception as e:
-    logger.warning(f"Failed to set event loop policy: {e}")
 # --- Global Variables ---
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -61,7 +89,13 @@ IQTREE_PATH = os.path.join(BASE_DIR, "binaries", "iqtree", "bin", "iqtree3")
 ALIGNMENT_PATH = os.path.join(BASE_DIR, "f_gene_sequences_aligned.fasta")
 TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
 QUERY_OUTPUT_DIR = os.path.join(BASE_DIR, "queries")
-os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
 # Model repository and file paths
 MODEL_REPO = "GGproject10/best_boundary_aware_model"
@@ -73,535 +107,472 @@ keras_model = None
 kmer_to_index = None
 analyzer = None
-# --- Model Loading ---
 def load_models_safely():
     global boundary_model, keras_model, kmer_to_index, analyzer
-    logger.info("🔍 Loading models...")
-    try:
-        boundary_path = hf_hub_download(
-            repo_id=MODEL_REPO,
-            filename="best_boundary_aware_model.pth",
-            token=None
-        )
-        if os.path.exists(boundary_path):
-            boundary_model = EnhancedGenePredictor(boundary_path)
-            logger.info("✅ Boundary model loaded successfully.")
-        else:
-            logger.error(f"❌ Boundary model file not found after download.")
-    except Exception as e:
-        logger.error(f"❌ Failed to load boundary model: {e}")
-        boundary_model = None
-    try:
-        keras_path = hf_hub_download(
-            repo_id=MODEL_REPO,
-            filename="best_model.keras",
-            token=None
-        )
-        kmer_path = hf_hub_download(
-            repo_id=MODEL_REPO,
-            filename="kmer_to_index.pkl",
-            token=None
-        )
-        if os.path.exists(keras_path) and os.path.exists(kmer_path):
-            keras_model = load_model(keras_path)
-            with open(kmer_path, "rb") as f:
-                kmer_to_index = pickle.load(f)
-            logger.info("✅ Keras model and k-mer index loaded successfully.")
-        else:
-            logger.error(f"❌ Keras model or k-mer files not found.")
-    except Exception as e:
-        logger.error(f"❌ Failed to load Keras model: {e}")
-        keras_model = None
-        kmer_to_index = None
-    try:
-        logger.info("🌳 Initializing tree analyzer...")
-        analyzer = PhylogeneticTreeAnalyzer()
-        csv_candidates = [
-            CSV_PATH,
-            os.path.join(BASE_DIR, CSV_PATH),
-            os.path.join(BASE_DIR, "app", CSV_PATH),
-            os.path.join(os.path.dirname(__file__), CSV_PATH),
-            "f_cleaned.csv",
-            os.path.join(BASE_DIR, "f_cleaned.csv")
-        ]
-        csv_loaded = False
-        for csv_candidate in csv_candidates:
-            if os.path.exists(csv_candidate):
-                logger.info(f"📊 Trying CSV: {csv_candidate}")
                 try:
-                    if analyzer.load_data(csv_candidate):
-                        logger.info(f"✅ CSV loaded from: {csv_candidate}")
-                        csv_loaded = True
-                        break
                 except Exception as e:
-                    logger.warning(f"CSV load failed for {csv_candidate}: {e}")
-                    continue
-        if not csv_loaded:
-            logger.error("❌ Failed to load CSV data from any candidate location.")
             analyzer = None
-        else:
-            try:
-                if analyzer.train_ai_model():
-                    logger.info("✅ AI model training completed successfully")
-                else:
-                    logger.warning("⚠️ AI model training failed; proceeding with basic analysis.")
-            except Exception as e:
-                logger.warning(f"⚠️ AI model training failed: {e}")
-    except Exception as e:
-        logger.error(f"❌ Tree analyzer initialization failed: {e}")
-        analyzer = None
-# Load models at startup
-load_models_safely()
 # --- Tool Detection ---
 def setup_binary_permissions():
-    for binary in [MAFFT_PATH, IQTREE_PATH]:
-        if os.path.exists(binary):
-            try:
-                os.chmod(binary, os.stat(binary).st_mode | stat.S_IEXEC)
-                logger.info(f"Set executable permission on {binary}")
-            except Exception as e:
-                logger.warning(f"Failed to set permission on {binary}: {e}")
-def check_tool_availability():
-    setup_binary_permissions()
-    mafft_available = False
-    mafft_cmd = None
-    mafft_candidates = ['mafft', '/usr/bin/mafft', '/usr/local/bin/mafft', MAFFT_PATH]
-    for candidate in mafft_candidates:
-        if shutil.which(candidate) or os.path.exists(candidate):
-            try:
-                result = subprocess.run(
-                    [candidate, "--help"],
-                    capture_output=True,
-                    text=True,
-                    timeout=5
-                )
-                if result.returncode == 0 or "mafft" in result.stderr.lower():
-                    mafft_available = True
-                    mafft_cmd = candidate
-                    logger.info(f"✅ MAFFT found at: {candidate}")
-                    break
-            except Exception as e:
-                logger.debug(f"MAFFT test failed for {candidate}: {e}")
-    iqtree_available = False
-    iqtree_cmd = None
-    iqtree_candidates = ['iqtree', 'iqtree2', 'iqtree3', '/usr/bin/iqtree', '/usr/local/bin/iqtree', IQTREE_PATH]
-    for candidate in iqtree_candidates:
-        if shutil.which(candidate) or os.path.exists(candidate):
-            try:
-                result = subprocess.run(
-                    [candidate, "--help"],
-                    capture_output=True,
-                    text=True,
-                    timeout=5
-                )
-                if result.returncode == 0 or "iqtree" in result.stderr.lower():
-                    iqtree_available = True
-                    iqtree_cmd = candidate
-                    logger.info(f"✅ IQ-TREE found at: {candidate}")
-                    break
-            except Exception as e:
-                logger.debug(f"IQ-TREE test failed for {candidate}: {e}")
-    return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
-# --- Pipeline Functions ---
-def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
-    query_fasta = None
-    cleanup_error = None
     try:
-        if len(sequence.strip()) < 100:
-            return False, "Sequence too short (<100 bp).", None, None
-        query_id = f"QUERY_{uuid.uuid4().hex[:8]}"
-        query_fasta = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}.fa")
-        aligned_with_query = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_aligned.fa")
-        output_prefix = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_placed_tree")
-        if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
-            return False, "Reference alignment or tree not found.", None, None
-        query_record = SeqRecord(Seq(sequence.upper()), id=query_id, description="")
-        SeqIO.write([query_record], query_fasta, "fasta")
-        with open(aligned_with_query, "w") as output_file:
-            subprocess.run([
-                mafft_cmd, "--add", query_fasta, "--reorder", ALIGNMENT_PATH
-            ], stdout=output_file, stderr=subprocess.PIPE, text=True, timeout=600, check=True)
-        if not os.path.exists(aligned_with_query) or os.path.getsize(aligned_with_query) == 0:
-            return False, "MAFFT alignment failed.", None, None
-        subprocess.run([
-            iqtree_cmd, "-s", aligned_with_query, "-g", TREE_PATH,
-            "-m", "GTR+G", "-pre", output_prefix, "-redo"
-        ], capture_output=True, text=True, timeout=1200, check=True)
-        treefile = f"{output_prefix}.treefile"
-        if not os.path.exists(treefile):
-            return False, "IQ-TREE placement failed.", aligned_with_query, None
-        success_msg = f"Placement completed!\nQuery ID: {query_id}\nAlignment: {os.path.basename(aligned_with_query)}\nTree: {os.path.basename(treefile)}"
-        return True, success_msg, aligned_with_query, treefile
     except Exception as e:
-        logger.error(f"Phylogenetic placement failed: {e}", exc_info=True)
-        return False, f"Error: {str(e)}", None, None
-    finally:
-        if query_fasta and os.path.exists(query_fasta):
-            try:
-                os.unlink(query_fasta)
-            except Exception as cleanup_e:
-                logger.warning(f"Failed to clean up {query_fasta}: {cleanup_e}")
-def analyze_sequence_for_tree(sequence: str, matching_percentage: float):
     try:
-        logger.debug("Starting tree analysis...")
-        if not analyzer:
-            return "❌ Tree analyzer not initialized.", None, None
-        if not sequence or len(sequence.strip()) < 10:
-            return "❌ Invalid sequence.", None, None
-        if not (1 <= matching_percentage <= 99):
-            return "❌ Matching percentage must be 1-99.", None, None
-        logger.debug("Finding query sequence...")
-        if not analyzer.find_query_sequence(sequence):
-            return "❌ Sequence not accepted.", None, None
-        logger.debug("Finding similar sequences...")
-        matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
-        if not matched_ids:
-            return f"❌ No similar sequences at {matching_percentage}% threshold.", None, None
-        logger.debug("Building tree structure...")
-        analyzer.build_tree_structure_with_ml_safe(matched_ids)
-        logger.debug("Creating interactive tree...")
-        fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
-        query_id = analyzer.query_id or f"query_{int(time.time())}"
-        tree_html_path = os.path.join("/tmp", f'phylogenetic_tree_{query_id}.html')
-        logger.debug(f"Saving tree to {tree_html_path}")
-        fig.write_html(tree_html_path)
-        analyzer.matching_percentage = matching_percentage
-        logger.debug("Generating detailed report...")
-        report_success = analyzer.generate_detailed_report(matched_ids, actual_percentage)
-        report_html_path = os.path.join("/tmp", f'detailed_report_{query_id}.html') if report_success else None
-        logger.debug(f"Tree analysis completed: {len(matched_ids)} matches")
-        return f"✅ Found {len(matched_ids)} sequences at {actual_percentage:.2f}% similarity.", tree_html_path, report_html_path
     except Exception as e:
-        logger.error(f"Tree analysis failed: {e}", exc_info=True)
-        return f"❌ Error: {str(e)}", None, None
 def predict_with_keras(sequence):
     try:
         if not keras_model or not kmer_to_index:
             return "❌ Keras model not available."
         if len(sequence) < 6:
             return "❌ Sequence too short (<6 bp)."
         kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
         indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
         input_arr = np.array([indices])
         prediction = keras_model.predict(input_arr, verbose=0)[0]
         f_gene_prob = prediction[-1]
         percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))
         return f"✅ {percentage}% F gene confidence"
     except Exception as e:
-        logger.error(f"Keras prediction failed: {e}", exc_info=True)
         return f"❌ Error: {str(e)}"
 def read_fasta_file(file_obj):
     try:
         if file_obj is None:
             return ""
         if isinstance(file_obj, str):
             with open(file_obj, "r") as f:
                 content = f.read()
         else:
             content = file_obj.read().decode("utf-8")
         lines = content.strip().split("\n")
         seq_lines = [line.strip() for line in lines if not line.startswith(">")]
         return ''.join(seq_lines)
     except Exception as e:
-        logger.error(f"Failed to read FASTA file: {e}", exc_info=True)
         return ""
-def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
     try:
         dna_input = dna_input.upper().strip()
-        if not dna_input:
-            return "❌ Empty input", "", "", "", "", None, None, None, None, "No input", "No input", None, None
-        if not re.match('^[ACTGN]+$', dna_input):
-            dna_input = ''.join(c if c in 'ACTGN' else 'N' for c in dna_input)
-        processed_sequence = dna_input
         boundary_output = ""
         if boundary_model:
             try:
-                result = boundary_model.predict_sequence(dna_input)
-                regions = result['gene_regions']
-                if regions:
-                    processed_sequence = regions[0]["sequence"]
-                    boundary_output = f"✅ F gene region found: {len(processed_sequence)} bp"
                 else:
-                    boundary_output = "⚠️ No F gene regions found."
-                    processed_sequence = dna_input
             except Exception as e:
                 boundary_output = f"❌ Boundary prediction error: {str(e)}"
-                processed_sequence = dna_input
         else:
-            boundary_output = f"⚠️ Boundary model not available. Using full input: {len(dna_input)} bp"
-        keras_output = predict_with_keras(processed_sequence) if processed_sequence and len(processed_sequence) >= 6 else "❌ Sequence too short."
-        aligned_file = None
-        phy_file = None
         ml_tree_output = ""
-        if build_ml_tree and processed_sequence and len(processed_sequence) >= 100:
-            try:
-                mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
-                if mafft_available and iqtree_available:
-                    ml_success, ml_message, ml_aligned, ml_tree = phylogenetic_placement(processed_sequence, mafft_cmd, iqtree_cmd)
-                    ml_tree_output = ml_message
-                    aligned_file = ml_aligned
-                    phy_file = ml_tree
-                else:
-                    ml_tree_output = "❌ MAFFT or IQ-TREE not available"
-            except Exception as e:
-                ml_tree_output = f"❌ ML tree error: {str(e)}"
-        elif build_ml_tree:
-            ml_tree_output = "❌ Sequence too short for placement (<100 bp)."
         else:
-            ml_tree_output = "⚠️ Phylogenetic placement skipped."
-        tree_html_content = "No tree generated."
-        report_html_content = "No report generated."
-        tree_html_path = None
-        report_html_path = None
-        simplified_ml_output = ""
-        if analyzer and processed_sequence and len(processed_sequence) >= 10:
             try:
-                tree_result, tree_html_path, report_html_path = analyze_sequence_for_tree(processed_sequence, similarity_score)
-                simplified_ml_output = tree_result
-                if tree_html_path and os.path.exists(tree_html_path):
-                    with open(tree_html_path, 'r', encoding='utf-8') as f:
-                        tree_html_content = f.read()
-                else:
-                    tree_html_content = f"<div style='color: red;'>{tree_result}</div>"
-                if report_html_path and os.path.exists(report_html_path):
-                    with open(report_html_path, 'r', encoding='utf-8') as f:
-                        report_html_content = f.read()
-                else:
-                    report_html_content = f"<div style='color: red;'>{tree_result}</div>"
             except Exception as e:
-                simplified_ml_output = f"❌ Tree analysis error: {str(e)}"
-                tree_html_content = f"<div style='color: red;'>{simplified_ml_output}</div>"
-                report_html_content = f"<div style='color: red;'>{simplified_ml_output}</div>"
         else:
-            simplified_ml_output = "❌ Tree analyzer not available." if not analyzer else "❌ Sequence too short (<10 bp)."
-            tree_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
-            report_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
         summary_output = f"""
 📊 ANALYSIS SUMMARY:
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-Input: {len(dna_input)} bp
-F Gene: {len(processed_sequence)} bp
-Validation: {keras_output.split(':')[-1].strip() if ':' in keras_output else keras_output}
-Placement: {'✅ OK' if '✅' in ml_tree_output else '⚠️ Skipped' if 'skipped' in ml_tree_output else '❌ Failed'}
-Tree Analysis: {'✅ OK' if 'Found' in simplified_ml_output else '❌ Failed'}
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 """
         return (
-            boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output,
-            aligned_file, phy_file, None, None, tree_html_content, report_html_content,
-            tree_html_path, report_html_path
         )
     except Exception as e:
         logger.error(f"Pipeline error: {e}", exc_info=True)
         error_msg = f"❌ Pipeline Error: {str(e)}"
-        return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg, None, None
-async def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree):
-    temp_file_path = None
-    try:
-        if fasta_file_obj is None:
-            return "❌ No file provided", "", "", "", "", None, None, None, None, "No input", "No input", None, None
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".fasta", dir="/tmp") as temp_file:
-            if isinstance(fasta_file_obj, UploadFile):
-                content = await fasta_file_obj.read()
-                temp_file.write(content)
-            else:
-                with open(fasta_file_obj, 'rb') as f:
-                    content = f.read()
-                temp_file.write(content)
-            temp_file_path = temp_file.name
-        dna_input = read_fasta_file(temp_file_path)
-        if not dna_input:
-            return "❌ Failed to read FASTA file", "", "", "", "", None, None, None, None, "No input", "No input", None, None
-        return run_pipeline(dna_input, similarity_score, build_ml_tree)
-    except Exception as e:
-        logger.error(f"Pipeline from file error: {e}", exc_info=True)
-        error_msg = f"❌ Error: {str(e)}"
-        return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg, None, None
-    finally:
-        if temp_file_path and os.path.exists(temp_file_path):
-            try:
-                os.unlink(temp_file_path)
-            except Exception as cleanup_e:
-                logger.warning(f"Failed to delete temp file {temp_file_path}: {cleanup_e}")
-# --- Pydantic Models ---
-class AnalysisRequest(BaseModel):
-    sequence: str
-    similarity_score: float = 95.0
-    build_ml_tree: bool = False
-class AnalysisResponse(BaseModel):
-    boundary_output: str
-    keras_output: str
-    ml_tree_output: str
-    tree_analysis_output: str
-    summary_output: str
-    success: bool
-    error_message: Optional[str] = None
-    tree_html_path: Optional[str] = None
-    report_html_path: Optional[str] = None
-# --- FastAPI App Setup ---
-app = FastAPI(title="🧬 Gene Analysis Pipeline", version="1.0.0")
-@app.get("/")
-async def root():
-    return {
-        "message": "🧬 Gene Analysis Pipeline API",
-        "status": "running",
-        "endpoints": {
-            "docs": "/docs",
-            "health": "/health",
-            "gradio": "/gradio",
-            "analyze": "/analyze",
-            "analyze_file": "/analyze-file",
-            "download": "/download/{file_type}/{query_id}"
-        }
-    }
-@app.get("/health")
-async def health_check():
-    try:
-        mafft_available, iqtree_available, _, _ = check_tool_availability()
-        return {
-            "status": "healthy",
-            "components": {
-                "boundary_model": boundary_model is not None,
-                "keras_model": keras_model is not None,
-                "tree_analyzer": analyzer is not None,
-                "mafft_available": mafft_available,
-                "iqtree_available": iqtree_available
-            },
-            "paths": {
-                "base_dir": BASE_DIR,
-                "query_output_dir": QUERY_OUTPUT_DIR
-            }
-        }
-    except Exception as e:
-        logger.error(f"Health check error: {e}", exc_info=True)
-        return {"status": "unhealthy", "error": str(e)}
-@app.post("/analyze", response_model=AnalysisResponse)
-async def analyze_sequence(request: AnalysisRequest):
-    try:
-        result = run_pipeline(request.sequence, request.similarity_score, request.build_ml_tree)
-        return AnalysisResponse(
-            boundary_output=result[0] or "",
-            keras_output=result[1] or "",
-            ml_tree_output=result[2] or "",
-            tree_analysis_output=result[3] or "",
-            summary_output=result[4] or "",
-            tree_html_path=result[11],
-            report_html_path=result[12],
-            success=True
-        )
-    except Exception as e:
-        logger.error(f"Analyze error: {e}", exc_info=True)
-        return AnalysisResponse(
-            boundary_output="", keras_output="", ml_tree_output="",
-            tree_analysis_output="", summary_output="",
-            tree_html_path=None, report_html_path=None,
-            success=False, error_message=str(e)
-        )
-@app.post("/analyze-file")
-async def analyze_file(
-    file: UploadFile = File(...),
-    similarity_score: float = Form(95.0),
-    build_ml_tree: bool = Form(False)
-):
-    temp_file_path = None
-    try:
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".fasta", dir="/tmp") as temp_file:
-            content = await file.read()
-            temp_file.write(content)
-            temp_file_path = temp_file.name
-        result = await run_pipeline_from_file(temp_file_path, similarity_score, build_ml_tree)
-        return AnalysisResponse(
-            boundary_output=result[0] or "",
-            keras_output=result[1] or "",
-            ml_tree_output=result[2] or "",
-            tree_analysis_output=result[3] or "",
-            summary_output=result[4] or "",
-            tree_html_path=result[11],
-            report_html_path=result[12],
-            success=True
-        )
-    except Exception as e:
-        logger.error(f"Analyze-file error: {e}", exc_info=True)
-        return AnalysisResponse(
-            boundary_output="", keras_output="", ml_tree_output="",
-            tree_analysis_output="", summary_output="",
-            tree_html_path=None, report_html_path=None,
-            success=False, error_message=str(e)
         )
-    finally:
-        if temp_file_path and os.path.exists(temp_file_path):
-            try:
-                os.unlink(temp_file_path)
-            except Exception as cleanup_e:
-                logger.warning(f"Failed to clean up {temp_file_path}: {cleanup_e}")
-@app.get("/download/{file_type}/{query_id}")
-async def download_file(file_type: str, query_id: str):
-    try:
-        if file_type not in ["tree", "report"]:
-            raise HTTPException(status_code=400, detail="Invalid file type. Use 'tree' or 'report'.")
-        file_name = f"phylogenetic_tree_{query_id}.html" if file_type == "tree" else f"detailed_report_{query_id}.html"
-        file_path = os.path.join("/tmp", file_name)
-        if not os.path.exists(file_path):
-            raise HTTPException(status_code=404, detail="File not found.")
-        return FileResponse(file_path, filename=file_name, media_type="text/html")
-    except Exception as e:
-        logger.error(f"Download error: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail=f"Error serving file: {str(e)}")
 # --- Gradio Interface ---
-def create_gradio_interface():
     try:
         with gr.Blocks(
             title="🧬 Gene Analysis Pipeline",
             theme=gr.themes.Soft(),
             css="""
-            .gradio-container { max-width: 1200px !important; }
-            .status-box { padding: 10px; border-radius: 5px; margin: 5px 0; }
-            .success { background-color: #d4edda; border: 1px solid #c3e6cb; color: #155724; }
-            .warning { background-color: #fff3cd; border: 1px solid #ffeaa7; color: #856404; }
-            .error { background-color: #f8d7da; border: 1px solid #f5c6cb; color: #721c24; }
             """
         ) as iface:
-            gr.Markdown("# 🧬 Gene Analysis Pipeline")
             with gr.Row():
                 with gr.Column():
-                    status_display = gr.HTML(value=f"""
-                    <div class="status-box">
-                        <h3>🔧 System Status</h3>
-                        <p>🤖 Boundary Model: {'✅ Loaded' if boundary_model else '❌ Missing'}</p>
-                        <p>🧠 Keras Model: {'✅ Loaded' if keras_model else '❌ Missing'}</p>
-                        <p>🌳 Tree Analyzer: {'✅ Loaded' if analyzer else '❌ Missing'}</p>
-                        <p>🧬 MAFFT: {'✅ Available' if check_tool_availability()[0] else '❌ Missing'}</p>
-                        <p>🌲 IQ-TREE: {'✅ Available' if check_tool_availability()[1] else '❌ Missing'}</p>
-                    </div>
-                    """)
             with gr.Tabs():
                 with gr.Tab("📝 Text Input"):
                     with gr.Row():
                         with gr.Column(scale=2):
                             dna_input = gr.Textbox(
                                 label="🧬 DNA Sequence",
-                                placeholder="Enter your DNA sequence (ATCG format)...",
                                 lines=8,
-                                max_lines=15
                             )
                             with gr.Row():
@@ -617,23 +588,35 @@ def create_gradio_interface():
                                 ml_tree_checkbox = gr.Checkbox(
                                     label="🌲 Build ML Tree",
                                     value=False,
-                                    info="Perform phylogenetic placement (slower)"
                                 )
-                            analyze_btn = gr.Button("🔬 Analyze Sequence", variant="primary", size="lg")
                         with gr.Column(scale=1):
-                            gr.Markdown("### 📋 Quick Guide")
                             gr.Markdown("""
                             1. **Paste DNA sequence** in ATCG format
                             2. **Adjust similarity** threshold (1-99%)
-                            3. **Enable ML tree** for detailed phylogeny
                             4. **Click Analyze** to start processing
                             **Supported formats:**
-                            - Raw DNA sequence
                             - Mixed case (auto-converted)
                             - With/without spaces/newlines
                             """)
                 with gr.Tab("📁 File Upload"):
@@ -659,11 +642,16 @@ def create_gradio_interface():
                                     value=False
                                 )
-                            analyze_file_btn = gr.Button("🔬 Analyze File", variant="primary", size="lg")
                         with gr.Column(scale=1):
-                            gr.Markdown("### 📄 File Requirements")
                             gr.Markdown("""
                             **Accepted formats:**
                             - `.fasta`, `.fa`, `.fas`
                             - `.txt` with FASTA content
@@ -675,45 +663,51 @@ def create_gradio_interface():
                             ```
                             **Notes:**
-                            - Single or multiple sequences
                             - First sequence will be analyzed
                             - Maximum file size: 10MB
                             """)
             # Results Section
             gr.Markdown("## 📊 Analysis Results")
             with gr.Row():
-                with gr.Column():
                     boundary_output = gr.Textbox(
                         label="🎯 Boundary Detection",
                         interactive=False,
-                        lines=2
                     )
                     keras_output = gr.Textbox(
                         label="🧠 Gene Classification",
                         interactive=False,
-                        lines=2
                     )
-                with gr.Column():
                     ml_tree_output = gr.Textbox(
                         label="🌲 Phylogenetic Placement",
                         interactive=False,
-                        lines=2
                     )
                     tree_analysis_output = gr.Textbox(
                         label="🌳 Tree Analysis",
                         interactive=False,
-                        lines=2
                     )
             summary_output = gr.Textbox(
-                label="📋 Summary Report",
                 interactive=False,
-                lines=8
             )
             # Visualization Section
@@ -721,114 +715,89 @@ def create_gradio_interface():
                 with gr.Tab("🌳 Interactive Tree"):
                     tree_html = gr.HTML(
                         label="Phylogenetic Tree Visualization",
-                        value="<div style='text-align: center; color: #666; padding: 50px;'>No tree generated yet. Run analysis to see results.</div>"
                     )
                 with gr.Tab("📊 Detailed Report"):
                     report_html = gr.HTML(
                         label="Analysis Report",
-                        value="<div style='text-align: center; color: #666; padding: 50px;'>No report generated yet. Run analysis to see results.</div>"
                     )
-            # Download Section
-            with gr.Row():
-                tree_download = gr.File(
-                    label="📥 Download Tree (HTML)",
-                    visible=False
-                )
-                report_download = gr.File(
-                    label="📥 Download Report (HTML)",
-                    visible=False
-                )
             # Event Handlers
-            def handle_analysis(dna_seq, similarity, build_ml):
                 try:
-                    results = run_pipeline(dna_seq, similarity, build_ml)
-                    # Extract results
-                    boundary_out = results[0] or "No boundary detection performed"
-                    keras_out = results[1] or "No classification performed"
-                    ml_out = results[2] or "No ML tree built"
-                    tree_out = results[3] or "No tree analysis performed"
-                    summary_out = results[4] or "No summary available"
-                    tree_html_content = results[9] or "<div>No tree visualization available</div>"
-                    report_html_content = results[10] or "<div>No report available</div>"
-                    tree_path = results[11]
-                    report_path = results[12]
-                    # Return all outputs including file paths for downloads
-                    return (
-                        boundary_out,
-                        keras_out,
-                        ml_out,
-                        tree_out,
-                        summary_out,
-                        tree_html_content,
-                        report_html_content,
-                        tree_path if tree_path and os.path.exists(tree_path) else None,
-                        report_path if report_path and os.path.exists(report_path) else None,
-                        gr.update(visible=bool(tree_path)),
-                        gr.update(visible=bool(report_path))
-                    )
                 except Exception as e:
                     error_msg = f"❌ Analysis failed: {str(e)}"
-                    logger.error(f"Gradio analysis error: {e}", exc_info=True)
                     return (
                         error_msg, "", "", "", error_msg,
                         f"<div style='color: red;'>{error_msg}</div>",
-                        f"<div style='color: red;'>{error_msg}</div>",
-                        None, None,
-                        gr.update(visible=False),
-                        gr.update(visible=False)
                     )
-            def handle_file_analysis(file_obj, similarity, build_ml):
                 try:
                     if file_obj is None:
-                        error_msg = "❌ No file uploaded"
                         return (
                             error_msg, "", "", "", error_msg,
                             f"<div style='color: red;'>{error_msg}</div>",
-                            f"<div style='color: red;'>{error_msg}</div>",
-                            None, None,
-                            gr.update(visible=False),
-                            gr.update(visible=False)
                         )
-                    # Read the uploaded file
-                    dna_sequence = read_fasta_file(file_obj.name)
-                    if not dna_sequence:
-                        error_msg = "❌ Failed to read DNA sequence from file"
                         return (
                             error_msg, "", "", "", error_msg,
                             f"<div style='color: red;'>{error_msg}</div>",
-                            f"<div style='color: red;'>{error_msg}</div>",
-                            None, None,
-                            gr.update(visible=False),
-                            gr.update(visible=False)
                         )
-                    # Run the same analysis as text input
-                    return handle_analysis(dna_sequence, similarity, build_ml)
                 except Exception as e:
                     error_msg = f"❌ File analysis failed: {str(e)}"
-                    logger.error(f"Gradio file analysis error: {e}", exc_info=True)
                     return (
                         error_msg, "", "", "", error_msg,
                         f"<div style='color: red;'>{error_msg}</div>",
-                        f"<div style='color: red;'>{error_msg}</div>",
-                        None, None,
-                        gr.update(visible=False),
-                        gr.update(visible=False)
                     )
             # Connect event handlers
             analyze_btn.click(
-                fn=handle_analysis,
                 inputs=[dna_input, similarity_slider, ml_tree_checkbox],
                 outputs=[
                     boundary_output,
@@ -837,16 +806,13 @@ def create_gradio_interface():
                     tree_analysis_output,
                     summary_output,
                     tree_html,
-                    report_html,
-                    tree_download,
-                    report_download,
-                    tree_download,  # For visibility update
-                    report_download  # For visibility update
-                ]
             )
             analyze_file_btn.click(
-                fn=handle_file_analysis,
                 inputs=[file_input, file_similarity_slider, file_ml_tree_checkbox],
                 outputs=[
                     boundary_output,
@@ -855,79 +821,92 @@ def create_gradio_interface():
                     tree_analysis_output,
                     summary_output,
                     tree_html,
-                    report_html,
-                    tree_download,
-                    report_download,
-                    tree_download,  # For visibility update
-                    report_download  # For visibility update
-                ]
             )
             # Footer
             gr.Markdown("""
             ---
-            ### 🔬 About This Pipeline
-            This application provides comprehensive analysis of DNA sequences with focus on gene detection and phylogenetic analysis:
-            - **🎯 Boundary Detection**: Identifies gene regions within sequences
-            - **🧠 Classification**: Validates gene identity using deep learning
-            - **🌲 Phylogenetic Placement**: Places sequences in evolutionary context
-            - **🌳 Tree Analysis**: Builds interactive phylogenetic trees
-            **📊 Output Features:**
-            - Interactive tree visualizations
-            - Detailed analysis reports
-            - Downloadable results
-            - Comprehensive summaries
-            **⚡ Performance Notes:**
-            - Text input: ~5-30 seconds
-            - File upload: ~10-60 seconds
-            - ML tree building: +2-5 minutes
-            - Tree analysis: +30-120 seconds
             """)
         return iface
     except Exception as e:
-        logger.error(f"Failed to create Gradio interface: {e}", exc_info=True)
-        # Return a minimal error interface
-        with gr.Blocks() as error_iface:
-            gr.Markdown(f"# ❌ Interface Error\n\nFailed to initialize: {str(e)}")
-        return error_iface
-# --- Application Entry Point ---
 def main():
-    """Main application entry point"""
     try:
         logger.info("🚀 Starting Gene Analysis Pipeline...")
-        # Create Gradio interface
-        gradio_app = create_gradio_interface()
-        # Mount Gradio app to FastAPI
-        gradio_app.queue(max_size=10)
-        app.mount("/gradio", gradio_app, name="gradio")
-        logger.info("✅ Application initialized successfully")
-        # Run the application
-        port = int(os.environ.get("PORT", 7860))
-        logger.info(f"🌐 Starting server on port {port}")
-        uvicorn.run(
-            app,
-            host="0.0.0.0",
-            port=port,
-            log_level="info",
-            access_log=True
         )
     except Exception as e:
-        logger.error(f"❌ Application startup failed: {e}", exc_info=True)
-        sys.exit(1)
-# --- Run Application ---
 if __name__ == "__main__":
     main()

 import re
 import logging
 import numpy as np
 import tempfile
 import shutil
 import sys
 import uuid
 from pathlib import Path
 import stat
 import time
+# Import with error handling
 try:
+    from predictor import EnhancedGenePredictor
+except ImportError as e:
+    logging.warning(f"Failed to import EnhancedGenePredictor: {e}")
+    EnhancedGenePredictor = None
+try:
+    from tensorflow.keras.models import load_model
+except ImportError as e:
+    logging.warning(f"Failed to import TensorFlow: {e}")
+    load_model = None
+try:
+    from analyzer import PhylogeneticTreeAnalyzer
+except ImportError as e:
+    logging.warning(f"Failed to import PhylogeneticTreeAnalyzer: {e}")
+    PhylogeneticTreeAnalyzer = None
 try:
+    from huggingface_hub import hf_hub_download
+except ImportError as e:
+    logging.warning(f"Failed to import huggingface_hub: {e}")
+    hf_hub_download = None
+try:
+    from Bio import SeqIO
+    from Bio.Seq import Seq
+    from Bio.SeqRecord import SeqRecord
+except ImportError as e:
+    logging.warning(f"Failed to import BioPython: {e}")
+    SeqIO = None
+# --- Logging Setup ---
+def setup_logging():
+    """Setup logging configuration"""
+    try:
+        log_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+        log_handler = logging.StreamHandler()
+        log_handler.setFormatter(log_formatter)
+        # Try to setup file logging, fallback if it fails
+        handlers = [log_handler]
+        try:
+            file_handler = logging.FileHandler('/tmp/app.log')
+            file_handler.setFormatter(log_formatter)
+            handlers.append(file_handler)
+        except Exception as e:
+            print(f"Warning: Failed to set up file logging: {e}")
+        logging.basicConfig(level=logging.INFO, handlers=handlers, force=True)
+        logger = logging.getLogger(__name__)
+        logger.info(f"Gradio version: {gr.__version__}")
+        return logger
+    except Exception as e:
+        print(f"Critical: Failed to setup logging: {e}")
+        # Create basic logger
+        logging.basicConfig(level=logging.INFO)
+        return logging.getLogger(__name__)
+logger = setup_logging()
 # --- Global Variables ---
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 ALIGNMENT_PATH = os.path.join(BASE_DIR, "f_gene_sequences_aligned.fasta")
 TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
 QUERY_OUTPUT_DIR = os.path.join(BASE_DIR, "queries")
+# Ensure output directory exists
+try:
+    os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
+    os.makedirs("/tmp", exist_ok=True)
+except Exception as e:
+    logger.warning(f"Failed to create directories: {e}")
 # Model repository and file paths
 MODEL_REPO = "GGproject10/best_boundary_aware_model"
 kmer_to_index = None
 analyzer = None
+# --- Safe Model Loading ---
 def load_models_safely():
+    """Load models with comprehensive error handling"""
     global boundary_model, keras_model, kmer_to_index, analyzer
+    logger.info("🔍 Starting model loading...")
+    # Load boundary model
+    if EnhancedGenePredictor and hf_hub_download:
+        try:
+            logger.info("Loading boundary model...")
+            boundary_path = hf_hub_download(
+                repo_id=MODEL_REPO,
+                filename="best_boundary_aware_model.pth",
+                token=None
+            )
+            if os.path.exists(boundary_path):
+                boundary_model = EnhancedGenePredictor(boundary_path)
+                logger.info("✅ Boundary model loaded successfully.")
+            else:
+                logger.error("❌ Boundary model file not found after download.")
+        except Exception as e:
+            logger.error(f"❌ Failed to load boundary model: {e}")
+            boundary_model = None
+    else:
+        logger.warning("⚠️ EnhancedGenePredictor or hf_hub_download not available")
+    # Load Keras model
+    if load_model and hf_hub_download:
+        try:
+            logger.info("Loading Keras model...")
+            keras_path = hf_hub_download(
+                repo_id=MODEL_REPO,
+                filename="best_model.keras",
+                token=None
+            )
+            kmer_path = hf_hub_download(
+                repo_id=MODEL_REPO,
+                filename="kmer_to_index.pkl",
+                token=None
+            )
+            if os.path.exists(keras_path) and os.path.exists(kmer_path):
+                keras_model = load_model(keras_path)
+                with open(kmer_path, "rb") as f:
+                    kmer_to_index = pickle.load(f)
+                logger.info("✅ Keras model and k-mer index loaded successfully.")
+            else:
+                logger.error("❌ Keras model or k-mer files not found.")
+        except Exception as e:
+            logger.error(f"❌ Failed to load Keras model: {e}")
+            keras_model = None
+            kmer_to_index = None
+    else:
+        logger.warning("⚠️ TensorFlow load_model or hf_hub_download not available")
+    # Load tree analyzer
+    if PhylogeneticTreeAnalyzer:
+        try:
+            logger.info("🌳 Initializing tree analyzer...")
+            analyzer = PhylogeneticTreeAnalyzer()
+            # Try to find CSV file
+            csv_candidates = [
+                CSV_PATH,
+                os.path.join(BASE_DIR, CSV_PATH),
+                os.path.join(BASE_DIR, "app", CSV_PATH),
+                os.path.join(os.path.dirname(__file__), CSV_PATH),
+                "f_cleaned.csv",
+                os.path.join(BASE_DIR, "f_cleaned.csv")
+            ]
+            csv_loaded = False
+            for csv_candidate in csv_candidates:
+                if os.path.exists(csv_candidate):
+                    logger.info(f"📊 Trying CSV: {csv_candidate}")
+                    try:
+                        if analyzer.load_data(csv_candidate):
+                            logger.info(f"✅ CSV loaded from: {csv_candidate}")
+                            csv_loaded = True
+                            break
+                    except Exception as e:
+                        logger.warning(f"CSV load failed for {csv_candidate}: {e}")
+                        continue
+            if not csv_loaded:
+                logger.error("❌ Failed to load CSV data from any candidate location.")
+                analyzer = None
+            else:
                 try:
+                    if hasattr(analyzer, 'train_ai_model') and analyzer.train_ai_model():
+                        logger.info("✅ AI model training completed successfully")
+                    else:
+                        logger.warning("⚠️ AI model training failed; proceeding with basic analysis.")
                 except Exception as e:
+                    logger.warning(f"⚠️ AI model training failed: {e}")
+        except Exception as e:
+            logger.error(f"❌ Tree analyzer initialization failed: {e}")
             analyzer = None
+    else:
+        logger.warning("⚠️ PhylogeneticTreeAnalyzer not available")
 # --- Tool Detection ---
 def setup_binary_permissions():
+    """Set executable permissions on binary files"""
     try:
+        for binary in [MAFFT_PATH, IQTREE_PATH]:
+            if os.path.exists(binary):
+                try:
+                    os.chmod(binary, os.stat(binary).st_mode | stat.S_IEXEC)
+                    logger.info(f"Set executable permission on {binary}")
+                except Exception as e:
+                    logger.warning(f"Failed to set permission on {binary}: {e}")
     except Exception as e:
+        logger.warning(f"Binary permission setup failed: {e}")
+def check_tool_availability():
+    """Check if required tools are available"""
     try:
+        setup_binary_permissions()
+        # Check MAFFT
+        mafft_available = False
+        mafft_cmd = None
+        mafft_candidates = ['mafft', '/usr/bin/mafft', '/usr/local/bin/mafft', MAFFT_PATH]
+        for candidate in mafft_candidates:
+            if shutil.which(candidate) or os.path.exists(candidate):
+                try:
+                    result = subprocess.run(
+                        [candidate, "--help"],
+                        capture_output=True,
+                        text=True,
+                        timeout=5
+                    )
+                    if result.returncode == 0 or "mafft" in result.stderr.lower():
+                        mafft_available = True
+                        mafft_cmd = candidate
+                        logger.info(f"✅ MAFFT found at: {candidate}")
+                        break
+                except Exception as e:
+                    logger.debug(f"MAFFT test failed for {candidate}: {e}")
+        # Check IQ-TREE
+        iqtree_available = False
+        iqtree_cmd = None
+        iqtree_candidates = ['iqtree', 'iqtree2', 'iqtree3', '/usr/bin/iqtree', '/usr/local/bin/iqtree', IQTREE_PATH]
+        for candidate in iqtree_candidates:
+            if shutil.which(candidate) or os.path.exists(candidate):
+                try:
+                    result = subprocess.run(
+                        [candidate, "--help"],
+                        capture_output=True,
+                        text=True,
+                        timeout=5
+                    )
+                    if result.returncode == 0 or "iqtree" in result.stderr.lower():
+                        iqtree_available = True
+                        iqtree_cmd = candidate
+                        logger.info(f"✅ IQ-TREE found at: {candidate}")
+                        break
+                except Exception as e:
+                    logger.debug(f"IQ-TREE test failed for {candidate}: {e}")
+        return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
     except Exception as e:
+        logger.error(f"Tool availability check failed: {e}")
+        return False, False, None, None
+# --- Core Functions ---
 def predict_with_keras(sequence):
+    """Predict using Keras model with error handling"""
     try:
         if not keras_model or not kmer_to_index:
             return "❌ Keras model not available."
         if len(sequence) < 6:
             return "❌ Sequence too short (<6 bp)."
+        # Generate k-mers
         kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
         indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
+        # Make prediction
         input_arr = np.array([indices])
         prediction = keras_model.predict(input_arr, verbose=0)[0]
         f_gene_prob = prediction[-1]
+        # Convert to percentage
         percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))
         return f"✅ {percentage}% F gene confidence"
     except Exception as e:
+        logger.error(f"Keras prediction failed: {e}")
         return f"❌ Error: {str(e)}"
 def read_fasta_file(file_obj):
+    """Read FASTA file with error handling"""
     try:
         if file_obj is None:
             return ""
         if isinstance(file_obj, str):
             with open(file_obj, "r") as f:
                 content = f.read()
         else:
             content = file_obj.read().decode("utf-8")
+        # Extract sequence lines (non-header lines)
         lines = content.strip().split("\n")
         seq_lines = [line.strip() for line in lines if not line.startswith(">")]
         return ''.join(seq_lines)
     except Exception as e:
+        logger.error(f"Failed to read FASTA file: {e}")
         return ""
+def analyze_sequence_basic(sequence, similarity_score=95.0):
+    """Basic sequence analysis without external tools"""
+    try:
+        if not sequence or len(sequence.strip()) < 10:
+            return "❌ Invalid sequence.", None, None
+        # Clean sequence
+        clean_seq = re.sub(r'[^ATCGN]', 'N', sequence.upper())
+        # Basic analysis
+        length = len(clean_seq)
+        gc_content = (clean_seq.count('G') + clean_seq.count('C')) / length * 100
+        n_content = clean_seq.count('N') / length * 100
+        analysis_result = f"""
+        ✅ Basic Analysis Complete
+        • Length: {length} bp
+        • GC Content: {gc_content:.1f}%
+        • N Content: {n_content:.1f}%
+        • Similarity Threshold: {similarity_score}%
+        """
+        return analysis_result, None, None
+    except Exception as e:
+        logger.error(f"Basic analysis failed: {e}")
+        return f"❌ Analysis error: {str(e)}", None, None
+def run_pipeline_safe(dna_input, similarity_score=95.0, build_ml_tree=False):
+    """Safe pipeline execution with comprehensive error handling"""
     try:
+        # Input validation
+        if not dna_input or not dna_input.strip():
+            return "❌ Empty input", "", "", "", "No input provided", None, None, None, None, "No input", "No input", None, None
+        # Clean and validate sequence
         dna_input = dna_input.upper().strip()
+        if not re.match('^[ACTGN\s\n\r]+$', dna_input):
+            # Remove invalid characters
+            dna_input = re.sub(r'[^ACTGN]', 'N', dna_input)
+        # Remove whitespace
+        processed_sequence = re.sub(r'\s+', '', dna_input)
+        logger.info(f"Processing sequence of length: {len(processed_sequence)}")
+        # Boundary detection
         boundary_output = ""
         if boundary_model:
             try:
+                result = boundary_model.predict_sequence(processed_sequence)
+                if hasattr(result, 'get') and result.get('gene_regions'):
+                    regions = result['gene_regions']
+                    if regions:
+                        processed_sequence = regions[0]["sequence"]
+                        boundary_output = f"✅ F gene region found: {len(processed_sequence)} bp"
+                    else:
+                        boundary_output = "⚠️ No F gene regions found."
                 else:
+                    boundary_output = "⚠️ Boundary detection completed (no regions found)."
             except Exception as e:
+                logger.error(f"Boundary prediction error: {e}")
                 boundary_output = f"❌ Boundary prediction error: {str(e)}"
         else:
+            boundary_output = f"⚠️ Boundary model not available. Using full input: {len(processed_sequence)} bp"
+        # Keras prediction
+        keras_output = ""
+        if len(processed_sequence) >= 6:
+            keras_output = predict_with_keras(processed_sequence)
+        else:
+            keras_output = "❌ Sequence too short for classification."
+        # ML Tree analysis (simplified for now)
         ml_tree_output = ""
+        if build_ml_tree:
+            if len(processed_sequence) >= 100:
+                try:
+                    mafft_available, iqtree_available, _, _ = check_tool_availability()
+                    if mafft_available and iqtree_available:
+                        ml_tree_output = "⚠️ ML tree analysis not implemented in safe mode."
+                    else:
+                        ml_tree_output = "❌ MAFFT or IQ-TREE not available"
+                except Exception as e:
+                    ml_tree_output = f"❌ ML tree error: {str(e)}"
+            else:
+                ml_tree_output = "❌ Sequence too short for ML tree (<100 bp)."
         else:
+            ml_tree_output = "⚠️ ML tree analysis skipped."
+        # Tree analysis
+        tree_analysis_output = ""
+        tree_html_content = "<div style='text-align: center; color: #666; padding: 50px;'>Tree analysis not available in safe mode.</div>"
+        report_html_content = "<div style='text-align: center; color: #666; padding: 50px;'>Report not available in safe mode.</div>"
+        if analyzer and len(processed_sequence) >= 10:
             try:
+                result, _, _ = analyze_sequence_basic(processed_sequence, similarity_score)
+                tree_analysis_output = result
+                tree_html_content = f"<div style='padding: 20px;'><h3>Basic Analysis</h3><pre>{result}</pre></div>"
+                report_html_content = tree_html_content
             except Exception as e:
+                logger.error(f"Tree analysis error: {e}")
+                tree_analysis_output = f"❌ Tree analysis error: {str(e)}"
         else:
+            tree_analysis_output = "❌ Tree analyzer not available or sequence too short."
+        # Create summary
         summary_output = f"""
 📊 ANALYSIS SUMMARY:
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+Input Length: {len(dna_input)} bp
+Processed Length: {len(processed_sequence)} bp
+Boundary Detection: {'✅ Active' if boundary_model else '❌ Unavailable'}
+Classification: {'✅ Active' if keras_model else '❌ Unavailable'}
+ML Tree: {'✅ Requested' if build_ml_tree else '⚠️ Skipped'}
+Tree Analysis: {'✅ Active' if analyzer else '❌ Unavailable'}
 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+Results:
+• Boundary: {boundary_output.split(':')[-1].strip() if ':' in boundary_output else boundary_output}
+• Classification: {keras_output.split(':')[-1].strip() if ':' in keras_output else keras_output}
+• ML Tree: {'Requested' if build_ml_tree else 'Skipped'}
+• Analysis: {'Completed' if '✅' in tree_analysis_output else 'Failed'}
 """
         return (
+            boundary_output,
+            keras_output,
+            ml_tree_output,
+            tree_analysis_output,
+            summary_output,
+            None,  # aligned_file
+            None,  # phy_file
+            None,  # additional_file_1
+            None,  # additional_file_2
+            tree_html_content,
+            report_html_content,
+            None,  # tree_html_path
+            None   # report_html_path
         )
     except Exception as e:
         logger.error(f"Pipeline error: {e}", exc_info=True)
         error_msg = f"❌ Pipeline Error: {str(e)}"
+        return (
+            error_msg, "", "", "", error_msg,
+            None, None, None, None,
+            f"<div style='color: red;'>{error_msg}</div>",
+            f"<div style='color: red;'>{error_msg}</div>",
+            None, None
         )
 # --- Gradio Interface ---
+def create_safe_gradio_interface():
+    """Create a safe Gradio interface with comprehensive error handling"""
     try:
         with gr.Blocks(
             title="🧬 Gene Analysis Pipeline",
             theme=gr.themes.Soft(),
             css="""
+            .gradio-container {
+                max-width: 1200px !important;
+                margin: 0 auto;
+            }
+            .status-box {
+                padding: 15px;
+                border-radius: 8px;
+                margin: 10px 0;
+                border-left: 4px solid #007bff;
+                background: linear-gradient(90deg, #f8f9fa 0%, #e9ecef 100%);
+            }
+            .success {
+                background-color: #d4edda;
+                border-left-color: #28a745;
+                color: #155724;
+            }
+            .warning {
+                background-color: #fff3cd;
+                border-left-color: #ffc107;
+                color: #856404;
+            }
+            .error {
+                background-color: #f8d7da;
+                border-left-color: #dc3545;
+                color: #721c24;
+            }
+            .analysis-section {
+                border: 1px solid #dee2e6;
+                border-radius: 8px;
+                padding: 20px;
+                margin: 10px 0;
+                background: white;
+            }
             """
         ) as iface:
+            # Header
+            gr.Markdown("""
+            # 🧬 Gene Analysis Pipeline
+            ### Comprehensive DNA sequence analysis with machine learning
+            This tool provides multi-modal analysis including boundary detection, gene classification,
+            and phylogenetic analysis for DNA sequences.
+            """)
+            # System Status
             with gr.Row():
                 with gr.Column():
+                    try:
+                        mafft_available, iqtree_available, _, _ = check_tool_availability()
+                        status_html = f"""
+                        <div class="status-box">
+                            <h3>🔧 System Status</h3>
+                            <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; margin-top: 10px;">
+                                <div>🤖 Boundary Model: <strong>{'✅ Loaded' if boundary_model else '❌ Missing'}</strong></div>
+                                <div>🧠 Keras Model: <strong>{'✅ Loaded' if keras_model else '❌ Missing'}</strong></div>
+                                <div>🌳 Tree Analyzer: <strong>{'✅ Loaded' if analyzer else '❌ Missing'}</strong></div>
+                                <div>🧬 MAFFT: <strong>{'✅ Available' if mafft_available else '❌ Missing'}</strong></div>
+                                <div>🌲 IQ-TREE: <strong>{'✅ Available' if iqtree_available else '❌ Missing'}</strong></div>
+                                <div>📊 Safe Mode: <strong>{'✅ Active' if not all([boundary_model, keras_model, analyzer]) else '⚠️ Inactive'}</strong></div>
+                            </div>
+                        </div>
+                        """
+                    except Exception as e:
+                        status_html = f"""
+                        <div class="status-box error">
+                            <h3>❌ System Status Error</h3>
+                            <p>Failed to check system status: {str(e)}</p>
+                        </div>
+                        """
+                    gr.HTML(value=status_html)
+            # Main Interface
             with gr.Tabs():
                 with gr.Tab("📝 Text Input"):
                     with gr.Row():
                         with gr.Column(scale=2):
                             dna_input = gr.Textbox(
                                 label="🧬 DNA Sequence",
+                                placeholder="Enter your DNA sequence (ATCG format)...\nExample: ATCGATCGATCG...",
                                 lines=8,
+                                max_lines=15,
+                                info="Paste your DNA sequence here. Supports FASTA format or raw sequence."
                             )
                             with gr.Row():
                                 ml_tree_checkbox = gr.Checkbox(
                                     label="🌲 Build ML Tree",
                                     value=False,
+                                    info="Perform phylogenetic placement (requires external tools)"
                                 )
+                            analyze_btn = gr.Button(
+                                "🔬 Analyze Sequence",
+                                variant="primary",
+                                size="lg",
+                                scale=1
+                            )
                         with gr.Column(scale=1):
                             gr.Markdown("""
+                            ### 📋 Quick Guide
                             1. **Paste DNA sequence** in ATCG format
                             2. **Adjust similarity** threshold (1-99%)
+                            3. **Enable ML tree** for phylogenetic analysis
                             4. **Click Analyze** to start processing
                             **Supported formats:**
+                            - Raw DNA sequence: `ATCGATCG...`
+                            - FASTA format: `>header\\nATCG...`
                             - Mixed case (auto-converted)
                             - With/without spaces/newlines
+                            **Requirements:**
+                            - Minimum length: 10 bp for basic analysis
+                            - Minimum length: 100 bp for ML tree
+                            - Only ATCG nucleotides (others converted to N)
                             """)
                 with gr.Tab("📁 File Upload"):
                                     value=False
                                 )
+                            analyze_file_btn = gr.Button(
+                                "🔬 Analyze File",
+                                variant="primary",
+                                size="lg"
+                            )
                         with gr.Column(scale=1):
                             gr.Markdown("""
+                            ### 📄 File Requirements
                             **Accepted formats:**
                             - `.fasta`, `.fa`, `.fas`
                             - `.txt` with FASTA content
                             ```
                             **Notes:**
+                            - Single or multiple sequences supported
                             - First sequence will be analyzed
                             - Maximum file size: 10MB
+                            - UTF-8 encoding recommended
                             """)
             # Results Section
             gr.Markdown("## 📊 Analysis Results")
             with gr.Row():
+                with gr.Column(scale=1):
                     boundary_output = gr.Textbox(
                         label="🎯 Boundary Detection",
                         interactive=False,
+                        lines=3,
+                        info="Gene region identification results"
                     )
                     keras_output = gr.Textbox(
                         label="🧠 Gene Classification",
                         interactive=False,
+                        lines=3,
+                        info="Machine learning classification confidence"
                     )
+                with gr.Column(scale=1):
                     ml_tree_output = gr.Textbox(
                         label="🌲 Phylogenetic Placement",
                         interactive=False,
+                        lines=3,
+                        info="Maximum likelihood tree placement"
                     )
                     tree_analysis_output = gr.Textbox(
                         label="🌳 Tree Analysis",
                         interactive=False,
+                        lines=3,
+                        info="Phylogenetic tree construction results"
                     )
             summary_output = gr.Textbox(
+                label="📋 Comprehensive Summary Report",
                 interactive=False,
+                lines=12,
+                info="Complete analysis overview"
             )
             # Visualization Section
                 with gr.Tab("🌳 Interactive Tree"):
                     tree_html = gr.HTML(
                         label="Phylogenetic Tree Visualization",
+                        value="""
+                        <div style='text-align: center; color: #666; padding: 50px; border: 2px dashed #ccc; border-radius: 8px;'>
+                            <h3>🌳 Tree Visualization</h3>
+                            <p>No tree generated yet. Run analysis to see interactive phylogenetic tree.</p>
+                            <p><em>Note: Tree visualization requires successful sequence analysis.</em></p>
+                        </div>
+                        """
                     )
                 with gr.Tab("📊 Detailed Report"):
                     report_html = gr.HTML(
                         label="Analysis Report",
+                        value="""
+                        <div style='text-align: center; color: #666; padding: 50px; border: 2px dashed #ccc; border-radius: 8px;'>
+                            <h3>📊 Analysis Report</h3>
+                            <p>No report generated yet. Run analysis to see detailed results.</p>
+                            <p><em>Note: Report includes statistical analysis and recommendations.</em></p>
+                        </div>
+                        """
                     )
             # Event Handlers
+            def handle_analysis_safe(dna_seq, similarity, build_ml):
+                """Safe analysis handler with comprehensive error handling"""
                 try:
+                    if not dna_seq or not dna_seq.strip():
+                        error_msg = "❌ Please enter a DNA sequence"
+                        return (
+                            error_msg, "", "", "", error_msg,
+                            None, None, None, None,
+                            f"<div style='color: red;'>{error_msg}</div>",
+                            f"<div style='color: red;'>{error_msg}</div>"
+                        )
+                    return run_pipeline_safe(dna_seq, similarity, build_ml)
                 except Exception as e:
+                    logger.error(f"Analysis handler error: {e}")
                     error_msg = f"❌ Analysis failed: {str(e)}"
                     return (
                         error_msg, "", "", "", error_msg,
+                        None, None, None, None,
                         f"<div style='color: red;'>{error_msg}</div>",
+                        f"<div style='color: red;'>{error_msg}</div>"
                     )
+            def handle_file_analysis_safe(file_obj, similarity, build_ml):
+                """Safe file analysis handler"""
                 try:
                     if file_obj is None:
+                        error_msg = "❌ Please upload a FASTA file"
                         return (
                             error_msg, "", "", "", error_msg,
+                            None, None, None, None,
                             f"<div style='color: red;'>{error_msg}</div>",
+                            f"<div style='color: red;'>{error_msg}</div>"
                         )
+                    sequence = read_fasta_file(file_obj)
+                    if not sequence:
+                        error_msg = "❌ Failed to read sequence from file"
                         return (
                             error_msg, "", "", "", error_msg,
+                            None, None, None, None,
                             f"<div style='color: red;'>{error_msg}</div>",
+                            f"<div style='color: red;'>{error_msg}</div>"
                         )
+                    return run_pipeline_safe(sequence, similarity, build_ml)
                 except Exception as e:
+                    logger.error(f"File analysis handler error: {e}")
                     error_msg = f"❌ File analysis failed: {str(e)}"
                     return (
                         error_msg, "", "", "", error_msg,
+                        None, None, None, None,
                         f"<div style='color: red;'>{error_msg}</div>",
+                        f"<div style='color: red;'>{error_msg}</div>"
                     )
             # Connect event handlers
             analyze_btn.click(
+                fn=handle_analysis_safe,
                 inputs=[dna_input, similarity_slider, ml_tree_checkbox],
                 outputs=[
                     boundary_output,
                     tree_analysis_output,
                     summary_output,
                     tree_html,
+                    report_html
+                ],
+                show_progress=True
             )
             analyze_file_btn.click(
+                fn=handle_file_analysis_safe,
                 inputs=[file_input, file_similarity_slider, file_ml_tree_checkbox],
                 outputs=[
                     boundary_output,
                     tree_analysis_output,
                     summary_output,
                     tree_html,
+                    report_html
+                ],
+                show_progress=True
             )
             # Footer
             gr.Markdown("""
             ---
+            ### 🔬 About This Tool
+            This Gene Analysis Pipeline provides comprehensive DNA sequence analysis using:
+            - **Boundary Detection**: Machine learning-based gene region identification
+            - **Classification**: Deep learning confidence scoring for gene classification
+            - **Phylogenetic Analysis**: Maximum likelihood tree construction and placement
+            - **Interactive Visualization**: Dynamic tree and report generation
+            **Safe Mode**: When external tools or models are unavailable, the pipeline operates in safe mode with basic analysis capabilities.
+            ---
+            *Powered by Gradio • Built with ❤️ for genomics research*
             """)
         return iface
     except Exception as e:
+        logger.error(f"Interface creation failed: {e}")
+        # Return minimal interface on failure
+        def minimal_interface():
+            return gr.Interface(
+                fn=lambda x: f"❌ System Error: {str(e)}",
+                inputs=gr.Textbox(label="Input"),
+                outputs=gr.Textbox(label="Output"),
+                title="Gene Analysis Pipeline - Error Mode"
+            )
+        return minimal_interface()
+# --- Main Execution ---
 def main():
+    """Main function with comprehensive error handling"""
     try:
         logger.info("🚀 Starting Gene Analysis Pipeline...")
+        # Load models
+        load_models_safely()
+        # Create interface
+        logger.info("🎨 Creating Gradio interface...")
+        iface = create_safe_gradio_interface()
+        # Launch
+        logger.info("🌐 Launching application...")
+        iface.launch(
+            server_name="0.0.0.0",
+            server_port=7860,
+            share=False,
+            show_error=True,
+            show_tips=True,
+            enable_queue=True,
+            max_threads=10
         )
+    except KeyboardInterrupt:
+        logger.info("🛑 Application stopped by user")
     except Exception as e:
+        logger.error(f"❌ Critical error in main: {e}", exc_info=True)
+        # Emergency fallback interface
+        try:
+            logger.info("🚨 Starting emergency fallback interface...")
+            emergency_iface = gr.Interface(
+                fn=lambda x: f"❌ System in emergency mode. Error: {str(e)}",
+                inputs=gr.Textbox(label="DNA Sequence", placeholder="Emergency mode - limited functionality"),
+                outputs=gr.Textbox(label="Status"),
+                title="🚨 Gene Analysis Pipeline - Emergency Mode",
+                description="The system is running in emergency mode due to initialization errors."
+            )
+            emergency_iface.launch(
+                server_name="0.0.0.0",
+                server_port=7860,
+                share=False
+            )
+        except Exception as emergency_e:
+            logger.error(f"❌ Emergency interface failed: {emergency_e}")
+            print(f"CRITICAL: Complete system failure. Error: {e}")
+            print(f"Emergency fallback also failed: {emergency_e}")
+            sys.exit(1)
 if __name__ == "__main__":
     main()