Spaces:

GGproject10
/

simplified_tree_AI

No application file

App Files Files Community

re-type commited on Jun 12, 2025

Commit

574dbbb

verified ·

1 Parent(s): 103437c

Update app.py

Browse files

Files changed (1) hide show

app.py +585 -320

app.py CHANGED Viewed

@@ -22,47 +22,37 @@ from Bio.SeqRecord import SeqRecord
 import stat
 import time
 import asyncio
-from fastapi import FastAPI, File, UploadFile, Form, Request
 from fastapi.responses import HTMLResponse
-from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from typing import Optional
 import uvicorn
 # Set event loop policy for Spaces
-asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
 # --- FastAPI App Setup ---
 app = FastAPI(title="🧬 Gene Analysis Pipeline", version="1.0.0")
-# Add CORS and logging middleware
-app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
-    allow_credentials=True,
-    allow_methods=["*"],
-    allow_headers=["*"],
-)
-@app.middleware("http")
-async def log_requests(request: Request, call_next):
-    logging.debug(f"Request: {request.method} {request.url}")
-    try:
-        response = await call_next(request)
-        logging.debug(f"Response: {response.status_code}")
-        return response
-    except Exception as e:
-        logging.error(f"Request error: {e}", exc_info=True)
-        raise
-# --- Logging ---
-logging.basicConfig(
-    level=logging.DEBUG,
-    format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
-    handlers=[logging.StreamHandler(), logging.FileHandler('/tmp/app.log')]
-)
-logging.getLogger('uvicorn').setLevel(logging.DEBUG)
-logging.getLogger('fastapi').setLevel(logging.DEBUG)
-logging.getLogger('gradio').setLevel(logging.DEBUG)
 # --- Global Variables ---
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -78,50 +68,100 @@ model_repo = "GGproject10/best_boundary_aware_model"
 csv_path = os.path.join(BASE_DIR, "f_cleaned.csv")
 hf_token = os.getenv("HF_TOKEN")
-# --- Load Models ---
 boundary_model = None
 keras_model = None
 kmer_to_index = None
-try:
-    boundary_path = hf_hub_download(repo_id=model_repo, filename="best_boundary_aware_model.pth", token=hf_token, cache_dir="/tmp/hf_cache")
-    if os.path.exists(boundary_path):
-        boundary_model = EnhancedGenePredictor(boundary_path)
-        logging.info("Boundary model loaded.")
-    else:
-        logging.warning("Boundary model not found.")
-except Exception as e:
-    logging.error(f"Failed to load boundary model: {e}", exc_info=True)
-try:
-    keras_path = hf_hub_download(repo_id=model_repo, filename="best_model.keras", token=hf_token, cache_dir="/tmp/hf_cache")
-    kmer_path = hf_hub_download(repo_id=model_repo, filename="kmer_to_index.pkl", token=hf_token, cache_dir="/tmp/hf_cache")
-    if os.path.exists(keras_path) and os.path.exists(kmer_path):
-        keras_model = load_model(keras_path)
-        with open(kmer_path, "rb") as f:
-            kmer_to_index = pickle.load(f)
-        logging.info("Keras model loaded.")
-    else:
-        logging.warning("Keras model not found.")
-except Exception as e:
-    logging.error(f"Failed to load Keras model: {e}", exc_info=True)
-# --- Initialize Tree Analyzer ---
-analyzer = None
-try:
-    analyzer = PhylogeneticTreeAnalyzer()
-    csv_candidates = [csv_path, os.path.join(BASE_DIR, "f cleaned.csv"), "f_cleaned.csv"]
-    csv_loaded = False
-    for csv_candidate in csv_candidates:
-        if os.path.exists(csv_candidate):
-            if analyzer.load_data(csv_candidate):
-                logging.info(f"Tree analyzer loaded from: {csv_candidate}")
-                csv_loaded = True
-                break
-    if not csv_loaded:
-        logging.error("Failed to load CSV data.")
         analyzer = None
-except Exception as e:
-    logging.error(f"Failed to initialize tree analyzer: {e}", exc_info=True)
 # --- Tool Detection ---
 def setup_binary_permissions():
@@ -129,153 +169,301 @@ def setup_binary_permissions():
         if os.path.exists(binary):
             try:
                 os.chmod(binary, os.stat(binary).st_mode | stat.S_IEXEC)
-                logging.info(f"Set executable permission on {binary}")
             except Exception as e:
-                logging.warning(f"Failed to set permission on {binary}: {e}")
 def check_tool_availability():
     setup_binary_permissions()
     mafft_available = False
     mafft_cmd = None
     mafft_candidates = ['mafft', '/usr/bin/mafft', '/usr/local/bin/mafft', MAFFT_PATH]
     for candidate in mafft_candidates:
-        if shutil.which(candidate):
             try:
-                result = subprocess.run([candidate, "--help"], capture_output=True, text=True, timeout=5)
                 if result.returncode == 0 or "mafft" in result.stderr.lower():
                     mafft_available = True
                     mafft_cmd = candidate
-                    logging.info(f"MAFFT found at: {candidate}")
                     break
             except Exception as e:
-                logging.debug(f"MAFFT test failed for {candidate}: {e}")
     iqtree_available = False
     iqtree_cmd = None
     iqtree_candidates = ['iqtree', 'iqtree2', 'iqtree3', '/usr/bin/iqtree', '/usr/local/bin/iqtree', IQTREE_PATH]
     for candidate in iqtree_candidates:
-        if shutil.which(candidate):
             try:
-                result = subprocess.run([candidate, "--help"], capture_output=True, text=True, timeout=5)
                 if result.returncode == 0 or "iqtree" in result.stderr.lower():
                     iqtree_available = True
                     iqtree_cmd = candidate
-                    logging.info(f"IQ-TREE found at: {candidate}")
                     break
             except Exception as e:
-                logging.debug(f"IQ-TREE test failed for {candidate}: {e}")
     return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
-# --- Pipeline Functions ---
 def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
     try:
         if len(sequence.strip()) < 100:
             return False, "Sequence too short (<100 bp).", None, None
         query_id = f"QUERY_{uuid.uuid4().hex[:8]}"
         query_fasta = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}.fa")
         aligned_with_query = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_aligned.fa")
         output_prefix = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_placed_tree")
         if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
             return False, "Reference alignment or tree not found.", None, None
         query_record = SeqRecord(Seq(sequence.upper()), id=query_id, description="")
         SeqIO.write([query_record], query_fasta, "fasta")
         with open(aligned_with_query, "w") as output_file:
-            subprocess.run([mafft_cmd, "--add", query_fasta, "--reorder", ALIGNMENT_PATH], stdout=output_file, stderr=subprocess.PIPE, text=True, timeout=600, check=True)
         if not os.path.exists(aligned_with_query) or os.path.getsize(aligned_with_query) == 0:
             return False, "MAFFT alignment failed.", None, None
-        subprocess.run([iqtree_cmd, "-s", aligned_with_query, "-g", TREE_PATH, "-m", "GTR+G", "-pre", output_prefix, "-redo"], capture_output=True, text=True, timeout=1200, check=True)
         treefile = f"{output_prefix}.treefile"
         if not os.path.exists(treefile):
             return False, "IQ-TREE placement failed.", aligned_with_query, None
         success_msg = f"Placement completed!\nQuery ID: {query_id}\nAlignment: {os.path.basename(aligned_with_query)}\nTree: {os.path.basename(treefile)}"
         return True, success_msg, aligned_with_query, treefile
     except Exception as e:
-        logging.error(f"Phylogenetic placement failed: {e}", exc_info=True)
         return False, f"Error: {str(e)}", None, None
     finally:
         if 'query_fasta' in locals() and os.path.exists(query_fasta):
-            os.unlink(query_fasta)
-def build_maximum_likelihood_tree(f_gene_sequence):
     try:
-        mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
-        status_msg = f"MAFFT: {'OK' if mafft_available else 'Missing'}\nIQ-TREE: {'OK' if iqtree_available else 'Missing'}\n"
-        if not mafft_available or not iqtree_available:
-            return False, f"{status_msg}\nInstall: conda install -c bioconda mafft iqtree", None, None
-        if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
-            return False, f"{status_msg}\nReference files missing.", None, None
-        success, message, aligned_file, tree_file = phylogenetic_placement(f_gene_sequence, mafft_cmd, iqtree_cmd)
-        if success:
-            if aligned_file:
-                shutil.copy2(aligned_file, "query_with_references_aligned.fasta")
-                aligned_file = "query_with_references_aligned.fasta"
-            if tree_file:
-                shutil.copy2(tree_file, "query_placement_tree.treefile")
-                tree_file = "query_placement_tree.treefile"
-            return True, f"{status_msg}\n{message}", aligned_file, tree_file
-        return False, f"{status_msg}\n{message}", aligned_file, tree_file
     except Exception as e:
-        logging.error(f"ML tree construction failed: {e}", exc_info=True)
-        return False, f"Error: {str(e)}", None, None
 def analyze_sequence_for_tree(sequence: str, matching_percentage: float):
     try:
         if not analyzer:
-            return "Tree analyzer not initialized.", None, None
         if not sequence or len(sequence.strip()) < 10:
-            return "Invalid sequence.", None, None
         if not (1 <= matching_percentage <= 99):
-            return "Matching percentage must be 1-99.", None, None
         if not analyzer.find_query_sequence(sequence):
-            return "Sequence not accepted.", None, None
         matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
         if not matched_ids:
-            return f"No similar sequences at {matching_percentage}% threshold.", None, None
         analyzer.build_tree_structure_with_ml_safe(matched_ids)
         fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
         query_id = analyzer.query_id or f"query_{int(time.time())}"
         tree_html_path = os.path.join("/tmp", f'phylogenetic_tree_{query_id}.html')
         fig.write_html(tree_html_path)
         analyzer.matching_percentage = matching_percentage
         report_success = analyzer.generate_detailed_report(matched_ids, actual_percentage)
         report_html_path = os.path.join("/tmp", f"detailed_report_{query_id}.html") if report_success else None
-        return f"Found {len(matched_ids)} sequences at {actual_percentage:.2f}% similarity.", tree_html_path, report_html_path
-    except Exception as e:
-        logging.error(f"Tree analysis failed: {e}", exc_info=True)
-        return f"Error: {str(e)}", None, None
-def predict_with_keras(sequence):
-    try:
-        if not keras_model or not kmer_to_index:
-            return "Keras model not available."
-        if len(sequence) < 6:
-            return "Sequence too short (<6 bp)."
-        kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
-        indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
-        input_arr = np.array([indices])
-        prediction = keras_model.predict(input_arr, verbose=0)[0]
-        f_gene_prob = prediction[-1]
-        percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))
-        return f"{percentage}% F gene"
     except Exception as e:
-        logging.error(f"Keras prediction failed: {e}", exc_info=True)
-        return f"Error: {str(e)}"
 def read_fasta_file(file_obj):
     try:
         if file_obj is None:
             return ""
         if isinstance(file_obj, str):
             with open(file_obj, "r") as f:
                 content = f.read()
         else:
             content = file_obj.read().decode("utf-8")
         lines = content.strip().split("\n")
         seq_lines = [line.strip() for line in lines if not line.startswith(">")]
         return ''.join(seq_lines)
     except Exception as e:
-        logging.error(f"Failed to read FASTA file: {e}", exc_info=True)
         return ""
 # --- Pydantic Models ---
 class AnalysisRequest(BaseModel):
     sequence: str
@@ -291,107 +479,42 @@ class AnalysisResponse(BaseModel):
     success: bool
     error_message: Optional[str] = None
-# --- Pipeline Execution ---
-async def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree):
-    try:
-        dna_input = read_fasta_file(fasta_file_obj)
-        if not dna_input:
-            return "Failed to read FASTA file", "", "", "", "", None, None, None, None, "No input", "No input"
-        return run_pipeline(dna_input, similarity_score, build_ml_tree)
-    except Exception as e:
-        logging.error(f"Pipeline from file error: {e}", exc_info=True)
-        return f"Error: {str(e)}", "", "", "", "", None, None, None, None, f"Error: {str(e)}", f"Error: {str(e)}"
-def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
-    try:
-        dna_input = dna_input.upper().strip()
-        if not dna_input:
-            return "Empty input", "", "", "", "", None, None, None, None, "No input", "No input"
-        if not re.match('^[ACTGN]+$', dna_input):
-            dna_input = ''.join(c if c in 'ACTGN' else 'N' for c in dna_input)
-        processed_sequence = dna_input
-        boundary_output = ""
-        if boundary_model:
-            result = boundary_model.predict_sequence(dna_input)
-            regions = result['gene_regions']
-            if regions:
-                processed_sequence = regions[0]["sequence"]
-                boundary_output = processed_sequence
-            else:
-                boundary_output = "No F gene regions found."
-                processed_sequence = dna_input
-        else:
-            boundary_output = f"Boundary model not available. Using input: {len(dna_input)} bp"
-        keras_output = predict_with_keras(processed_sequence) if processed_sequence and len(processed_sequence) >= 6 else "Sequence too short."
-        aligned_file = None
-        phy_file = None
-        ml_tree_output = ""
-        if build_ml_tree and processed_sequence and len(processed_sequence) >= 100:
-            ml_success, ml_message, ml_aligned, ml_tree = build_maximum_likelihood_tree(processed_sequence)
-            ml_tree_output = ml_message
-            aligned_file = ml_aligned
-            phy_file = ml_tree
-        elif build_ml_tree:
-            ml_tree_output = "Sequence too short for placement (<100 bp)."
-        else:
-            ml_tree_output = "Phylogenetic placement skipped."
-        tree_html_file = None
-        report_html_file = None
-        tree_html_content = "No tree generated."
-        report_html_content = "No report generated."
-        simplified_ml_output = ""
-        if analyzer and processed_sequence and len(processed_sequence) >= 10:
-            tree_result, tree_html_path, report_html_path = analyze_sequence_for_tree(processed_sequence, similarity_score)
-            if tree_html_path and os.path.exists(tree_html_path):
-                output_dir = os.path.join("/tmp", "output")
-                os.makedirs(output_dir, exist_ok=True)
-                safe_seq_name = re.sub(r'[^a-zA-Z0-9_-]', '', processed_sequence[:20])
-                timestamp = str(int(time.time()))
-                tree_html_filename = f"tree_{safe_seq_name}_{timestamp}.html"
-                tree_html_final_path = os.path.join(output_dir, tree_html_filename)
-                shutil.copy2(tree_html_path, tree_html_final_path)
-                tree_html_file = tree_html_final_path
-                with open(tree_html_path, 'r', encoding='utf-8') as f:
-                    tree_html_content = f.read()
-                os.unlink(tree_html_path)
-            if report_html_path and os.path.exists(report_html_path):
-                report_html_filename = f"report_{safe_seq_name}_{timestamp}.html"
-                report_html_final_path = os.path.join(output_dir, report_html_filename)
-                shutil.copy2(report_html_path, report_html_final_path)
-                report_html_file = report_html_final_path
-                with open(report_html_path, 'r', encoding='utf-8') as f:
-                    report_html_content = f.read()
-                os.unlink(report_html_path)
-            simplified_ml_output = tree_result
-            if not tree_html_file:
-                tree_html_content = f"<div style='color: red;'>{tree_result}</div>"
-            if not report_html_file:
-                report_html_content = f"<div style='color: red;'>{tree_result}</div>"
-        else:
-            simplified_ml_output = "Tree analyzer not available." if not analyzer else "Sequence too short (<10 bp)."
-            tree_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
-            report_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
-        summary_output = f"""
-ANALYSIS SUMMARY:
-Input: {len(dna_input)} bp
-F Gene: {len(processed_sequence)} bp
-Validation: {keras_output}
-Placement: {'OK' if 'successfully' in ml_tree_output else 'Skipped' if 'skipped' in ml_tree_output else 'Failed'}
-Tree Analysis: {'OK' if 'Found' in simplified_ml_output else 'Failed'}
-Report: {'OK' if report_html_file else 'Failed'}
-"""
-        return (
-            boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output,
-            aligned_file, phy_file, tree_html_file, report_html_file, tree_html_content, report_html_content
-        )
-    except Exception as e:
-        logging.error(f"Pipeline error: {e}", exc_info=True)
-        return f"Error: {str(e)}", "", "", "", "", None, None, None, None, f"Error: {str(e)}", f"Error: {str(e)}"
 # --- FastAPI Endpoints ---
 @app.get("/")
 async def root():
-    return {"message": "Gene Analysis Pipeline API", "docs": "/docs"}
 @app.post("/analyze", response_model=AnalysisResponse)
 async def analyze_sequence(request: AnalysisRequest):
@@ -406,21 +529,28 @@ async def analyze_sequence(request: AnalysisRequest):
             success=True
         )
     except Exception as e:
-        logging.error(f"Analyze error: {e}", exc_info=True)
         return AnalysisResponse(
-            boundary_output="", keras_output="", ml_tree_output="", tree_analysis_output="", summary_output="",
             success=False, error_message=str(e)
         )
 @app.post("/analyze-file")
-async def analyze_file(file: UploadFile = File(...), similarity_score: float = Form(95.0), build_ml_tree: bool = Form(False)):
     try:
         with tempfile.NamedTemporaryFile(delete=False, suffix=".fasta", dir="/tmp") as temp_file:
             content = await file.read()
             temp_file.write(content)
             temp_file_path = temp_file.name
         result = await run_pipeline_from_file(temp_file_path, similarity_score, build_ml_tree)
-        os.unlink(temp_file_path)
         return AnalysisResponse(
             boundary_output=result[0] or "",
             keras_output=result[1] or "",
@@ -430,111 +560,246 @@ async def analyze_file(file: UploadFile = File(...), similarity_score: float = F
             success=True
         )
     except Exception as e:
-        logging.error(f"Analyze-file error: {e}", exc_info=True)
-        if 'temp_file_path' in locals():
-            os.unlink(temp_file_path)
         return AnalysisResponse(
-            boundary_output="", keras_output="", ml_tree_output="", tree_analysis_output="", summary_output="",
             success=False, error_message=str(e)
         )
-@app.get("/health")
-async def health_check():
     try:
-        mafft_available, iqtree_available, _, _ = check_tool_availability()
-        return {
-            "status": "healthy",
-            "boundary_model": boundary_model is not None,
-            "keras_model": keras_model is not None,
-            "tree_analyzer": analyzer is not None,
-            "mafft_available": mafft_available,
-            "iqtree_available": iqtree_available
-        }
     except Exception as e:
-        logging.error(f"Health check error: {e}", exc_info=True)
-        return {"status": "unhealthy", "error": str(e)}
-# --- Gradio Interface ---
-def create_gradio_interface():
-    with gr.Blocks(title="Gene Analysis Pipeline") as iface:
-        gr.Markdown("## Gene Analysis Pipeline")
-        with gr.Row():
-            dna_input = gr.Textbox(label="DNA Sequence", placeholder="Enter DNA sequence...", lines=4)
-            fasta_file = gr.File(label="Upload FASTA File", file_types=[".fasta", ".fa"])
-            similarity_score = gr.Slider(minimum=70.0, maximum=99.0, value=95.0, label="Similarity (%)")
-            build_ml_tree = gr.Checkbox(label="Phylogenetic Placement")
-        with gr.Row():
-            analyze_text_btn = gr.Button("Analyze Text")
-            analyze_file_btn = gr.Button("Analyze File")
-        with gr.Tabs():
-            with gr.TabItem("F Gene"):
-                f_gene_output = gr.Textbox(label="F Gene Sequence")
-            with gr.TabItem("Validation"):
-                keras_output = gr.Textbox(label="Validation Result")
-            with gr.TabItem("Placement"):
-                ml_tree_output = gr.Textbox(label="Phylogenetic Placement")
-            with gr.TabItem("Tree"):
-                tree_analysis_output = gr.Textbox(label="Tree Analysis")
-                tree_html_display = gr.HTML(label="Interactive Tree")
-            with gr.TabItem("Report"):
-                report_html_display = gr.HTML(label="Report")
-            with gr.TabItem("Summary"):
-                summary_output = gr.Textbox(label="Summary")
-        with gr.Row():
-            alignment_file = gr.File(label="Alignment")
-            tree_file = gr.File(label="Tree")
-            html_tree_file = gr.File(label="Interactive Tree (HTML)")
-            report_file = gr.File(label="Report (HTML)")
-        analyze_text_btn.click(
-            fn=run_pipeline,
-            inputs=[dna_input, similarity_score, build_ml_tree],
-            outputs=[f_gene_output, keras_output, ml_tree_output, tree_analysis_output, summary_output,
-                     alignment_file, tree_file, html_tree_file, report_file, tree_html_display, report_html_display]
-        )
-        analyze_file_btn.click(
-            fn=run_pipeline_from_file,
-            inputs=[fasta_file, similarity_score, build_ml_tree],
-            outputs=[f_gene_output, keras_output, ml_tree_output, tree_analysis_output, summary_output,
-                     alignment_file, tree_file, html_tree_file, report_file, tree_html_display, report_html_display]
         )
-    return iface
-# --- Mount Gradio ---
 try:
     gradio_app = create_gradio_interface()
     app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
-    logging.info("Gradio mounted at /gradio")
 except Exception as e:
-    logging.error(f"Gradio mounting failed: {e}", exc_info=True)
-    @app.get("/gradio")
-    async def gradio_fallback():
-        health = await health_check()
-        return HTMLResponse(f"""
-            <h1>Gradio UI Failed</h1>
-            <p>Error: Check /tmp/app.log for details.</p>
-            <p>Health: {health}</p>
-            <p>Try: <a href="/docs">API Docs</a> | <a href="/health">Health Check</a></p>
-            """, status_code=503)
-# --- Main Execution ---
 if __name__ == "__main__":
     try:
-        logging.info("Starting Gene Analysis Pipeline")
-        logging.info(f"Boundary Model: {'OK' if boundary_model else 'Missing'}")
-        logging.info(f"Keras Model: {'OK' if keras_model else 'Missing'}")
-        logging.info(f"Tree Analyzer: {'OK' if analyzer else 'Missing'}")
-        mafft_available, iqtree_available, _, _ = check_tool_availability()
-        logging.info(f"MAFFT: {'OK' if mafft_available else 'Missing'}")
-        logging.info(f"IQ-TREE: {'OK' if iqtree_available else 'Missing'}")
-        logging.info("Starting server...")
-        logging.info("API Docs: http://localhost:8000/docs")
-        logging.info("Gradio UI: http://localhost:8000/gradio")
         uvicorn.run(
-            app,
-            host="0.0.0.0",
-            port=8000,
-            reload=False
         )
     except Exception as e:
-        logging.error(f"Server startup failed: {e}", exc_info=True)
-        sys.exit(1)

 import stat
 import time
 import asyncio
+# FastAPI imports
+from fastapi import FastAPI, File, UploadFile, Form, HTTPException
 from fastapi.responses import HTMLResponse
 from pydantic import BaseModel
 from typing import Optional
 import uvicorn
 # Set event loop policy for Spaces
+try:
+    asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
+except Exception:
+    pass
 # --- FastAPI App Setup ---
 app = FastAPI(title="🧬 Gene Analysis Pipeline", version="1.0.0")
+# --- Enhanced Logging ---
+log_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+log_handler = logging.StreamHandler()
+log_handler.setFormatter(log_formatter)
+# File handler with error handling
+try:
+    file_handler = logging.FileHandler('/tmp/app.log')
+    file_handler.setFormatter(log_formatter)
+    logging.basicConfig(level=logging.INFO, handlers=[log_handler, file_handler])
+except Exception:
+    logging.basicConfig(level=logging.INFO, handlers=[log_handler])
+logger = logging.getLogger(__name__)
 # --- Global Variables ---
 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 csv_path = os.path.join(BASE_DIR, "f_cleaned.csv")
 hf_token = os.getenv("HF_TOKEN")
+# Initialize models as None
 boundary_model = None
 keras_model = None
 kmer_to_index = None
+analyzer = None
+# --- Enhanced Model Loading with Better Error Handling ---
+def load_models_safely():
+    global boundary_model, keras_model, kmer_to_index, analyzer
+    # Load Boundary Model
+    try:
+        if hf_token:
+            logger.info("Attempting to load boundary model from Hugging Face...")
+            boundary_path = hf_hub_download(
+                repo_id=model_repo,
+                filename="best_boundary_aware_model.pth",
+                token=hf_token,
+                cache_dir="/tmp/hf_cache"
+            )
+            if os.path.exists(boundary_path):
+                boundary_model = EnhancedGenePredictor(boundary_path)
+                logger.info("✅ Boundary model loaded successfully")
+            else:
+                logger.warning("❌ Boundary model file not found after download")
+        else:
+            logger.warning("❌ HF_TOKEN not found, skipping boundary model")
+    except Exception as e:
+        logger.error(f"❌ Failed to load boundary model: {e}")
+        boundary_model = None
+    # Load Keras Model
+    try:
+        if hf_token:
+            logger.info("Attempting to load Keras model from Hugging Face...")
+            keras_path = hf_hub_download(
+                repo_id=model_repo,
+                filename="best_model.keras",
+                token=hf_token,
+                cache_dir="/tmp/hf_cache"
+            )
+            kmer_path = hf_hub_download(
+                repo_id=model_repo,
+                filename="kmer_to_index.pkl",
+                token=hf_token,
+                cache_dir="/tmp/hf_cache"
+            )
+            if os.path.exists(keras_path) and os.path.exists(kmer_path):
+                keras_model = load_model(keras_path)
+                with open(kmer_path, "rb") as f:
+                    kmer_to_index = pickle.load(f)
+                logger.info("✅ Keras model loaded successfully")
+            else:
+                logger.warning("❌ Keras model files not found after download")
+        else:
+            logger.warning("❌ HF_TOKEN not found, skipping Keras model")
+    except Exception as e:
+        logger.error(f"❌ Failed to load Keras model: {e}")
+        keras_model = None
+        kmer_to_index = None
+    # Initialize Tree Analyzer
+    try:
+        logger.info("Initializing tree analyzer...")
+        analyzer = PhylogeneticTreeAnalyzer()
+        csv_candidates = [
+            csv_path,
+            os.path.join(BASE_DIR, "f cleaned.csv"),
+            "f_cleaned.csv",
+            os.path.join(BASE_DIR, "data", "f_cleaned.csv")
+        ]
+        csv_loaded = False
+        for csv_candidate in csv_candidates:
+            if os.path.exists(csv_candidate):
+                try:
+                    if analyzer.load_data(csv_candidate):
+                        logger.info(f"✅ Tree analyzer loaded from: {csv_candidate}")
+                        csv_loaded = True
+                        break
+                except Exception as e:
+                    logger.warning(f"Failed to load CSV from {csv_candidate}: {e}")
+                    continue
+        if not csv_loaded:
+            logger.error("❌ Failed to load CSV data from any location")
+            analyzer = None
+    except Exception as e:
+        logger.error(f"❌ Failed to initialize tree analyzer: {e}")
         analyzer = None
+# Load models at startup
+load_models_safely()
 # --- Tool Detection ---
 def setup_binary_permissions():
         if os.path.exists(binary):
             try:
                 os.chmod(binary, os.stat(binary).st_mode | stat.S_IEXEC)
+                logger.info(f"Set executable permission on {binary}")
             except Exception as e:
+                logger.warning(f"Failed to set permission on {binary}: {e}")
 def check_tool_availability():
     setup_binary_permissions()
+    # Check MAFFT
     mafft_available = False
     mafft_cmd = None
     mafft_candidates = ['mafft', '/usr/bin/mafft', '/usr/local/bin/mafft', MAFFT_PATH]
     for candidate in mafft_candidates:
+        if shutil.which(candidate) or os.path.exists(candidate):
             try:
+                result = subprocess.run(
+                    [candidate, "--help"],
+                    capture_output=True,
+                    text=True,
+                    timeout=5
+                )
                 if result.returncode == 0 or "mafft" in result.stderr.lower():
                     mafft_available = True
                     mafft_cmd = candidate
+                    logger.info(f"✅ MAFFT found at: {candidate}")
                     break
             except Exception as e:
+                logger.debug(f"MAFFT test failed for {candidate}: {e}")
+    # Check IQ-TREE
     iqtree_available = False
     iqtree_cmd = None
     iqtree_candidates = ['iqtree', 'iqtree2', 'iqtree3', '/usr/bin/iqtree', '/usr/local/bin/iqtree', IQTREE_PATH]
     for candidate in iqtree_candidates:
+        if shutil.which(candidate) or os.path.exists(candidate):
             try:
+                result = subprocess.run(
+                    [candidate, "--help"],
+                    capture_output=True,
+                    text=True,
+                    timeout=5
+                )
                 if result.returncode == 0 or "iqtree" in result.stderr.lower():
                     iqtree_available = True
                     iqtree_cmd = candidate
+                    logger.info(f"✅ IQ-TREE found at: {candidate}")
                     break
             except Exception as e:
+                logger.debug(f"IQ-TREE test failed for {candidate}: {e}")
     return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
+# --- Pipeline Functions (keeping your original logic) ---
 def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
     try:
         if len(sequence.strip()) < 100:
             return False, "Sequence too short (<100 bp).", None, None
         query_id = f"QUERY_{uuid.uuid4().hex[:8]}"
         query_fasta = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}.fa")
         aligned_with_query = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_aligned.fa")
         output_prefix = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_placed_tree")
         if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
             return False, "Reference alignment or tree not found.", None, None
         query_record = SeqRecord(Seq(sequence.upper()), id=query_id, description="")
         SeqIO.write([query_record], query_fasta, "fasta")
         with open(aligned_with_query, "w") as output_file:
+            subprocess.run([
+                mafft_cmd, "--add", query_fasta, "--reorder", ALIGNMENT_PATH
+            ], stdout=output_file, stderr=subprocess.PIPE, text=True, timeout=600, check=True)
         if not os.path.exists(aligned_with_query) or os.path.getsize(aligned_with_query) == 0:
             return False, "MAFFT alignment failed.", None, None
+        subprocess.run([
+            iqtree_cmd, "-s", aligned_with_query, "-g", TREE_PATH,
+            "-m", "GTR+G", "-pre", output_prefix, "-redo"
+        ], capture_output=True, text=True, timeout=1200, check=True)
         treefile = f"{output_prefix}.treefile"
         if not os.path.exists(treefile):
             return False, "IQ-TREE placement failed.", aligned_with_query, None
         success_msg = f"Placement completed!\nQuery ID: {query_id}\nAlignment: {os.path.basename(aligned_with_query)}\nTree: {os.path.basename(treefile)}"
         return True, success_msg, aligned_with_query, treefile
     except Exception as e:
+        logger.error(f"Phylogenetic placement failed: {e}")
         return False, f"Error: {str(e)}", None, None
     finally:
         if 'query_fasta' in locals() and os.path.exists(query_fasta):
+            try:
+                os.unlink(query_fasta)
+            except:
+                pass
+def predict_with_keras(sequence):
     try:
+        if not keras_model or not kmer_to_index:
+            return "❌ Keras model not available."
+        if len(sequence) < 6:
+            return "❌ Sequence too short (<6 bp)."
+        kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
+        indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
+        input_arr = np.array([indices])
+        prediction = keras_model.predict(input_arr, verbose=0)[0]
+        f_gene_prob = prediction[-1]
+        percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))
+        return f"✅ {percentage}% F gene confidence"
     except Exception as e:
+        logger.error(f"Keras prediction failed: {e}")
+        return f"❌ Error: {str(e)}"
+def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
+    try:
+        dna_input = dna_input.upper().strip()
+        if not dna_input:
+            return "❌ Empty input", "", "", "", "", None, None, None, None, "No input", "No input"
+        # Clean sequence
+        if not re.match('^[ACTGN]+$', dna_input):
+            dna_input = ''.join(c if c in 'ACTGN' else 'N' for c in dna_input)
+        processed_sequence = dna_input
+        # Boundary prediction
+        boundary_output = ""
+        if boundary_model:
+            try:
+                result = boundary_model.predict_sequence(dna_input)
+                regions = result['gene_regions']
+                if regions:
+                    processed_sequence = regions[0]["sequence"]
+                    boundary_output = f"✅ F gene region found: {len(processed_sequence)} bp"
+                else:
+                    boundary_output = "⚠️ No F gene regions found."
+                    processed_sequence = dna_input
+            except Exception as e:
+                boundary_output = f"❌ Boundary prediction error: {str(e)}"
+                processed_sequence = dna_input
+        else:
+            boundary_output = f"⚠️ Boundary model not available. Using full input: {len(dna_input)} bp"
+        # Keras prediction
+        keras_output = predict_with_keras(processed_sequence) if processed_sequence and len(processed_sequence) >= 6 else "❌ Sequence too short."
+        # ML Tree (keeping your original logic)
+        aligned_file = None
+        phy_file = None
+        ml_tree_output = ""
+        if build_ml_tree and processed_sequence and len(processed_sequence) >= 100:
+            try:
+                mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
+                if mafft_available and iqtree_available:
+                    ml_success, ml_message, ml_aligned, ml_tree = phylogenetic_placement(processed_sequence, mafft_cmd, iqtree_cmd)
+                    ml_tree_output = ml_message
+                    aligned_file = ml_aligned
+                    phy_file = ml_tree
+                else:
+                    ml_tree_output = "❌ MAFFT or IQ-TREE not available"
+            except Exception as e:
+                ml_tree_output = f"❌ ML tree error: {str(e)}"
+        elif build_ml_tree:
+            ml_tree_output = "❌ Sequence too short for placement (<100 bp)."
+        else:
+            ml_tree_output = "⚠️ Phylogenetic placement skipped."
+        # Tree analysis
+        tree_html_content = "No tree generated."
+        report_html_content = "No report generated."
+        simplified_ml_output = ""
+        if analyzer and processed_sequence and len(processed_sequence) >= 10:
+            try:
+                tree_result, tree_html_path, report_html_path = analyze_sequence_for_tree(processed_sequence, similarity_score)
+                simplified_ml_output = tree_result
+                if tree_html_path and os.path.exists(tree_html_path):
+                    with open(tree_html_path, 'r', encoding='utf-8') as f:
+                        tree_html_content = f.read()
+                else:
+                    tree_html_content = f"<div style='color: red;'>{tree_result}</div>"
+                if report_html_path and os.path.exists(report_html_path):
+                    with open(report_html_path, 'r', encoding='utf-8') as f:
+                        report_html_content = f.read()
+                else:
+                    report_html_content = f"<div style='color: red;'>{tree_result}</div>"
+            except Exception as e:
+                simplified_ml_output = f"❌ Tree analysis error: {str(e)}"
+                tree_html_content = f"<div style='color: red;'>{simplified_ml_output}</div>"
+                report_html_content = f"<div style='color: red;'>{simplified_ml_output}</div>"
+        else:
+            simplified_ml_output = "❌ Tree analyzer not available." if not analyzer else "❌ Sequence too short (<10 bp)."
+            tree_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
+            report_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
+        # Summary
+        summary_output = f"""
+📊 ANALYSIS SUMMARY:
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+Input: {len(dna_input)} bp
+F Gene: {len(processed_sequence)} bp
+Validation: {keras_output.split(':')[-1].strip() if ':' in keras_output else keras_output}
+Placement: {'✅ OK' if '✅' in ml_tree_output else '⚠️ Skipped' if 'skipped' in ml_tree_output else '❌ Failed'}
+Tree Analysis: {'✅ OK' if 'Found' in simplified_ml_output else '❌ Failed'}
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+"""
+        return (
+            boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output,
+            aligned_file, phy_file, None, None, tree_html_content, report_html_content
+        )
+    except Exception as e:
+        logger.error(f"Pipeline error: {e}")
+        error_msg = f"❌ Pipeline Error: {str(e)}"
+        return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg
+# Keep your other functions (analyze_sequence_for_tree, build_maximum_likelihood_tree, etc.)
 def analyze_sequence_for_tree(sequence: str, matching_percentage: float):
     try:
         if not analyzer:
+            return "❌ Tree analyzer not initialized.", None, None
         if not sequence or len(sequence.strip()) < 10:
+            return "❌ Invalid sequence.", None, None
         if not (1 <= matching_percentage <= 99):
+            return "❌ Matching percentage must be 1-99.", None, None
         if not analyzer.find_query_sequence(sequence):
+            return "❌ Sequence not accepted.", None, None
         matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
         if not matched_ids:
+            return f"❌ No similar sequences at {matching_percentage}% threshold.", None, None
         analyzer.build_tree_structure_with_ml_safe(matched_ids)
         fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
         query_id = analyzer.query_id or f"query_{int(time.time())}"
         tree_html_path = os.path.join("/tmp", f'phylogenetic_tree_{query_id}.html')
         fig.write_html(tree_html_path)
         analyzer.matching_percentage = matching_percentage
         report_success = analyzer.generate_detailed_report(matched_ids, actual_percentage)
         report_html_path = os.path.join("/tmp", f"detailed_report_{query_id}.html") if report_success else None
+        return f"✅ Found {len(matched_ids)} sequences at {actual_percentage:.2f}% similarity.", tree_html_path, report_html_path
     except Exception as e:
+        logger.error(f"Tree analysis failed: {e}")
+        return f"❌ Error: {str(e)}", None, None
 def read_fasta_file(file_obj):
     try:
         if file_obj is None:
             return ""
         if isinstance(file_obj, str):
             with open(file_obj, "r") as f:
                 content = f.read()
         else:
             content = file_obj.read().decode("utf-8")
         lines = content.strip().split("\n")
         seq_lines = [line.strip() for line in lines if not line.startswith(">")]
         return ''.join(seq_lines)
     except Exception as e:
+        logger.error(f"Failed to read FASTA file: {e}")
         return ""
+async def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree):
+    try:
+        dna_input = read_fasta_file(fasta_file_obj)
+        if not dna_input:
+            return "❌ Failed to read FASTA file", "", "", "", "", None, None, None, None, "No input", "No input"
+        return run_pipeline(dna_input, similarity_score, build_ml_tree)
+    except Exception as e:
+        logger.error(f"Pipeline from file error: {e}")
+        error_msg = f"❌ Error: {str(e)}"
+        return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg
 # --- Pydantic Models ---
 class AnalysisRequest(BaseModel):
     sequence: str
     success: bool
     error_message: Optional[str] = None
 # --- FastAPI Endpoints ---
 @app.get("/")
 async def root():
+    return {
+        "message": "🧬 Gene Analysis Pipeline API",
+        "status": "running",
+        "endpoints": {
+            "docs": "/docs",
+            "health": "/health",
+            "gradio": "/gradio",
+            "analyze": "/analyze",
+            "analyze_file": "/analyze-file"
+        }
+    }
+@app.get("/health")
+async def health_check():
+    try:
+        mafft_available, iqtree_available, _, _ = check_tool_availability()
+        return {
+            "status": "healthy",
+            "components": {
+                "boundary_model": boundary_model is not None,
+                "keras_model": keras_model is not None,
+                "tree_analyzer": analyzer is not None,
+                "mafft_available": mafft_available,
+                "iqtree_available": iqtree_available
+            },
+            "recommendations": {
+                "hf_token": "Set HF_TOKEN environment variable" if not hf_token else "OK",
+                "bioinformatics_tools": "Install MAFFT and IQ-TREE" if not (mafft_available and iqtree_available) else "OK"
+            }
+        }
+    except Exception as e:
+        logger.error(f"Health check error: {e}")
+        return {"status": "unhealthy", "error": str(e)}
 @app.post("/analyze", response_model=AnalysisResponse)
 async def analyze_sequence(request: AnalysisRequest):
             success=True
         )
     except Exception as e:
+        logger.error(f"Analyze error: {e}")
         return AnalysisResponse(
+            boundary_output="", keras_output="", ml_tree_output="",
+            tree_analysis_output="", summary_output="",
             success=False, error_message=str(e)
         )
 @app.post("/analyze-file")
+async def analyze_file(
+    file: UploadFile = File(...),
+    similarity_score: float = Form(95.0),
+    build_ml_tree: bool = Form(False)
+):
+    temp_file_path = None
     try:
         with tempfile.NamedTemporaryFile(delete=False, suffix=".fasta", dir="/tmp") as temp_file:
             content = await file.read()
             temp_file.write(content)
             temp_file_path = temp_file.name
         result = await run_pipeline_from_file(temp_file_path, similarity_score, build_ml_tree)
         return AnalysisResponse(
             boundary_output=result[0] or "",
             keras_output=result[1] or "",
             success=True
         )
     except Exception as e:
+        logger.error(f"Analyze-file error: {e}")
         return AnalysisResponse(
+            boundary_output="", keras_output="", ml_tree_output="",
+            tree_analysis_output="", summary_output="",
             success=False, error_message=str(e)
         )
+    finally:
+        if temp_file_path and os.path.exists(temp_file_path):
+            try:
+                os.unlink(temp_file_path)
+            except:
+                pass
+# --- Enhanced Gradio Interface ---
+def create_gradio_interface():
     try:
+        with gr.Blocks(
+            title="🧬 Gene Analysis Pipeline",
+            theme=gr.themes.Soft(),
+            css="""
+            .gradio-container { max-width: 1200px !important; }
+            .status-box { padding: 10px; border-radius: 5px; margin: 5px 0; }
+            .success { background-color: #d4edda; border: 1px solid #c3e6cb; color: #155724; }
+            .warning { background-color: #fff3cd; border: 1px solid #ffeaa7; color: #856404; }
+            .error { background-color: #f8d7da; border: 1px solid #f5c6cb; color: #721c24; }
+            """
+        ) as iface:
+            gr.Markdown("# 🧬 Gene Analysis Pipeline")
+            # Status display
+            with gr.Row():
+                with gr.Column():
+                    status_display = gr.HTML(value=f"""
+                    <div class="status-box">
+                        <h3>🔧 System Status</h3>
+                        <p>🤖 Boundary Model: {'✅ Loaded' if boundary_model else '❌ Missing'}</p>
+                        <p>🧠 Keras Model: {'✅ Loaded' if keras_model else '❌ Missing'}</p>
+                        <p>🌳 Tree Analyzer: {'✅ Loaded' if analyzer else '❌ Missing'}</p>
+                        <p>🧬 MAFFT: {'✅ Available' if check_tool_availability()[0] else '❌ Missing'}</p>
+                        <p>🌲 IQ-TREE: {'✅ Available' if check_tool_availability()[1] else '❌ Missing'}</p>
+                    </div>
+                    """)
+            # Input section
+            with gr.Row():
+                with gr.Column(scale=2):
+                    dna_input = gr.Textbox(
+                        label="🧬 DNA Sequence",
+                        placeholder="Enter DNA sequence (ATCG)...",
+                        lines=4,
+                        info="Paste your DNA sequence here"
+                    )
+                with gr.Column(scale=1):
+                    fasta_file = gr.File(
+                        label="📁 Upload FASTA File",
+                        file_types=[".fasta", ".fa", ".txt"],
+                        info="Or upload a FASTA file"
+                    )
+            # Parameters
+            with gr.Row():
+                similarity_score = gr.Slider(
+                    minimum=70.0,
+                    maximum=99.0,
+                    value=95.0,
+                    label="🎯 Similarity Threshold (%)",
+                    info="Minimum similarity for phylogenetic analysis"
+                )
+                build_ml_tree = gr.Checkbox(
+                    label="🌲 Enable Phylogenetic Placement",
+                    value=False,
+                    info="Computationally intensive"
+                )
+            # Action buttons
+            with gr.Row():
+                analyze_text_btn = gr.Button("🔬 Analyze Sequence", variant="primary")
+                analyze_file_btn = gr.Button("📁 Analyze File", variant="secondary")
+                clear_btn = gr.Button("🗑️ Clear", variant="stop")
+            # Results section
+            gr.Markdown("## 📊 Results")
+            with gr.Tabs():
+                with gr.TabItem("📋 Summary"):
+                    summary_output = gr.Textbox(label="Analysis Summary", lines=8)
+                with gr.TabItem("🎯 F Gene Detection"):
+                    f_gene_output = gr.Textbox(label="F Gene Sequence Detection")
+                    keras_output = gr.Textbox(label="Validation Result")
+                with gr.TabItem("🌲 Phylogenetic Analysis"):
+                    ml_tree_output = gr.Textbox(label="Phylogenetic Placement")
+                    tree_analysis_output = gr.Textbox(label="Tree Analysis")
+                with gr.TabItem("🌳 Interactive Tree"):
+                    tree_html_display = gr.HTML(label="Interactive Tree Visualization")
+                with gr.TabItem("📄 Detailed Report"):
+                    report_html_display = gr.HTML(label="Analysis Report")
+                with gr.TabItem("📁 Download Files"):
+                    with gr.Row():
+                        aligned_file_output = gr.File(label="Aligned Sequences", visible=False)
+                        tree_file_output = gr.File(label="Phylogenetic Tree", visible=False)
+                        custom_file_1 = gr.File(label="Additional Output 1", visible=False)
+                        custom_file_2 = gr.File(label="Additional Output 2", visible=False)
+            # Event handlers
+            def run_analysis_text(dna_input_val, similarity_val, build_ml_val):
+                if not dna_input_val.strip():
+                    return "❌ Please enter a DNA sequence", "", "", "", "", None, None, None, None, "", ""
+                return run_pipeline(dna_input_val, similarity_val, build_ml_val)
+            def run_analysis_file(file_obj, similarity_val, build_ml_val):
+                if file_obj is None:
+                    return "❌ Please upload a file", "", "", "", "", None, None, None, None, "", ""
+                try:
+                    # Run the async function in a synchronous context
+                    import asyncio
+                    loop = asyncio.new_event_loop()
+                    asyncio.set_event_loop(loop)
+                    try:
+                        result = loop.run_until_complete(run_pipeline_from_file(file_obj, similarity_val, build_ml_val))
+                        return result
+                    finally:
+                        loop.close()
+                except Exception as e:
+                    error_msg = f"❌ Error processing file: {str(e)}"
+                    return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg
+            def clear_all():
+                return ("", None, 95.0, False, "", "", "", "", "", None, None, None, None, "", "")
+            # Wire up the interface
+            analyze_text_btn.click(
+                fn=run_analysis_text,
+                inputs=[dna_input, similarity_score, build_ml_tree],
+                outputs=[
+                    f_gene_output, keras_output, ml_tree_output, tree_analysis_output,
+                    summary_output, aligned_file_output, tree_file_output,
+                    custom_file_1, custom_file_2, tree_html_display, report_html_display
+                ]
+            )
+            analyze_file_btn.click(
+                fn=run_analysis_file,
+                inputs=[fasta_file, similarity_score, build_ml_tree],
+                outputs=[
+                    f_gene_output, keras_output, ml_tree_output, tree_analysis_output,
+                    summary_output, aligned_file_output, tree_file_output,
+                    custom_file_1, custom_file_2, tree_html_display, report_html_display
+                ]
+            )
+            clear_btn.click(
+                fn=clear_all,
+                outputs=[
+                    dna_input, fasta_file, similarity_score, build_ml_tree,
+                    f_gene_output, keras_output, ml_tree_output, tree_analysis_output,
+                    summary_output, aligned_file_output, tree_file_output,
+                    custom_file_1, custom_file_2, tree_html_display, report_html_display
+                ]
+            )
+            # Example section
+            gr.Markdown("""
+            ## 💡 Examples
+            Try these sample sequences:
+            **Short F Gene Sequence:**
+            ```
+            ATGGAGTTGCCACACCATCACAGAGGCCTCGAGATGCCAAGTCGTTAACC
+            ```
+            **Medium Length Sequence:**
+            ```
+            ATGGAGTTGCCACACCATCACAGAGGCCTCGAGATGCCAAGTCGTTAACCCTACTAAGCTCCCTGTCTGACATACTTGATGTGGAGGCTATAGATATTATCAATCAAGCAGTGACCATTCTGAAGATGAATGGACCCAACACCACCTACATATACCCTGACAAACTGGAAAATCTGGCAATGCTGACATTGGATGAACAACTTGAGAGGGTGATGATTATCAATGCCACCATCCAAGAGACAGATAATAATTACAACAACATTATTAGAAAATACACAAGCAATGATGACCTTGAACAAGATGAAGAGATGAAACGGAAAATACCAGAGGAAAAGACTAAGGGATCCGGATTGATCCACAACATGAAGAGGAAGAAGCACTACGACCTGACCATGACCATGAAAAAGCACGAGACACTAACCATGAACACCTTGACAATGATCATGACTTTGGACATGCAAGAGGCCAAATTGAAGGACTTGATGACTACAACCAACACCACATCCGTGGCCACCTCAAGGAAGTCTTTGACACACAAGCGCAACGCCAAGCTGACCATGACCTACATCCAAGCCAACACGGTGAACACCGTGGACATGATGAAGAACACAACATCCAAGGACACAGACAAGATGATGAAGAACACAATGACCTCCTACAACACCATGACCACAATGATGAACACCGTGACAATGATGAAGAACACCATCTCCAAGAACACAAGGAAGATGAAAAACACAACGATCCACAATGCCATGAACATGATGAACCCTCTGACAAACCTGAACAATATTATCAAGAACACAAACATGAACAACCTGGACAAGCTGATGAACACCATCTCCAAGAACACAAGGAAGATGAAAAACACAACGATCCACAATGCCATGAACATGATGAACCCTCTGACAAACCTGAACAATATTATCAAGAACACAAACATGAACAACCTGGACAAGCTGATGAACACCATCTCCAAGAACACAAGGAAGATG
+            ```
+            **Tips:**
+            - Use sequences at least 100 bp for phylogenetic placement
+            - Higher similarity thresholds (95-99%) provide more specific results
+            - Phylogenetic placement is computationally intensive
+            """)
+        return iface
     except Exception as e:
+        logger.error(f"Failed to create Gradio interface: {e}")
+        # Fallback minimal interface
+        return gr.Interface(
+            fn=lambda x: f"Error creating interface: {e}",
+            inputs=gr.Textbox(label="Input"),
+            outputs=gr.Textbox(label="Error"),
+            title="Gene Analysis Pipeline - Error"
         )
+# --- Mount Gradio App ---
 try:
     gradio_app = create_gradio_interface()
     app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
+    logger.info("✅ Gradio interface mounted successfully")
 except Exception as e:
+    logger.error(f"❌ Failed to mount Gradio interface: {e}")
+# --- Main Function ---
 if __name__ == "__main__":
     try:
+        # Print startup information
+        print("🧬 Gene Analysis Pipeline Starting...")
+        print(f"📍 Working Directory: {BASE_DIR}")
+        print(f"🔑 HF Token: {'✅ Set' if hf_token else '❌ Missing'}")
+        print(f"🤖 Boundary Model: {'✅ Loaded' if boundary_model else '❌ Missing'}")
+        print(f"🧠 Keras Model: {'✅ Loaded' if keras_model else '❌ Missing'}")
+        print(f"🌳 Tree Analyzer: {'✅ Loaded' if analyzer else '❌ Missing'}")
+        mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
+        print(f"🧬 MAFFT: {'✅ Available' if mafft_available else '❌ Missing'} - {mafft_cmd if mafft_available else 'Not found'}")
+        print(f"🌲 IQ-TREE: {'✅ Available' if iqtree_available else '❌ Missing'} - {iqtree_cmd if iqtree_available else 'Not found'}")
+        print("\n🚀 Starting server...")
+        print("📱 FastAPI docs: http://localhost:7860/docs")
+        print("🎨 Gradio interface: http://localhost:7860/gradio")
+        # Start the server
         uvicorn.run(
+            app,
+            host="0.0.0.0",
+            port=7860,
+            log_level="info",
+            access_log=True
         )
     except Exception as e:
+        logger.error(f"❌ Failed to start server: {e}")
+        print(f"❌ Server startup failed: {e}")
+        sys.exit(1)
+    except KeyboardInterrupt:
+        print("\n👋 Server stopped by user")
+        sys.exit(0)