Spaces:

GGproject10
/

simplified_tree_AI

No application file

App Files Files Community

re-type commited on Jun 14, 2025

Commit

42bde38

verified ·

1 Parent(s): 2c6a591

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -634

app.py DELETED Viewed

@@ -1,634 +0,0 @@
-import os
-import logging
-import pickle
-import subprocess
-import pandas as pd
-import re
-import numpy as np
-import torch
-from flask import Flask, request, jsonify, send_file
-from werkzeug.utils import secure_filename
-from predictor import EnhancedGenePredictor
-from tensorflow.keras.models import load_model
-from analyzer import PhylogeneticTreeAnalyzer
-import tempfile
-import shutil
-import sys
-import uuid
-from pathlib import Path
-from huggingface_hub import hf_hub_download
-from Bio import SeqIO
-from Bio.Seq import Seq
-from Bio.SeqRecord import SeqRecord
-import stat
-import time
-# --- Logging Setup ---
-os.makedirs('/tmp', exist_ok=True)
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.StreamHandler(),
-        logging.FileHandler('/tmp/flask_app.log')
-    ]
-)
-logger = logging.getLogger(__name__)
-# Disable GPU to avoid CUDA errors
-os.environ["CUDA_VISIBLE_DEVICES"] = ""
-os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
-os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"
-# --- Global Variables ---
-BASE_DIR = os.path.dirname(os.path.abspath(__file__))
-MAFFT_PATH = os.path.join(BASE_DIR, "binaries", "mafft", "mafft")
-IQTREE_PATH = os.path.join(BASE_DIR, "binaries", "iqtree", "bin", "iqtree3")
-ALIGNMENT_PATH = os.path.join(BASE_DIR, "f_gene_sequences_aligned.fasta")
-TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
-QUERY_OUTPUT_DIR = os.path.join(BASE_DIR, "queries")
-os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
-MODEL_REPO = "GGproject10/best_boundary_aware_model"
-CSV_PATH = "f cleaned.csv"
-HF_TOKEN = os.getenv("HF_TOKEN")
-# Initialize models
-boundary_model = None
-keras_model = None
-kmer_to_index = None
-analyzer = None
-# --- Load Models ---
-def load_models_safely():
-    global boundary_model, keras_model, kmer_to_index, analyzer
-    logger.info("🔍 Loading models...")
-    # Boundary model
-    try:
-        boundary_path = os.path.join(BASE_DIR, "models", "best_boundary_aware_model.pth")
-        if not os.path.exists(boundary_path):
-            logger.info(f"Downloading boundary model from {MODEL_REPO}...")
-            boundary_path = hf_hub_download(
-                repo_id=MODEL_REPO,
-                filename="best_boundary_aware_model.pth",
-                token=HF_TOKEN,
-                local_dir=os.path.join(BASE_DIR, "models")
-            )
-        boundary_model = EnhancedGenePredictor(boundary_path)
-        logger.info("✅ Boundary model loaded")
-    except Exception as e:
-        logger.error(f"❌ Failed to load boundary model: {e}")
-        boundary_model = None
-    # Keras model
-    try:
-        keras_path = os.path.join(BASE_DIR, "models", "best_model.keras")
-        kmer_path = os.path.join(BASE_DIR, "models", "kmer_to_index.pkl")
-        if not os.path.exists(keras_path):
-            logger.info(f"Downloading Keras model from {MODEL_REPO}...")
-            keras_path = hf_hub_download(
-                repo_id=MODEL_REPO,
-                filename="best_model.keras",
-                token=HF_TOKEN,
-                local_dir=os.path.join(BASE_DIR, "models")
-            )
-        if not os.path.exists(kmer_path):
-            logger.info(f"Downloading k-mer index from {MODEL_REPO}...")
-            kmer_path = hf_hub_download(
-                repo_id=MODEL_REPO,
-                filename="kmer_to_index.pkl",
-                token=HF_TOKEN,
-                local_dir=os.path.join(BASE_DIR, "models")
-            )
-        keras_model = load_model(keras_path)
-        with open(kmer_path, "rb") as f:
-            kmer_to_index = pickle.load(f)
-        logger.info("✅ Keras model and k-mer index loaded")
-    except Exception as e:
-        logger.error(f"❌ Failed to load Keras model: {e}")
-        keras_model = None
-        kmer_to_index = None
-    # Tree analyzer
-    try:
-        analyzer = PhylogeneticTreeAnalyzer()
-        csv_candidates = [
-            CSV_PATH,
-            os.path.join(BASE_DIR, CSV_PATH),
-            os.path.join(BASE_DIR, "app", CSV_PATH),
-            os.path.join(os.path.dirname(__file__), CSV_PATH),
-            "f_cleaned.csv",
-            os.path.join(BASE_DIR, "f_cleaned.csv")
-        ]
-        csv_loaded = False
-        for csv_candidate in csv_candidates:
-            if os.path.exists(csv_candidate):
-                if analyzer.load_data(csv_candidate):
-                    logger.info(f"✅ CSV loaded: {csv_candidate}")
-                    csv_loaded = True
-                    break
-        if not csv_loaded:
-            logger.error("❌ Failed to load CSV")
-            analyzer = None
-        else:
-            if analyzer.train_ai_model():
-                logger.info("✅ AI model trained")
-    except Exception as e:
-        logger.error(f"❌ Tree analyzer failed: {e}")
-        analyzer = None
-try:
-    load_models_safely()
-except Exception as e:
-    logger.critical(f"Model loading failed: {e}")
-    sys.exit(1)
-# --- Tool Detection ---
-def setup_binary_permissions():
-    for binary in [MAFFT_PATH, IQTREE_PATH]:
-        if os.path.exists(binary):
-            os.chmod(binary, os.stat(binary).st_mode | stat.S_IEXEC)
-            logger.info(f"✅ Set permission: {binary}")
-        else:
-            logger.warning(f"⚠️ Binary not found: {binary}")
-def check_tool_availability():
-    setup_binary_permissions()
-    mafft_available = False
-    mafft_cmd = None
-    mafft_candidates = [
-        MAFFT_PATH,
-        os.path.join(BASE_DIR, "binaries", "mafft", "mafft"),
-        os.path.join(BASE_DIR, "binaries", "mafft", "mafft.bat"),
-        'mafft',
-        '/usr/bin/mafft',
-        '/usr/local/bin/mafft',
-        os.path.join(BASE_DIR, "binaries", "mafft", "mafftdir", "bin", "mafft"),
-        os.path.expanduser("~/anaconda3/bin/mafft"),
-        os.path.expanduser("~/miniconda3/bin/mafft"),
-        "/opt/conda/bin/mafft",
-        "/usr/local/miniconda3/bin/mafft"
-    ]
-    for candidate in mafft_candidates:
-        if os.path.exists(candidate) or shutil.which(candidate):
-            try:
-                result = subprocess.run(
-                    [candidate, "--help"],
-                    capture_output=True,
-                    text=True,
-                    timeout=10
-                )
-                if result.returncode == 0 or "mafft" in result.stderr.lower():
-                    mafft_available = True
-                    mafft_cmd = candidate
-                    logger.info(f"✅ MAFFT: {candidate}")
-                    break
-            except Exception as e:
-                logger.debug(f"MAFFT test failed: {candidate}: {e}")
-    iqtree_available = False
-    iqtree_cmd = None
-    iqtree_candidates = [
-        IQTREE_PATH,
-        'iqtree',
-        'iqtree2',
-        'iqtree3',
-        '/usr/bin/iqtree',
-        '/usr/local/bin/iqtree',
-        'iqtree.exe',
-        'iqtree2.exe',
-        'iqtree3.exe',
-        os.path.join(BASE_DIR, "binaries", "iqtree", "bin", "iqtree2"),
-        os.path.expanduser("~/anaconda3/bin/iqtree2"),
-        os.path.expanduser("~/miniconda3/bin/iqtree2"),
-        "/opt/conda/bin/iqtree2",
-        "/usr/local/miniconda3/bin/iqtree2"
-    ]
-    for candidate in iqtree_candidates:
-        if os.path.exists(candidate) or shutil.which(candidate):
-            try:
-                result = subprocess.run(
-                    [candidate, "--help"],
-                    capture_output=True,
-                    text=True,
-                    timeout=10
-                )
-                if result.returncode == 0 or "iqtree" in result.stderr.lower():
-                    iqtree_available = True
-                    iqtree_cmd = candidate
-                    logger.info(f"✅ IQ-TREE: {candidate}")
-                    break
-            except Exception as e:
-                logger.debug(f"IQ-TREE test failed: {candidate}: {e}")
-    return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
-def install_dependencies_guide():
-    return """
-🔧 INSTALLATION GUIDE FOR MISSING DEPENDENCIES:
-For MAFFT:
-- Ubuntu/Debian: sudo apt-get install mafft
-- CentOS/RHEL: sudo yum install mafft
-- macOS: brew install mafft
-- Windows: Download from https://mafft.cbrc.jp/alignment/software/
-- Conda: conda install -c bioconda mafft
-For IQ-TREE:
-- Ubuntu/Debian: sudo apt-get install iqtree
-- CentOS/RHEL: sudo yum install iqtree
-- macOS: brew install iqtree
-- Windows: Download from http://www.iqtree.org/
-- Conda: conda install -c bioconda iqtree
-"""
-# --- Pipeline Functions ---
-def cleanup_file(file_path: str):
-    if file_path and os.path.exists(file_path):
-        try:
-            os.unlink(file_path)
-            logger.debug(f"Cleaned up {file_path}")
-        except Exception as e:
-            logger.warning(f"Failed to clean up {file_path}: {e}")
-def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
-    query_fasta = None
-    try:
-        if len(sequence.strip()) < 100:
-            return False, "Sequence too short (<100 bp).", None, None
-        query_id = f"QUERY_{uuid.uuid4().hex[:8]}"
-        query_fasta = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}.fa")
-        aligned_with_query = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_aligned.fa")
-        output_prefix = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_placed_tree")
-        if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
-            return False, f"Reference files missing: {ALIGNMENT_PATH}, {TREE_PATH}", None, None
-        query_record = SeqRecord(Seq(sequence.upper()), id=query_id, description="")
-        SeqIO.write([query_record], query_fasta, "fasta")
-        with open(aligned_with_query, "w") as output_file:
-            result = subprocess.run(
-                [mafft_cmd, "--add", query_fasta, "--reorder", ALIGNMENT_PATH],
-                stdout=output_file,
-                stderr=subprocess.PIPE,
-                text=True,
-                timeout=600,
-                check=True
-            )
-        if not os.path.exists(aligned_with_query) or os.path.getsize(aligned_with_query) == 0:
-            cleanup_file(query_fasta)
-            return False, "MAFFT alignment failed.", None, None
-        result = subprocess.run(
-            [iqtree_cmd, "-s", aligned_with_query, "-g", TREE_PATH, "-m", "GTR+G", "-pre", output_prefix, "-redo"],
-            capture_output=True,
-            text=True,
-            timeout=1200,
-            check=True
-        )
-        treefile = f"{output_prefix}.treefile"
-        if not os.path.exists(treefile):
-            cleanup_file(query_fasta)
-            return False, "IQ-TREE placement failed.", aligned_with_query, None
-        success_msg = f"Placement completed!\nQuery ID: {query_id}\nAlignment: {os.path.basename(aligned_with_query)}\nTree: {os.path.basename(treefile)}"
-        cleanup_file(query_fasta)
-        return True, success_msg, aligned_with_query, treefile
-    except Exception as e:
-        logger.error(f"Phylogenetic placement failed: {e}")
-        cleanup_file(query_fasta)
-        return False, f"Error: {str(e)}", None, None
-def build_maximum_likelihood_tree(f_gene_sequence):
-    try:
-        mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
-        status_msg = "🔍 Checking dependencies...\n"
-        if not mafft_available:
-            status_msg += "❌ MAFFT not found\n"
-        else:
-            status_msg += f"✅ MAFFT found: {mafft_cmd}\n"
-        if not iqtree_available:
-            status_msg += "❌ IQ-TREE not found\n"
-        else:
-            status_msg += f"✅ IQ-TREE found: {iqtree_cmd}\n"
-        if not os.path.exists(ALIGNMENT_PATH):
-            status_msg += f"❌ Reference alignment not found: {ALIGNMENT_PATH}\n"
-        else:
-            status_msg += f"✅ Reference alignment found\n"
-        if not os.path.exists(TREE_PATH):
-            status_msg += f"❌ Reference tree not found: {TREE_PATH}\n"
-        else:
-            status_msg += f"✅ Reference tree found\n"
-        if not mafft_available or not iqtree_available:
-            guide = install_dependencies_guide()
-            return False, f"{status_msg}\n{guide}", None, None
-        if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
-            status_msg += "\n❌ Missing reference files.\n"
-            return False, status_msg, None, None
-        placement_success, placement_message, aligned_file, tree_file = phylogenetic_placement(
-            f_gene_sequence, mafft_cmd, iqtree_cmd
-        )
-        if placement_success:
-            final_message = f"{status_msg}\n{placement_message}"
-            if aligned_file and os.path.exists(aligned_file):
-                standard_aligned = os.path.join(QUERY_OUTPUT_DIR, "query_with_references_aligned.fasta")
-                shutil.copy2(aligned_file, standard_aligned)
-                aligned_file = standard_aligned
-            if tree_file and os.path.exists(tree_file):
-                standard_tree = os.path.join(QUERY_OUTPUT_DIR, "query_placement_tree.treefile")
-                shutil.copy2(tree_file, standard_tree)
-                tree_file = standard_tree
-            return True, final_message, aligned_file, tree_file
-        else:
-            return False, f"{status_msg}\n{placement_message}", aligned_file, tree_file
-    except Exception as e:
-        logger.error(f"ML tree construction failed: {e}")
-        return False, f"Error: {str(e)}", None, None
-def analyze_sequence_for_tree(sequence: str, matching_percentage: float):
-    try:
-        if not analyzer:
-            return "❌ Tree analyzer not initialized.", None
-        if not sequence or len(sequence.strip()) < 10:
-            return "❌ Invalid sequence.", None
-        if not (1 <= matching_percentage <= 99):
-            return "❌ Matching percentage must be 1-99.", None
-        if not analyzer.find_query_sequence(sequence):
-            return "❌ Sequence not accepted.", None
-        matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
-        if not matched_ids:
-            return f"❌ No similar sequences at {matching_percentage}% threshold.", None
-        analyzer.build_tree_structure_with_ml_safe(matched_ids)
-        fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
-        query_id = analyzer.query_id or f"query_{int(time.time())}"
-        output_dir = os.path.join(BASE_DIR, "output")
-        os.makedirs(output_dir, exist_ok=True)
-        html_filename = f"tree_{query_id}.html"
-        html_path = os.path.join(output_dir, html_filename)
-        fig.write_html(html_path)
-        success_msg = f"✅ Found {len(matched_ids)} sequences at {actual_percentage:.2f}% similarity."
-        return success_msg, html_path
-    except Exception as e:
-        logger.error(f"Tree analysis failed: {e}")
-        return f"❌ Error: {str(e)}", None
-def predict_with_keras(sequence):
-    try:
-        if not keras_model or not kmer_to_index:
-            return f"❌ Keras model not available."
-        if len(sequence) < 6:
-            return "❌ Sequence too short (<6 bp)."
-        kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
-        indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
-        input_arr = np.array([indices])
-        prediction = keras_model.predict(input_arr, verbose=0)[0]
-        f_gene_prob = prediction[-1]
-        percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))
-        return f"✅ {percentage}% F gene confidence"
-    except Exception as e:
-        logger.error(f"Keras prediction failed: {e}")
-        return f"❌ Error: {str(e)}"
-def read_fasta_file(file_path):
-    try:
-        if not file_path:
-            return ""
-        with open(file_path, "r") as f:
-            content = f.read()
-        lines = content.strip().split("\n")
-        seq_lines = [line.strip() for line in lines if not line.startswith(">")]
-        return ''.join(seq_lines)
-    except Exception as e:
-        logger.error(f"Failed to read FASTA file: {e}")
-        return ""
-def run_pipeline_from_file(fasta_file_path, similarity_score, build_ml_tree):
-    try:
-        dna_input = read_fasta_file(fasta_file_path)
-        if not dna_input:
-            return "❌ Failed to read FASTA file", "", "", "", "", None, None, None, "No input"
-        return run_pipeline(dna_input, similarity_score, build_ml_tree)
-    except Exception as e:
-        logger.error(f"Pipeline from file error: {e}")
-        return f"❌ Error: {str(e)}", "", "", "", "", None, None, None, f"❌ Error: {str(e)}"
-def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
-    try:
-        dna_input = dna_input.upper().strip()
-        if not dna_input:
-            return "❌ Empty input", "", "", "", "", None, None, None, "No input"
-        if not re.match('^[ACTGN]+$', dna_input):
-            dna_input = ''.join(c if c in 'ACTGN' else 'N' for c in dna_input)
-        processed_sequence = dna_input
-        boundary_output = ""
-        if boundary_model:
-            try:
-                predictions, probs, confidence = boundary_model.predict(dna_input)
-                regions = boundary_model.extract_gene_regions(predictions, dna_input)
-                if regions:
-                    processed_sequence = regions[0]["sequence"]
-                    boundary_output = processed_sequence
-                    logger.info(f"F gene extracted: {len(processed_sequence)} bp")
-                else:
-                    boundary_output = "⚠️ No F gene regions found."
-                    processed_sequence = dna_input
-            except Exception as e:
-                boundary_output = f"❌ Boundary error: {str(e)}"
-                processed_sequence = dna_input
-        else:
-            boundary_output = f"⚠️ Boundary model not available. Using full input: {len(dna_input)} bp"
-        keras_output = predict_with_keras(processed_sequence) if processed_sequence and len(processed_sequence) >= 6 else "❌ Sequence too short."
-        aligned_file = None
-        phy_file = None
-        ml_tree_output = ""
-        if build_ml_tree and processed_sequence and len(processed_sequence) >= 100:
-            ml_success, ml_message, ml_aligned, ml_tree = build_maximum_likelihood_tree(processed_sequence)
-            ml_tree_output = ml_message
-            aligned_file = ml_aligned
-            phy_file = ml_tree
-        elif build_ml_tree:
-            ml_tree_output = "❌ Sequence too short for placement (<100 bp)."
-        else:
-            ml_tree_output = "⚠️ Phylogenetic placement skipped."
-        html_file = None
-        tree_html_content = "No tree generated."
-        simplified_ml_output = ""
-        if analyzer and processed_sequence and len(processed_sequence) >= 10:
-            tree_result, html_path = analyze_sequence_for_tree(processed_sequence, similarity_score)
-            simplified_ml_output = tree_result
-            html_file = html_path
-            if html_path and os.path.exists(html_path):
-                with open(html_path, 'r', encoding='utf-8') as f:
-                    tree_html_content = f.read()
-            else:
-                tree_html_content = f"<div style='color: red;'>{tree_result}</div>"
-        else:
-            simplified_ml_output = "❌ Tree analyzer not available." if not analyzer else "❌ Sequence too short (<10 bp)."
-            tree_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
-        summary_output = f"""
-📊 ANALYSIS SUMMARY:
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-Input: {len(dna_input)} bp
-F Gene: {len(processed_sequence)} bp
-Validation: {keras_output.split(':')[-1].strip() if ':' in keras_output else keras_output}
-Placement: {'✅ OK' if '✅' in ml_tree_output else '⚠️ Skipped' if 'skipped' in ml_tree_output else '❌ Failed'}
-Tree Analysis: {'✅ OK' if '✅' in simplified_ml_output else '❌ Failed'}
-━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
-"""
-        return (
-            boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output,
-            aligned_file, phy_file, html_file, tree_html_content
-        )
-    except Exception as e:
-        logger.error(f"Pipeline error: {e}")
-        error_msg = f"❌ Pipeline Error: {str(e)}"
-        return error_msg, "", "", "", "", None, None, None, error_msg
-# --- Flask App ---
-app = Flask(__name__)
-@app.route("/health", methods=["GET"])
-def health_check():
-    try:
-        mafft_available, iqtree_available, _, _ = check_tool_availability()
-        files_exist = {
-            "alignment": os.path.exists(ALIGNMENT_PATH),
-            "tree": os.path.exists(TREE_PATH),
-            "csv": any(os.path.exists(c) for c in [
-                CSV_PATH,
-                os.path.join(BASE_DIR, CSV_PATH),
-                os.path.join(BASE_DIR, "app", CSV_PATH),
-                os.path.join(os.path.dirname(__file__), CSV_PATH),
-                "f_cleaned.csv",
-                os.path.join(BASE_DIR, "f_cleaned.csv")
-            ])
-        }
-        return jsonify({
-            "status": "healthy" if all([boundary_model, keras_model, analyzer, mafft_available, iqtree_available, files_exist["alignment"], files_exist["tree"], files_exist["csv"]]) else "unhealthy",
-            "components": {
-                "boundary_model": boundary_model is not None,
-                "keras_model": keras_model is not None,
-                "kmer_index": kmer_to_index is not None,
-                "tree_analyzer": analyzer is not None,
-                "mafft_available": mafft_available,
-                "iqtree_available": iqtree_available,
-                "files": files_exist
-            },
-            "paths": {
-                "base_dir": BASE_DIR,
-                "query_output_dir": QUERY_OUTPUT_DIR,
-                "alignment_path": ALIGNMENT_PATH,
-                "tree_path": TREE_PATH
-            }
-        }), 200
-    except Exception as e:
-        logger.error(f"Health check failed: {e}")
-        return jsonify({"status": "unhealthy", "error": str(e)}), 500
-@app.route("/analyze", methods=["POST"])
-def analyze_sequence():
-    try:
-        data = request.get_json()
-        if not data or "sequence" not in data:
-            return jsonify({"error": "Missing 'sequence' in JSON body"}), 400
-        sequence = data["sequence"].upper().strip()
-        similarity_score = float(data.get("similarity_score", 95.0))
-        build_ml_tree = data.get("build_ml_tree", False)
-        if not sequence:
-            return jsonify({"error": "Empty sequence"}), 400
-        if not re.match('^[ACTGN]+$', sequence):
-            return jsonify({"error": "Invalid sequence (use A, T, C, G, N)"}), 400
-        if not 30.0 <= similarity_score <= 99.0:
-            return jsonify({"error": "Similarity score must be between 30 and 99"}), 400
-        result = run_pipeline(sequence, similarity_score, build_ml_tree)
-        return jsonify({
-            "status": "success",
-            "boundary_output": result[0],
-            "keras_output": result[1],
-            "ml_tree_output": result[2],
-            "tree_analysis_output": result[3],
-            "summary_output": result[4],
-            "aligned_file": os.path.basename(result[5]) if result[5] else None,
-            "tree_file": os.path.basename(result[6]) if result[6] else None,
-            "html_tree_file": os.path.basename(result[7]) if result[7] else None,
-            "tree_html_content": result[8]
-        }), 200
-    except Exception as e:
-        logger.error(f"Analyze error: {e}")
-        return jsonify({"error": str(e)}), 500
-@app.route("/analyze-file", methods=["POST"])
-def analyze_file():
-    try:
-        if 'file' not in request.files:
-            return jsonify({"error": "No file provided"}), 400
-        file = request.files['file']
-        if file.filename == '':
-            return jsonify({"error": "Empty filename"}), 400
-        if not file.filename.endswith(('.fasta', '.fa', '.fas', '.txt')):
-            return jsonify({"error": "Invalid file type (use .fasta, .fa, .fas, .txt)"}), 400
-        similarity_score = float(request.form.get("similarity_score", 95.0))
-        build_ml_tree = request.form.get("build_ml_tree", "false").lower() == "true"
-        if not 30.0 <= similarity_score <= 99.0:
-            return jsonify({"error": "Similarity score must be between 30 and 99"}), 400
-        with tempfile.NamedTemporaryFile(delete=False, suffix=".fasta", dir="/tmp") as temp_file:
-            file.save(temp_file.name)
-            temp_file_path = temp_file.name
-        result = run_pipeline_from_file(temp_file_path, similarity_score, build_ml_tree)
-        cleanup_file(temp_file_path)
-        return jsonify({
-            "status": "success",
-            "boundary_output": result[0],
-            "keras_output": result[1],
-            "ml_tree_output": result[2],
-            "tree_analysis_output": result[3],
-            "summary_output": result[4],
-            "aligned_file": os.path.basename(result[5]) if result[5] else None,
-            "tree_file": os.path.basename(result[6]) if result[6] else None,
-            "html_tree_file": os.path.basename(result[7]) if result[7] else None,
-            "tree_html_content": result[8]
-        }), 200
-    except Exception as e:
-        logger.error(f"Analyze-file error: {e}")
-        cleanup_file(temp_file_path) if 'temp_file_path' in locals() else None
-        return jsonify({"error": str(e)}), 500
-@app.route("/download/<file_type>/<filename>", methods=["GET"])
-def download_file(file_type, filename):
-    try:
-        if file_type not in ["alignment", "tree", "html"]:
-            return jsonify({"error": "Invalid file type (use alignment, tree, html)"}), 400
-        if file_type == "html":
-            file_path = os.path.join(BASE_DIR, "output", filename)
-            if not filename.startswith("tree_") or not filename.endswith(".html"):
-                return jsonify({"error": "Invalid HTML filename"}), 400
-        else:
-            file_path = os.path.join(QUERY_OUTPUT_DIR, filename)
-            if file_type == "alignment" and not filename.endswith((".fasta", ".fa")):
-                return jsonify({"error": "Invalid alignment filename"}), 400
-            if file_type == "tree" and not filename.endswith(".treefile"):
-                return jsonify({"error": "Invalid tree filename"}), 400
-        if not os.path.exists(file_path):
-            return jsonify({"error": "File not found"}), 404
-        return send_file(file_path, as_attachment=True, download_name=filename)
-    except Exception as e:
-        logger.error(f"Download error: {e}")
-        return jsonify({"error": str(e)}), 500
-if __name__ == "__main__":
-    logger.info("🧬 Starting Flask Gene Analysis API...")
-    mafft_available, iqtree_available, _, _ = check_tool_availability()
-    logger.info(f"🤖 Boundary Model: {'✅ Loaded' if boundary_model else '❌ Missing'}")
-    logger.info(f"🧠 Keras Model: {'✅ Loaded' if keras_model else '❌ Missing'}")
-    logger.info(f"🌳 Tree Analyzer: {'✅ Loaded' if analyzer else '❌ Missing'}")
-    logger.info(f"🧬 MAFFT: {'✅ Available' if mafft_available else '❌ Missing'}")
-    logger.info(f"🌲 IQ-TREE: {'✅ Available' if iqtree_available else '❌ Missing'}")
-    files_exist = {
-        "alignment": os.path.exists(ALIGNMENT_PATH),
-        "tree": os.path.exists(TREE_PATH),
-        "csv": any(os.path.exists(c) for c in [
-            CSV_PATH,
-            os.path.join(BASE_DIR, CSV_PATH),
-            os.path.join(BASE_DIR, "app", CSV_PATH),
-            os.path.join(os.path.dirname(__file__), CSV_PATH),
-            "f_cleaned.csv",
-            os.path.join(BASE_DIR, "f_cleaned.csv")
-        ])
-    }
-    logger.info(f"📂 Files: Alignment={'✅' if files_exist['alignment'] else '❌'}, Tree={'✅' if files_exist['tree'] else '❌'}, CSV={'✅' if files_exist['csv'] else '❌'}")
-    if not all(files_exist.values()):
-        logger.critical("Missing required reference files")
-        sys.exit(1)
-    app.run(host="0.0.0.0", port=8080, debug=False)