Spaces:

GGproject10
/

simplified_tree_AI

No application file

App Files Files Community

re-type commited on Jun 8, 2025

Commit

a20d1ea

verified ·

1 Parent(s): c59941c

Update app.py

Browse files

Files changed (1) hide show

app.py +142 -51

app.py CHANGED Viewed

@@ -1,84 +1,175 @@
 import gradio as gr
 import torch
 import pickle
 import subprocess
 import pandas as pd
-from predictor import Predictor
 from tensorflow.keras.models import load_model
 from ml_simplified_tree import maximum_likelihood
 # --------- Load Models ---------
-boundary_model = Predictor("best_boundary_aware_model.pth")
-keras_model = load_model("best_model.keras")
-with open("kmer_to_index.pkl", "rb") as f:
-    kmer_to_index = pickle.load(f)
 # --------- Utilities ---------
 def predict_with_keras(sequence):
-    kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
-    indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
-    input_arr = torch.tensor([indices])
-    prediction = keras_model.predict(input_arr)[0]
-    return "".join(str(round(p, 3)) for p in prediction)
 def save_to_fasta(name, sequence, path):
-    with open(path, "w") as f:
-        f.write(f">{name}\n{sequence}\n")
 def save_to_csv(sequence, path):
-    df = pd.DataFrame({"Sequence": [sequence]})
-    df.to_csv(path, index=False)
 def run_mafft_and_iqtree(fasta_file="f_gene_sequences_aligned.fasta"):
     try:
         subprocess.run(["mafft", "--auto", fasta_file], check=True)
-        subprocess.run(["iqtree", "-s", "f_gene_sequences.phy.treefile", "-m", "GTR"], check=True)
-        return "MAFFT and IQTree executed successfully."
-    except Exception as e:
-        return f"Error running alignment/tree: {e}"
 def run_full_pipeline(dna_input):
-    # 1. Boundary-Aware Prediction
-    step1_out = boundary_model.predict(dna_input)
-    # 2. Keras Prediction
-    step2_out = predict_with_keras(step1_out)
-    # 3. Save intermediate files
-    save_to_fasta("Predicted_Seq", step2_out, "f_gene_sequences_aligned.fasta")
-    save_to_csv(step2_out, "f gene clean dataset.csv")
-    # 4. Run MAFFT + IQTree
-    mafft_status = run_mafft_and_iqtree()
-    # 5. Run ML tree
-    try:
-        ml_output = maximum_likelihood("f gene clean dataset.csv")
-    except Exception as e:
-        ml_output = f"ML Tree Error: {e}"
-    return {
-        "Boundary Model Output": step1_out,
-        "Keras Model Output": step2_out,
-        "MAFFT + IQTree Status": mafft_status,
-        "Maximum Likelihood Tree Output": ml_output
-    }
 # --------- Gradio Interface ---------
-gr_interface = gr.Interface(
-    fn=run_full_pipeline,
-    inputs=gr.Textbox(label="Input DNA Sequence"),
-    outputs=[
-        gr.Textbox(label="Boundary Model Output"),
-        gr.Textbox(label="Keras Model Output"),
-        gr.Textbox(label="MAFFT + IQTree Status"),
-        gr.Textbox(label="ML Tree Output")
-    ],
-    title="Sequential Phylogenetic Inference Pipeline",
-    description="This pipeline runs sequentially: Boundary-Aware Model → Keras Model → Tree Building"
-)
 # --------- Launch ---------
 if __name__ == "__main__":
-    gr_interface.launch()

+# -*- coding: utf-8 -*-
 import gradio as gr
 import torch
 import pickle
 import subprocess
 import pandas as pd
+from predictor import GenePredictor
 from tensorflow.keras.models import load_model
 from ml_simplified_tree import maximum_likelihood
+import logging
+import os
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # --------- Load Models ---------
+try:
+    boundary_model = GenePredictor("best_boundary_aware_model.pth")
+    keras_model = load_model("best_model.keras")
+    logging.info("Models loaded successfully")
+except FileNotFoundError as e:
+    logging.error(f"Model file not found: {e}")
+    raise
+except Exception as e:
+    logging.error(f"Error loading models: {e}")
+    raise
+try:
+    with open("kmer_to_index.pkl", "rb") as f:
+        kmer_to_index = pickle.load(f)
+    logging.info("kmer_to_index.pkl loaded successfully")
+except FileNotFoundError:
+    logging.error("kmer_to_index.pkl not found")
+    raise
+except Exception as e:
+    logging.error(f"Error loading kmer_to_index.pkl: {e}")
+    raise
 # --------- Utilities ---------
 def predict_with_keras(sequence):
+    try:
+        kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
+        indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
+        input_arr = torch.tensor([indices])
+        prediction = keras_model.predict(input_arr)[0]
+        return "".join(str(round(p, 3)) for p in prediction)
+    except Exception as e:
+        logging.error(f"Error in Keras prediction: {e}")
+        return f"Error in Keras prediction: {e}"
 def save_to_fasta(name, sequence, path):
+    try:
+        with open(path, "w") as f:
+            f.write(f">{name}\n{sequence}\n")
+        logging.info(f"FASTA file saved to {path}")
+        return "FASTA file saved successfully"
+    except Exception as e:
+        logging.error(f"Error saving FASTA: {e}")
+        return f"Error saving FASTA: {e}"
 def save_to_csv(sequence, path):
+    try:
+        df = pd.DataFrame({"Sequence": [sequence]})
+        df.to_csv(path, index=False)
+        logging.info(f"CSV file saved to {path}")
+        return "CSV file saved successfully"
+    except Exception as e:
+        logging.error(f"Error saving CSV: {e}")
+        return f"Error saving CSV: {e}"
 def run_mafft_and_iqtree(fasta_file="f_gene_sequences_aligned.fasta"):
     try:
         subprocess.run(["mafft", "--auto", fasta_file], check=True)
+        subprocess.run(["iqtree", "-s", fasta_file, "-m", "GTR"], check=True)
+        logging.info("MAFFT and IQTree executed successfully")
+        return "MAFFT and IQTree executed successfully"
+    except subprocess.CalledProcessError as e:
+        logging.error(f"Error running MAFFT/IQTree: {e}")
+        return f"Error running MAFFT/IQTree: {e}"
+    except FileNotFoundError:
+        logging.error("MAFFT or IQTree not found. Ensure they are installed.")
+        return "MAFFT or IQTree not found. Ensure they are installed and in PATH."
 def run_full_pipeline(dna_input):
+    try:
+        # 1. Boundary-Aware Prediction
+        predictions, probs, confidence = boundary_model.predict(dna_input)
+        gene_regions = boundary_model.extract_gene_regions(predictions, dna_input)
+        step1_out = gene_regions[0]["sequence"] if gene_regions else dna_input
+        logging.info(f"Boundary model output: {step1_out}")
+        # 2. Keras Prediction
+        step2_out = predict_with_keras(step1_out)
+        logging.info(f"Keras model output: {step2_out}")
+        # 3. Save intermediate files
+        fasta_status = save_to_fasta("Predicted_Seq", step2_out, "f_gene_sequences_aligned.fasta")
+        csv_status = save_to_csv(step2_out, "f gene clean dataset.csv")
+        # 4. Run MAFFT + IQTree
+        mafft_status = run_mafft_and_iqtree()
+        # 5. Run ML tree and ensure HTML output
+        html_file = "tree.html"  # Expected output file from maximum_likelihood
+        try:
+            ml_output = maximum_likelihood("f gene clean dataset.csv")
+            if os.path.exists(html_file):
+                logging.info(f"HTML tree file generated: {html_file}")
+            else:
+                logging.warning(f"HTML tree file {html_file} not found")
+                html_file = None  # Set to None if file doesn't exist
+        except Exception as e:
+            logging.error(f"ML Tree Error: {e}")
+            ml_output = f"ML Tree Error: {e}"
+            html_file = None
+        return {
+            "Boundary Model Output": step1_out,
+            "Keras Model Output": step2_out,
+            "FASTA Save Status": fasta_status,
+            "CSV Save Status": csv_status,
+            "MAFFT + IQTree Status": mafft_status,
+            "Maximum Likelihood Tree Output": ml_output,
+            "Tree HTML File": html_file  # Return file path for download
+        }
+    except Exception as e:
+        logging.error(f"Pipeline failed: {e}")
+        return {
+            "Boundary Model Output": f"Error: {e}",
+            "Keras Model Output": "N/A",
+            "FASTA Save Status": "N/A",
+            "CSV Save Status": "N/A",
+            "MAFFT + IQTree Status": "N/A",
+            "Maximum Likelihood Tree Output": "N/A",
+            "Tree HTML File": None
+        }
 # --------- Gradio Interface ---------
+with gr.Blocks() as gr_interface:
+    gr.Markdown("# Sequential Phylogenetic Inference Pipeline")
+    gr.Markdown("This pipeline runs sequentially: Boundary-Aware Model → Keras Model → Tree Building")
+    dna_input = gr.Textbox(label="Input DNA Sequence")
+    submit_button = gr.Button("Run Pipeline")
+    boundary_output = gr.Textbox(label="Boundary Model Output")
+    keras_output = gr.Textbox(label="Keras Model Output")
+    fasta_status = gr.Textbox(label="FASTA Save Status")
+    csv_status = gr.Textbox(label="CSV Save Status")
+    mafft_status = gr.Textbox(label="MAFFT + IQTree Status")
+    ml_output = gr.Textbox(label="Maximum Likelihood Tree Output")
+    tree_download = gr.File(label="Download Tree (HTML)")
+    submit_button.click(
+        fn=run_full_pipeline,
+        inputs=dna_input,
+        outputs=[
+            boundary_output,
+            keras_output,
+            fasta_status,
+            csv_status,
+            mafft_status,
+            ml_output,
+            tree_download
+        ]
+    )
 # --------- Launch ---------
 if __name__ == "__main__":
+    try:
+        gr_interface.launch(server_name="0.0.0.0", server_port=7860)
+        logging.info("Gradio interface launched successfully")
+    except Exception as e:
+        logging.error(f"Error launching Gradio interface: {e}")
+        raise