Spaces:

GGproject10
/

simplified_tree_AI

No application file

App Files Files Community

re-type commited on Jun 8, 2025

Commit

7703f2a

verified ·

1 Parent(s): a20d1ea

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -28

app.py CHANGED Viewed

@@ -17,23 +17,14 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
 try:
     boundary_model = GenePredictor("best_boundary_aware_model.pth")
     keras_model = load_model("best_model.keras")
-    logging.info("Models loaded successfully")
-except FileNotFoundError as e:
-    logging.error(f"Model file not found: {e}")
-    raise
-except Exception as e:
-    logging.error(f"Error loading models: {e}")
-    raise
-try:
     with open("kmer_to_index.pkl", "rb") as f:
         kmer_to_index = pickle.load(f)
-    logging.info("kmer_to_index.pkl loaded successfully")
-except FileNotFoundError:
-    logging.error("kmer_to_index.pkl not found")
     raise
 except Exception as e:
-    logging.error(f"Error loading kmer_to_index.pkl: {e}")
     raise
 # --------- Utilities ---------
@@ -42,7 +33,7 @@ def predict_with_keras(sequence):
         kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
         indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
         input_arr = torch.tensor([indices])
-        prediction = keras_model.predict(input_arr)[0]
         return "".join(str(round(p, 3)) for p in prediction)
     except Exception as e:
         logging.error(f"Error in Keras prediction: {e}")
@@ -68,8 +59,10 @@ def save_to_csv(sequence, path):
         logging.error(f"Error saving CSV: {e}")
         return f"Error saving CSV: {e}"
-def run_mafft_and_iqtree(fasta_file="f_gene_sequences_aligned.fasta"):
     try:
         subprocess.run(["mafft", "--auto", fasta_file], check=True)
         subprocess.run(["iqtree", "-s", fasta_file, "-m", "GTR"], check=True)
         logging.info("MAFFT and IQTree executed successfully")
@@ -83,32 +76,32 @@ def run_mafft_and_iqtree(fasta_file="f_gene_sequences_aligned.fasta"):
 def run_full_pipeline(dna_input):
     try:
-        # 1. Boundary-Aware Prediction
         predictions, probs, confidence = boundary_model.predict(dna_input)
         gene_regions = boundary_model.extract_gene_regions(predictions, dna_input)
         step1_out = gene_regions[0]["sequence"] if gene_regions else dna_input
         logging.info(f"Boundary model output: {step1_out}")
-        # 2. Keras Prediction
         step2_out = predict_with_keras(step1_out)
         logging.info(f"Keras model output: {step2_out}")
-        # 3. Save intermediate files
         fasta_status = save_to_fasta("Predicted_Seq", step2_out, "f_gene_sequences_aligned.fasta")
-        csv_status = save_to_csv(step2_out, "f gene clean dataset.csv")
-        # 4. Run MAFFT + IQTree
-        mafft_status = run_mafft_and_iqtree()
-        # 5. Run ML tree and ensure HTML output
-        html_file = "tree.html"  # Expected output file from maximum_likelihood
         try:
-            ml_output = maximum_likelihood("f gene clean dataset.csv")
             if os.path.exists(html_file):
                 logging.info(f"HTML tree file generated: {html_file}")
             else:
                 logging.warning(f"HTML tree file {html_file} not found")
-                html_file = None  # Set to None if file doesn't exist
         except Exception as e:
             logging.error(f"ML Tree Error: {e}")
             ml_output = f"ML Tree Error: {e}"
@@ -121,7 +114,7 @@ def run_full_pipeline(dna_input):
             "CSV Save Status": csv_status,
             "MAFFT + IQTree Status": mafft_status,
             "Maximum Likelihood Tree Output": ml_output,
-            "Tree HTML File": html_file  # Return file path for download
         }
     except Exception as e:
         logging.error(f"Pipeline failed: {e}")
@@ -135,10 +128,10 @@ def run_full_pipeline(dna_input):
             "Tree HTML File": None
         }
-# --------- Gradio Interface ---------
 with gr.Blocks() as gr_interface:
     gr.Markdown("# Sequential Phylogenetic Inference Pipeline")
-    gr.Markdown("This pipeline runs sequentially: Boundary-Aware Model → Keras Model → Tree Building")
     dna_input = gr.Textbox(label="Input DNA Sequence")
     submit_button = gr.Button("Run Pipeline")

 try:
     boundary_model = GenePredictor("best_boundary_aware_model.pth")
     keras_model = load_model("best_model.keras")
     with open("kmer_to_index.pkl", "rb") as f:
         kmer_to_index = pickle.load(f)
+    logging.info("Models and kmer_to_index loaded successfully")
+except FileNotFoundError as e:
+    logging.error(f"Model or file not found: {e}")
     raise
 except Exception as e:
+    logging.error(f"Error loading models or files: {e}")
     raise
 # --------- Utilities ---------
         kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
         indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
         input_arr = torch.tensor([indices])
+        prediction = keras_model.predict(input_arr, verbose=0)[0]
         return "".join(str(round(p, 3)) for p in prediction)
     except Exception as e:
         logging.error(f"Error in Keras prediction: {e}")
         logging.error(f"Error saving CSV: {e}")
         return f"Error saving CSV: {e}"
+def run_mafft_and_iqtree(sequence):
+    fasta_file = "f_gene_sequences_aligned.fasta"
     try:
+        save_to_fasta("Predicted_Seq", sequence, fasta_file)
         subprocess.run(["mafft", "--auto", fasta_file], check=True)
         subprocess.run(["iqtree", "-s", fasta_file, "-m", "GTR"], check=True)
         logging.info("MAFFT and IQTree executed successfully")
 def run_full_pipeline(dna_input):
     try:
+        # Step 1: Boundary-Aware Prediction
         predictions, probs, confidence = boundary_model.predict(dna_input)
         gene_regions = boundary_model.extract_gene_regions(predictions, dna_input)
         step1_out = gene_regions[0]["sequence"] if gene_regions else dna_input
         logging.info(f"Boundary model output: {step1_out}")
+        # Step 2: Keras Prediction
         step2_out = predict_with_keras(step1_out)
         logging.info(f"Keras model output: {step2_out}")
+        # Step 3a: Save for MAFFT/IQTree and ML Simplified Tree
         fasta_status = save_to_fasta("Predicted_Seq", step2_out, "f_gene_sequences_aligned.fasta")
+        csv_status = save_to_csv(step2_out, "f_gene_clean_dataset.csv")
+        # Step 3b: Run MAFFT and IQTree
+        mafft_status = run_mafft_and_iqtree(step2_out)
+        # Step 3c: Run ML Simplified Tree
+        html_file = "tree.html"
         try:
+            ml_output = maximum_likelihood("f_gene_clean_dataset.csv")
             if os.path.exists(html_file):
                 logging.info(f"HTML tree file generated: {html_file}")
             else:
                 logging.warning(f"HTML tree file {html_file} not found")
+                html_file = None
         except Exception as e:
             logging.error(f"ML Tree Error: {e}")
             ml_output = f"ML Tree Error: {e}"
             "CSV Save Status": csv_status,
             "MAFFT + IQTree Status": mafft_status,
             "Maximum Likelihood Tree Output": ml_output,
+            "Tree HTML File": html_file
         }
     except Exception as e:
         logging.error(f"Pipeline failed: {e}")
             "Tree HTML File": None
         }
+# --------- Gradio Interface and API ---------
 with gr.Blocks() as gr_interface:
     gr.Markdown("# Sequential Phylogenetic Inference Pipeline")
+    gr.Markdown("This pipeline runs sequentially: Boundary-Aware Model → Keras Model → MAFFT/IQTree & ML Tree")
     dna_input = gr.Textbox(label="Input DNA Sequence")
     submit_button = gr.Button("Run Pipeline")