re-type commited on
Commit
7703f2a
·
verified ·
1 Parent(s): a20d1ea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -28
app.py CHANGED
@@ -17,23 +17,14 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
17
  try:
18
  boundary_model = GenePredictor("best_boundary_aware_model.pth")
19
  keras_model = load_model("best_model.keras")
20
- logging.info("Models loaded successfully")
21
- except FileNotFoundError as e:
22
- logging.error(f"Model file not found: {e}")
23
- raise
24
- except Exception as e:
25
- logging.error(f"Error loading models: {e}")
26
- raise
27
-
28
- try:
29
  with open("kmer_to_index.pkl", "rb") as f:
30
  kmer_to_index = pickle.load(f)
31
- logging.info("kmer_to_index.pkl loaded successfully")
32
- except FileNotFoundError:
33
- logging.error("kmer_to_index.pkl not found")
34
  raise
35
  except Exception as e:
36
- logging.error(f"Error loading kmer_to_index.pkl: {e}")
37
  raise
38
 
39
  # --------- Utilities ---------
@@ -42,7 +33,7 @@ def predict_with_keras(sequence):
42
  kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
43
  indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
44
  input_arr = torch.tensor([indices])
45
- prediction = keras_model.predict(input_arr)[0]
46
  return "".join(str(round(p, 3)) for p in prediction)
47
  except Exception as e:
48
  logging.error(f"Error in Keras prediction: {e}")
@@ -68,8 +59,10 @@ def save_to_csv(sequence, path):
68
  logging.error(f"Error saving CSV: {e}")
69
  return f"Error saving CSV: {e}"
70
 
71
- def run_mafft_and_iqtree(fasta_file="f_gene_sequences_aligned.fasta"):
 
72
  try:
 
73
  subprocess.run(["mafft", "--auto", fasta_file], check=True)
74
  subprocess.run(["iqtree", "-s", fasta_file, "-m", "GTR"], check=True)
75
  logging.info("MAFFT and IQTree executed successfully")
@@ -83,32 +76,32 @@ def run_mafft_and_iqtree(fasta_file="f_gene_sequences_aligned.fasta"):
83
 
84
  def run_full_pipeline(dna_input):
85
  try:
86
- # 1. Boundary-Aware Prediction
87
  predictions, probs, confidence = boundary_model.predict(dna_input)
88
  gene_regions = boundary_model.extract_gene_regions(predictions, dna_input)
89
  step1_out = gene_regions[0]["sequence"] if gene_regions else dna_input
90
  logging.info(f"Boundary model output: {step1_out}")
91
 
92
- # 2. Keras Prediction
93
  step2_out = predict_with_keras(step1_out)
94
  logging.info(f"Keras model output: {step2_out}")
95
 
96
- # 3. Save intermediate files
97
  fasta_status = save_to_fasta("Predicted_Seq", step2_out, "f_gene_sequences_aligned.fasta")
98
- csv_status = save_to_csv(step2_out, "f gene clean dataset.csv")
99
 
100
- # 4. Run MAFFT + IQTree
101
- mafft_status = run_mafft_and_iqtree()
102
 
103
- # 5. Run ML tree and ensure HTML output
104
- html_file = "tree.html" # Expected output file from maximum_likelihood
105
  try:
106
- ml_output = maximum_likelihood("f gene clean dataset.csv")
107
  if os.path.exists(html_file):
108
  logging.info(f"HTML tree file generated: {html_file}")
109
  else:
110
  logging.warning(f"HTML tree file {html_file} not found")
111
- html_file = None # Set to None if file doesn't exist
112
  except Exception as e:
113
  logging.error(f"ML Tree Error: {e}")
114
  ml_output = f"ML Tree Error: {e}"
@@ -121,7 +114,7 @@ def run_full_pipeline(dna_input):
121
  "CSV Save Status": csv_status,
122
  "MAFFT + IQTree Status": mafft_status,
123
  "Maximum Likelihood Tree Output": ml_output,
124
- "Tree HTML File": html_file # Return file path for download
125
  }
126
  except Exception as e:
127
  logging.error(f"Pipeline failed: {e}")
@@ -135,10 +128,10 @@ def run_full_pipeline(dna_input):
135
  "Tree HTML File": None
136
  }
137
 
138
- # --------- Gradio Interface ---------
139
  with gr.Blocks() as gr_interface:
140
  gr.Markdown("# Sequential Phylogenetic Inference Pipeline")
141
- gr.Markdown("This pipeline runs sequentially: Boundary-Aware Model → Keras Model → Tree Building")
142
 
143
  dna_input = gr.Textbox(label="Input DNA Sequence")
144
  submit_button = gr.Button("Run Pipeline")
 
17
  try:
18
  boundary_model = GenePredictor("best_boundary_aware_model.pth")
19
  keras_model = load_model("best_model.keras")
 
 
 
 
 
 
 
 
 
20
  with open("kmer_to_index.pkl", "rb") as f:
21
  kmer_to_index = pickle.load(f)
22
+ logging.info("Models and kmer_to_index loaded successfully")
23
+ except FileNotFoundError as e:
24
+ logging.error(f"Model or file not found: {e}")
25
  raise
26
  except Exception as e:
27
+ logging.error(f"Error loading models or files: {e}")
28
  raise
29
 
30
  # --------- Utilities ---------
 
33
  kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
34
  indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
35
  input_arr = torch.tensor([indices])
36
+ prediction = keras_model.predict(input_arr, verbose=0)[0]
37
  return "".join(str(round(p, 3)) for p in prediction)
38
  except Exception as e:
39
  logging.error(f"Error in Keras prediction: {e}")
 
59
  logging.error(f"Error saving CSV: {e}")
60
  return f"Error saving CSV: {e}"
61
 
62
+ def run_mafft_and_iqtree(sequence):
63
+ fasta_file = "f_gene_sequences_aligned.fasta"
64
  try:
65
+ save_to_fasta("Predicted_Seq", sequence, fasta_file)
66
  subprocess.run(["mafft", "--auto", fasta_file], check=True)
67
  subprocess.run(["iqtree", "-s", fasta_file, "-m", "GTR"], check=True)
68
  logging.info("MAFFT and IQTree executed successfully")
 
76
 
77
  def run_full_pipeline(dna_input):
78
  try:
79
+ # Step 1: Boundary-Aware Prediction
80
  predictions, probs, confidence = boundary_model.predict(dna_input)
81
  gene_regions = boundary_model.extract_gene_regions(predictions, dna_input)
82
  step1_out = gene_regions[0]["sequence"] if gene_regions else dna_input
83
  logging.info(f"Boundary model output: {step1_out}")
84
 
85
+ # Step 2: Keras Prediction
86
  step2_out = predict_with_keras(step1_out)
87
  logging.info(f"Keras model output: {step2_out}")
88
 
89
+ # Step 3a: Save for MAFFT/IQTree and ML Simplified Tree
90
  fasta_status = save_to_fasta("Predicted_Seq", step2_out, "f_gene_sequences_aligned.fasta")
91
+ csv_status = save_to_csv(step2_out, "f_gene_clean_dataset.csv")
92
 
93
+ # Step 3b: Run MAFFT and IQTree
94
+ mafft_status = run_mafft_and_iqtree(step2_out)
95
 
96
+ # Step 3c: Run ML Simplified Tree
97
+ html_file = "tree.html"
98
  try:
99
+ ml_output = maximum_likelihood("f_gene_clean_dataset.csv")
100
  if os.path.exists(html_file):
101
  logging.info(f"HTML tree file generated: {html_file}")
102
  else:
103
  logging.warning(f"HTML tree file {html_file} not found")
104
+ html_file = None
105
  except Exception as e:
106
  logging.error(f"ML Tree Error: {e}")
107
  ml_output = f"ML Tree Error: {e}"
 
114
  "CSV Save Status": csv_status,
115
  "MAFFT + IQTree Status": mafft_status,
116
  "Maximum Likelihood Tree Output": ml_output,
117
+ "Tree HTML File": html_file
118
  }
119
  except Exception as e:
120
  logging.error(f"Pipeline failed: {e}")
 
128
  "Tree HTML File": None
129
  }
130
 
131
+ # --------- Gradio Interface and API ---------
132
  with gr.Blocks() as gr_interface:
133
  gr.Markdown("# Sequential Phylogenetic Inference Pipeline")
134
+ gr.Markdown("This pipeline runs sequentially: Boundary-Aware Model → Keras Model → MAFFT/IQTree & ML Tree")
135
 
136
  dna_input = gr.Textbox(label="Input DNA Sequence")
137
  submit_button = gr.Button("Run Pipeline")