re-type commited on
Commit
9ed00db
·
verified ·
1 Parent(s): 1740e0a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -37
app.py CHANGED
@@ -204,51 +204,91 @@ def run_pipeline(dna_input):
204
  logging.info("F gene sequence found in dataset")
205
  matched_ids, perc = analyzer.find_similar_sequences(analyzer.matching_percentage)
206
 
207
- # Try to create tree with error handling
208
  try:
209
- analyzer.create_interactive_tree(matched_ids, perc)
210
 
211
- # Check for multiple possible HTML file names
 
 
 
212
  possible_html_files = [
213
  "phylogenetic_tree_normalized_horizontal.html",
 
214
  "phylogenetic_tree.html",
215
- "tree.html"
 
216
  ]
217
 
218
- for html_filename in possible_html_files:
219
- if os.path.exists(html_filename):
220
- html_file = html_filename
221
- with open(html_filename, "r", encoding='utf-8') as f:
222
- tree_html_content = f.read()
223
- ml_output = f"Phylogenetic tree generated successfully with {len(matched_ids)} sequences (similarity: {perc:.1f}%)"
224
- logging.info(f"Tree HTML file found: {html_filename}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  break
226
 
227
  if not html_file:
228
- # List all HTML files in directory for debugging
229
- html_files = [f for f in os.listdir('.') if f.endswith('.html')]
230
- ml_output = f"Tree analysis completed but HTML file not found. Available HTML files: {html_files}"
231
- logging.warning(f"HTML files in directory: {html_files}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
 
233
  except Exception as tree_error:
234
- ml_output = f"Tree creation failed: {str(tree_error)}"
 
 
235
  logging.error(f"Tree creation error: {tree_error}")
 
 
236
 
237
  else:
238
- ml_output = f"F gene sequence not found in dataset (length: {len(processed_sequence)} bp)"
239
  logging.warning(f"F gene sequence not found. Length: {len(processed_sequence)}")
240
  else:
241
- ml_output = "Failed to load CSV dataset"
242
  logging.error("Failed to load CSV dataset")
243
  except Exception as e:
244
- ml_output = f"ML Tree analysis failed: {str(e)}"
245
  logging.error(f"ML Tree failed: {e}")
 
 
246
  elif not os.path.exists(csv_path):
247
- ml_output = f"CSV dataset not found at {csv_path}"
248
  elif not processed_sequence or len(processed_sequence) < 10:
249
- ml_output = f"F gene sequence too short for analysis (length: {len(processed_sequence) if processed_sequence else 0})"
250
  else:
251
- ml_output = "Skipped due to previous step errors"
252
 
253
  return (
254
  boundary_output,
@@ -264,45 +304,48 @@ def run_pipeline(dna_input):
264
  except Exception as e:
265
  error_msg = f"Pipeline failed: {str(e)}"
266
  logging.error(error_msg)
 
 
267
  return error_msg, "", "", "", None, None, None, error_msg
268
 
269
  # --- Gradio UI ---
270
- with gr.Blocks(title="Viral Gene Phylogenetic Pipeline") as demo:
271
- gr.Markdown("# Viral Gene Phylogenetic Inference Pipeline")
272
  gr.Markdown("This pipeline processes DNA sequences through boundary detection, k-mer analysis, and phylogenetic tree construction.")
273
 
274
- with gr.Tab("Paste DNA Sequence"):
275
  inp = gr.Textbox(
276
  label="DNA Input",
277
  placeholder="Paste your DNA sequence here (ACTG format)",
278
  lines=5
279
  )
280
- btn1 = gr.Button("Run Pipeline", variant="primary")
281
 
282
- with gr.Tab("Upload FASTA File"):
283
  file_input = gr.File(
284
  label="FASTA File",
285
  file_types=['.fasta', '.fa', '.txt']
286
  )
287
- btn2 = gr.Button("Run on FASTA", variant="primary")
288
 
289
  # Outputs
290
- gr.Markdown("## Pipeline Results")
291
 
292
  with gr.Row():
293
  with gr.Column():
294
- out1 = gr.Textbox(label="Step 1: Extracted F Gene Sequence", lines=8)
295
- out2 = gr.Textbox(label="Step 2: F Gene Validation (Keras)", lines=3)
296
  with gr.Column():
297
- out3 = gr.Textbox(label="Dataset Used")
298
- out4 = gr.Textbox(label="Step 3: Phylogenetic Tree Status", lines=3)
299
 
300
  with gr.Row():
301
- html = gr.File(label="Download Tree (HTML)")
302
- fasta = gr.File(label="Download Aligned FASTA")
303
- phy = gr.File(label="Download IQ-TREE .phy File")
304
 
305
- tree_html = gr.HTML(label="Interactive Tree Preview")
 
306
 
307
  # Event handlers
308
  btn1.click(
 
204
  logging.info("F gene sequence found in dataset")
205
  matched_ids, perc = analyzer.find_similar_sequences(analyzer.matching_percentage)
206
 
207
+ # Enhanced tree creation with better error handling
208
  try:
209
+ logging.info(f"Creating tree with {len(matched_ids)} sequences at {perc:.1f}% similarity")
210
 
211
+ # Call the tree creation method
212
+ tree_result = analyzer.create_interactive_tree(matched_ids, perc)
213
+
214
+ # Check for multiple possible HTML file names and locations
215
  possible_html_files = [
216
  "phylogenetic_tree_normalized_horizontal.html",
217
+ "phylogenetic_tree_horizontal.html",
218
  "phylogenetic_tree.html",
219
+ "tree.html",
220
+ "interactive_tree.html"
221
  ]
222
 
223
+ # Also check in common subdirectories
224
+ search_dirs = [".", "output", "results", "trees"]
225
+
226
+ for search_dir in search_dirs:
227
+ if not os.path.exists(search_dir):
228
+ continue
229
+
230
+ for html_filename in possible_html_files:
231
+ full_path = os.path.join(search_dir, html_filename)
232
+ if os.path.exists(full_path):
233
+ html_file = full_path
234
+ try:
235
+ with open(full_path, "r", encoding='utf-8') as f:
236
+ tree_html_content = f.read()
237
+ ml_output = f"✅ Phylogenetic tree generated successfully!\n- {len(matched_ids)} sequences analyzed\n- Similarity threshold: {perc:.1f}%\n- Tree file: {html_filename}"
238
+ logging.info(f"Tree HTML file found: {full_path}")
239
+ break
240
+ except Exception as read_error:
241
+ logging.error(f"Error reading HTML file {full_path}: {read_error}")
242
+ continue
243
+
244
+ if html_file:
245
  break
246
 
247
  if not html_file:
248
+ # Enhanced debugging - list all files in current directory
249
+ all_files = []
250
+ for root, dirs, files in os.walk("."):
251
+ for file in files:
252
+ if file.endswith(('.html', '.htm')):
253
+ all_files.append(os.path.join(root, file))
254
+
255
+ ml_output = f"⚠️ Tree analysis completed but HTML file not found.\n"
256
+ ml_output += f"- Analyzed {len(matched_ids)} sequences at {perc:.1f}% similarity\n"
257
+ ml_output += f"- Available HTML files in directory: {all_files}\n"
258
+ ml_output += f"- Tree creation result: {tree_result if 'tree_result' in locals() else 'Unknown'}"
259
+
260
+ logging.warning(f"HTML files found in directory tree: {all_files}")
261
+
262
+ # Try to get any HTML content that might have been generated
263
+ if hasattr(analyzer, 'last_generated_html') and analyzer.last_generated_html:
264
+ tree_html_content = analyzer.last_generated_html
265
+ ml_output += "\n- Using HTML content from analyzer object"
266
 
267
  except Exception as tree_error:
268
+ ml_output = f"Tree creation failed: {str(tree_error)}\n"
269
+ ml_output += f"- Sequences available: {len(matched_ids)}\n"
270
+ ml_output += f"- Similarity threshold: {perc:.1f}%"
271
  logging.error(f"Tree creation error: {tree_error}")
272
+ import traceback
273
+ logging.error(f"Full traceback: {traceback.format_exc()}")
274
 
275
  else:
276
+ ml_output = f"F gene sequence not found in dataset\n- Query length: {len(processed_sequence)} bp\n- Dataset contains {len(analyzer.data) if hasattr(analyzer, 'data') else 'unknown'} sequences"
277
  logging.warning(f"F gene sequence not found. Length: {len(processed_sequence)}")
278
  else:
279
+ ml_output = "Failed to load CSV dataset"
280
  logging.error("Failed to load CSV dataset")
281
  except Exception as e:
282
+ ml_output = f"ML Tree analysis failed: {str(e)}"
283
  logging.error(f"ML Tree failed: {e}")
284
+ import traceback
285
+ logging.error(f"Full traceback: {traceback.format_exc()}")
286
  elif not os.path.exists(csv_path):
287
+ ml_output = f"CSV dataset not found at {csv_path}"
288
  elif not processed_sequence or len(processed_sequence) < 10:
289
+ ml_output = f"F gene sequence too short for analysis (length: {len(processed_sequence) if processed_sequence else 0})"
290
  else:
291
+ ml_output = "Skipped due to previous step errors"
292
 
293
  return (
294
  boundary_output,
 
304
  except Exception as e:
305
  error_msg = f"Pipeline failed: {str(e)}"
306
  logging.error(error_msg)
307
+ import traceback
308
+ logging.error(f"Full traceback: {traceback.format_exc()}")
309
  return error_msg, "", "", "", None, None, None, error_msg
310
 
311
  # --- Gradio UI ---
312
+ with gr.Blocks(title="Viral Gene Phylogenetic Pipeline", theme=gr.themes.Soft()) as demo:
313
+ gr.Markdown("# 🧬 Viral Gene Phylogenetic Inference Pipeline")
314
  gr.Markdown("This pipeline processes DNA sequences through boundary detection, k-mer analysis, and phylogenetic tree construction.")
315
 
316
+ with gr.Tab("📝 Paste DNA Sequence"):
317
  inp = gr.Textbox(
318
  label="DNA Input",
319
  placeholder="Paste your DNA sequence here (ACTG format)",
320
  lines=5
321
  )
322
+ btn1 = gr.Button("🚀 Run Pipeline", variant="primary", size="lg")
323
 
324
+ with gr.Tab("📁 Upload FASTA File"):
325
  file_input = gr.File(
326
  label="FASTA File",
327
  file_types=['.fasta', '.fa', '.txt']
328
  )
329
+ btn2 = gr.Button("🚀 Run on FASTA", variant="primary", size="lg")
330
 
331
  # Outputs
332
+ gr.Markdown("## 📊 Pipeline Results")
333
 
334
  with gr.Row():
335
  with gr.Column():
336
+ out1 = gr.Textbox(label="🎯 Step 1: Extracted F Gene Sequence", lines=8)
337
+ out2 = gr.Textbox(label="🔍 Step 2: F Gene Validation (Keras)", lines=3)
338
  with gr.Column():
339
+ out3 = gr.Textbox(label="📋 Dataset Used")
340
+ out4 = gr.Textbox(label="🌳 Step 3: Phylogenetic Tree Status", lines=5)
341
 
342
  with gr.Row():
343
+ html = gr.File(label="📥 Download Tree (HTML)")
344
+ fasta = gr.File(label="📥 Download Aligned FASTA")
345
+ phy = gr.File(label="📥 Download IQ-TREE .phy File")
346
 
347
+ with gr.Row():
348
+ tree_html = gr.HTML(label="🌳 Interactive Tree Preview", height=600)
349
 
350
  # Event handlers
351
  btn1.click(