re-type commited on
Commit
f7b5b44
·
verified ·
1 Parent(s): f70096f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +370 -489
app.py CHANGED
@@ -13,12 +13,21 @@ import ml_simplified_tree
13
  import tempfile
14
  import shutil
15
  import sys
 
16
  from pathlib import Path
17
  from huggingface_hub import hf_hub_download
 
 
 
18
 
19
  # --- Global Variables ---
20
- MAFFT_PATH = "mafft/mafftdir/bin/mafft" # Update this path as needed
21
- IQTREE_PATH = "iqtree/bin/iqtree2" # Update this path as needed
 
 
 
 
 
22
 
23
  # --- Logging ---
24
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -112,6 +121,7 @@ def check_tool_availability():
112
  '/usr/bin/mafft',
113
  '/usr/local/bin/mafft',
114
  'mafft.bat', # Windows
 
115
  ]
116
 
117
  for candidate in mafft_candidates:
@@ -130,12 +140,15 @@ def check_tool_availability():
130
  IQTREE_PATH,
131
  'iqtree2',
132
  'iqtree',
 
133
  '/usr/bin/iqtree2',
134
  '/usr/local/bin/iqtree2',
135
  '/usr/bin/iqtree',
136
  '/usr/local/bin/iqtree',
137
  'iqtree2.exe', # Windows
138
  'iqtree.exe', # Windows
 
 
139
  ]
140
 
141
  for candidate in iqtree_candidates:
@@ -173,170 +186,148 @@ Docker option:
173
  """
174
  return guide
175
 
176
- def run_mafft_alignment(input_fasta, output_fasta, mafft_cmd):
177
- """Run MAFFT alignment with enhanced error handling"""
 
 
 
178
  try:
179
- # MAFFT command with more robust options
180
- cmd = [
181
- mafft_cmd,
182
- '--auto', # Automatic strategy selection
183
- '--quiet', # Reduce output verbosity
184
- input_fasta
185
- ]
186
-
187
- logging.info(f"Running MAFFT: {' '.join(cmd)}")
188
-
189
- # Run MAFFT with enhanced error handling
190
- result = subprocess.run(
191
- cmd,
192
- capture_output=True,
193
- text=True,
194
- timeout=600, # Increased timeout to 10 minutes
195
- cwd=os.getcwd() # Ensure working directory is set
196
- )
197
-
198
- if result.returncode == 0:
199
- # Write aligned sequences to output file
200
- with open(output_fasta, 'w') as f:
201
- f.write(result.stdout)
202
- logging.info(f"MAFFT alignment completed: {output_fasta}")
203
-
204
- # Verify output file
205
- if os.path.exists(output_fasta) and os.path.getsize(output_fasta) > 0:
206
- return True, output_fasta
207
- else:
208
- return False, "MAFFT completed but output file is empty"
209
- else:
210
- error_msg = result.stderr.strip() if result.stderr else "Unknown MAFFT error"
211
- logging.error(f"MAFFT failed: {error_msg}")
212
- return False, f"MAFFT error: {error_msg}"
213
-
214
- except subprocess.TimeoutExpired:
215
- logging.error("MAFFT timeout")
216
- return False, "MAFFT timeout (>10 minutes). Try with fewer sequences."
217
- except FileNotFoundError:
218
- return False, f"MAFFT executable not found: {mafft_cmd}"
219
- except Exception as e:
220
- logging.error(f"MAFFT execution failed: {e}")
221
- return False, f"MAFFT execution failed: {str(e)}"
222
 
223
- def run_iqtree_analysis(aligned_fasta, output_prefix, iqtree_cmd):
224
- """Run IQ-TREE with enhanced options and error handling"""
225
- try:
226
- # Enhanced IQ-TREE command
227
- cmd = [
228
- iqtree_cmd,
229
- '-s', aligned_fasta,
230
- '-m', 'MFP', # ModelFinder Plus for automatic model selection
231
- '-bb', '1000', # Bootstrap replicates
232
- '-alrt', '1000', # SH-aLRT test
233
- '-nt', 'AUTO', # Auto detect threads
234
- '--prefix', output_prefix,
235
- '-redo', # Overwrite existing files
236
- '--quiet' # Reduce verbosity
237
- ]
238
-
239
- logging.info(f"Running IQ-TREE: {' '.join(cmd)}")
240
-
241
- # Run IQ-TREE with enhanced error handling
242
- result = subprocess.run(
243
- cmd,
244
- capture_output=True,
245
- text=True,
246
- timeout=1200, # 20 minute timeout for larger datasets
247
- cwd=os.getcwd()
248
- )
249
-
250
- if result.returncode == 0:
251
- tree_file = f"{output_prefix}.treefile"
252
- if os.path.exists(tree_file) and os.path.getsize(tree_file) > 0:
253
- logging.info(f"IQ-TREE analysis completed: {tree_file}")
254
- return True, tree_file
255
- else:
256
- logging.error("IQ-TREE completed but tree file not found or empty")
257
- return False, "Tree file not generated or empty"
258
- else:
259
- error_msg = result.stderr.strip() if result.stderr else "Unknown IQ-TREE error"
260
- logging.error(f"IQ-TREE failed: {error_msg}")
261
- return False, f"IQ-TREE error: {error_msg}"
262
-
263
- except subprocess.TimeoutExpired:
264
- logging.error("IQ-TREE timeout")
265
- return False, "IQ-TREE timeout (>20 minutes). Try with fewer sequences or simpler model."
266
- except FileNotFoundError:
267
- return False, f"IQ-TREE executable not found: {iqtree_cmd}"
268
- except Exception as e:
269
- logging.error(f"IQ-TREE execution failed: {e}")
270
- return False, f"IQ-TREE execution failed: {str(e)}"
271
 
272
- def create_simple_neighbor_joining_tree(sequences_dict):
273
- """Create a simple distance-based tree when ML tools are not available"""
274
- try:
275
- # This is a simplified implementation
276
- # In a real scenario, you'd want to use a proper NJ implementation
277
- import random
278
-
279
- seq_names = list(sequences_dict.keys())
280
- n_seqs = len(seq_names)
281
-
282
- if n_seqs < 2:
283
- return None, "Need at least 2 sequences for tree construction"
284
-
285
- # Create a simple Newick tree structure
286
- if n_seqs == 2:
287
- tree_str = f"({seq_names[0]}:0.1,{seq_names[1]}:0.1);"
288
- else:
289
- # Simple clustering approach
290
- tree_str = "(" + ",".join([f"{name}:0.1" for name in seq_names[:5]]) + ");"
291
 
292
- # Save to temporary file
293
- tree_file = "simple_tree.nwk"
294
- with open(tree_file, 'w') as f:
295
- f.write(tree_str)
296
-
297
- return tree_file, "Simple distance-based tree created"
298
-
299
- except Exception as e:
300
- return None, f"Simple tree creation failed: {str(e)}"
301
 
302
- def create_multi_fasta_with_query(query_sequence, query_id="Query_F_Gene"):
303
- """Create a multi-FASTA file with query sequence and reference sequences"""
304
- try:
305
- # Create temporary FASTA file
306
- temp_fasta = tempfile.NamedTemporaryFile(mode='w', suffix='.fasta', delete=False)
307
-
308
- # Add query sequence
309
- temp_fasta.write(f">{query_id}\n{query_sequence}\n")
310
-
311
- # Add reference sequences from existing aligned FASTA if available
312
- ref_fasta_path = "f_gene_sequences_aligned.fasta"
313
- if os.path.exists(ref_fasta_path):
314
- with open(ref_fasta_path, 'r') as ref_file:
315
- temp_fasta.write(ref_file.read())
316
- logging.info(f"Added reference sequences from {ref_fasta_path}")
317
- else:
318
- # If no reference file, try to create from CSV data
319
- if analyzer and hasattr(analyzer, 'data'):
320
- count = 0
321
- for idx, row in analyzer.data.iterrows():
322
- if 'sequence' in row and len(str(row['sequence'])) > 50:
323
- seq_id = row.get('id', f"Ref_{count}")
324
- sequence = str(row['sequence']).upper()
325
- temp_fasta.write(f">{seq_id}\n{sequence}\n")
326
- count += 1
327
- if count >= 20: # Limit to prevent too large datasets
328
- break
329
- logging.info(f"Added {count} reference sequences from CSV")
330
-
331
- temp_fasta.close()
332
- return temp_fasta.name
333
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
  except Exception as e:
335
- logging.error(f"Failed to create multi-FASTA: {e}")
336
- return None
 
 
 
 
 
 
 
337
 
338
  def build_maximum_likelihood_tree(f_gene_sequence):
339
- """Build maximum likelihood phylogenetic tree with comprehensive fallback options"""
 
 
 
340
  try:
341
  # Check tool availability with enhanced detection
342
  mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
@@ -354,110 +345,51 @@ def build_maximum_likelihood_tree(f_gene_sequence):
354
  else:
355
  status_msg += f"✅ IQ-TREE found: {iqtree_cmd}\n"
356
 
357
- # If neither tool is available, provide installation guide
358
- if not mafft_available and not iqtree_available:
 
 
 
 
 
 
 
 
 
 
 
359
  guide = install_dependencies_guide()
360
  return False, f"{status_msg}\n{guide}", None, None
361
 
362
- # If only one tool is missing, provide specific guidance
363
- if not mafft_available:
364
- return False, f"{status_msg}\n❌ MAFFT is required for sequence alignment. Please install MAFFT first.", None, None
 
 
 
 
 
 
 
365
 
366
- if not iqtree_available:
367
- status_msg += "\n⚠️ IQ-TREE not available. Attempting simple tree construction...\n"
368
 
369
- # Try to create a simple tree as fallback
370
- multi_fasta = create_multi_fasta_with_query(f_gene_sequence)
371
- if multi_fasta:
372
- # Read sequences
373
- sequences = {}
374
- current_seq = ""
375
- current_name = ""
376
-
377
- with open(multi_fasta, 'r') as f:
378
- for line in f:
379
- line = line.strip()
380
- if line.startswith('>'):
381
- if current_name and current_seq:
382
- sequences[current_name] = current_seq
383
- current_name = line[1:]
384
- current_seq = ""
385
- else:
386
- current_seq += line
387
- if current_name and current_seq:
388
- sequences[current_name] = current_seq
389
 
390
- simple_tree, simple_msg = create_simple_neighbor_joining_tree(sequences)
391
- os.unlink(multi_fasta)
392
-
393
- if simple_tree:
394
- return True, f"{status_msg}✅ {simple_msg}", None, simple_tree
395
- else:
396
- return False, f"{status_msg}❌ {simple_msg}", None, None
397
- else:
398
- return False, f"{status_msg}❌ Failed to create input sequences", None, None
399
-
400
- # Both tools available - proceed with full ML analysis
401
- # Create output directory
402
- output_dir = "ml_tree_output"
403
- os.makedirs(output_dir, exist_ok=True)
404
-
405
- # Step 1: Create multi-FASTA file with query and reference sequences
406
- logging.info("Creating multi-FASTA file...")
407
- multi_fasta = create_multi_fasta_with_query(f_gene_sequence)
408
- if not multi_fasta:
409
- return False, f"{status_msg}❌ Failed to create input FASTA", None, None
410
-
411
- # Step 2: Run MAFFT alignment
412
- logging.info("Running MAFFT alignment...")
413
- aligned_fasta = os.path.join(output_dir, "aligned_sequences.fasta")
414
- mafft_success, mafft_result = run_mafft_alignment(multi_fasta, aligned_fasta, mafft_cmd)
415
-
416
- # Clean up temporary file
417
- os.unlink(multi_fasta)
418
-
419
- if not mafft_success:
420
- return False, f"{status_msg}❌ MAFFT failed: {mafft_result}", None, None
421
-
422
- # Step 3: Run IQ-TREE analysis
423
- logging.info("Running IQ-TREE analysis...")
424
- tree_prefix = os.path.join(output_dir, "ml_tree")
425
- iqtree_success, iqtree_result = run_iqtree_analysis(aligned_fasta, tree_prefix, iqtree_cmd)
426
-
427
- if not iqtree_success:
428
- return False, f"{status_msg}❌ IQ-TREE failed: {iqtree_result}", aligned_fasta, None
429
-
430
- # Step 4: Prepare output files
431
- tree_file = iqtree_result
432
- log_file = f"{tree_prefix}.log"
433
-
434
- # Copy to standard names for compatibility
435
- standard_aligned = "f_gene_sequences_aligned.fasta"
436
- standard_tree = "f_gene_sequences.phy.treefile"
437
-
438
- if os.path.exists(aligned_fasta):
439
- shutil.copy2(aligned_fasta, standard_aligned)
440
- if os.path.exists(tree_file):
441
- shutil.copy2(tree_file, standard_tree)
442
-
443
- success_msg = f"{status_msg}✅ Maximum likelihood tree built successfully!\n"
444
- success_msg += f"- Alignment: {os.path.basename(aligned_fasta)}\n"
445
- success_msg += f"- Tree: {os.path.basename(tree_file)}\n"
446
-
447
- if os.path.exists(log_file):
448
- try:
449
- with open(log_file, 'r') as f:
450
- log_content = f.read()
451
- # Extract model information
452
- if "Best-fit model:" in log_content:
453
- model_lines = [line for line in log_content.split('\n') if "Best-fit model:" in line]
454
- if model_lines:
455
- success_msg += f"- {model_lines[0].strip()}\n"
456
- except Exception as e:
457
- logging.warning(f"Could not read log file: {e}")
458
-
459
- logging.info("Maximum likelihood tree construction completed")
460
- return True, success_msg, aligned_fasta, tree_file
461
 
462
  except Exception as e:
463
  logging.error(f"ML tree construction failed: {e}")
@@ -629,19 +561,19 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
629
  keras_output = ""
630
  if processed_sequence and len(processed_sequence) >= 6:
631
  keras_prediction = predict_with_keras(processed_sequence)
632
- # Use the prediction directly as its now a percentage
633
  keras_output = keras_prediction
634
  else:
635
  keras_output = "Skipped: sequence too short for F gene validation"
636
 
637
- # Step 3: Maximum Likelihood Tree (MAFFT + IQ-TREE)
638
  aligned_file = None
639
  phy_file = None
640
  ml_tree_output = ""
641
 
642
- if build_ml_tree and processed_sequence and len(processed_sequence) >= 50:
643
  try:
644
- logging.info("Starting maximum likelihood tree construction...")
645
  ml_success, ml_message, ml_aligned, ml_tree = build_maximum_likelihood_tree(processed_sequence)
646
 
647
  if ml_success:
@@ -652,12 +584,12 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
652
  ml_tree_output = ml_message # This now includes detailed error information
653
 
654
  except Exception as e:
655
- ml_tree_output = f"❌ ML Tree construction failed: {str(e)}"
656
- logging.error(f"ML Tree failed: {e}")
657
  elif build_ml_tree:
658
- ml_tree_output = "❌ F gene sequence too short for ML tree construction (minimum 50 bp)"
659
  else:
660
- ml_tree_output = "ML tree construction skipped (not requested)"
661
 
662
  # Step 4: ML Simplified Tree (using the existing approach)
663
  html_file = None
@@ -709,7 +641,7 @@ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
709
  return (
710
  boundary_output, # F gene extraction result
711
  keras_output, # F gene validation result
712
- ml_tree_output, # ML tree construction status
713
  simplified_ml_output, # Simplified tree analysis status
714
  tree_html_content, # HTML content from file for tree display
715
  aligned_file, # Path to aligned FASTA file
@@ -752,290 +684,239 @@ def create_interface():
752
  This tool provides comprehensive analysis of F genes including:
753
  - **Gene Boundary Detection**: Extract F gene sequences from larger genomic sequences
754
  - **Gene Validation**: Validate extracted sequences using machine learning
755
- - **Phylogenetic Analysis**: Build maximum likelihood trees and simplified phylogenetic trees
756
-
757
- **Instructions:**
758
- 1. Enter your sequence directly or upload a FASTA file
759
- 2. Adjust similarity threshold for phylogenetic analysis (1-99%)
760
- 3. Choose whether to build maximum likelihood trees (requires MAFFT & IQ-TREE)
761
- 4. Click "Run Analysis" to start the pipeline
762
  """)
763
 
764
- with gr.Tab("🔬 Analysis Pipeline"):
765
  with gr.Row():
766
  with gr.Column(scale=2):
767
- # Input section
768
- gr.Markdown("### Input Sequence")
 
 
 
 
 
769
  dna_input = gr.Textbox(
770
- label="DNA Sequence",
771
  placeholder="Enter your DNA sequence here (ATCG format)...",
772
- lines=5,
773
- max_lines=10
774
  )
775
 
776
  fasta_file = gr.File(
777
- label="Or Upload FASTA File",
778
- file_types=[".fasta", ".fa", ".fas", ".txt"]
 
779
  )
780
 
781
- with gr.Row():
782
- similarity_score = gr.Slider(
783
- minimum=1,
784
- maximum=99,
785
- value=95.0,
786
- step=1.0,
787
- label="Similarity Threshold (%)",
788
- info="Minimum similarity for phylogenetic analysis"
789
- )
790
-
791
- build_ml_tree = gr.Checkbox(
792
- label="Build ML Tree",
793
- value=False,
794
- info="Build maximum likelihood tree (requires MAFFT & IQ-TREE)"
795
- )
796
 
797
- # Action buttons
798
- with gr.Row():
799
- run_btn = gr.Button("🚀 Run Analysis", variant="primary", size="lg")
800
- clear_btn = gr.Button("🗑️ Clear", variant="secondary")
 
801
 
802
  with gr.Column(scale=1):
803
- # Status and info
804
- gr.Markdown("### Analysis Status")
805
- status_display = gr.Textbox(
806
- label="Status",
807
- value="Ready to analyze",
808
- interactive=False,
809
- lines=3
 
810
  )
811
 
812
- # Model status
813
- gr.Markdown("### Available Models")
814
- model_status = []
815
- if boundary_model:
816
- model_status.append("✅ Boundary Detection Model")
817
- else:
818
- model_status.append("❌ Boundary Detection Model")
819
-
820
- if keras_model:
821
- model_status.append("✅ Gene Validation Model")
822
- else:
823
- model_status.append("❌ Gene Validation Model")
824
-
825
- if analyzer:
826
- model_status.append("✅ Tree Analysis Module")
827
- else:
828
- model_status.append("❌ Tree Analysis Module")
829
 
830
- gr.Markdown("\n".join(model_status))
831
 
832
  with gr.Tab("📊 Results"):
833
  with gr.Row():
834
  with gr.Column():
835
- # Text outputs
836
- boundary_output = gr.Textbox(
837
- label="🎯 F Gene Extraction",
838
- lines=5,
839
- interactive=False
840
- )
841
 
842
- keras_output = gr.Textbox(
843
- label="🔍 Gene Validation",
844
- lines=3,
845
- interactive=False
846
- )
 
847
 
848
- with gr.Column():
849
- ml_tree_output = gr.Textbox(
850
- label="🌳 Maximum Likelihood Tree",
851
- lines=5,
852
- interactive=False
853
- )
854
 
855
- simplified_ml_output = gr.Textbox(
856
- label="📈 Simplified Phylogenetic Analysis",
857
- lines=3,
858
- interactive=False
859
- )
860
-
861
- # Tree visualization
862
- gr.Markdown("### 🌲 Phylogenetic Tree Visualization (ML Simplified Tree)")
 
 
 
 
 
 
 
 
863
  tree_html = gr.HTML(
864
- label="Interactive Tree",
865
- value="<p>Click the link below to view the ML Simplified Tree in a new tab.</p>"
866
  )
 
 
 
867
  with gr.Row():
868
- show_tree_link = gr.HTML()
869
-
870
- # File downloads
871
- gr.Markdown("### 📁 Download Results")
872
- with gr.Row():
873
- aligned_file = gr.File(
874
  label="Aligned Sequences (FASTA)",
875
  interactive=False
876
  )
877
-
878
- phy_file = gr.File(
879
  label="Phylogenetic Tree File",
880
  interactive=False
881
  )
882
-
883
- html_file = gr.File(
884
- label="Interactive Tree (HTML) - ML Simplified Tree",
885
  interactive=False
886
  )
887
 
888
- with gr.Tab("ℹ️ Help & Info"):
889
  gr.Markdown("""
890
- ## About This Tool
891
-
892
- ### F Gene Analysis Pipeline
893
- This comprehensive pipeline analyzes F genes through multiple computational approaches:
894
-
895
- #### 🎯 Gene Boundary Detection
896
- - Uses deep learning to identify and extract F gene sequences from larger genomic sequences
897
- - Provides confidence scores for detected boundaries
898
- - Automatically trims sequences to focus on the F gene region
899
-
900
- #### 🔍 Gene Validation
901
- - Employs k-mer based machine learning models to validate extracted sequences
902
- - Provides probability scores indicating likelihood of being a genuine F gene
903
- - Uses 6-mer frequency patterns for classification
904
-
905
- #### 🌳 Phylogenetic Analysis
906
-
907
- **Maximum Likelihood Trees:**
908
- - Requires MAFFT (sequence alignment) and IQ-TREE (phylogenetic reconstruction)
909
- - Performs model selection and bootstrap analysis
910
- - Generates publication-quality phylogenetic trees
911
- - Provides detailed evolutionary analysis
912
-
913
- **Simplified Trees:**
914
- - Uses built-in algorithms for quick phylogenetic analysis
915
- - Interactive visualization with similarity-based clustering
916
- - Faster alternative when external tools are not available
917
-
918
- ### Input Requirements
919
- - **DNA Sequences**: ATCG format, minimum 50 bp for meaningful analysis
920
- - **FASTA Files**: Standard FASTA format with single or multiple sequences
921
- - **Similarity Threshold**: 1-99% for controlling phylogenetic analysis sensitivity
922
 
923
- ### Dependencies
924
 
925
- **Required for ML Trees:**
926
- ```bash
927
- # Ubuntu/Debian
928
- sudo apt-get install mafft iqtree
929
 
930
- # macOS
931
- brew install mafft iqtree
 
932
 
933
- # Conda
934
- conda install -c bioconda mafft iqtree
935
- ```
936
 
937
- ### Output Files
938
- - **Aligned FASTA**: Multiple sequence alignment in FASTA format
939
- - **Tree File**: Newick format phylogenetic tree
940
- - **HTML Tree**: Interactive visualization for web browsers (ML Simplified Tree)
941
 
942
- ### Troubleshooting
 
 
 
943
 
944
- **Common Issues:**
945
- - *"No similar sequences found"*: Lower the similarity threshold
946
- - *"Sequence too short"*: Provide sequences longer than 50 bp
947
- - *"MAFFT/IQ-TREE not found"*: Install required dependencies
948
- - *"Model not available"*: Check model files are properly downloaded
949
 
950
- **Performance Tips:**
951
- - Use sequences between 100-2000 bp for optimal performance
952
- - Limit to <50 sequences for faster tree construction
953
- - Lower similarity thresholds find more distant relatives
954
- - Higher thresholds focus on closely related sequences
955
-
956
- ### Citation
957
- If you use this tool in your research, please cite the appropriate methods and tools used.
958
  """)
959
 
960
- # Event handlers
961
- def run_analysis_text(dna_seq, sim_score, build_tree):
962
- return run_pipeline(dna_seq, sim_score, build_tree)
963
-
964
- def run_analysis_file(file_obj, sim_score, build_tree):
965
- return run_pipeline_from_file(file_obj, sim_score, build_tree)
 
966
 
967
- def run_analysis_combined(dna_seq, file_obj, sim_score, build_tree):
968
- # Priority: file upload over text input
969
- if file_obj is not None:
970
- return run_pipeline_from_file(file_obj, sim_score, build_tree)
 
971
  else:
972
- return run_pipeline(dna_seq, sim_score, build_tree)
973
-
974
- def clear_inputs():
975
- return "", None, 95.0, False, "Ready to analyze"
976
-
977
- def show_tree(html_file):
978
- if html_file and os.path.exists(html_file):
979
- try:
980
- # Convert to relative path for Gradio file serving
981
- relative_path = os.path.relpath(html_file, os.getcwd())
982
- file_url = f"/file={relative_path}"
983
- link_html = f'<a href="{file_url}" target="_blank">View ML Simplified Tree</a>'
984
- return gr.update(value=link_html)
985
- except Exception as e:
986
- return gr.update(value=f"<p>Error generating link: {str(e)}. Please download and open the file manually.</p>")
987
- return gr.update(value="<p>No tree file available. Run analysis to generate one.</p>")
988
 
989
- # Connect events
990
  run_btn.click(
991
- fn=run_analysis_combined,
992
- inputs=[dna_input, fasta_file, similarity_score, build_ml_tree],
 
 
 
 
 
 
993
  outputs=[
994
- boundary_output, keras_output, ml_tree_output,
995
- simplified_ml_output, tree_html, aligned_file,
996
- phy_file, html_file, status_display
 
 
 
 
 
 
997
  ]
998
  )
999
 
1000
- clear_btn.click(
1001
- fn=clear_inputs,
1002
- outputs=[dna_input, fasta_file, similarity_score, build_ml_tree, status_display]
1003
- )
1004
-
1005
- show_tree_link.click(
1006
- fn=show_tree,
1007
- inputs=[html_file],
1008
- outputs=[show_tree_link]
1009
- )
1010
-
1011
- # Example data loading
1012
- gr.Markdown("### 🧪 Example Data")
1013
- example_btn = gr.Button("Load Example F Gene Sequence", variant="secondary")
1014
-
1015
- def load_example():
1016
- example_seq = "ATGAAACTGTCAACACTCACTGAGTACATTAGCCAAGTTCTCAAGACTGAGTGTTTACCTTTGTGAATACACTGAGTCCTTGTCAACGTTCGGCTGCAGTCACACTGATGGTCTTGTCTTCAGGAGCAACTGCAGTCTGTGCTGTGTACTATAGTGCTAAGAGTGATAATGCACTGTTCAGTACCTTTGACAGTGTGTCTCTGTCACCTGGTGCTATGCAGAGCTGCGATGAGATCTACATTGGTCTGATCGATAAGACTGAGTCCAAGGGTGTTGCTGTGTGTACTGTAGAGTGTGATAGTGTTGCCTGCACTGTGTCTATGGCTGATCTTGAGGCTCTGCTTATGTCAACACTGAGTGTGAAATGTTCATTTGCTACTTCAAGACTGATGTGAAGACTGTGTATTGTACTCAGTCATGCAGAGTGAAGTCCTTGAGCCACTTGCTTTGTACAATGTGGGTGATGAGATGTTGTGCTGCAGTGTCAAGGGGCCACAGTCTTGCCTTGATAGTGCGATTGCTGTGATGATGTGCACTTCAATGAGTGGTCGAGATGCTGCTGTGTGTAAGGATGCTGCTGTGTGTAAGAAGGATGCTGCTGTGTGTAAGA"
1017
- return example_seq, "Example F gene sequence loaded"
1018
 
1019
- example_btn.click(
1020
- fn=load_example,
1021
- outputs=[dna_input, status_display]
 
 
 
 
 
 
 
1022
  )
1023
 
1024
  return iface
 
1025
  # --- Main Execution ---
1026
  if __name__ == "__main__":
1027
- # Initialize and launch interface
1028
- interface = create_interface()
1029
 
1030
- # Launch with enhanced configuration
1031
- interface.launch(
1032
- server_name="0.0.0.0", # Allow external connections
1033
- server_port=7860, # Default Gradio port
1034
- share=False, # Set to True for public sharing
1035
- debug=True, # Enable debug mode
1036
- show_error=True, # Show detailed errors
1037
- max_threads=4, # Limit concurrent threads
1038
- auth=None, # Add authentication if needed: ("username", "password")
1039
- ssl_verify=False, # For development environments
1040
- quiet=False # Show startup messages
1041
  )
 
13
  import tempfile
14
  import shutil
15
  import sys
16
+ import uuid
17
  from pathlib import Path
18
  from huggingface_hub import hf_hub_download
19
+ from Bio import SeqIO
20
+ from Bio.Seq import Seq
21
+ from Bio.SeqRecord import SeqRecord
22
 
23
  # --- Global Variables ---
24
+ BASE_DIR = os.path.dirname(os.path.abspath(__file__))
25
+ MAFFT_PATH = os.path.join(BASE_DIR, "binaries", "mafft", "mafft.bat") # Windows path
26
+ IQTREE_PATH = os.path.join(BASE_DIR, "binaries", "iqtree", "bin", "iqtree3")
27
+ ALIGNMENT_PATH = os.path.join(BASE_DIR, "f_gene_sequences_aligned.fasta")
28
+ TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
29
+ QUERY_OUTPUT_DIR = os.path.join(BASE_DIR, "queries")
30
+ os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
31
 
32
  # --- Logging ---
33
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
121
  '/usr/bin/mafft',
122
  '/usr/local/bin/mafft',
123
  'mafft.bat', # Windows
124
+ os.path.join(BASE_DIR, "mafft", "mafftdir", "bin", "mafft"),
125
  ]
126
 
127
  for candidate in mafft_candidates:
 
140
  IQTREE_PATH,
141
  'iqtree2',
142
  'iqtree',
143
+ 'iqtree3',
144
  '/usr/bin/iqtree2',
145
  '/usr/local/bin/iqtree2',
146
  '/usr/bin/iqtree',
147
  '/usr/local/bin/iqtree',
148
  'iqtree2.exe', # Windows
149
  'iqtree.exe', # Windows
150
+ 'iqtree3.exe', # Windows
151
+ os.path.join(BASE_DIR, "iqtree", "bin", "iqtree2"),
152
  ]
153
 
154
  for candidate in iqtree_candidates:
 
186
  """
187
  return guide
188
 
189
+ def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
190
+ """
191
+ Perform phylogenetic placement using MAFFT + IQ-TREE approach.
192
+ This adds the query sequence to a reference alignment and tree.
193
+ """
194
  try:
195
+ # Validate sequence
196
+ if len(sequence.strip()) < 100:
197
+ return False, "Error: Sequence is too short for phylogenetic placement (minimum 100 bp).", None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
+ # Generate unique query ID
200
+ query_id = f"QUERY_{uuid.uuid4().hex[:8]}"
201
+ query_fasta = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}.fa")
202
+ aligned_with_query = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_aligned.fa")
203
+ output_prefix = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_placed_tree")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
+ # Check if reference files exist
206
+ if not os.path.exists(ALIGNMENT_PATH):
207
+ return False, f"Reference alignment not found: {ALIGNMENT_PATH}", None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
+ if not os.path.exists(TREE_PATH):
210
+ return False, f"Reference tree not found: {TREE_PATH}", None, None
 
 
 
 
 
 
 
211
 
212
+ # Save query sequence as FASTA
213
+ try:
214
+ query_record = SeqRecord(Seq(sequence.upper()), id=query_id, description="Query sequence")
215
+ SeqIO.write([query_record], query_fasta, "fasta")
216
+ logging.info(f"Query sequence saved: {query_fasta}")
217
+ except Exception as e:
218
+ return False, f"Failed to save query sequence: {str(e)}", None, None
219
+
220
+ # Step 1: Add query sequence to reference alignment using MAFFT
221
+ logging.info("Adding query sequence to reference alignment...")
222
+ try:
223
+ with open(aligned_with_query, "w") as output_file:
224
+ mafft_cmd_full = [
225
+ mafft_cmd,
226
+ "--add", query_fasta,
227
+ "--reorder",
228
+ ALIGNMENT_PATH
229
+ ]
230
+
231
+ logging.info(f"Running MAFFT: {' '.join(mafft_cmd_full)}")
232
+
233
+ result = subprocess.run(
234
+ mafft_cmd_full,
235
+ stdout=output_file,
236
+ stderr=subprocess.PIPE,
237
+ text=True,
238
+ timeout=600, # 10 minute timeout
239
+ check=True
240
+ )
241
+
242
+ # Verify alignment file was created and is not empty
243
+ if not os.path.exists(aligned_with_query) or os.path.getsize(aligned_with_query) == 0:
244
+ return False, "MAFFT alignment failed: output file is empty", None, None
245
+
246
+ logging.info(f"MAFFT alignment completed: {aligned_with_query}")
247
+
248
+ except subprocess.CalledProcessError as e:
249
+ error_msg = e.stderr.decode() if e.stderr else "Unknown MAFFT error"
250
+ return False, f"MAFFT alignment failed: {error_msg}", None, None
251
+ except subprocess.TimeoutExpired:
252
+ return False, "MAFFT alignment timeout (>10 minutes)", None, None
253
+ except Exception as e:
254
+ return False, f"MAFFT alignment error: {str(e)}", None, None
255
+
256
+ # Step 2: Place sequence in phylogenetic tree using IQ-TREE
257
+ logging.info("Placing sequence in phylogenetic tree...")
258
+ try:
259
+ iqtree_cmd_full = [
260
+ iqtree_cmd,
261
+ "-s", aligned_with_query,
262
+ "-g", TREE_PATH, # Constraint tree (reference tree)
263
+ "-m", "GTR+G", # Substitution model
264
+ "-pre", output_prefix,
265
+ "-redo", # Overwrite existing files
266
+ "--quiet" # Reduce verbosity
267
+ ]
268
+
269
+ logging.info(f"Running IQ-TREE: {' '.join(iqtree_cmd_full)}")
270
+
271
+ result = subprocess.run(
272
+ iqtree_cmd_full,
273
+ capture_output=True,
274
+ text=True,
275
+ timeout=1200, # 20 minute timeout
276
+ check=True
277
+ )
278
+
279
+ # Check if treefile was generated
280
+ treefile = f"{output_prefix}.treefile"
281
+ if not os.path.exists(treefile) or os.path.getsize(treefile) == 0:
282
+ return False, "IQ-TREE placement failed: treefile not generated", aligned_with_query, None
283
+
284
+ logging.info(f"IQ-TREE placement completed: {treefile}")
285
+
286
+ # Generate success message with details
287
+ success_msg = "✅ Phylogenetic placement completed successfully!\n"
288
+ success_msg += f"- Query ID: {query_id}\n"
289
+ success_msg += f"- Alignment: {os.path.basename(aligned_with_query)}\n"
290
+ success_msg += f"- Tree: {os.path.basename(treefile)}\n"
291
+
292
+ # Try to extract model information from log
293
+ log_file = f"{output_prefix}.log"
294
+ if os.path.exists(log_file):
295
+ try:
296
+ with open(log_file, 'r') as f:
297
+ log_content = f.read()
298
+ if "Log-likelihood" in log_content:
299
+ log_lines = [line for line in log_content.split('\n') if "Log-likelihood" in line]
300
+ if log_lines:
301
+ success_msg += f"- {log_lines[0].strip()}\n"
302
+ except Exception as e:
303
+ logging.warning(f"Could not read log file: {e}")
304
+
305
+ return True, success_msg, aligned_with_query, treefile
306
+
307
+ except subprocess.CalledProcessError as e:
308
+ error_msg = e.stderr if e.stderr else "Unknown IQ-TREE error"
309
+ return False, f"IQ-TREE placement failed: {error_msg}", aligned_with_query, None
310
+ except subprocess.TimeoutExpired:
311
+ return False, "IQ-TREE placement timeout (>20 minutes)", aligned_with_query, None
312
+ except Exception as e:
313
+ return False, f"IQ-TREE placement error: {str(e)}", aligned_with_query, None
314
+
315
  except Exception as e:
316
+ logging.error(f"Phylogenetic placement failed: {e}")
317
+ return False, f"Phylogenetic placement failed: {str(e)}", None, None
318
+ finally:
319
+ # Clean up temporary query file
320
+ if os.path.exists(query_fasta):
321
+ try:
322
+ os.unlink(query_fasta)
323
+ except:
324
+ pass
325
 
326
  def build_maximum_likelihood_tree(f_gene_sequence):
327
+ """
328
+ Build maximum likelihood phylogenetic tree using phylogenetic placement approach.
329
+ This replaces the previous de novo tree building with placement-based analysis.
330
+ """
331
  try:
332
  # Check tool availability with enhanced detection
333
  mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
 
345
  else:
346
  status_msg += f"✅ IQ-TREE found: {iqtree_cmd}\n"
347
 
348
+ # Check for reference files
349
+ if not os.path.exists(ALIGNMENT_PATH):
350
+ status_msg += f"❌ Reference alignment not found: {ALIGNMENT_PATH}\n"
351
+ else:
352
+ status_msg += f"✅ Reference alignment found\n"
353
+
354
+ if not os.path.exists(TREE_PATH):
355
+ status_msg += f"❌ Reference tree not found: {TREE_PATH}\n"
356
+ else:
357
+ status_msg += f"✅ Reference tree found\n"
358
+
359
+ # If any required component is missing, provide installation guide
360
+ if not mafft_available or not iqtree_available:
361
  guide = install_dependencies_guide()
362
  return False, f"{status_msg}\n{guide}", None, None
363
 
364
+ if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
365
+ status_msg += "\n❌ Reference alignment and/or tree files are missing.\n"
366
+ status_msg += "Please ensure f_gene_sequences_aligned.fasta and f_gene_sequences.phy.treefile are available."
367
+ return False, status_msg, None, None
368
+
369
+ # Perform phylogenetic placement
370
+ logging.info("Starting phylogenetic placement...")
371
+ placement_success, placement_message, aligned_file, tree_file = phylogenetic_placement(
372
+ f_gene_sequence, mafft_cmd, iqtree_cmd
373
+ )
374
 
375
+ if placement_success:
376
+ final_message = f"{status_msg}\n{placement_message}"
377
 
378
+ # Copy files to standard locations for compatibility
379
+ if aligned_file and os.path.exists(aligned_file):
380
+ standard_aligned = "query_with_references_aligned.fasta"
381
+ shutil.copy2(aligned_file, standard_aligned)
382
+ aligned_file = standard_aligned
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
 
384
+ if tree_file and os.path.exists(tree_file):
385
+ standard_tree = "query_placement_tree.treefile"
386
+ shutil.copy2(tree_file, standard_tree)
387
+ tree_file = standard_tree
388
+
389
+ logging.info("Phylogenetic placement completed successfully")
390
+ return True, final_message, aligned_file, tree_file
391
+ else:
392
+ return False, f"{status_msg}\n{placement_message}", aligned_file, tree_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
393
 
394
  except Exception as e:
395
  logging.error(f"ML tree construction failed: {e}")
 
561
  keras_output = ""
562
  if processed_sequence and len(processed_sequence) >= 6:
563
  keras_prediction = predict_with_keras(processed_sequence)
564
+ # Use the prediction directly as it's now a percentage
565
  keras_output = keras_prediction
566
  else:
567
  keras_output = "Skipped: sequence too short for F gene validation"
568
 
569
+ # Step 3: Maximum Likelihood Tree (Phylogenetic Placement)
570
  aligned_file = None
571
  phy_file = None
572
  ml_tree_output = ""
573
 
574
+ if build_ml_tree and processed_sequence and len(processed_sequence) >= 100:
575
  try:
576
+ logging.info("Starting phylogenetic placement...")
577
  ml_success, ml_message, ml_aligned, ml_tree = build_maximum_likelihood_tree(processed_sequence)
578
 
579
  if ml_success:
 
584
  ml_tree_output = ml_message # This now includes detailed error information
585
 
586
  except Exception as e:
587
+ ml_tree_output = f"❌ Phylogenetic placement failed: {str(e)}"
588
+ logging.error(f"Phylogenetic placement failed: {e}")
589
  elif build_ml_tree:
590
+ ml_tree_output = "❌ F gene sequence too short for phylogenetic placement (minimum 100 bp)"
591
  else:
592
+ ml_tree_output = "Phylogenetic placement skipped (not requested)"
593
 
594
  # Step 4: ML Simplified Tree (using the existing approach)
595
  html_file = None
 
641
  return (
642
  boundary_output, # F gene extraction result
643
  keras_output, # F gene validation result
644
+ ml_tree_output, # Phylogenetic placement status
645
  simplified_ml_output, # Simplified tree analysis status
646
  tree_html_content, # HTML content from file for tree display
647
  aligned_file, # Path to aligned FASTA file
 
684
  This tool provides comprehensive analysis of F genes including:
685
  - **Gene Boundary Detection**: Extract F gene sequences from larger genomic sequences
686
  - **Gene Validation**: Validate extracted sequences using machine learning
687
+ - **Phylogenetic Placement**: Add your sequence to reference phylogenetic trees using MAFFT & IQ-TREE
688
+ - **Simplified Phylogenetic Analysis**: Build quick phylogenetic trees for comparison
 
 
 
 
 
689
  """)
690
 
691
+ with gr.Tab("📁 Input"):
692
  with gr.Row():
693
  with gr.Column(scale=2):
694
+ gr.Markdown("### Input Options")
695
+ input_choice = gr.Radio(
696
+ choices=["Text Input", "FASTA File"],
697
+ value="Text Input",
698
+ label="Choose Input Method"
699
+ )
700
+
701
  dna_input = gr.Textbox(
702
+ label="DNA Sequence",
703
  placeholder="Enter your DNA sequence here (ATCG format)...",
704
+ lines=6,
705
+ visible=True
706
  )
707
 
708
  fasta_file = gr.File(
709
+ label="Upload FASTA File",
710
+ file_types=[".fasta", ".fa", ".fas", ".txt"],
711
+ visible=False
712
  )
713
 
714
+ def toggle_input(choice):
715
+ if choice == "Text Input":
716
+ return gr.update(visible=True), gr.update(visible=False)
717
+ else:
718
+ return gr.update(visible=False), gr.update(visible=True)
 
 
 
 
 
 
 
 
 
 
719
 
720
+ input_choice.change(
721
+ fn=toggle_input,
722
+ inputs=[input_choice],
723
+ outputs=[dna_input, fasta_file]
724
+ )
725
 
726
  with gr.Column(scale=1):
727
+ gr.Markdown("### Analysis Options")
728
+ similarity_score = gr.Slider(
729
+ minimum=50,
730
+ maximum=99,
731
+ value=95,
732
+ step=1,
733
+ label="Similarity Threshold (%)",
734
+ info="Minimum similarity for phylogenetic analysis"
735
  )
736
 
737
+ build_ml_tree = gr.Checkbox(
738
+ label="Build Phylogenetic Tree",
739
+ value=False,
740
+ info="Perform phylogenetic placement (requires MAFFT & IQ-TREE)"
741
+ )
 
 
 
 
 
 
 
 
 
 
 
 
742
 
743
+ run_btn = gr.Button("🚀 Run Analysis", variant="primary", size="lg")
744
 
745
  with gr.Tab("📊 Results"):
746
  with gr.Row():
747
  with gr.Column():
748
+ gr.Markdown("### Analysis Results")
 
 
 
 
 
749
 
750
+ with gr.Accordion("🔍 Gene Boundary Detection", open=True):
751
+ boundary_output = gr.Textbox(
752
+ label="F Gene Extraction",
753
+ lines=4,
754
+ interactive=False
755
+ )
756
 
757
+ with gr.Accordion("✅ Gene Validation", open=True):
758
+ keras_output = gr.Textbox(
759
+ label="F Gene Validation",
760
+ lines=2,
761
+ interactive=False
762
+ )
763
 
764
+ with gr.Accordion("🌳 Phylogenetic Placement", open=False):
765
+ ml_tree_output = gr.Textbox(
766
+ label="ML Tree Status",
767
+ lines=6,
768
+ interactive=False
769
+ )
770
+
771
+ with gr.Accordion("📈 Simplified Tree Analysis", open=False):
772
+ simplified_ml_output = gr.Textbox(
773
+ label="Tree Analysis Status",
774
+ lines=4,
775
+ interactive=False
776
+ )
777
+
778
+ with gr.Tab("🌳 Tree Visualization"):
779
+ gr.Markdown("### Interactive Phylogenetic Tree")
780
  tree_html = gr.HTML(
781
+ label="Phylogenetic Tree",
782
+ value="<p>Run analysis to generate tree visualization</p>"
783
  )
784
+
785
+ with gr.Tab("💾 Downloads"):
786
+ gr.Markdown("### Download Results")
787
  with gr.Row():
788
+ aligned_file_download = gr.File(
 
 
 
 
 
789
  label="Aligned Sequences (FASTA)",
790
  interactive=False
791
  )
792
+ phy_file_download = gr.File(
 
793
  label="Phylogenetic Tree File",
794
  interactive=False
795
  )
796
+ html_file_download = gr.File(
797
+ label="Interactive Tree (HTML)",
 
798
  interactive=False
799
  )
800
 
801
+ with gr.Tab("ℹ️ Information"):
802
  gr.Markdown("""
803
+ ### About This Tool
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
804
 
805
+ This F Gene Analysis Pipeline provides comprehensive analysis capabilities:
806
 
807
+ #### 🔍 **Gene Boundary Detection**
808
+ - Uses deep learning models to identify and extract F gene sequences from larger genomic contexts
809
+ - Provides confidence scores for extracted regions
 
810
 
811
+ #### ✅ **Gene Validation**
812
+ - Validates extracted sequences using k-mer based machine learning
813
+ - Provides percentage confidence that the sequence is indeed an F gene
814
 
815
+ #### 🌳 **Phylogenetic Analysis**
816
+ - **Phylogenetic Placement**: Places your sequence in a reference phylogenetic tree using MAFFT and IQ-TREE
817
+ - **Simplified Analysis**: Quick phylogenetic comparison with similar sequences from the database
818
 
819
+ #### 📊 **Output Files**
820
+ - Aligned FASTA sequences
821
+ - Phylogenetic tree files (Newick format)
822
+ - Interactive HTML tree visualizations
823
 
824
+ #### ⚙️ **Requirements**
825
+ - For phylogenetic placement: MAFFT and IQ-TREE must be installed
826
+ - Reference alignment and tree files must be available
827
+ - CSV database for simplified tree analysis
828
 
829
+ #### 📝 **Input Formats**
830
+ - Plain text DNA sequences (ATCG format)
831
+ - FASTA files (.fasta, .fa, .fas, .txt)
832
+ - Sequences should be at least 100 bp for phylogenetic analysis
 
833
 
834
+ #### 🎯 **Tips for Best Results**
835
+ - Use sequences longer than 100 bp for phylogenetic analysis
836
+ - Start with high similarity thresholds (95%) and adjust if needed
837
+ - For large sequences, the boundary detection will extract the F gene portion automatically
 
 
 
 
838
  """)
839
 
840
+ # Status and summary
841
+ with gr.Row():
842
+ status_output = gr.Textbox(
843
+ label="Analysis Summary",
844
+ lines=2,
845
+ interactive=False
846
+ )
847
 
848
+ # Event handlers
849
+ def run_analysis_wrapper(input_choice, dna_text, fasta_file, similarity, build_tree):
850
+ """Wrapper to handle both input methods"""
851
+ if input_choice == "Text Input":
852
+ return run_pipeline(dna_text, similarity, build_tree)
853
  else:
854
+ return run_pipeline_from_file(fasta_file, similarity, build_tree)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
855
 
 
856
  run_btn.click(
857
+ fn=run_analysis_wrapper,
858
+ inputs=[
859
+ input_choice,
860
+ dna_input,
861
+ fasta_file,
862
+ similarity_score,
863
+ build_ml_tree
864
+ ],
865
  outputs=[
866
+ boundary_output,
867
+ keras_output,
868
+ ml_tree_output,
869
+ simplified_ml_output,
870
+ tree_html,
871
+ aligned_file_download,
872
+ phy_file_download,
873
+ html_file_download,
874
+ status_output
875
  ]
876
  )
877
 
878
+ # Example sequences for demonstration
879
+ examples = [
880
+ [
881
+ "Text Input",
882
+ "ATGAAACTCCTAGGATTCCTTGGAACCGTCAAGTCCTGCACATCCGATGCAGTCTTCCTGAGCACAGGCCCAATCAGCAGGGACAACCAGCTGGACAGCGTAAGCAAACCTGCACCCGACCCGTCGGTAACAGATGGAGACAGTGAGTCTACTCGGACCATCCGAAGCAGGAAACATGTCAACCTGCGACTTCCCGTTCGCCAGATGGCCAGTCCGACCTTCGCCGCCAACTACCTCGATGTCAACGCCGCCAATGATGGCTCCTGCACGTCCTACTACGGCTTCACCCCGACCAACATCCGAGACAACGAGATCTCGTCGGTGGATGTCAGATCGGGCGCCAACGCC",
883
+ None,
884
+ 85.0,
885
+ False
886
+ ],
887
+ [
888
+ "Text Input",
889
+ "ATGAAACTCCTGGGATTCCTTGGAACCGTCAAGTCCTGCACATCCGATGCAGTCTTCCTGAGCACAGGCCCAATCAGCAGGGACAACCAGCTGGACAGCGTAAGCAAACCTGCACCCGACCCGTCGGTAACAGATGGAGACAGTGAGTCTACTCGGACCATCCGAAGCAGGAAACATGTCAACCTGCGACTTCCCGTTCGCCAGATGGCCAGTCCGACCTTCGCCGCCAACTACCTCGATGTCAACGCCGCCAATGATGGCTCCTGCACGTCCTACTACGGCTTCACCCCGACCAACATCCGAGACAACGAGATCTCGTCGGTGGATGTCAGATCGGGCGCCAACGCCGAGATCTGA",
890
+ None,
891
+ 90.0,
892
+ True
893
+ ]
894
+ ]
 
895
 
896
+ gr.Examples(
897
+ examples=examples,
898
+ inputs=[
899
+ input_choice,
900
+ dna_input,
901
+ fasta_file,
902
+ similarity_score,
903
+ build_ml_tree
904
+ ],
905
+ label="Example Sequences"
906
  )
907
 
908
  return iface
909
+
910
  # --- Main Execution ---
911
  if __name__ == "__main__":
912
+ # Create and launch the interface
913
+ demo = create_interface()
914
 
915
+ # Launch configuration
916
+ demo.launch(
917
+ share=False, # Set to True to create public link
918
+ server_name="0.0.0.0", # Allow external access
919
+ server_port=7860, # Default Gradio port
920
+ show_error=True, # Show detailed error messages
921
+ debug=True # Enable debug mode
 
 
 
 
922
  )