re-type commited on
Commit
c3d2d01
·
verified ·
1 Parent(s): 3490fb3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +608 -727
app.py CHANGED
@@ -12,75 +12,47 @@ import pandas as pd
12
  import re
13
  import logging
14
  import numpy as np
 
 
 
15
  import tempfile
16
  import shutil
17
  import sys
18
  import uuid
19
  from pathlib import Path
 
 
 
 
20
  import stat
21
  import time
 
 
 
 
 
 
22
 
23
- # Import with error handling
24
- try:
25
- from predictor import EnhancedGenePredictor
26
- except ImportError as e:
27
- logging.warning(f"Failed to import EnhancedGenePredictor: {e}")
28
- EnhancedGenePredictor = None
29
-
30
- try:
31
- from tensorflow.keras.models import load_model
32
- except ImportError as e:
33
- logging.warning(f"Failed to import TensorFlow: {e}")
34
- load_model = None
35
-
36
  try:
37
- from analyzer import PhylogeneticTreeAnalyzer
38
- except ImportError as e:
39
- logging.warning(f"Failed to import PhylogeneticTreeAnalyzer: {e}")
40
- PhylogeneticTreeAnalyzer = None
 
 
41
 
42
- try:
43
- from huggingface_hub import hf_hub_download
44
- except ImportError as e:
45
- logging.warning(f"Failed to import huggingface_hub: {e}")
46
- hf_hub_download = None
47
 
 
48
  try:
49
- from Bio import SeqIO
50
- from Bio.Seq import Seq
51
- from Bio.SeqRecord import SeqRecord
52
- except ImportError as e:
53
- logging.warning(f"Failed to import BioPython: {e}")
54
- SeqIO = None
55
-
56
- # --- Logging Setup ---
57
- def setup_logging():
58
- """Setup logging configuration"""
59
- try:
60
- log_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
61
- log_handler = logging.StreamHandler()
62
- log_handler.setFormatter(log_formatter)
63
-
64
- # Try to setup file logging, fallback if it fails
65
- handlers = [log_handler]
66
- try:
67
- file_handler = logging.FileHandler('/tmp/app.log')
68
- file_handler.setFormatter(log_formatter)
69
- handlers.append(file_handler)
70
- except Exception as e:
71
- print(f"Warning: Failed to set up file logging: {e}")
72
-
73
- logging.basicConfig(level=logging.INFO, handlers=handlers, force=True)
74
- logger = logging.getLogger(__name__)
75
- logger.info(f"Gradio version: {gr.__version__}")
76
- return logger
77
- except Exception as e:
78
- print(f"Critical: Failed to setup logging: {e}")
79
- # Create basic logger
80
- logging.basicConfig(level=logging.INFO)
81
- return logging.getLogger(__name__)
82
-
83
- logger = setup_logging()
84
 
85
  # --- Global Variables ---
86
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -89,13 +61,7 @@ IQTREE_PATH = os.path.join(BASE_DIR, "binaries", "iqtree", "bin", "iqtree3")
89
  ALIGNMENT_PATH = os.path.join(BASE_DIR, "f_gene_sequences_aligned.fasta")
90
  TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
91
  QUERY_OUTPUT_DIR = os.path.join(BASE_DIR, "queries")
92
-
93
- # Ensure output directory exists
94
- try:
95
- os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
96
- os.makedirs("/tmp", exist_ok=True)
97
- except Exception as e:
98
- logger.warning(f"Failed to create directories: {e}")
99
 
100
  # Model repository and file paths
101
  MODEL_REPO = "GGproject10/best_boundary_aware_model"
@@ -107,815 +73,730 @@ keras_model = None
107
  kmer_to_index = None
108
  analyzer = None
109
 
110
- # --- Safe Model Loading ---
111
  def load_models_safely():
112
- """Load models with comprehensive error handling"""
113
  global boundary_model, keras_model, kmer_to_index, analyzer
114
-
115
- logger.info("🔍 Starting model loading...")
116
-
117
- # Load boundary model
118
- if EnhancedGenePredictor and hf_hub_download:
119
- try:
120
- logger.info("Loading boundary model...")
121
- boundary_path = hf_hub_download(
122
- repo_id=MODEL_REPO,
123
- filename="best_boundary_aware_model.pth",
124
- token=None
125
- )
126
- if os.path.exists(boundary_path):
127
- boundary_model = EnhancedGenePredictor(boundary_path)
128
- logger.info("✅ Boundary model loaded successfully.")
129
- else:
130
- logger.error("❌ Boundary model file not found after download.")
131
- except Exception as e:
132
- logger.error(f"❌ Failed to load boundary model: {e}")
133
- boundary_model = None
134
- else:
135
- logger.warning("⚠️ EnhancedGenePredictor or hf_hub_download not available")
136
-
137
- # Load Keras model
138
- if load_model and hf_hub_download:
139
- try:
140
- logger.info("Loading Keras model...")
141
- keras_path = hf_hub_download(
142
- repo_id=MODEL_REPO,
143
- filename="best_model.keras",
144
- token=None
145
- )
146
- kmer_path = hf_hub_download(
147
- repo_id=MODEL_REPO,
148
- filename="kmer_to_index.pkl",
149
- token=None
150
- )
151
-
152
- if os.path.exists(keras_path) and os.path.exists(kmer_path):
153
- keras_model = load_model(keras_path)
154
- with open(kmer_path, "rb") as f:
155
- kmer_to_index = pickle.load(f)
156
- logger.info("✅ Keras model and k-mer index loaded successfully.")
157
- else:
158
- logger.error("❌ Keras model or k-mer files not found.")
159
- except Exception as e:
160
- logger.error(f"❌ Failed to load Keras model: {e}")
161
- keras_model = None
162
- kmer_to_index = None
163
- else:
164
- logger.warning("⚠️ TensorFlow load_model or hf_hub_download not available")
165
-
166
- # Load tree analyzer
167
- if PhylogeneticTreeAnalyzer:
168
- try:
169
- logger.info("🌳 Initializing tree analyzer...")
170
- analyzer = PhylogeneticTreeAnalyzer()
171
-
172
- # Try to find CSV file
173
- csv_candidates = [
174
- CSV_PATH,
175
- os.path.join(BASE_DIR, CSV_PATH),
176
- os.path.join(BASE_DIR, "app", CSV_PATH),
177
- os.path.join(os.path.dirname(__file__), CSV_PATH),
178
- "f_cleaned.csv",
179
- os.path.join(BASE_DIR, "f_cleaned.csv")
180
- ]
181
-
182
- csv_loaded = False
183
- for csv_candidate in csv_candidates:
184
- if os.path.exists(csv_candidate):
185
- logger.info(f"📊 Trying CSV: {csv_candidate}")
186
- try:
187
- if analyzer.load_data(csv_candidate):
188
- logger.info(f"✅ CSV loaded from: {csv_candidate}")
189
- csv_loaded = True
190
- break
191
- except Exception as e:
192
- logger.warning(f"CSV load failed for {csv_candidate}: {e}")
193
- continue
194
-
195
- if not csv_loaded:
196
- logger.error("❌ Failed to load CSV data from any candidate location.")
197
- analyzer = None
198
- else:
199
  try:
200
- if hasattr(analyzer, 'train_ai_model') and analyzer.train_ai_model():
201
- logger.info("✅ AI model training completed successfully")
202
- else:
203
- logger.warning("⚠️ AI model training failed; proceeding with basic analysis.")
204
  except Exception as e:
205
- logger.warning(f"⚠️ AI model training failed: {e}")
206
-
207
- except Exception as e:
208
- logger.error(f"❌ Tree analyzer initialization failed: {e}")
209
  analyzer = None
210
- else:
211
- logger.warning("⚠️ PhylogeneticTreeAnalyzer not available")
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
  # --- Tool Detection ---
214
  def setup_binary_permissions():
215
- """Set executable permissions on binary files"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  try:
217
- for binary in [MAFFT_PATH, IQTREE_PATH]:
218
- if os.path.exists(binary):
219
- try:
220
- os.chmod(binary, os.stat(binary).st_mode | stat.S_IEXEC)
221
- logger.info(f"Set executable permission on {binary}")
222
- except Exception as e:
223
- logger.warning(f"Failed to set permission on {binary}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  except Exception as e:
225
- logger.warning(f"Binary permission setup failed: {e}")
 
 
 
 
 
 
 
226
 
227
- def check_tool_availability():
228
- """Check if required tools are available"""
229
  try:
230
- setup_binary_permissions()
231
-
232
- # Check MAFFT
233
- mafft_available = False
234
- mafft_cmd = None
235
- mafft_candidates = ['mafft', '/usr/bin/mafft', '/usr/local/bin/mafft', MAFFT_PATH]
236
-
237
- for candidate in mafft_candidates:
238
- if shutil.which(candidate) or os.path.exists(candidate):
239
- try:
240
- result = subprocess.run(
241
- [candidate, "--help"],
242
- capture_output=True,
243
- text=True,
244
- timeout=5
245
- )
246
- if result.returncode == 0 or "mafft" in result.stderr.lower():
247
- mafft_available = True
248
- mafft_cmd = candidate
249
- logger.info(f" MAFFT found at: {candidate}")
250
- break
251
- except Exception as e:
252
- logger.debug(f"MAFFT test failed for {candidate}: {e}")
253
-
254
- # Check IQ-TREE
255
- iqtree_available = False
256
- iqtree_cmd = None
257
- iqtree_candidates = ['iqtree', 'iqtree2', 'iqtree3', '/usr/bin/iqtree', '/usr/local/bin/iqtree', IQTREE_PATH]
258
-
259
- for candidate in iqtree_candidates:
260
- if shutil.which(candidate) or os.path.exists(candidate):
261
- try:
262
- result = subprocess.run(
263
- [candidate, "--help"],
264
- capture_output=True,
265
- text=True,
266
- timeout=5
267
- )
268
- if result.returncode == 0 or "iqtree" in result.stderr.lower():
269
- iqtree_available = True
270
- iqtree_cmd = candidate
271
- logger.info(f"✅ IQ-TREE found at: {candidate}")
272
- break
273
- except Exception as e:
274
- logger.debug(f"IQ-TREE test failed for {candidate}: {e}")
275
-
276
- return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
277
-
278
  except Exception as e:
279
- logger.error(f"Tool availability check failed: {e}")
280
- return False, False, None, None
281
 
282
- # --- Core Functions ---
283
  def predict_with_keras(sequence):
284
- """Predict using Keras model with error handling"""
285
  try:
286
  if not keras_model or not kmer_to_index:
287
  return "❌ Keras model not available."
288
-
289
  if len(sequence) < 6:
290
  return "❌ Sequence too short (<6 bp)."
291
-
292
- # Generate k-mers
293
  kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
294
  indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
295
-
296
- # Make prediction
297
  input_arr = np.array([indices])
298
  prediction = keras_model.predict(input_arr, verbose=0)[0]
299
  f_gene_prob = prediction[-1]
300
-
301
- # Convert to percentage
302
  percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))
303
  return f"✅ {percentage}% F gene confidence"
304
-
305
  except Exception as e:
306
- logger.error(f"Keras prediction failed: {e}")
307
  return f"❌ Error: {str(e)}"
308
 
309
  def read_fasta_file(file_obj):
310
- """Read FASTA file with error handling"""
311
  try:
312
  if file_obj is None:
313
  return ""
314
-
315
  if isinstance(file_obj, str):
316
  with open(file_obj, "r") as f:
317
  content = f.read()
318
  else:
319
  content = file_obj.read().decode("utf-8")
320
-
321
- # Extract sequence lines (non-header lines)
322
  lines = content.strip().split("\n")
323
  seq_lines = [line.strip() for line in lines if not line.startswith(">")]
324
  return ''.join(seq_lines)
325
-
326
  except Exception as e:
327
- logger.error(f"Failed to read FASTA file: {e}")
328
  return ""
 
329
 
330
- def analyze_sequence_basic(sequence, similarity_score=95.0):
331
- """Basic sequence analysis without external tools"""
332
- try:
333
- if not sequence or len(sequence.strip()) < 10:
334
- return "❌ Invalid sequence.", None, None
335
-
336
- # Clean sequence
337
- clean_seq = re.sub(r'[^ATCGN]', 'N', sequence.upper())
338
-
339
- # Basic analysis
340
- length = len(clean_seq)
341
- gc_content = (clean_seq.count('G') + clean_seq.count('C')) / length * 100
342
- n_content = clean_seq.count('N') / length * 100
343
-
344
- analysis_result = f"""
345
- ✅ Basic Analysis Complete
346
- • Length: {length} bp
347
- • GC Content: {gc_content:.1f}%
348
- • N Content: {n_content:.1f}%
349
- • Similarity Threshold: {similarity_score}%
350
- """
351
-
352
- return analysis_result, None, None
353
-
354
- except Exception as e:
355
- logger.error(f"Basic analysis failed: {e}")
356
- return f"❌ Analysis error: {str(e)}", None, None
357
-
358
- def run_pipeline_safe(dna_input, similarity_score=95.0, build_ml_tree=False):
359
- """Safe pipeline execution with comprehensive error handling"""
360
  try:
361
- # Input validation
362
- if not dna_input or not dna_input.strip():
363
- return "❌ Empty input", "", "", "", "No input provided", None, None, None, None, "No input", "No input", None, None
364
-
365
- # Clean and validate sequence
366
  dna_input = dna_input.upper().strip()
367
- if not re.match('^[ACTGN\s\n\r]+$', dna_input):
368
- # Remove invalid characters
369
- dna_input = re.sub(r'[^ACTGN]', 'N', dna_input)
370
-
371
- # Remove whitespace
372
- processed_sequence = re.sub(r'\s+', '', dna_input)
373
-
374
- logger.info(f"Processing sequence of length: {len(processed_sequence)}")
375
-
376
- # Boundary detection
377
  boundary_output = ""
378
  if boundary_model:
379
  try:
380
- result = boundary_model.predict_sequence(processed_sequence)
381
- if hasattr(result, 'get') and result.get('gene_regions'):
382
- regions = result['gene_regions']
383
- if regions:
384
- processed_sequence = regions[0]["sequence"]
385
- boundary_output = f"✅ F gene region found: {len(processed_sequence)} bp"
386
- else:
387
- boundary_output = "⚠️ No F gene regions found."
388
  else:
389
- boundary_output = "⚠️ Boundary detection completed (no regions found)."
 
390
  except Exception as e:
391
- logger.error(f"Boundary prediction error: {e}")
392
  boundary_output = f"❌ Boundary prediction error: {str(e)}"
 
393
  else:
394
- boundary_output = f"⚠️ Boundary model not available. Using full input: {len(processed_sequence)} bp"
395
-
396
- # Keras prediction
397
- keras_output = ""
398
- if len(processed_sequence) >= 6:
399
- keras_output = predict_with_keras(processed_sequence)
400
- else:
401
- keras_output = "❌ Sequence too short for classification."
402
-
403
- # ML Tree analysis (simplified for now)
404
  ml_tree_output = ""
405
- if build_ml_tree:
406
- if len(processed_sequence) >= 100:
407
- try:
408
- mafft_available, iqtree_available, _, _ = check_tool_availability()
409
- if mafft_available and iqtree_available:
410
- ml_tree_output = "⚠️ ML tree analysis not implemented in safe mode."
411
- else:
412
- ml_tree_output = "❌ MAFFT or IQ-TREE not available"
413
- except Exception as e:
414
- ml_tree_output = f"❌ ML tree error: {str(e)}"
415
- else:
416
- ml_tree_output = "❌ Sequence too short for ML tree (<100 bp)."
 
 
417
  else:
418
- ml_tree_output = "⚠️ ML tree analysis skipped."
419
-
420
- # Tree analysis
421
- tree_analysis_output = ""
422
- tree_html_content = "<div style='text-align: center; color: #666; padding: 50px;'>Tree analysis not available in safe mode.</div>"
423
- report_html_content = "<div style='text-align: center; color: #666; padding: 50px;'>Report not available in safe mode.</div>"
424
-
425
- if analyzer and len(processed_sequence) >= 10:
426
  try:
427
- result, _, _ = analyze_sequence_basic(processed_sequence, similarity_score)
428
- tree_analysis_output = result
429
- tree_html_content = f"<div style='padding: 20px;'><h3>Basic Analysis</h3><pre>{result}</pre></div>"
430
- report_html_content = tree_html_content
 
 
 
 
 
 
 
 
431
  except Exception as e:
432
- logger.error(f"Tree analysis error: {e}")
433
- tree_analysis_output = f" Tree analysis error: {str(e)}"
 
434
  else:
435
- tree_analysis_output = "❌ Tree analyzer not available or sequence too short."
436
-
437
- # Create summary
438
  summary_output = f"""
439
  📊 ANALYSIS SUMMARY:
440
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
441
- Input Length: {len(dna_input)} bp
442
- Processed Length: {len(processed_sequence)} bp
443
- Boundary Detection: {'✅ Active' if boundary_model else '❌ Unavailable'}
444
- Classification: {'✅ Active' if keras_model else '❌ Unavailable'}
445
- ML Tree: {'✅ Requested' if build_ml_tree else '⚠️ Skipped'}
446
- Tree Analysis: {'✅ Active' if analyzer else '❌ Unavailable'}
447
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
448
-
449
- Results:
450
- • Boundary: {boundary_output.split(':')[-1].strip() if ':' in boundary_output else boundary_output}
451
- • Classification: {keras_output.split(':')[-1].strip() if ':' in keras_output else keras_output}
452
- • ML Tree: {'Requested' if build_ml_tree else 'Skipped'}
453
- • Analysis: {'Completed' if '✅' in tree_analysis_output else 'Failed'}
454
  """
455
-
456
  return (
457
- boundary_output,
458
- keras_output,
459
- ml_tree_output,
460
- tree_analysis_output,
461
- summary_output,
462
- None, # aligned_file
463
- None, # phy_file
464
- None, # additional_file_1
465
- None, # additional_file_2
466
- tree_html_content,
467
- report_html_content,
468
- None, # tree_html_path
469
- None # report_html_path
470
  )
471
-
472
  except Exception as e:
473
  logger.error(f"Pipeline error: {e}", exc_info=True)
474
  error_msg = f"❌ Pipeline Error: {str(e)}"
475
- return (
476
- error_msg, "", "", "", error_msg,
477
- None, None, None, None,
478
- f"<div style='color: red;'>{error_msg}</div>",
479
- f"<div style='color: red;'>{error_msg}</div>",
480
- None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482
 
483
  # --- Gradio Interface ---
484
- def create_safe_gradio_interface():
485
- """Create a safe Gradio interface with comprehensive error handling"""
486
  try:
487
  with gr.Blocks(
488
  title="🧬 Gene Analysis Pipeline",
489
  theme=gr.themes.Soft(),
490
  css="""
491
- .gradio-container {
492
- max-width: 1200px !important;
493
- margin: 0 auto;
494
- }
495
- .status-box {
496
- padding: 15px;
497
- border-radius: 8px;
498
- margin: 10px 0;
499
- border-left: 4px solid #007bff;
500
- background: linear-gradient(90deg, #f8f9fa 0%, #e9ecef 100%);
501
- }
502
- .success {
503
- background-color: #d4edda;
504
- border-left-color: #28a745;
505
- color: #155724;
506
- }
507
- .warning {
508
- background-color: #fff3cd;
509
- border-left-color: #ffc107;
510
- color: #856404;
511
- }
512
- .error {
513
- background-color: #f8d7da;
514
- border-left-color: #dc3545;
515
- color: #721c24;
516
- }
517
- .analysis-section {
518
- border: 1px solid #dee2e6;
519
- border-radius: 8px;
520
- padding: 20px;
521
- margin: 10px 0;
522
- background: white;
523
- }
524
  """
525
  ) as iface:
526
-
527
- # Header
528
- gr.Markdown("""
529
- # 🧬 Gene Analysis Pipeline
530
-
531
- ### Comprehensive DNA sequence analysis with machine learning
532
-
533
- This tool provides multi-modal analysis including boundary detection, gene classification,
534
- and phylogenetic analysis for DNA sequences.
535
- """)
536
-
537
- # System Status
538
  with gr.Row():
539
  with gr.Column():
540
- try:
541
- mafft_available, iqtree_available, _, _ = check_tool_availability()
542
- status_html = f"""
543
- <div class="status-box">
544
- <h3>🔧 System Status</h3>
545
- <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; margin-top: 10px;">
546
- <div>🤖 Boundary Model: <strong>{'✅ Loaded' if boundary_model else '❌ Missing'}</strong></div>
547
- <div>🧠 Keras Model: <strong>{'✅ Loaded' if keras_model else '❌ Missing'}</strong></div>
548
- <div>🌳 Tree Analyzer: <strong>{'✅ Loaded' if analyzer else '❌ Missing'}</strong></div>
549
- <div>🧬 MAFFT: <strong>{'✅ Available' if mafft_available else '❌ Missing'}</strong></div>
550
- <div>🌲 IQ-TREE: <strong>{'✅ Available' if iqtree_available else '❌ Missing'}</strong></div>
551
- <div>📊 Safe Mode: <strong>{'✅ Active' if not all([boundary_model, keras_model, analyzer]) else '⚠️ Inactive'}</strong></div>
552
- </div>
553
- </div>
554
- """
555
- except Exception as e:
556
- status_html = f"""
557
- <div class="status-box error">
558
- <h3>❌ System Status Error</h3>
559
- <p>Failed to check system status: {str(e)}</p>
560
- </div>
561
- """
562
-
563
- gr.HTML(value=status_html)
564
-
565
- # Main Interface
566
  with gr.Tabs():
567
- with gr.Tab("📝 Text Input"):
568
  with gr.Row():
569
  with gr.Column(scale=2):
570
  dna_input = gr.Textbox(
571
  label="🧬 DNA Sequence",
572
- placeholder="Enter your DNA sequence (ATCG format)...\nExample: ATCGATCGATCG...",
573
- lines=8,
574
- max_lines=15,
575
- info="Paste your DNA sequence here. Supports FASTA format or raw sequence."
576
  )
577
-
578
- with gr.Row():
579
- similarity_slider = gr.Slider(
580
- minimum=1,
581
- maximum=99,
582
- value=95,
583
- step=1,
584
- label="🎯 Similarity Threshold (%)",
585
- info="Minimum similarity for phylogenetic analysis"
586
- )
587
-
588
- ml_tree_checkbox = gr.Checkbox(
589
- label="🌲 Build ML Tree",
590
- value=False,
591
- info="Perform phylogenetic placement (requires external tools)"
592
- )
593
-
594
- analyze_btn = gr.Button(
595
- "🔬 Analyze Sequence",
596
- variant="primary",
597
- size="lg",
598
- scale=1
599
- )
600
-
601
  with gr.Column(scale=1):
602
- gr.Markdown("""
603
- ### 📋 Quick Guide
604
-
605
- 1. **Paste DNA sequence** in ATCG format
606
- 2. **Adjust similarity** threshold (1-99%)
607
- 3. **Enable ML tree** for phylogenetic analysis
608
- 4. **Click Analyze** to start processing
609
-
610
- **Supported formats:**
611
- - Raw DNA sequence: `ATCGATCG...`
612
- - FASTA format: `>header\\nATCG...`
613
- - Mixed case (auto-converted)
614
- - With/without spaces/newlines
615
-
616
- **Requirements:**
617
- - Minimum length: 10 bp for basic analysis
618
- - Minimum length: 100 bp for ML tree
619
- - Only ATCG nucleotides (others converted to N)
620
- """)
621
-
622
- with gr.Tab("📁 File Upload"):
623
  with gr.Row():
624
  with gr.Column(scale=2):
625
  file_input = gr.File(
626
  label="📄 Upload FASTA File",
627
  file_types=[".fasta", ".fa", ".fas", ".txt"],
628
- info="Select a FASTA file containing your DNA sequence"
629
- )
630
-
631
- with gr.Row():
632
- file_similarity_slider = gr.Slider(
633
- minimum=1,
634
- maximum=99,
635
- value=95,
636
- step=1,
637
- label="🎯 Similarity Threshold (%)"
638
- )
639
-
640
- file_ml_tree_checkbox = gr.Checkbox(
641
- label="🌲 Build ML Tree",
642
- value=False
643
- )
644
-
645
- analyze_file_btn = gr.Button(
646
- "🔬 Analyze File",
647
- variant="primary",
648
- size="lg"
649
  )
650
-
651
  with gr.Column(scale=1):
652
- gr.Markdown("""
653
- ### 📄 File Requirements
654
-
655
- **Accepted formats:**
656
- - `.fasta`, `.fa`, `.fas`
657
- - `.txt` with FASTA content
658
-
659
- **FASTA format example:**
660
- ```
661
- >sequence_name
662
- ATCGATCGATCG...
663
- ```
664
-
665
- **Notes:**
666
- - Single or multiple sequences supported
667
- - First sequence will be analyzed
668
- - Maximum file size: 10MB
669
- - UTF-8 encoding recommended
670
- """)
671
-
672
- # Results Section
673
  gr.Markdown("## 📊 Analysis Results")
674
-
675
  with gr.Row():
676
- with gr.Column(scale=1):
677
  boundary_output = gr.Textbox(
678
  label="🎯 Boundary Detection",
679
  interactive=False,
680
- lines=3,
681
- info="Gene region identification results"
682
  )
683
-
684
  keras_output = gr.Textbox(
685
- label="🧠 Gene Classification",
686
  interactive=False,
687
- lines=3,
688
- info="Machine learning classification confidence"
689
  )
690
-
691
- with gr.Column(scale=1):
692
  ml_tree_output = gr.Textbox(
693
  label="🌲 Phylogenetic Placement",
694
  interactive=False,
695
- lines=3,
696
- info="Maximum likelihood tree placement"
697
  )
698
-
699
  tree_analysis_output = gr.Textbox(
700
  label="🌳 Tree Analysis",
701
  interactive=False,
702
- lines=3,
703
- info="Phylogenetic tree construction results"
704
  )
705
-
706
  summary_output = gr.Textbox(
707
- label="📋 Comprehensive Summary Report",
708
  interactive=False,
709
- lines=12,
710
- info="Complete analysis overview"
711
  )
712
-
713
- # Visualization Section
 
 
 
714
  with gr.Tabs():
715
- with gr.Tab("🌳 Interactive Tree"):
716
  tree_html = gr.HTML(
717
- label="Phylogenetic Tree Visualization",
718
- value="""
719
- <div style='text-align: center; color: #666; padding: 50px; border: 2px dashed #ccc; border-radius: 8px;'>
720
- <h3>🌳 Tree Visualization</h3>
721
- <p>No tree generated yet. Run analysis to see interactive phylogenetic tree.</p>
722
- <p><em>Note: Tree visualization requires successful sequence analysis.</em></p>
723
- </div>
724
- """
725
  )
726
-
727
- with gr.Tab("📊 Detailed Report"):
728
  report_html = gr.HTML(
729
  label="Analysis Report",
730
- value="""
731
- <div style='text-align: center; color: #666; padding: 50px; border: 2px dashed #ccc; border-radius: 8px;'>
732
- <h3>📊 Analysis Report</h3>
733
- <p>No report generated yet. Run analysis to see detailed results.</p>
734
- <p><em>Note: Report includes statistical analysis and recommendations.</em></p>
735
- </div>
736
- """
737
  )
738
 
739
- # Event Handlers
740
- def handle_analysis_safe(dna_seq, similarity, build_ml):
741
- """Safe analysis handler with comprehensive error handling"""
742
- try:
743
- if not dna_seq or not dna_seq.strip():
744
- error_msg = "❌ Please enter a DNA sequence"
745
- return (
746
- error_msg, "", "", "", error_msg,
747
- None, None, None, None,
748
- f"<div style='color: red;'>{error_msg}</div>",
749
- f"<div style='color: red;'>{error_msg}</div>"
750
- )
751
-
752
- return run_pipeline_safe(dna_seq, similarity, build_ml)
753
-
754
- except Exception as e:
755
- logger.error(f"Analysis handler error: {e}")
756
- error_msg = f"❌ Analysis failed: {str(e)}"
757
- return (
758
- error_msg, "", "", "", error_msg,
759
- None, None, None, None,
760
- f"<div style='color: red;'>{error_msg}</div>",
761
- f"<div style='color: red;'>{error_msg}</div>"
762
- )
763
 
764
- def handle_file_analysis_safe(file_obj, similarity, build_ml):
765
- """Safe file analysis handler"""
766
- try:
767
- if file_obj is None:
768
- error_msg = "❌ Please upload a FASTA file"
769
- return (
770
- error_msg, "", "", "", error_msg,
771
- None, None, None, None,
772
- f"<div style='color: red;'>{error_msg}</div>",
773
- f"<div style='color: red;'>{error_msg}</div>"
774
- )
775
-
776
- sequence = read_fasta_file(file_obj)
777
- if not sequence:
778
- error_msg = "❌ Failed to read sequence from file"
779
- return (
780
- error_msg, "", "", "", error_msg,
781
- None, None, None, None,
782
- f"<div style='color: red;'>{error_msg}</div>",
783
- f"<div style='color: red;'>{error_msg}</div>"
784
- )
785
-
786
- return run_pipeline_safe(sequence, similarity, build_ml)
787
-
788
- except Exception as e:
789
- logger.error(f"File analysis handler error: {e}")
790
- error_msg = f"❌ File analysis failed: {str(e)}"
791
- return (
792
- error_msg, "", "", "", error_msg,
793
- None, None, None, None,
794
- f"<div style='color: red;'>{error_msg}</div>",
795
- f"<div style='color: red;'>{error_msg}</div>"
796
- )
797
-
798
- # Connect event handlers
799
  analyze_btn.click(
800
- fn=handle_analysis_safe,
801
- inputs=[dna_input, similarity_slider, ml_tree_checkbox],
802
  outputs=[
803
- boundary_output,
804
- keras_output,
805
- ml_tree_output,
806
- tree_analysis_output,
807
- summary_output,
808
- tree_html,
809
- report_html
810
  ],
811
- show_progress=True
 
 
812
  )
813
 
814
  analyze_file_btn.click(
815
- fn=handle_file_analysis_safe,
816
- inputs=[file_input, file_similarity_slider, file_ml_tree_checkbox],
817
  outputs=[
818
- boundary_output,
819
- keras_output,
820
- ml_tree_output,
821
- tree_analysis_output,
822
- summary_output,
823
- tree_html,
824
- report_html
 
 
 
 
 
 
825
  ],
826
- show_progress=True
 
827
  )
828
 
829
- # Footer
830
  gr.Markdown("""
831
- ---
832
- ### 🔬 About This Tool
833
-
834
- This Gene Analysis Pipeline provides comprehensive DNA sequence analysis using:
835
- - **Boundary Detection**: Machine learning-based gene region identification
836
- - **Classification**: Deep learning confidence scoring for gene classification
837
- - **Phylogenetic Analysis**: Maximum likelihood tree construction and placement
838
- - **Interactive Visualization**: Dynamic tree and report generation
839
-
840
- **Safe Mode**: When external tools or models are unavailable, the pipeline operates in safe mode with basic analysis capabilities.
841
-
842
- ---
843
- *Powered by Gradio Built with ❤️ for genomics research*
844
  """)
845
 
846
  return iface
847
-
848
  except Exception as e:
849
- logger.error(f"Interface creation failed: {e}")
850
- # Return minimal interface on failure
851
- def minimal_interface():
852
- return gr.Interface(
853
- fn=lambda x: f"❌ System Error: {str(e)}",
854
- inputs=gr.Textbox(label="Input"),
855
- outputs=gr.Textbox(label="Output"),
856
- title="Gene Analysis Pipeline - Error Mode"
857
- )
858
- return minimal_interface()
859
 
860
- # --- Main Execution ---
861
- def main():
862
- """Main function with comprehensive error handling"""
863
  try:
 
 
864
  logger.info("🚀 Starting Gene Analysis Pipeline...")
865
-
866
- # Load models
867
- load_models_safely()
868
-
869
- # Create interface
870
- logger.info("🎨 Creating Gradio interface...")
871
- iface = create_safe_gradio_interface()
872
-
873
- # Launch
874
- logger.info("🌐 Launching application...")
875
- iface.launch(
876
- server_name="0.0.0.0",
877
- server_port=7860,
878
- share=False,
879
- show_error=True,
880
- max_threads=10
881
  )
882
-
883
- except KeyboardInterrupt:
884
- logger.info("🛑 Application stopped by user")
885
- iface.close()
886
  except Exception as e:
887
- logger.error(f" Critical error in main: {e}", exc_info=True)
888
-
889
- # Emergency fallback interface
890
  try:
891
- logger.info("🚨 Starting emergency fallback interface...")
892
- emergency_iface = gr.Interface(
893
- fn=lambda x: f"❌ System in emergency mode. Error: {str(e)}",
894
- inputs=gr.Textbox(label="DNA Sequence", placeholder="Emergency mode - limited functionality"),
895
- outputs=gr.Textbox(label="Status"),
896
- title="🚨 Gene Analysis Pipeline - Emergency Mode",
897
- description="The system is running in emergency mode due to initialization errors."
898
- )
899
- emergency_iface.launch(
900
  server_name="0.0.0.0",
901
  server_port=7860,
902
  share=False,
903
- show_error=True
904
  )
905
- except Exception as emergency_e:
906
- logger.error(f" Emergency interface failed: {emergency_e}")
907
- print(f"CRITICAL: Complete system failure. Error: {e}")
908
- print(f"Emergency fallback also failed: {emergency_e}")
909
- sys.exit(1)
910
- finally:
911
- try:
912
- emergency_iface.close()
913
- except:
914
- pass
915
- finally:
916
- try:
917
- iface.close()
918
- except:
919
- pass
920
  if __name__ == "__main__":
921
- main()
 
 
 
 
 
 
 
 
 
 
 
12
  import re
13
  import logging
14
  import numpy as np
15
+ from predictor import EnhancedGenePredictor
16
+ from tensorflow.keras.models import load_model
17
+ from analyzer import PhylogeneticTreeAnalyzer
18
  import tempfile
19
  import shutil
20
  import sys
21
  import uuid
22
  from pathlib import Path
23
+ from huggingface_hub import hf_hub_download
24
+ from Bio import SeqIO
25
+ from Bio.Seq import Seq
26
+ from Bio.SeqRecord import SeqRecord
27
  import stat
28
  import time
29
+ import asyncio
30
+ from fastapi import FastAPI, File, UploadFile, Form, HTTPException
31
+ from fastapi.responses import HTMLResponse, FileResponse
32
+ from pydantic import BaseModel
33
+ from typing import Optional
34
+ import uvicorn
35
 
36
+ # --- Logging Setup ---
37
+ log_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
38
+ log_handler = logging.StreamHandler()
39
+ log_handler.setFormatter(log_formatter)
 
 
 
 
 
 
 
 
 
40
  try:
41
+ file_handler = logging.FileHandler('/tmp/app.log')
42
+ file_handler.setFormatter(log_formatter)
43
+ logging.basicConfig(level=logging.INFO, handlers=[log_handler, file_handler])
44
+ except Exception as e:
45
+ logging.basicConfig(level=logging.INFO, handlers=[log_handler])
46
+ logging.warning(f"Failed to set up file logging: {e}")
47
 
48
+ logger = logging.getLogger(__name__)
49
+ logger.info(f"Gradio version: {gr.__version__}")
 
 
 
50
 
51
+ # Set event loop policy for compatibility with Gradio Spaces
52
  try:
53
+ asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
54
+ except Exception as e:
55
+ logger.warning(f"Failed to set event loop policy: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  # --- Global Variables ---
58
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 
61
  ALIGNMENT_PATH = os.path.join(BASE_DIR, "f_gene_sequences_aligned.fasta")
62
  TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
63
  QUERY_OUTPUT_DIR = os.path.join(BASE_DIR, "queries")
64
+ os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
 
 
 
 
 
 
65
 
66
  # Model repository and file paths
67
  MODEL_REPO = "GGproject10/best_boundary_aware_model"
 
73
  kmer_to_index = None
74
  analyzer = None
75
 
76
+ # --- Model Loading ---
77
  def load_models_safely():
 
78
  global boundary_model, keras_model, kmer_to_index, analyzer
79
+ logger.info("🔍 Loading models...")
80
+ try:
81
+ boundary_path = hf_hub_download(
82
+ repo_id=MODEL_REPO,
83
+ filename="best_boundary_aware_model.pth",
84
+ token=None
85
+ )
86
+ if os.path.exists(boundary_path):
87
+ boundary_model = EnhancedGenePredictor(boundary_path)
88
+ logger.info("✅ Boundary model loaded successfully.")
89
+ else:
90
+ logger.error(f"❌ Boundary model file not found after download.")
91
+ except Exception as e:
92
+ logger.error(f"❌ Failed to load boundary model: {e}")
93
+ boundary_model = None
94
+ try:
95
+ keras_path = hf_hub_download(
96
+ repo_id=MODEL_REPO,
97
+ filename="best_model.keras",
98
+ token=None
99
+ )
100
+ kmer_path = hf_hub_download(
101
+ repo_id=MODEL_REPO,
102
+ filename="kmer_to_index.pkl",
103
+ token=None
104
+ )
105
+ if os.path.exists(keras_path) and os.path.exists(kmer_path):
106
+ keras_model = load_model(keras_path)
107
+ with open(kmer_path, "rb") as f:
108
+ kmer_to_index = pickle.load(f)
109
+ logger.info("✅ Keras model and k-mer index loaded successfully.")
110
+ else:
111
+ logger.error(f"❌ Keras model or k-mer files not found.")
112
+ except Exception as e:
113
+ logger.error(f"❌ Failed to load Keras model: {e}")
114
+ keras_model = None
115
+ kmer_to_index = None
116
+ try:
117
+ logger.info("🌳 Initializing tree analyzer...")
118
+ analyzer = PhylogeneticTreeAnalyzer()
119
+ csv_candidates = [
120
+ CSV_PATH,
121
+ os.path.join(BASE_DIR, CSV_PATH),
122
+ os.path.join(BASE_DIR, "app", CSV_PATH),
123
+ os.path.join(os.path.dirname(__file__), CSV_PATH),
124
+ "f_cleaned.csv",
125
+ os.path.join(BASE_DIR, "f_cleaned.csv")
126
+ ]
127
+ csv_loaded = False
128
+ for csv_candidate in csv_candidates:
129
+ if os.path.exists(csv_candidate):
130
+ logger.info(f"📊 Trying CSV: {csv_candidate}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  try:
132
+ if analyzer.load_data(csv_candidate):
133
+ logger.info(f"✅ CSV loaded from: {csv_candidate}")
134
+ csv_loaded = True
135
+ break
136
  except Exception as e:
137
+ logger.warning(f"CSV load failed for {csv_candidate}: {e}")
138
+ continue
139
+ if not csv_loaded:
140
+ logger.error("❌ Failed to load CSV data from any candidate location.")
141
  analyzer = None
142
+ else:
143
+ try:
144
+ if analyzer.train_ai_model():
145
+ logger.info("✅ AI model training completed successfully")
146
+ else:
147
+ logger.warning("⚠️ AI model training failed; proceeding with basic analysis.")
148
+ except Exception as e:
149
+ logger.warning(f"⚠️ AI model training failed: {e}")
150
+ except Exception as e:
151
+ logger.error(f"❌ Tree analyzer initialization failed: {e}")
152
+ analyzer = None
153
+
154
+ # Load models at startup
155
+ load_models_safely()
156
 
157
  # --- Tool Detection ---
158
  def setup_binary_permissions():
159
+ for binary in [MAFFT_PATH, IQTREE_PATH]:
160
+ if os.path.exists(binary):
161
+ try:
162
+ os.chmod(binary, os.stat(binary).st_mode | stat.S_IEXEC)
163
+ logger.info(f"Set executable permission on {binary}")
164
+ except Exception as e:
165
+ logger.warning(f"Failed to set permission on {binary}: {e}")
166
+
167
+ def check_tool_availability():
168
+ setup_binary_permissions()
169
+ mafft_available = False
170
+ mafft_cmd = None
171
+ mafft_candidates = ['mafft', '/usr/bin/mafft', '/usr/local/bin/mafft', MAFFT_PATH]
172
+ for candidate in mafft_candidates:
173
+ if shutil.which(candidate) or os.path.exists(candidate):
174
+ try:
175
+ result = subprocess.run(
176
+ [candidate, "--help"],
177
+ capture_output=True,
178
+ text=True,
179
+ timeout=5
180
+ )
181
+ if result.returncode == 0 or "mafft" in result.stderr.lower():
182
+ mafft_available = True
183
+ mafft_cmd = candidate
184
+ logger.info(f"✅ MAFFT found at: {candidate}")
185
+ break
186
+ except Exception as e:
187
+ logger.debug(f"MAFFT test failed for {candidate}: {e}")
188
+ iqtree_available = False
189
+ iqtree_cmd = None
190
+ iqtree_candidates = ['iqtree', 'iqtree2', 'iqtree3', '/usr/bin/iqtree', '/usr/local/bin/iqtree', IQTREE_PATH]
191
+ for candidate in iqtree_candidates:
192
+ if shutil.which(candidate) or os.path.exists(candidate):
193
+ try:
194
+ result = subprocess.run(
195
+ [candidate, "--help"],
196
+ capture_output=True,
197
+ text=True,
198
+ timeout=5
199
+ )
200
+ if result.returncode == 0 or "iqtree" in result.stderr.lower():
201
+ iqtree_available = True
202
+ iqtree_cmd = candidate
203
+ logger.info(f"✅ IQ-TREE found at: {candidate}")
204
+ break
205
+ except Exception as e:
206
+ logger.debug(f"IQ-TREE test failed for {candidate}: {e}")
207
+ return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
208
+
209
+ # --- Pipeline Functions ---
210
+ def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
211
  try:
212
+ if len(sequence.strip()) < 100:
213
+ return False, "Sequence too short (<100 bp).", None, None
214
+ query_id = f"QUERY_{uuid.uuid4().hex[:8]}"
215
+ query_fasta = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}.fa")
216
+ aligned_with_query = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_aligned.fa")
217
+ output_prefix = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_placed_tree")
218
+ if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
219
+ return False, "Reference alignment or tree not found.", None, None
220
+ query_record = SeqRecord(Seq(sequence.upper()), id=query_id, description="")
221
+ SeqIO.write([query_record], query_fasta, "fasta")
222
+ with open(aligned_with_query, "w") as output_file:
223
+ subprocess.run([
224
+ mafft_cmd, "--add", query_fasta, "--reorder", ALIGNMENT_PATH
225
+ ], stdout=output_file, stderr=subprocess.PIPE, text=True, timeout=600, check=True)
226
+ if not os.path.exists(aligned_with_query) or os.path.getsize(aligned_with_query) == 0:
227
+ return False, "MAFFT alignment failed.", None, None
228
+ subprocess.run([
229
+ iqtree_cmd, "-s", aligned_with_query, "-g", TREE_PATH,
230
+ "-m", "GTR+G", "-pre", output_prefix, "-redo"
231
+ ], capture_output=True, text=True, timeout=1200, check=True)
232
+ treefile = f"{output_prefix}.treefile"
233
+ if not os.path.exists(treefile):
234
+ return False, "IQ-TREE placement failed.", aligned_with_query, None
235
+ success_msg = f"Placement completed!\nQuery ID: {query_id}\nAlignment: {os.path.basename(aligned_with_query)}\nTree: {os.path.basename(treefile)}"
236
+ return True, success_msg, aligned_with_query, treefile
237
  except Exception as e:
238
+ logger.error(f"Phylogenetic placement failed: {e}", exc_info=True)
239
+ return False, f"Error: {str(e)}", None, None
240
+ finally:
241
+ if 'query_fasta' in locals() and os.path.exists(query_fasta):
242
+ try:
243
+ os.unlink(query_fasta)
244
+ except Exception as e: # Fixed bare 'except'
245
+ logger.warning(f"Failed to clean up {query_fasta}: {e}")
246
 
247
+ def analyze_sequence_for_tree(sequence: str, matching_percentage: float):
 
248
  try:
249
+ logger.debug("Starting tree analysis...")
250
+ if not analyzer:
251
+ return "❌ Tree analyzer not initialized.", None, None
252
+ if not sequence or len(sequence.strip()) < 10:
253
+ return "❌ Invalid sequence.", None, None
254
+ if not (1 <= matching_percentage <= 99):
255
+ return "❌ Matching percentage must be 1-99.", None, None
256
+ logger.debug("Finding query sequence...")
257
+ if not analyzer.find_query_sequence(sequence):
258
+ return "❌ Sequence not accepted.", None, None
259
+ logger.debug("Finding similar sequences...")
260
+ matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
261
+ if not matched_ids:
262
+ return f"❌ No similar sequences at {matching_percentage}% threshold.", None, None
263
+ logger.debug("Building tree structure...")
264
+ analyzer.build_tree_structure_with_ml_safe(matched_ids)
265
+ logger.debug("Creating interactive tree...")
266
+ fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
267
+ query_id = analyzer.query_id or f"query_{int(time.time())}"
268
+ tree_html_path = os.path.join("/tmp", f'phylogenetic_tree_{query_id}.html')
269
+ logger.debug(f"Saving tree to {tree_html_path}")
270
+ fig.write_html(tree_html_path)
271
+ analyzer.matching_percentage = matching_percentage
272
+ logger.debug("Generating detailed report...")
273
+ report_success = analyzer.generate_detailed_report(matched_ids, actual_percentage)
274
+ report_html_path = os.path.join("/tmp", f'detailed_report_{query_id}.html') if report_success else None
275
+ logger.debug(f"Tree analysis completed: {len(matched_ids)} matches")
276
+ return f"✅ Found {len(matched_ids)} sequences at {actual_percentage:.2f}% similarity.", tree_html_path, report_html_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
277
  except Exception as e:
278
+ logger.error(f"Tree analysis failed: {e}", exc_info=True)
279
+ return f"❌ Error: {str(e)}", None, None
280
 
 
281
  def predict_with_keras(sequence):
 
282
  try:
283
  if not keras_model or not kmer_to_index:
284
  return "❌ Keras model not available."
 
285
  if len(sequence) < 6:
286
  return "❌ Sequence too short (<6 bp)."
 
 
287
  kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
288
  indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
 
 
289
  input_arr = np.array([indices])
290
  prediction = keras_model.predict(input_arr, verbose=0)[0]
291
  f_gene_prob = prediction[-1]
 
 
292
  percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))
293
  return f"✅ {percentage}% F gene confidence"
 
294
  except Exception as e:
295
+ logger.error(f"Keras prediction failed: {e}", exc_info=True)
296
  return f"❌ Error: {str(e)}"
297
 
298
  def read_fasta_file(file_obj):
 
299
  try:
300
  if file_obj is None:
301
  return ""
 
302
  if isinstance(file_obj, str):
303
  with open(file_obj, "r") as f:
304
  content = f.read()
305
  else:
306
  content = file_obj.read().decode("utf-8")
 
 
307
  lines = content.strip().split("\n")
308
  seq_lines = [line.strip() for line in lines if not line.startswith(">")]
309
  return ''.join(seq_lines)
 
310
  except Exception as e:
311
+ logger.error(f"Failed to read FASTA file: {e}", exc_info=True)
312
  return ""
313
+ import gradio as gr
314
 
315
+ @gr.queue()
316
+ def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  try:
 
 
 
 
 
318
  dna_input = dna_input.upper().strip()
319
+ if not dna_input:
320
+ return "❌ Empty input", "", "", "", "", None, None, None, None, "No input", "No input", None, None
321
+ if not re.match('^[ACTGN]+$', dna_input):
322
+ dna_input = ''.join(c if c in 'ACTGN' else 'N' for c in dna_input)
323
+ processed_sequence = dna_input
 
 
 
 
 
324
  boundary_output = ""
325
  if boundary_model:
326
  try:
327
+ result = boundary_model.predict_sequence(dna_input)
328
+ regions = result['gene_regions']
329
+ if regions:
330
+ processed_sequence = regions[0]["sequence"]
331
+ boundary_output = f"✅ F gene region found: {len(processed_sequence)} bp"
 
 
 
332
  else:
333
+ boundary_output = "⚠️ No F gene regions found."
334
+ processed_sequence = dna_input
335
  except Exception as e:
 
336
  boundary_output = f"❌ Boundary prediction error: {str(e)}"
337
+ processed_sequence = dna_input
338
  else:
339
+ boundary_output = f"⚠️ Boundary model not available. Using full input: {len(dna_input)} bp"
340
+ keras_output = predict_with_keras(processed_sequence) if processed_sequence and len(processed_sequence) >= 6 else "❌ Sequence too short."
341
+ aligned_file = None
342
+ phy_file = None
 
 
 
 
 
 
343
  ml_tree_output = ""
344
+ if build_ml_tree and processed_sequence and len(processed_sequence) >= 100:
345
+ try:
346
+ mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
347
+ if mafft_available and iqtree_available:
348
+ ml_success, ml_message, ml_aligned, ml_tree = phylogenetic_placement(processed_sequence, mafft_cmd, iqtree_cmd)
349
+ ml_tree_output = ml_message
350
+ aligned_file = ml_aligned
351
+ phy_file = ml_tree
352
+ else:
353
+ ml_tree_output = "❌ MAFFT or IQ-TREE not available"
354
+ except Exception as e:
355
+ ml_tree_output = f"❌ ML tree error: {str(e)}"
356
+ elif build_ml_tree:
357
+ ml_tree_output = "❌ Sequence too short for placement (<100 bp)."
358
  else:
359
+ ml_tree_output = "⚠️ Phylogenetic placement skipped."
360
+ tree_html_content = "No tree generated."
361
+ report_html_content = "No report generated."
362
+ tree_html_path = None
363
+ report_html_path = None
364
+ simplified_ml_output = ""
365
+ if analyzer and processed_sequence and len(processed_sequence) >= 10:
 
366
  try:
367
+ tree_result, tree_html_path, report_html_path = analyze_sequence_for_tree(processed_sequence, similarity_score)
368
+ simplified_ml_output = tree_result
369
+ if tree_html_path and os.path.exists(tree_html_path):
370
+ with open(tree_html_path, 'r', encoding='utf-8') as f:
371
+ tree_html_content = f.read()
372
+ else:
373
+ tree_html_content = f"<div style='color: red;'>{tree_result}</div>"
374
+ if report_html_path and os.path.exists(report_html_path):
375
+ with open(report_html_path, 'r', encoding='utf-8') as f:
376
+ report_html_content = f.read()
377
+ else:
378
+ report_html_content = f"<div style='color: red;'>{tree_result}</div>"
379
  except Exception as e:
380
+ simplified_ml_output = f"Tree analysis error: {str(e)}"
381
+ tree_html_content = f"<div style='color: red;'>{simplified_ml_output}</div>"
382
+ report_html_content = f"<div style='color: red;'>{simplified_ml_output}</div>"
383
  else:
384
+ simplified_ml_output = "❌ Tree analyzer not available." if not analyzer else "❌ Sequence too short (<10 bp)."
385
+ tree_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
386
+ report_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
387
  summary_output = f"""
388
  📊 ANALYSIS SUMMARY:
389
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
390
+ Input: {len(dna_input)} bp
391
+ F Gene: {len(processed_sequence)} bp
392
+ Validation: {keras_output.split(':')[-1].strip() if ':' in keras_output else keras_output}
393
+ Placement: {'✅ OK' if '✅' in ml_tree_output else '⚠️ Skipped' if 'skipped' in ml_tree_output else 'Failed'}
394
+ Tree Analysis: {'✅ OK' if 'Found' in simplified_ml_output else ' Failed'}
 
395
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 
 
 
 
 
 
396
  """
 
397
  return (
398
+ boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output,
399
+ aligned_file, phy_file, None, None, tree_html_content, report_html_content,
400
+ tree_html_path, report_html_path
 
 
 
 
 
 
 
 
 
 
401
  )
 
402
  except Exception as e:
403
  logger.error(f"Pipeline error: {e}", exc_info=True)
404
  error_msg = f"❌ Pipeline Error: {str(e)}"
405
+ return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg, None, None
406
+
407
+ @gr.queue()
408
+ async def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree):
409
+ temp_file_path = None
410
+ try:
411
+ if fasta_file_obj is None:
412
+ return "❌ No file provided", "", "", "", "", None, None, None, None, "No input", "No input", None, None
413
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".fasta", dir="/tmp") as temp_file:
414
+ if isinstance(fasta_file_obj, UploadFile):
415
+ content = await fasta_file_obj.read()
416
+ temp_file.write(content)
417
+ else:
418
+ with open(fasta_file_obj, 'rb') as f:
419
+ content = f.read()
420
+ temp_file.write(content)
421
+ temp_file_path = temp_file.name
422
+ dna_input = read_fasta_file(temp_file_path)
423
+ if not dna_input:
424
+ return "❌ Failed to read FASTA file", "", "", "", "", None, None, None, None, "No input", "No input", None, None
425
+ return run_pipeline(dna_input, similarity_score, build_ml_tree)
426
+ except Exception as e:
427
+ logger.error(f"Pipeline from file error: {e}", exc_info=True)
428
+ error_msg = f"❌ Error: {str(e)}"
429
+ return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg, None, None
430
+ finally:
431
+ if temp_file_path and os.path.exists(temp_file_path):
432
+ try:
433
+ os.unlink(temp_file_path)
434
+ except Exception as e:
435
+ logger.warning(f"Failed to delete temp file {temp_file_path}: {e}")
436
+ # --- Pydantic Models ---
437
+ class AnalysisRequest(BaseModel):
438
+ sequence: str
439
+ similarity_score: float = 95.0
440
+ build_ml_tree: bool = False
441
+
442
+ class AnalysisResponse(BaseModel):
443
+ boundary_output: str
444
+ keras_output: str
445
+ ml_tree_output: str
446
+ tree_analysis_output: str
447
+ summary_output: str
448
+ success: bool
449
+ error_message: Optional[str] = None
450
+ tree_html_path: Optional[str] = None
451
+ report_html_path: Optional[str] = None
452
+
453
+ # --- FastAPI App Setup ---
454
+ app = FastAPI(title="🧬 Gene Analysis Pipeline", version="1.0.0")
455
+
456
+ @app.get("/")
457
+ async def root():
458
+ return {
459
+ "message": "🧬 Gene Analysis Pipeline API",
460
+ "status": "running",
461
+ "endpoints": {
462
+ "docs": "/docs",
463
+ "health": "/health",
464
+ "gradio": "/gradio",
465
+ "analyze": "/analyze",
466
+ "analyze_file": "/analyze-file",
467
+ "download": "/download/{file_type}/{query_id}"
468
+ }
469
+ }
470
+
471
+ @app.get("/health")
472
+ async def health_check():
473
+ try:
474
+ mafft_available, iqtree_available, _, _ = check_tool_availability()
475
+ return {
476
+ "status": "healthy",
477
+ "components": {
478
+ "boundary_model": boundary_model is not None,
479
+ "keras_model": keras_model is not None,
480
+ "tree_analyzer": analyzer is not None,
481
+ "mafft_available": mafft_available,
482
+ "iqtree_available": iqtree_available
483
+ },
484
+ "paths": {
485
+ "base_dir": BASE_DIR,
486
+ "query_output_dir": QUERY_OUTPUT_DIR
487
+ }
488
+ }
489
+ except Exception as e:
490
+ logger.error(f"Health check error: {e}", exc_info=True)
491
+ return {"status": "unhealthy", "error": str(e)}
492
+
493
+ @app.post("/analyze", response_model=AnalysisResponse)
494
+ async def analyze_sequence(request: AnalysisRequest):
495
+ try:
496
+ result = run_pipeline(request.sequence, request.similarity_score, request.build_ml_tree)
497
+ return AnalysisResponse(
498
+ boundary_output=result[0] or "",
499
+ keras_output=result[1] or "",
500
+ ml_tree_output=result[2] or "",
501
+ tree_analysis_output=result[3] or "",
502
+ summary_output=result[4] or "",
503
+ tree_html_path=result[11],
504
+ report_html_path=result[12],
505
+ success=True
506
+ )
507
+ except Exception as e:
508
+ logger.error(f"Analyze error: {e}", exc_info=True)
509
+ return AnalysisResponse(
510
+ boundary_output="", keras_output="", ml_tree_output="",
511
+ tree_analysis_output="", summary_output="",
512
+ tree_html_path=None, report_html_path=None,
513
+ success=False, error_message=str(e)
514
+ )
515
+
516
+ @app.post("/analyze-file")
517
+ async def analyze_file(
518
+ file: UploadFile = File(...),
519
+ similarity_score: float = Form(95.0),
520
+ build_ml_tree: bool = Form(False)
521
+ ):
522
+ temp_file_path = None
523
+ try:
524
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".fasta", dir="/tmp") as temp_file:
525
+ content = await file.read()
526
+ temp_file.write(content)
527
+ temp_file_path = temp_file.name
528
+ result = await run_pipeline_from_file(temp_file_path, similarity_score, build_ml_tree)
529
+ return AnalysisResponse(
530
+ boundary_output=result[0] or "",
531
+ keras_output=result[1] or "",
532
+ ml_tree_output=result[2] or "",
533
+ tree_analysis_output=result[3] or "",
534
+ summary_output=result[4] or "",
535
+ tree_html_path=result[11],
536
+ report_html_path=result[12],
537
+ success=True
538
  )
539
+ except Exception as e:
540
+ logger.error(f"Analyze-file error: {e}", exc_info=True)
541
+ return AnalysisResponse(
542
+ boundary_output="", keras_output="", ml_tree_output="",
543
+ tree_analysis_output="", summary_output="",
544
+ tree_html_path=None, report_html_path=None,
545
+ success=False, error_message=str(e)
546
+ )
547
+ finally:
548
+ if temp_file_path and os.path.exists(temp_file_path):
549
+ try:
550
+ os.unlink(temp_file_path)
551
+ except Exception as e:
552
+ logger.warning(f"Failed to clean up {temp_file_path}: {e}")
553
+
554
+ @app.get("/download/{file_type}/{query_id}")
555
+ async def download_file(file_type: str, query_id: str):
556
+ try:
557
+ if file_type not in ["tree", "report"]:
558
+ raise HTTPException(status_code=400, detail="Invalid file type. Use 'tree' or 'report'.")
559
+ file_name = f"phylogenetic_tree_{query_id}.html" if file_type == "tree" else f"detailed_report_{query_id}.html"
560
+ file_path = os.path.join("/tmp", file_name)
561
+ if not os.path.exists(file_path):
562
+ raise HTTPException(status_code=404, detail="File not found.")
563
+ return FileResponse(file_path, filename=file_name, media_type="text/html")
564
+ except Exception as e:
565
+ logger.error(f"Download error: {e}", exc_info=True)
566
+ raise HTTPException(status_code=500, detail=f"Error serving file: {str(e)}")
567
 
568
  # --- Gradio Interface ---
569
+ def create_gradio_interface():
 
570
  try:
571
  with gr.Blocks(
572
  title="🧬 Gene Analysis Pipeline",
573
  theme=gr.themes.Soft(),
574
  css="""
575
+ .gradio-container { max-width: 1200px !important; }
576
+ .status-box { padding: 10px; border-radius: 5px; margin: 5px 0; }
577
+ .success { background-color: #d4edda; border: 1px solid #c3e6cb; color: #155724; }
578
+ .warning { background-color: #fff3cd; border: 1px solid #ffeaa7; color: #856404; }
579
+ .error { background-color: #f8d7da; border: 1px solid #f5c6cb; color: #721c24; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
580
  """
581
  ) as iface:
582
+ gr.Markdown("# 🧬 Gene Analysis Pipeline")
 
 
 
 
 
 
 
 
 
 
 
583
  with gr.Row():
584
  with gr.Column():
585
+ status_display = gr.HTML(value=f"""
586
+ <div class="status-box">
587
+ <h3>🔧 System Status</h3>
588
+ <p>🤖 Boundary Model: {'✅ Loaded' if boundary_model else '❌ Missing'}</p>
589
+ <p>🧠 Keras Model: {'✅ Loaded' if keras_model else '❌ Missing'}</p>
590
+ <p>🌳 Tree Analyzer: {'✅ Loaded' if analyzer else '❌ Missing'}</p>
591
+ <p>🧬 MAFFT: {'✅ Available' if check_tool_availability()[0] else '❌ Missing'}</p>
592
+ <p>🌲 IQ-TREE: {'✅ Available' if check_tool_availability()[1] else '❌ Missing'}</p>
593
+ </div>
594
+ """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
595
  with gr.Tabs():
596
+ with gr.TabItem("📝 Text Input"):
597
  with gr.Row():
598
  with gr.Column(scale=2):
599
  dna_input = gr.Textbox(
600
  label="🧬 DNA Sequence",
601
+ placeholder="Enter DNA sequence (ATCG format)...",
602
+ lines=5,
603
+ description="Paste your DNA sequence here"
 
604
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
605
  with gr.Column(scale=1):
606
+ similarity_score = gr.Slider(
607
+ minimum=1,
608
+ maximum=99,
609
+ value=95.0,
610
+ step=1.0,
611
+ label="🎯 Similarity Threshold (%)",
612
+ description="Minimum similarity for tree analysis"
613
+ )
614
+ build_ml_tree = gr.Checkbox(
615
+ label="🌲 Build ML Tree",
616
+ value=False,
617
+ description="Generate phylogenetic placement (slower)"
618
+ )
619
+ analyze_btn = gr.Button("🔬 Analyze Sequence", variant="primary")
620
+ with gr.TabItem("📁 File Upload"):
 
 
 
 
 
 
621
  with gr.Row():
622
  with gr.Column(scale=2):
623
  file_input = gr.File(
624
  label="📄 Upload FASTA File",
625
  file_types=[".fasta", ".fa", ".fas", ".txt"],
626
+ description="Upload a FASTA file containing your sequence"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
627
  )
 
628
  with gr.Column(scale=1):
629
+ file_similarity_score = gr.Slider(
630
+ minimum=1,
631
+ maximum=99,
632
+ value=95.0,
633
+ step=1.0,
634
+ label="🎯 Similarity Threshold (%)",
635
+ description="Minimum similarity for tree analysis"
636
+ )
637
+ file_build_ml_tree = gr.Checkbox(
638
+ label="🌲 Build ML Tree",
639
+ value=False,
640
+ description="Generate phylogenetic placement (slower)"
641
+ )
642
+ analyze_file_btn = gr.Button("🔬 Analyze File", variant="primary")
 
 
 
 
 
 
 
643
  gr.Markdown("## 📊 Analysis Results")
 
644
  with gr.Row():
645
+ with gr.Column():
646
  boundary_output = gr.Textbox(
647
  label="🎯 Boundary Detection",
648
  interactive=False,
649
+ lines=2
 
650
  )
 
651
  keras_output = gr.Textbox(
652
+ label="🧠 F Gene Validation",
653
  interactive=False,
654
+ lines=2
 
655
  )
656
+ with gr.Column():
 
657
  ml_tree_output = gr.Textbox(
658
  label="🌲 Phylogenetic Placement",
659
  interactive=False,
660
+ lines=2
 
661
  )
 
662
  tree_analysis_output = gr.Textbox(
663
  label="🌳 Tree Analysis",
664
  interactive=False,
665
+ lines=2
 
666
  )
 
667
  summary_output = gr.Textbox(
668
+ label="📋 Summary",
669
  interactive=False,
670
+ lines=8
 
671
  )
672
+ with gr.Row():
673
+ aligned_file = gr.File(label="📄 Alignment File", visible=False)
674
+ tree_file = gr.File(label="🌲 Tree File", visible=False)
675
+ tree_html_file = gr.File(label="🌳 Simplified Tree HTML", visible=False)
676
+ report_html_file = gr.File(label="📊 Detailed Report HTML", visible=False)
677
  with gr.Tabs():
678
+ with gr.TabItem("🌳 Interactive Tree"):
679
  tree_html = gr.HTML(
680
+ value="<div style='text-align: center; color: #666; padding: 20px;'>No tree generated yet. Run analysis to see results.</div>"
 
 
 
 
 
 
 
681
  )
682
+ with gr.TabItem("📊 Detailed Report"):
 
683
  report_html = gr.HTML(
684
  label="Analysis Report",
685
+ value="<div style='text-align: center; color: #666; padding: 20px;'>No report generated yet. Run analysis to see results.</div>"
 
 
 
 
 
 
686
  )
687
 
688
+ # Event handlers
689
+ def handle_analysis_output(*outputs):
690
+ boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output, aligned_file, phy_file, _, _, tree_html_content, report_html_content, tree_html_path, report_html_path = outputs
691
+ return (
692
+ boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output,
693
+ gr.File.update(value=aligned_file, visible=aligned_file is not None),
694
+ gr.File.update(value=phy_file, visible=phy_file is not None),
695
+ gr.File.update(value=tree_html_path, visible=tree_html_path is not None),
696
+ gr.File.update(value=report_html_path, visible=report_html_path is not None),
697
+ tree_html_content,
698
+ report_html_content
699
+ )
 
 
 
 
 
 
 
 
 
 
 
 
700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
701
  analyze_btn.click(
702
+ fn=run_pipeline,
703
+ inputs=[dna_input, similarity_score, build_ml_tree],
704
  outputs=[
705
+ boundary_output, keras_output, ml_tree_output, tree_analysis_output, summary_output,
706
+ aligned_file, tree_file, tree_html_file, report_html_file, tree_html, report_html
 
 
 
 
 
707
  ],
708
+ _js="""(outputs) => {
709
+ return outputs;
710
+ }"""
711
  )
712
 
713
  analyze_file_btn.click(
714
+ fn=run_pipeline_from_file,
715
+ inputs=[file_input, file_similarity_score, file_build_ml_tree],
716
  outputs=[
717
+ boundary_output, keras_output, ml_tree_output, tree_analysis_output, summary_output,
718
+ aligned_file, tree_file, tree_html_file, report_html_file, tree_html, report_html
719
+ ],
720
+ _js="""(outputs) => {
721
+ return outputs;
722
+ }"""
723
+ )
724
+
725
+ # Examples
726
+ gr.Examples(
727
+ examples=[
728
+ ["ATCG" * 250, 85.0, False],
729
+ ["CGATCG" * 150, 90.0, True]
730
  ],
731
+ inputs=[dna_input, similarity_score, build_ml_tree],
732
+ label="Example Sequences"
733
  )
734
 
 
735
  gr.Markdown("""
736
+ ## 📚 Instructions
737
+ 1. **Input**: Enter a DNA sequence (ATCG format) or upload a FASTA file
738
+ 2. **Parameters**:
739
+ - Set similarity threshold for phylogenetic analysis (1-99%)
740
+ - Choose whether to build ML tree (slower but more accurate)
741
+ 3. **Analysis**: Click analyze to run the complete pipeline
742
+ 4. **Results**: View results in different tabs - summary, tree visualization, and detailed report
743
+ 5. **Downloads**: Download alignment, tree, simplified tree HTML, and detailed report HTML files
744
+ ### 🔬 Pipeline Components:
745
+ - **Boundary Detection**: Identifies F gene regions
746
+ - **F Gene Validation**: Validates F gene using ML
747
+ - **Phylogenetic Placement**: Places sequence in reference tree (optional)
748
+ - **Tree Analysis**: Builds phylogenetic tree with similar sequences
749
  """)
750
 
751
  return iface
 
752
  except Exception as e:
753
+ logger.error(f"Gradio interface creation failed: {e}", exc_info=True)
754
+ return gr.Interface(
755
+ fn=lambda x: f"Error: {str(e)}",
756
+ inputs=gr.Textbox(label="DNA Sequence"),
757
+ outputs=gr.Textbox(label="Error"),
758
+ title="🧬 Gene Analysis Pipeline (Error Mode)"
759
+ )
 
 
 
760
 
761
+ # --- Application Startup ---
762
+ def run_application():
 
763
  try:
764
+ gradio_app = create_gradio_interface()
765
+ gradio_app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
766
  logger.info("🚀 Starting Gene Analysis Pipeline...")
767
+ logger.info("📊 FastAPI docs available at: http://localhost:7860/docs")
768
+ logger.info("🧬 Gradio interface available at: http://localhost:7860/gradio")
769
+ uvicorn.run(
770
+ app,
771
+ host="0.0.0.0",
772
+ port=7860,
773
+ log_level="info"
 
 
 
 
 
 
 
 
 
774
  )
 
 
 
 
775
  except Exception as e:
776
+ logger.error(f"Application startup failed: {e}", exc_info=True)
 
 
777
  try:
778
+ logger.info("🔄 Falling back to Gradio-only mode...")
779
+ gradio_app = create_gradio_interface()
780
+ gradio_app.launch(
 
 
 
 
 
 
781
  server_name="0.0.0.0",
782
  server_port=7860,
783
  share=False,
784
+ debug=False
785
  )
786
+ except Exception as fallback_error:
787
+ logger.error(f"Fallback failed: {fallback_error}", exc_info=True)
788
+ print(" Application failed to start. Check logs for details.")
789
+
790
+ # --- Main Entry Point ---
 
 
 
 
 
 
 
 
 
 
791
  if __name__ == "__main__":
792
+ print("🧬 Gene Analysis Pipeline Starting...")
793
+ print("=" * 50)
794
+ print("🔍 Checking system components...")
795
+ mafft_available, iqtree_available, _, _ = check_tool_availability()
796
+ print(f"🤖 Boundary Model: {'✅' if boundary_model else '❌'}")
797
+ print(f"🧠 Keras Model: {'✅' if keras_model else '❌'}")
798
+ print(f"🌳 Tree Analyzer: {'✅' if analyzer else '❌'}")
799
+ print(f"🧬 MAFFT: {'✅' if mafft_available else '❌'}")
800
+ print(f"🌲 IQ-TREE: {'✅' if iqtree_available else '❌'}")
801
+ print("=" * 50)
802
+ run_application()