re-type commited on
Commit
a79d7d1
·
verified ·
1 Parent(s): 88cb35a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +594 -615
app.py CHANGED
@@ -12,47 +12,75 @@ import pandas as pd
12
  import re
13
  import logging
14
  import numpy as np
15
- from predictor import EnhancedGenePredictor
16
- from tensorflow.keras.models import load_model
17
- from analyzer import PhylogeneticTreeAnalyzer
18
  import tempfile
19
  import shutil
20
  import sys
21
  import uuid
22
  from pathlib import Path
23
- from huggingface_hub import hf_hub_download
24
- from Bio import SeqIO
25
- from Bio.Seq import Seq
26
- from Bio.SeqRecord import SeqRecord
27
  import stat
28
  import time
29
- import asyncio
30
- from fastapi import FastAPI, File, UploadFile, Form, HTTPException
31
- from fastapi.responses import HTMLResponse, FileResponse
32
- from pydantic import BaseModel
33
- from typing import Optional
34
- import uvicorn
35
 
36
- # --- Logging Setup ---
37
- log_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
38
- log_handler = logging.StreamHandler()
39
- log_handler.setFormatter(log_formatter)
40
  try:
41
- file_handler = logging.FileHandler('/tmp/app.log')
42
- file_handler.setFormatter(log_formatter)
43
- logging.basicConfig(level=logging.INFO, handlers=[log_handler, file_handler])
44
- except Exception as e:
45
- logging.basicConfig(level=logging.INFO, handlers=[log_handler])
46
- logging.warning(f"Failed to set up file logging: {e}")
 
 
 
 
47
 
48
- logger = logging.getLogger(__name__)
49
- logger.info(f"Gradio version: {gr.__version__}")
 
 
 
50
 
51
- # Set event loop policy for compatibility with Gradio Spaces
52
  try:
53
- asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
54
- except Exception as e:
55
- logger.warning(f"Failed to set event loop policy: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  # --- Global Variables ---
58
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -61,7 +89,13 @@ IQTREE_PATH = os.path.join(BASE_DIR, "binaries", "iqtree", "bin", "iqtree3")
61
  ALIGNMENT_PATH = os.path.join(BASE_DIR, "f_gene_sequences_aligned.fasta")
62
  TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
63
  QUERY_OUTPUT_DIR = os.path.join(BASE_DIR, "queries")
64
- os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
 
 
 
 
 
 
65
 
66
  # Model repository and file paths
67
  MODEL_REPO = "GGproject10/best_boundary_aware_model"
@@ -73,535 +107,472 @@ keras_model = None
73
  kmer_to_index = None
74
  analyzer = None
75
 
76
- # --- Model Loading ---
77
  def load_models_safely():
 
78
  global boundary_model, keras_model, kmer_to_index, analyzer
79
- logger.info("🔍 Loading models...")
80
- try:
81
- boundary_path = hf_hub_download(
82
- repo_id=MODEL_REPO,
83
- filename="best_boundary_aware_model.pth",
84
- token=None
85
- )
86
- if os.path.exists(boundary_path):
87
- boundary_model = EnhancedGenePredictor(boundary_path)
88
- logger.info("✅ Boundary model loaded successfully.")
89
- else:
90
- logger.error(f"❌ Boundary model file not found after download.")
91
- except Exception as e:
92
- logger.error(f"❌ Failed to load boundary model: {e}")
93
- boundary_model = None
94
- try:
95
- keras_path = hf_hub_download(
96
- repo_id=MODEL_REPO,
97
- filename="best_model.keras",
98
- token=None
99
- )
100
- kmer_path = hf_hub_download(
101
- repo_id=MODEL_REPO,
102
- filename="kmer_to_index.pkl",
103
- token=None
104
- )
105
- if os.path.exists(keras_path) and os.path.exists(kmer_path):
106
- keras_model = load_model(keras_path)
107
- with open(kmer_path, "rb") as f:
108
- kmer_to_index = pickle.load(f)
109
- logger.info("✅ Keras model and k-mer index loaded successfully.")
110
- else:
111
- logger.error(f"❌ Keras model or k-mer files not found.")
112
- except Exception as e:
113
- logger.error(f"❌ Failed to load Keras model: {e}")
114
- keras_model = None
115
- kmer_to_index = None
116
- try:
117
- logger.info("🌳 Initializing tree analyzer...")
118
- analyzer = PhylogeneticTreeAnalyzer()
119
- csv_candidates = [
120
- CSV_PATH,
121
- os.path.join(BASE_DIR, CSV_PATH),
122
- os.path.join(BASE_DIR, "app", CSV_PATH),
123
- os.path.join(os.path.dirname(__file__), CSV_PATH),
124
- "f_cleaned.csv",
125
- os.path.join(BASE_DIR, "f_cleaned.csv")
126
- ]
127
- csv_loaded = False
128
- for csv_candidate in csv_candidates:
129
- if os.path.exists(csv_candidate):
130
- logger.info(f"📊 Trying CSV: {csv_candidate}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  try:
132
- if analyzer.load_data(csv_candidate):
133
- logger.info(f"✅ CSV loaded from: {csv_candidate}")
134
- csv_loaded = True
135
- break
136
  except Exception as e:
137
- logger.warning(f"CSV load failed for {csv_candidate}: {e}")
138
- continue
139
- if not csv_loaded:
140
- logger.error("❌ Failed to load CSV data from any candidate location.")
141
  analyzer = None
142
- else:
143
- try:
144
- if analyzer.train_ai_model():
145
- logger.info("✅ AI model training completed successfully")
146
- else:
147
- logger.warning("⚠️ AI model training failed; proceeding with basic analysis.")
148
- except Exception as e:
149
- logger.warning(f"⚠️ AI model training failed: {e}")
150
- except Exception as e:
151
- logger.error(f"❌ Tree analyzer initialization failed: {e}")
152
- analyzer = None
153
-
154
- # Load models at startup
155
- load_models_safely()
156
 
157
  # --- Tool Detection ---
158
  def setup_binary_permissions():
159
- for binary in [MAFFT_PATH, IQTREE_PATH]:
160
- if os.path.exists(binary):
161
- try:
162
- os.chmod(binary, os.stat(binary).st_mode | stat.S_IEXEC)
163
- logger.info(f"Set executable permission on {binary}")
164
- except Exception as e:
165
- logger.warning(f"Failed to set permission on {binary}: {e}")
166
-
167
- def check_tool_availability():
168
- setup_binary_permissions()
169
- mafft_available = False
170
- mafft_cmd = None
171
- mafft_candidates = ['mafft', '/usr/bin/mafft', '/usr/local/bin/mafft', MAFFT_PATH]
172
- for candidate in mafft_candidates:
173
- if shutil.which(candidate) or os.path.exists(candidate):
174
- try:
175
- result = subprocess.run(
176
- [candidate, "--help"],
177
- capture_output=True,
178
- text=True,
179
- timeout=5
180
- )
181
- if result.returncode == 0 or "mafft" in result.stderr.lower():
182
- mafft_available = True
183
- mafft_cmd = candidate
184
- logger.info(f"✅ MAFFT found at: {candidate}")
185
- break
186
- except Exception as e:
187
- logger.debug(f"MAFFT test failed for {candidate}: {e}")
188
- iqtree_available = False
189
- iqtree_cmd = None
190
- iqtree_candidates = ['iqtree', 'iqtree2', 'iqtree3', '/usr/bin/iqtree', '/usr/local/bin/iqtree', IQTREE_PATH]
191
- for candidate in iqtree_candidates:
192
- if shutil.which(candidate) or os.path.exists(candidate):
193
- try:
194
- result = subprocess.run(
195
- [candidate, "--help"],
196
- capture_output=True,
197
- text=True,
198
- timeout=5
199
- )
200
- if result.returncode == 0 or "iqtree" in result.stderr.lower():
201
- iqtree_available = True
202
- iqtree_cmd = candidate
203
- logger.info(f"✅ IQ-TREE found at: {candidate}")
204
- break
205
- except Exception as e:
206
- logger.debug(f"IQ-TREE test failed for {candidate}: {e}")
207
- return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
208
-
209
- # --- Pipeline Functions ---
210
- def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
211
- query_fasta = None
212
- cleanup_error = None
213
-
214
  try:
215
- if len(sequence.strip()) < 100:
216
- return False, "Sequence too short (<100 bp).", None, None
217
- query_id = f"QUERY_{uuid.uuid4().hex[:8]}"
218
- query_fasta = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}.fa")
219
- aligned_with_query = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_aligned.fa")
220
- output_prefix = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_placed_tree")
221
- if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
222
- return False, "Reference alignment or tree not found.", None, None
223
- query_record = SeqRecord(Seq(sequence.upper()), id=query_id, description="")
224
- SeqIO.write([query_record], query_fasta, "fasta")
225
- with open(aligned_with_query, "w") as output_file:
226
- subprocess.run([
227
- mafft_cmd, "--add", query_fasta, "--reorder", ALIGNMENT_PATH
228
- ], stdout=output_file, stderr=subprocess.PIPE, text=True, timeout=600, check=True)
229
- if not os.path.exists(aligned_with_query) or os.path.getsize(aligned_with_query) == 0:
230
- return False, "MAFFT alignment failed.", None, None
231
- subprocess.run([
232
- iqtree_cmd, "-s", aligned_with_query, "-g", TREE_PATH,
233
- "-m", "GTR+G", "-pre", output_prefix, "-redo"
234
- ], capture_output=True, text=True, timeout=1200, check=True)
235
- treefile = f"{output_prefix}.treefile"
236
- if not os.path.exists(treefile):
237
- return False, "IQ-TREE placement failed.", aligned_with_query, None
238
- success_msg = f"Placement completed!\nQuery ID: {query_id}\nAlignment: {os.path.basename(aligned_with_query)}\nTree: {os.path.basename(treefile)}"
239
- return True, success_msg, aligned_with_query, treefile
240
  except Exception as e:
241
- logger.error(f"Phylogenetic placement failed: {e}", exc_info=True)
242
- return False, f"Error: {str(e)}", None, None
243
- finally:
244
- if query_fasta and os.path.exists(query_fasta):
245
- try:
246
- os.unlink(query_fasta)
247
- except Exception as cleanup_e:
248
- logger.warning(f"Failed to clean up {query_fasta}: {cleanup_e}")
249
 
250
- def analyze_sequence_for_tree(sequence: str, matching_percentage: float):
 
251
  try:
252
- logger.debug("Starting tree analysis...")
253
- if not analyzer:
254
- return "❌ Tree analyzer not initialized.", None, None
255
- if not sequence or len(sequence.strip()) < 10:
256
- return "❌ Invalid sequence.", None, None
257
- if not (1 <= matching_percentage <= 99):
258
- return "❌ Matching percentage must be 1-99.", None, None
259
- logger.debug("Finding query sequence...")
260
- if not analyzer.find_query_sequence(sequence):
261
- return "❌ Sequence not accepted.", None, None
262
- logger.debug("Finding similar sequences...")
263
- matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
264
- if not matched_ids:
265
- return f"❌ No similar sequences at {matching_percentage}% threshold.", None, None
266
- logger.debug("Building tree structure...")
267
- analyzer.build_tree_structure_with_ml_safe(matched_ids)
268
- logger.debug("Creating interactive tree...")
269
- fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
270
- query_id = analyzer.query_id or f"query_{int(time.time())}"
271
- tree_html_path = os.path.join("/tmp", f'phylogenetic_tree_{query_id}.html')
272
- logger.debug(f"Saving tree to {tree_html_path}")
273
- fig.write_html(tree_html_path)
274
- analyzer.matching_percentage = matching_percentage
275
- logger.debug("Generating detailed report...")
276
- report_success = analyzer.generate_detailed_report(matched_ids, actual_percentage)
277
- report_html_path = os.path.join("/tmp", f'detailed_report_{query_id}.html') if report_success else None
278
- logger.debug(f"Tree analysis completed: {len(matched_ids)} matches")
279
- return f"✅ Found {len(matched_ids)} sequences at {actual_percentage:.2f}% similarity.", tree_html_path, report_html_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  except Exception as e:
281
- logger.error(f"Tree analysis failed: {e}", exc_info=True)
282
- return f"❌ Error: {str(e)}", None, None
283
 
 
284
  def predict_with_keras(sequence):
 
285
  try:
286
  if not keras_model or not kmer_to_index:
287
  return "❌ Keras model not available."
 
288
  if len(sequence) < 6:
289
  return "❌ Sequence too short (<6 bp)."
 
 
290
  kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
291
  indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
 
 
292
  input_arr = np.array([indices])
293
  prediction = keras_model.predict(input_arr, verbose=0)[0]
294
  f_gene_prob = prediction[-1]
 
 
295
  percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))
296
  return f"✅ {percentage}% F gene confidence"
 
297
  except Exception as e:
298
- logger.error(f"Keras prediction failed: {e}", exc_info=True)
299
  return f"❌ Error: {str(e)}"
300
 
301
  def read_fasta_file(file_obj):
 
302
  try:
303
  if file_obj is None:
304
  return ""
 
305
  if isinstance(file_obj, str):
306
  with open(file_obj, "r") as f:
307
  content = f.read()
308
  else:
309
  content = file_obj.read().decode("utf-8")
 
 
310
  lines = content.strip().split("\n")
311
  seq_lines = [line.strip() for line in lines if not line.startswith(">")]
312
  return ''.join(seq_lines)
 
313
  except Exception as e:
314
- logger.error(f"Failed to read FASTA file: {e}", exc_info=True)
315
  return ""
316
 
317
- def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  try:
 
 
 
 
 
319
  dna_input = dna_input.upper().strip()
320
- if not dna_input:
321
- return "❌ Empty input", "", "", "", "", None, None, None, None, "No input", "No input", None, None
322
- if not re.match('^[ACTGN]+$', dna_input):
323
- dna_input = ''.join(c if c in 'ACTGN' else 'N' for c in dna_input)
324
- processed_sequence = dna_input
 
 
 
 
 
325
  boundary_output = ""
326
  if boundary_model:
327
  try:
328
- result = boundary_model.predict_sequence(dna_input)
329
- regions = result['gene_regions']
330
- if regions:
331
- processed_sequence = regions[0]["sequence"]
332
- boundary_output = f"✅ F gene region found: {len(processed_sequence)} bp"
 
 
 
333
  else:
334
- boundary_output = "⚠️ No F gene regions found."
335
- processed_sequence = dna_input
336
  except Exception as e:
 
337
  boundary_output = f"❌ Boundary prediction error: {str(e)}"
338
- processed_sequence = dna_input
339
  else:
340
- boundary_output = f"⚠️ Boundary model not available. Using full input: {len(dna_input)} bp"
341
- keras_output = predict_with_keras(processed_sequence) if processed_sequence and len(processed_sequence) >= 6 else "❌ Sequence too short."
342
- aligned_file = None
343
- phy_file = None
 
 
 
 
 
 
344
  ml_tree_output = ""
345
- if build_ml_tree and processed_sequence and len(processed_sequence) >= 100:
346
- try:
347
- mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
348
- if mafft_available and iqtree_available:
349
- ml_success, ml_message, ml_aligned, ml_tree = phylogenetic_placement(processed_sequence, mafft_cmd, iqtree_cmd)
350
- ml_tree_output = ml_message
351
- aligned_file = ml_aligned
352
- phy_file = ml_tree
353
- else:
354
- ml_tree_output = "❌ MAFFT or IQ-TREE not available"
355
- except Exception as e:
356
- ml_tree_output = f"❌ ML tree error: {str(e)}"
357
- elif build_ml_tree:
358
- ml_tree_output = "❌ Sequence too short for placement (<100 bp)."
359
  else:
360
- ml_tree_output = "⚠️ Phylogenetic placement skipped."
361
- tree_html_content = "No tree generated."
362
- report_html_content = "No report generated."
363
- tree_html_path = None
364
- report_html_path = None
365
- simplified_ml_output = ""
366
- if analyzer and processed_sequence and len(processed_sequence) >= 10:
 
367
  try:
368
- tree_result, tree_html_path, report_html_path = analyze_sequence_for_tree(processed_sequence, similarity_score)
369
- simplified_ml_output = tree_result
370
- if tree_html_path and os.path.exists(tree_html_path):
371
- with open(tree_html_path, 'r', encoding='utf-8') as f:
372
- tree_html_content = f.read()
373
- else:
374
- tree_html_content = f"<div style='color: red;'>{tree_result}</div>"
375
- if report_html_path and os.path.exists(report_html_path):
376
- with open(report_html_path, 'r', encoding='utf-8') as f:
377
- report_html_content = f.read()
378
- else:
379
- report_html_content = f"<div style='color: red;'>{tree_result}</div>"
380
  except Exception as e:
381
- simplified_ml_output = f"Tree analysis error: {str(e)}"
382
- tree_html_content = f"<div style='color: red;'>{simplified_ml_output}</div>"
383
- report_html_content = f"<div style='color: red;'>{simplified_ml_output}</div>"
384
  else:
385
- simplified_ml_output = "❌ Tree analyzer not available." if not analyzer else "❌ Sequence too short (<10 bp)."
386
- tree_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
387
- report_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
388
  summary_output = f"""
389
  📊 ANALYSIS SUMMARY:
390
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
391
- Input: {len(dna_input)} bp
392
- F Gene: {len(processed_sequence)} bp
393
- Validation: {keras_output.split(':')[-1].strip() if ':' in keras_output else keras_output}
394
- Placement: {'✅ OK' if '✅' in ml_tree_output else '⚠️ Skipped' if 'skipped' in ml_tree_output else 'Failed'}
395
- Tree Analysis: {'✅ OK' if 'Found' in simplified_ml_output else ' Failed'}
 
396
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
 
 
 
 
 
 
397
  """
 
398
  return (
399
- boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output,
400
- aligned_file, phy_file, None, None, tree_html_content, report_html_content,
401
- tree_html_path, report_html_path
 
 
 
 
 
 
 
 
 
 
402
  )
 
403
  except Exception as e:
404
  logger.error(f"Pipeline error: {e}", exc_info=True)
405
  error_msg = f"❌ Pipeline Error: {str(e)}"
406
- return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg, None, None
407
-
408
- async def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree):
409
- temp_file_path = None
410
- try:
411
- if fasta_file_obj is None:
412
- return "❌ No file provided", "", "", "", "", None, None, None, None, "No input", "No input", None, None
413
- with tempfile.NamedTemporaryFile(delete=False, suffix=".fasta", dir="/tmp") as temp_file:
414
- if isinstance(fasta_file_obj, UploadFile):
415
- content = await fasta_file_obj.read()
416
- temp_file.write(content)
417
- else:
418
- with open(fasta_file_obj, 'rb') as f:
419
- content = f.read()
420
- temp_file.write(content)
421
- temp_file_path = temp_file.name
422
- dna_input = read_fasta_file(temp_file_path)
423
- if not dna_input:
424
- return "❌ Failed to read FASTA file", "", "", "", "", None, None, None, None, "No input", "No input", None, None
425
- return run_pipeline(dna_input, similarity_score, build_ml_tree)
426
- except Exception as e:
427
- logger.error(f"Pipeline from file error: {e}", exc_info=True)
428
- error_msg = f"❌ Error: {str(e)}"
429
- return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg, None, None
430
- finally:
431
- if temp_file_path and os.path.exists(temp_file_path):
432
- try:
433
- os.unlink(temp_file_path)
434
- except Exception as cleanup_e:
435
- logger.warning(f"Failed to delete temp file {temp_file_path}: {cleanup_e}")
436
-
437
- # --- Pydantic Models ---
438
- class AnalysisRequest(BaseModel):
439
- sequence: str
440
- similarity_score: float = 95.0
441
- build_ml_tree: bool = False
442
-
443
- class AnalysisResponse(BaseModel):
444
- boundary_output: str
445
- keras_output: str
446
- ml_tree_output: str
447
- tree_analysis_output: str
448
- summary_output: str
449
- success: bool
450
- error_message: Optional[str] = None
451
- tree_html_path: Optional[str] = None
452
- report_html_path: Optional[str] = None
453
-
454
- # --- FastAPI App Setup ---
455
- app = FastAPI(title="🧬 Gene Analysis Pipeline", version="1.0.0")
456
-
457
- @app.get("/")
458
- async def root():
459
- return {
460
- "message": "🧬 Gene Analysis Pipeline API",
461
- "status": "running",
462
- "endpoints": {
463
- "docs": "/docs",
464
- "health": "/health",
465
- "gradio": "/gradio",
466
- "analyze": "/analyze",
467
- "analyze_file": "/analyze-file",
468
- "download": "/download/{file_type}/{query_id}"
469
- }
470
- }
471
-
472
- @app.get("/health")
473
- async def health_check():
474
- try:
475
- mafft_available, iqtree_available, _, _ = check_tool_availability()
476
- return {
477
- "status": "healthy",
478
- "components": {
479
- "boundary_model": boundary_model is not None,
480
- "keras_model": keras_model is not None,
481
- "tree_analyzer": analyzer is not None,
482
- "mafft_available": mafft_available,
483
- "iqtree_available": iqtree_available
484
- },
485
- "paths": {
486
- "base_dir": BASE_DIR,
487
- "query_output_dir": QUERY_OUTPUT_DIR
488
- }
489
- }
490
- except Exception as e:
491
- logger.error(f"Health check error: {e}", exc_info=True)
492
- return {"status": "unhealthy", "error": str(e)}
493
-
494
- @app.post("/analyze", response_model=AnalysisResponse)
495
- async def analyze_sequence(request: AnalysisRequest):
496
- try:
497
- result = run_pipeline(request.sequence, request.similarity_score, request.build_ml_tree)
498
- return AnalysisResponse(
499
- boundary_output=result[0] or "",
500
- keras_output=result[1] or "",
501
- ml_tree_output=result[2] or "",
502
- tree_analysis_output=result[3] or "",
503
- summary_output=result[4] or "",
504
- tree_html_path=result[11],
505
- report_html_path=result[12],
506
- success=True
507
- )
508
- except Exception as e:
509
- logger.error(f"Analyze error: {e}", exc_info=True)
510
- return AnalysisResponse(
511
- boundary_output="", keras_output="", ml_tree_output="",
512
- tree_analysis_output="", summary_output="",
513
- tree_html_path=None, report_html_path=None,
514
- success=False, error_message=str(e)
515
- )
516
-
517
- @app.post("/analyze-file")
518
- async def analyze_file(
519
- file: UploadFile = File(...),
520
- similarity_score: float = Form(95.0),
521
- build_ml_tree: bool = Form(False)
522
- ):
523
- temp_file_path = None
524
- try:
525
- with tempfile.NamedTemporaryFile(delete=False, suffix=".fasta", dir="/tmp") as temp_file:
526
- content = await file.read()
527
- temp_file.write(content)
528
- temp_file_path = temp_file.name
529
- result = await run_pipeline_from_file(temp_file_path, similarity_score, build_ml_tree)
530
- return AnalysisResponse(
531
- boundary_output=result[0] or "",
532
- keras_output=result[1] or "",
533
- ml_tree_output=result[2] or "",
534
- tree_analysis_output=result[3] or "",
535
- summary_output=result[4] or "",
536
- tree_html_path=result[11],
537
- report_html_path=result[12],
538
- success=True
539
- )
540
- except Exception as e:
541
- logger.error(f"Analyze-file error: {e}", exc_info=True)
542
- return AnalysisResponse(
543
- boundary_output="", keras_output="", ml_tree_output="",
544
- tree_analysis_output="", summary_output="",
545
- tree_html_path=None, report_html_path=None,
546
- success=False, error_message=str(e)
547
  )
548
- finally:
549
- if temp_file_path and os.path.exists(temp_file_path):
550
- try:
551
- os.unlink(temp_file_path)
552
- except Exception as cleanup_e:
553
- logger.warning(f"Failed to clean up {temp_file_path}: {cleanup_e}")
554
-
555
- @app.get("/download/{file_type}/{query_id}")
556
- async def download_file(file_type: str, query_id: str):
557
- try:
558
- if file_type not in ["tree", "report"]:
559
- raise HTTPException(status_code=400, detail="Invalid file type. Use 'tree' or 'report'.")
560
- file_name = f"phylogenetic_tree_{query_id}.html" if file_type == "tree" else f"detailed_report_{query_id}.html"
561
- file_path = os.path.join("/tmp", file_name)
562
- if not os.path.exists(file_path):
563
- raise HTTPException(status_code=404, detail="File not found.")
564
- return FileResponse(file_path, filename=file_name, media_type="text/html")
565
- except Exception as e:
566
- logger.error(f"Download error: {e}", exc_info=True)
567
- raise HTTPException(status_code=500, detail=f"Error serving file: {str(e)}")
568
 
569
  # --- Gradio Interface ---
570
- def create_gradio_interface():
 
571
  try:
572
  with gr.Blocks(
573
  title="🧬 Gene Analysis Pipeline",
574
  theme=gr.themes.Soft(),
575
  css="""
576
- .gradio-container { max-width: 1200px !important; }
577
- .status-box { padding: 10px; border-radius: 5px; margin: 5px 0; }
578
- .success { background-color: #d4edda; border: 1px solid #c3e6cb; color: #155724; }
579
- .warning { background-color: #fff3cd; border: 1px solid #ffeaa7; color: #856404; }
580
- .error { background-color: #f8d7da; border: 1px solid #f5c6cb; color: #721c24; }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
581
  """
582
  ) as iface:
583
- gr.Markdown("# 🧬 Gene Analysis Pipeline")
 
 
 
 
 
 
 
 
 
 
 
584
  with gr.Row():
585
  with gr.Column():
586
- status_display = gr.HTML(value=f"""
587
- <div class="status-box">
588
- <h3>🔧 System Status</h3>
589
- <p>🤖 Boundary Model: {'✅ Loaded' if boundary_model else '❌ Missing'}</p>
590
- <p>🧠 Keras Model: {'✅ Loaded' if keras_model else '❌ Missing'}</p>
591
- <p>🌳 Tree Analyzer: {'✅ Loaded' if analyzer else '❌ Missing'}</p>
592
- <p>🧬 MAFFT: {'✅ Available' if check_tool_availability()[0] else '❌ Missing'}</p>
593
- <p>🌲 IQ-TREE: {'✅ Available' if check_tool_availability()[1] else '❌ Missing'}</p>
594
- </div>
595
- """)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
596
  with gr.Tabs():
597
  with gr.Tab("📝 Text Input"):
598
  with gr.Row():
599
  with gr.Column(scale=2):
600
  dna_input = gr.Textbox(
601
  label="🧬 DNA Sequence",
602
- placeholder="Enter your DNA sequence (ATCG format)...",
603
  lines=8,
604
- max_lines=15
 
605
  )
606
 
607
  with gr.Row():
@@ -617,23 +588,35 @@ def create_gradio_interface():
617
  ml_tree_checkbox = gr.Checkbox(
618
  label="🌲 Build ML Tree",
619
  value=False,
620
- info="Perform phylogenetic placement (slower)"
621
  )
622
 
623
- analyze_btn = gr.Button("🔬 Analyze Sequence", variant="primary", size="lg")
 
 
 
 
 
624
 
625
  with gr.Column(scale=1):
626
- gr.Markdown("### 📋 Quick Guide")
627
  gr.Markdown("""
 
 
628
  1. **Paste DNA sequence** in ATCG format
629
  2. **Adjust similarity** threshold (1-99%)
630
- 3. **Enable ML tree** for detailed phylogeny
631
  4. **Click Analyze** to start processing
632
 
633
  **Supported formats:**
634
- - Raw DNA sequence
 
635
  - Mixed case (auto-converted)
636
  - With/without spaces/newlines
 
 
 
 
 
637
  """)
638
 
639
  with gr.Tab("📁 File Upload"):
@@ -659,11 +642,16 @@ def create_gradio_interface():
659
  value=False
660
  )
661
 
662
- analyze_file_btn = gr.Button("🔬 Analyze File", variant="primary", size="lg")
 
 
 
 
663
 
664
  with gr.Column(scale=1):
665
- gr.Markdown("### 📄 File Requirements")
666
  gr.Markdown("""
 
 
667
  **Accepted formats:**
668
  - `.fasta`, `.fa`, `.fas`
669
  - `.txt` with FASTA content
@@ -675,45 +663,51 @@ def create_gradio_interface():
675
  ```
676
 
677
  **Notes:**
678
- - Single or multiple sequences
679
  - First sequence will be analyzed
680
  - Maximum file size: 10MB
 
681
  """)
682
 
683
  # Results Section
684
  gr.Markdown("## 📊 Analysis Results")
685
 
686
  with gr.Row():
687
- with gr.Column():
688
  boundary_output = gr.Textbox(
689
  label="🎯 Boundary Detection",
690
  interactive=False,
691
- lines=2
 
692
  )
693
 
694
  keras_output = gr.Textbox(
695
  label="🧠 Gene Classification",
696
  interactive=False,
697
- lines=2
 
698
  )
699
 
700
- with gr.Column():
701
  ml_tree_output = gr.Textbox(
702
  label="🌲 Phylogenetic Placement",
703
  interactive=False,
704
- lines=2
 
705
  )
706
 
707
  tree_analysis_output = gr.Textbox(
708
  label="🌳 Tree Analysis",
709
  interactive=False,
710
- lines=2
 
711
  )
712
 
713
  summary_output = gr.Textbox(
714
- label="📋 Summary Report",
715
  interactive=False,
716
- lines=8
 
717
  )
718
 
719
  # Visualization Section
@@ -721,114 +715,89 @@ def create_gradio_interface():
721
  with gr.Tab("🌳 Interactive Tree"):
722
  tree_html = gr.HTML(
723
  label="Phylogenetic Tree Visualization",
724
- value="<div style='text-align: center; color: #666; padding: 50px;'>No tree generated yet. Run analysis to see results.</div>"
 
 
 
 
 
 
725
  )
726
 
727
  with gr.Tab("📊 Detailed Report"):
728
  report_html = gr.HTML(
729
  label="Analysis Report",
730
- value="<div style='text-align: center; color: #666; padding: 50px;'>No report generated yet. Run analysis to see results.</div>"
 
 
 
 
 
 
731
  )
732
 
733
- # Download Section
734
- with gr.Row():
735
- tree_download = gr.File(
736
- label="📥 Download Tree (HTML)",
737
- visible=False
738
- )
739
-
740
- report_download = gr.File(
741
- label="📥 Download Report (HTML)",
742
- visible=False
743
- )
744
-
745
  # Event Handlers
746
- def handle_analysis(dna_seq, similarity, build_ml):
 
747
  try:
748
- results = run_pipeline(dna_seq, similarity, build_ml)
749
-
750
- # Extract results
751
- boundary_out = results[0] or "No boundary detection performed"
752
- keras_out = results[1] or "No classification performed"
753
- ml_out = results[2] or "No ML tree built"
754
- tree_out = results[3] or "No tree analysis performed"
755
- summary_out = results[4] or "No summary available"
756
- tree_html_content = results[9] or "<div>No tree visualization available</div>"
757
- report_html_content = results[10] or "<div>No report available</div>"
758
- tree_path = results[11]
759
- report_path = results[12]
760
-
761
- # Return all outputs including file paths for downloads
762
- return (
763
- boundary_out,
764
- keras_out,
765
- ml_out,
766
- tree_out,
767
- summary_out,
768
- tree_html_content,
769
- report_html_content,
770
- tree_path if tree_path and os.path.exists(tree_path) else None,
771
- report_path if report_path and os.path.exists(report_path) else None,
772
- gr.update(visible=bool(tree_path)),
773
- gr.update(visible=bool(report_path))
774
- )
775
 
 
 
776
  except Exception as e:
 
777
  error_msg = f"❌ Analysis failed: {str(e)}"
778
- logger.error(f"Gradio analysis error: {e}", exc_info=True)
779
  return (
780
  error_msg, "", "", "", error_msg,
 
781
  f"<div style='color: red;'>{error_msg}</div>",
782
- f"<div style='color: red;'>{error_msg}</div>",
783
- None, None,
784
- gr.update(visible=False),
785
- gr.update(visible=False)
786
  )
787
 
788
- def handle_file_analysis(file_obj, similarity, build_ml):
 
789
  try:
790
  if file_obj is None:
791
- error_msg = "❌ No file uploaded"
792
  return (
793
  error_msg, "", "", "", error_msg,
 
794
  f"<div style='color: red;'>{error_msg}</div>",
795
- f"<div style='color: red;'>{error_msg}</div>",
796
- None, None,
797
- gr.update(visible=False),
798
- gr.update(visible=False)
799
  )
800
 
801
- # Read the uploaded file
802
- dna_sequence = read_fasta_file(file_obj.name)
803
- if not dna_sequence:
804
- error_msg = "❌ Failed to read DNA sequence from file"
805
  return (
806
  error_msg, "", "", "", error_msg,
 
807
  f"<div style='color: red;'>{error_msg}</div>",
808
- f"<div style='color: red;'>{error_msg}</div>",
809
- None, None,
810
- gr.update(visible=False),
811
- gr.update(visible=False)
812
  )
813
 
814
- # Run the same analysis as text input
815
- return handle_analysis(dna_sequence, similarity, build_ml)
816
-
817
  except Exception as e:
 
818
  error_msg = f"❌ File analysis failed: {str(e)}"
819
- logger.error(f"Gradio file analysis error: {e}", exc_info=True)
820
  return (
821
  error_msg, "", "", "", error_msg,
 
822
  f"<div style='color: red;'>{error_msg}</div>",
823
- f"<div style='color: red;'>{error_msg}</div>",
824
- None, None,
825
- gr.update(visible=False),
826
- gr.update(visible=False)
827
  )
828
 
829
  # Connect event handlers
830
  analyze_btn.click(
831
- fn=handle_analysis,
832
  inputs=[dna_input, similarity_slider, ml_tree_checkbox],
833
  outputs=[
834
  boundary_output,
@@ -837,16 +806,13 @@ def create_gradio_interface():
837
  tree_analysis_output,
838
  summary_output,
839
  tree_html,
840
- report_html,
841
- tree_download,
842
- report_download,
843
- tree_download, # For visibility update
844
- report_download # For visibility update
845
- ]
846
  )
847
-
848
  analyze_file_btn.click(
849
- fn=handle_file_analysis,
850
  inputs=[file_input, file_similarity_slider, file_ml_tree_checkbox],
851
  outputs=[
852
  boundary_output,
@@ -855,79 +821,92 @@ def create_gradio_interface():
855
  tree_analysis_output,
856
  summary_output,
857
  tree_html,
858
- report_html,
859
- tree_download,
860
- report_download,
861
- tree_download, # For visibility update
862
- report_download # For visibility update
863
- ]
864
  )
865
 
866
  # Footer
867
  gr.Markdown("""
868
  ---
869
- ### 🔬 About This Pipeline
870
 
871
- This application provides comprehensive analysis of DNA sequences with focus on gene detection and phylogenetic analysis:
 
 
 
 
872
 
873
- - **🎯 Boundary Detection**: Identifies gene regions within sequences
874
- - **🧠 Classification**: Validates gene identity using deep learning
875
- - **🌲 Phylogenetic Placement**: Places sequences in evolutionary context
876
- - **🌳 Tree Analysis**: Builds interactive phylogenetic trees
877
 
878
- **📊 Output Features:**
879
- - Interactive tree visualizations
880
- - Detailed analysis reports
881
- - Downloadable results
882
- - Comprehensive summaries
883
-
884
- **⚡ Performance Notes:**
885
- - Text input: ~5-30 seconds
886
- - File upload: ~10-60 seconds
887
- - ML tree building: +2-5 minutes
888
- - Tree analysis: +30-120 seconds
889
  """)
890
 
891
  return iface
892
 
893
  except Exception as e:
894
- logger.error(f"Failed to create Gradio interface: {e}", exc_info=True)
895
- # Return a minimal error interface
896
- with gr.Blocks() as error_iface:
897
- gr.Markdown(f"# ❌ Interface Error\n\nFailed to initialize: {str(e)}")
898
- return error_iface
 
 
 
 
 
899
 
900
- # --- Application Entry Point ---
901
  def main():
902
- """Main application entry point"""
903
  try:
904
  logger.info("🚀 Starting Gene Analysis Pipeline...")
905
 
906
- # Create Gradio interface
907
- gradio_app = create_gradio_interface()
908
 
909
- # Mount Gradio app to FastAPI
910
- gradio_app.queue(max_size=10)
911
- app.mount("/gradio", gradio_app, name="gradio")
912
 
913
- logger.info("✅ Application initialized successfully")
914
-
915
- # Run the application
916
- port = int(os.environ.get("PORT", 7860))
917
- logger.info(f"🌐 Starting server on port {port}")
918
-
919
- uvicorn.run(
920
- app,
921
- host="0.0.0.0",
922
- port=port,
923
- log_level="info",
924
- access_log=True
925
  )
926
 
 
 
927
  except Exception as e:
928
- logger.error(f"❌ Application startup failed: {e}", exc_info=True)
929
- sys.exit(1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
930
 
931
- # --- Run Application ---
932
  if __name__ == "__main__":
933
  main()
 
12
  import re
13
  import logging
14
  import numpy as np
 
 
 
15
  import tempfile
16
  import shutil
17
  import sys
18
  import uuid
19
  from pathlib import Path
 
 
 
 
20
  import stat
21
  import time
 
 
 
 
 
 
22
 
23
+ # Import with error handling
 
 
 
24
  try:
25
+ from predictor import EnhancedGenePredictor
26
+ except ImportError as e:
27
+ logging.warning(f"Failed to import EnhancedGenePredictor: {e}")
28
+ EnhancedGenePredictor = None
29
+
30
+ try:
31
+ from tensorflow.keras.models import load_model
32
+ except ImportError as e:
33
+ logging.warning(f"Failed to import TensorFlow: {e}")
34
+ load_model = None
35
 
36
+ try:
37
+ from analyzer import PhylogeneticTreeAnalyzer
38
+ except ImportError as e:
39
+ logging.warning(f"Failed to import PhylogeneticTreeAnalyzer: {e}")
40
+ PhylogeneticTreeAnalyzer = None
41
 
 
42
  try:
43
+ from huggingface_hub import hf_hub_download
44
+ except ImportError as e:
45
+ logging.warning(f"Failed to import huggingface_hub: {e}")
46
+ hf_hub_download = None
47
+
48
+ try:
49
+ from Bio import SeqIO
50
+ from Bio.Seq import Seq
51
+ from Bio.SeqRecord import SeqRecord
52
+ except ImportError as e:
53
+ logging.warning(f"Failed to import BioPython: {e}")
54
+ SeqIO = None
55
+
56
+ # --- Logging Setup ---
57
+ def setup_logging():
58
+ """Setup logging configuration"""
59
+ try:
60
+ log_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
61
+ log_handler = logging.StreamHandler()
62
+ log_handler.setFormatter(log_formatter)
63
+
64
+ # Try to setup file logging, fallback if it fails
65
+ handlers = [log_handler]
66
+ try:
67
+ file_handler = logging.FileHandler('/tmp/app.log')
68
+ file_handler.setFormatter(log_formatter)
69
+ handlers.append(file_handler)
70
+ except Exception as e:
71
+ print(f"Warning: Failed to set up file logging: {e}")
72
+
73
+ logging.basicConfig(level=logging.INFO, handlers=handlers, force=True)
74
+ logger = logging.getLogger(__name__)
75
+ logger.info(f"Gradio version: {gr.__version__}")
76
+ return logger
77
+ except Exception as e:
78
+ print(f"Critical: Failed to setup logging: {e}")
79
+ # Create basic logger
80
+ logging.basicConfig(level=logging.INFO)
81
+ return logging.getLogger(__name__)
82
+
83
+ logger = setup_logging()
84
 
85
  # --- Global Variables ---
86
  BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 
89
  ALIGNMENT_PATH = os.path.join(BASE_DIR, "f_gene_sequences_aligned.fasta")
90
  TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
91
  QUERY_OUTPUT_DIR = os.path.join(BASE_DIR, "queries")
92
+
93
+ # Ensure output directory exists
94
+ try:
95
+ os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
96
+ os.makedirs("/tmp", exist_ok=True)
97
+ except Exception as e:
98
+ logger.warning(f"Failed to create directories: {e}")
99
 
100
  # Model repository and file paths
101
  MODEL_REPO = "GGproject10/best_boundary_aware_model"
 
107
  kmer_to_index = None
108
  analyzer = None
109
 
110
+ # --- Safe Model Loading ---
111
  def load_models_safely():
112
+ """Load models with comprehensive error handling"""
113
  global boundary_model, keras_model, kmer_to_index, analyzer
114
+
115
+ logger.info("🔍 Starting model loading...")
116
+
117
+ # Load boundary model
118
+ if EnhancedGenePredictor and hf_hub_download:
119
+ try:
120
+ logger.info("Loading boundary model...")
121
+ boundary_path = hf_hub_download(
122
+ repo_id=MODEL_REPO,
123
+ filename="best_boundary_aware_model.pth",
124
+ token=None
125
+ )
126
+ if os.path.exists(boundary_path):
127
+ boundary_model = EnhancedGenePredictor(boundary_path)
128
+ logger.info("✅ Boundary model loaded successfully.")
129
+ else:
130
+ logger.error("❌ Boundary model file not found after download.")
131
+ except Exception as e:
132
+ logger.error(f"❌ Failed to load boundary model: {e}")
133
+ boundary_model = None
134
+ else:
135
+ logger.warning("⚠️ EnhancedGenePredictor or hf_hub_download not available")
136
+
137
+ # Load Keras model
138
+ if load_model and hf_hub_download:
139
+ try:
140
+ logger.info("Loading Keras model...")
141
+ keras_path = hf_hub_download(
142
+ repo_id=MODEL_REPO,
143
+ filename="best_model.keras",
144
+ token=None
145
+ )
146
+ kmer_path = hf_hub_download(
147
+ repo_id=MODEL_REPO,
148
+ filename="kmer_to_index.pkl",
149
+ token=None
150
+ )
151
+
152
+ if os.path.exists(keras_path) and os.path.exists(kmer_path):
153
+ keras_model = load_model(keras_path)
154
+ with open(kmer_path, "rb") as f:
155
+ kmer_to_index = pickle.load(f)
156
+ logger.info("✅ Keras model and k-mer index loaded successfully.")
157
+ else:
158
+ logger.error("❌ Keras model or k-mer files not found.")
159
+ except Exception as e:
160
+ logger.error(f"❌ Failed to load Keras model: {e}")
161
+ keras_model = None
162
+ kmer_to_index = None
163
+ else:
164
+ logger.warning("⚠️ TensorFlow load_model or hf_hub_download not available")
165
+
166
+ # Load tree analyzer
167
+ if PhylogeneticTreeAnalyzer:
168
+ try:
169
+ logger.info("🌳 Initializing tree analyzer...")
170
+ analyzer = PhylogeneticTreeAnalyzer()
171
+
172
+ # Try to find CSV file
173
+ csv_candidates = [
174
+ CSV_PATH,
175
+ os.path.join(BASE_DIR, CSV_PATH),
176
+ os.path.join(BASE_DIR, "app", CSV_PATH),
177
+ os.path.join(os.path.dirname(__file__), CSV_PATH),
178
+ "f_cleaned.csv",
179
+ os.path.join(BASE_DIR, "f_cleaned.csv")
180
+ ]
181
+
182
+ csv_loaded = False
183
+ for csv_candidate in csv_candidates:
184
+ if os.path.exists(csv_candidate):
185
+ logger.info(f"📊 Trying CSV: {csv_candidate}")
186
+ try:
187
+ if analyzer.load_data(csv_candidate):
188
+ logger.info(f"✅ CSV loaded from: {csv_candidate}")
189
+ csv_loaded = True
190
+ break
191
+ except Exception as e:
192
+ logger.warning(f"CSV load failed for {csv_candidate}: {e}")
193
+ continue
194
+
195
+ if not csv_loaded:
196
+ logger.error("❌ Failed to load CSV data from any candidate location.")
197
+ analyzer = None
198
+ else:
199
  try:
200
+ if hasattr(analyzer, 'train_ai_model') and analyzer.train_ai_model():
201
+ logger.info("✅ AI model training completed successfully")
202
+ else:
203
+ logger.warning("⚠️ AI model training failed; proceeding with basic analysis.")
204
  except Exception as e:
205
+ logger.warning(f"⚠️ AI model training failed: {e}")
206
+
207
+ except Exception as e:
208
+ logger.error(f"❌ Tree analyzer initialization failed: {e}")
209
  analyzer = None
210
+ else:
211
+ logger.warning("⚠️ PhylogeneticTreeAnalyzer not available")
 
 
 
 
 
 
 
 
 
 
 
 
212
 
213
  # --- Tool Detection ---
214
  def setup_binary_permissions():
215
+ """Set executable permissions on binary files"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  try:
217
+ for binary in [MAFFT_PATH, IQTREE_PATH]:
218
+ if os.path.exists(binary):
219
+ try:
220
+ os.chmod(binary, os.stat(binary).st_mode | stat.S_IEXEC)
221
+ logger.info(f"Set executable permission on {binary}")
222
+ except Exception as e:
223
+ logger.warning(f"Failed to set permission on {binary}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  except Exception as e:
225
+ logger.warning(f"Binary permission setup failed: {e}")
 
 
 
 
 
 
 
226
 
227
+ def check_tool_availability():
228
+ """Check if required tools are available"""
229
  try:
230
+ setup_binary_permissions()
231
+
232
+ # Check MAFFT
233
+ mafft_available = False
234
+ mafft_cmd = None
235
+ mafft_candidates = ['mafft', '/usr/bin/mafft', '/usr/local/bin/mafft', MAFFT_PATH]
236
+
237
+ for candidate in mafft_candidates:
238
+ if shutil.which(candidate) or os.path.exists(candidate):
239
+ try:
240
+ result = subprocess.run(
241
+ [candidate, "--help"],
242
+ capture_output=True,
243
+ text=True,
244
+ timeout=5
245
+ )
246
+ if result.returncode == 0 or "mafft" in result.stderr.lower():
247
+ mafft_available = True
248
+ mafft_cmd = candidate
249
+ logger.info(f" MAFFT found at: {candidate}")
250
+ break
251
+ except Exception as e:
252
+ logger.debug(f"MAFFT test failed for {candidate}: {e}")
253
+
254
+ # Check IQ-TREE
255
+ iqtree_available = False
256
+ iqtree_cmd = None
257
+ iqtree_candidates = ['iqtree', 'iqtree2', 'iqtree3', '/usr/bin/iqtree', '/usr/local/bin/iqtree', IQTREE_PATH]
258
+
259
+ for candidate in iqtree_candidates:
260
+ if shutil.which(candidate) or os.path.exists(candidate):
261
+ try:
262
+ result = subprocess.run(
263
+ [candidate, "--help"],
264
+ capture_output=True,
265
+ text=True,
266
+ timeout=5
267
+ )
268
+ if result.returncode == 0 or "iqtree" in result.stderr.lower():
269
+ iqtree_available = True
270
+ iqtree_cmd = candidate
271
+ logger.info(f"✅ IQ-TREE found at: {candidate}")
272
+ break
273
+ except Exception as e:
274
+ logger.debug(f"IQ-TREE test failed for {candidate}: {e}")
275
+
276
+ return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
277
+
278
  except Exception as e:
279
+ logger.error(f"Tool availability check failed: {e}")
280
+ return False, False, None, None
281
 
282
+ # --- Core Functions ---
283
  def predict_with_keras(sequence):
284
+ """Predict using Keras model with error handling"""
285
  try:
286
  if not keras_model or not kmer_to_index:
287
  return "❌ Keras model not available."
288
+
289
  if len(sequence) < 6:
290
  return "❌ Sequence too short (<6 bp)."
291
+
292
+ # Generate k-mers
293
  kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
294
  indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
295
+
296
+ # Make prediction
297
  input_arr = np.array([indices])
298
  prediction = keras_model.predict(input_arr, verbose=0)[0]
299
  f_gene_prob = prediction[-1]
300
+
301
+ # Convert to percentage
302
  percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))
303
  return f"✅ {percentage}% F gene confidence"
304
+
305
  except Exception as e:
306
+ logger.error(f"Keras prediction failed: {e}")
307
  return f"❌ Error: {str(e)}"
308
 
309
  def read_fasta_file(file_obj):
310
+ """Read FASTA file with error handling"""
311
  try:
312
  if file_obj is None:
313
  return ""
314
+
315
  if isinstance(file_obj, str):
316
  with open(file_obj, "r") as f:
317
  content = f.read()
318
  else:
319
  content = file_obj.read().decode("utf-8")
320
+
321
+ # Extract sequence lines (non-header lines)
322
  lines = content.strip().split("\n")
323
  seq_lines = [line.strip() for line in lines if not line.startswith(">")]
324
  return ''.join(seq_lines)
325
+
326
  except Exception as e:
327
+ logger.error(f"Failed to read FASTA file: {e}")
328
  return ""
329
 
330
+ def analyze_sequence_basic(sequence, similarity_score=95.0):
331
+ """Basic sequence analysis without external tools"""
332
+ try:
333
+ if not sequence or len(sequence.strip()) < 10:
334
+ return "❌ Invalid sequence.", None, None
335
+
336
+ # Clean sequence
337
+ clean_seq = re.sub(r'[^ATCGN]', 'N', sequence.upper())
338
+
339
+ # Basic analysis
340
+ length = len(clean_seq)
341
+ gc_content = (clean_seq.count('G') + clean_seq.count('C')) / length * 100
342
+ n_content = clean_seq.count('N') / length * 100
343
+
344
+ analysis_result = f"""
345
+ ✅ Basic Analysis Complete
346
+ • Length: {length} bp
347
+ • GC Content: {gc_content:.1f}%
348
+ • N Content: {n_content:.1f}%
349
+ • Similarity Threshold: {similarity_score}%
350
+ """
351
+
352
+ return analysis_result, None, None
353
+
354
+ except Exception as e:
355
+ logger.error(f"Basic analysis failed: {e}")
356
+ return f"❌ Analysis error: {str(e)}", None, None
357
+
358
+ def run_pipeline_safe(dna_input, similarity_score=95.0, build_ml_tree=False):
359
+ """Safe pipeline execution with comprehensive error handling"""
360
  try:
361
+ # Input validation
362
+ if not dna_input or not dna_input.strip():
363
+ return "❌ Empty input", "", "", "", "No input provided", None, None, None, None, "No input", "No input", None, None
364
+
365
+ # Clean and validate sequence
366
  dna_input = dna_input.upper().strip()
367
+ if not re.match('^[ACTGN\s\n\r]+$', dna_input):
368
+ # Remove invalid characters
369
+ dna_input = re.sub(r'[^ACTGN]', 'N', dna_input)
370
+
371
+ # Remove whitespace
372
+ processed_sequence = re.sub(r'\s+', '', dna_input)
373
+
374
+ logger.info(f"Processing sequence of length: {len(processed_sequence)}")
375
+
376
+ # Boundary detection
377
  boundary_output = ""
378
  if boundary_model:
379
  try:
380
+ result = boundary_model.predict_sequence(processed_sequence)
381
+ if hasattr(result, 'get') and result.get('gene_regions'):
382
+ regions = result['gene_regions']
383
+ if regions:
384
+ processed_sequence = regions[0]["sequence"]
385
+ boundary_output = f"✅ F gene region found: {len(processed_sequence)} bp"
386
+ else:
387
+ boundary_output = "⚠️ No F gene regions found."
388
  else:
389
+ boundary_output = "⚠️ Boundary detection completed (no regions found)."
 
390
  except Exception as e:
391
+ logger.error(f"Boundary prediction error: {e}")
392
  boundary_output = f"❌ Boundary prediction error: {str(e)}"
 
393
  else:
394
+ boundary_output = f"⚠️ Boundary model not available. Using full input: {len(processed_sequence)} bp"
395
+
396
+ # Keras prediction
397
+ keras_output = ""
398
+ if len(processed_sequence) >= 6:
399
+ keras_output = predict_with_keras(processed_sequence)
400
+ else:
401
+ keras_output = "❌ Sequence too short for classification."
402
+
403
+ # ML Tree analysis (simplified for now)
404
  ml_tree_output = ""
405
+ if build_ml_tree:
406
+ if len(processed_sequence) >= 100:
407
+ try:
408
+ mafft_available, iqtree_available, _, _ = check_tool_availability()
409
+ if mafft_available and iqtree_available:
410
+ ml_tree_output = "⚠️ ML tree analysis not implemented in safe mode."
411
+ else:
412
+ ml_tree_output = "❌ MAFFT or IQ-TREE not available"
413
+ except Exception as e:
414
+ ml_tree_output = f"❌ ML tree error: {str(e)}"
415
+ else:
416
+ ml_tree_output = "❌ Sequence too short for ML tree (<100 bp)."
 
 
417
  else:
418
+ ml_tree_output = "⚠️ ML tree analysis skipped."
419
+
420
+ # Tree analysis
421
+ tree_analysis_output = ""
422
+ tree_html_content = "<div style='text-align: center; color: #666; padding: 50px;'>Tree analysis not available in safe mode.</div>"
423
+ report_html_content = "<div style='text-align: center; color: #666; padding: 50px;'>Report not available in safe mode.</div>"
424
+
425
+ if analyzer and len(processed_sequence) >= 10:
426
  try:
427
+ result, _, _ = analyze_sequence_basic(processed_sequence, similarity_score)
428
+ tree_analysis_output = result
429
+ tree_html_content = f"<div style='padding: 20px;'><h3>Basic Analysis</h3><pre>{result}</pre></div>"
430
+ report_html_content = tree_html_content
 
 
 
 
 
 
 
 
431
  except Exception as e:
432
+ logger.error(f"Tree analysis error: {e}")
433
+ tree_analysis_output = f" Tree analysis error: {str(e)}"
 
434
  else:
435
+ tree_analysis_output = "❌ Tree analyzer not available or sequence too short."
436
+
437
+ # Create summary
438
  summary_output = f"""
439
  📊 ANALYSIS SUMMARY:
440
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
441
+ Input Length: {len(dna_input)} bp
442
+ Processed Length: {len(processed_sequence)} bp
443
+ Boundary Detection: {'✅ Active' if boundary_model else '❌ Unavailable'}
444
+ Classification: {'✅ Active' if keras_model else '❌ Unavailable'}
445
+ ML Tree: {'✅ Requested' if build_ml_tree else '⚠️ Skipped'}
446
+ Tree Analysis: {'✅ Active' if analyzer else '❌ Unavailable'}
447
  ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
448
+
449
+ Results:
450
+ • Boundary: {boundary_output.split(':')[-1].strip() if ':' in boundary_output else boundary_output}
451
+ • Classification: {keras_output.split(':')[-1].strip() if ':' in keras_output else keras_output}
452
+ • ML Tree: {'Requested' if build_ml_tree else 'Skipped'}
453
+ • Analysis: {'Completed' if '✅' in tree_analysis_output else 'Failed'}
454
  """
455
+
456
  return (
457
+ boundary_output,
458
+ keras_output,
459
+ ml_tree_output,
460
+ tree_analysis_output,
461
+ summary_output,
462
+ None, # aligned_file
463
+ None, # phy_file
464
+ None, # additional_file_1
465
+ None, # additional_file_2
466
+ tree_html_content,
467
+ report_html_content,
468
+ None, # tree_html_path
469
+ None # report_html_path
470
  )
471
+
472
  except Exception as e:
473
  logger.error(f"Pipeline error: {e}", exc_info=True)
474
  error_msg = f"❌ Pipeline Error: {str(e)}"
475
+ return (
476
+ error_msg, "", "", "", error_msg,
477
+ None, None, None, None,
478
+ f"<div style='color: red;'>{error_msg}</div>",
479
+ f"<div style='color: red;'>{error_msg}</div>",
480
+ None, None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
481
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
482
 
483
  # --- Gradio Interface ---
484
+ def create_safe_gradio_interface():
485
+ """Create a safe Gradio interface with comprehensive error handling"""
486
  try:
487
  with gr.Blocks(
488
  title="🧬 Gene Analysis Pipeline",
489
  theme=gr.themes.Soft(),
490
  css="""
491
+ .gradio-container {
492
+ max-width: 1200px !important;
493
+ margin: 0 auto;
494
+ }
495
+ .status-box {
496
+ padding: 15px;
497
+ border-radius: 8px;
498
+ margin: 10px 0;
499
+ border-left: 4px solid #007bff;
500
+ background: linear-gradient(90deg, #f8f9fa 0%, #e9ecef 100%);
501
+ }
502
+ .success {
503
+ background-color: #d4edda;
504
+ border-left-color: #28a745;
505
+ color: #155724;
506
+ }
507
+ .warning {
508
+ background-color: #fff3cd;
509
+ border-left-color: #ffc107;
510
+ color: #856404;
511
+ }
512
+ .error {
513
+ background-color: #f8d7da;
514
+ border-left-color: #dc3545;
515
+ color: #721c24;
516
+ }
517
+ .analysis-section {
518
+ border: 1px solid #dee2e6;
519
+ border-radius: 8px;
520
+ padding: 20px;
521
+ margin: 10px 0;
522
+ background: white;
523
+ }
524
  """
525
  ) as iface:
526
+
527
+ # Header
528
+ gr.Markdown("""
529
+ # 🧬 Gene Analysis Pipeline
530
+
531
+ ### Comprehensive DNA sequence analysis with machine learning
532
+
533
+ This tool provides multi-modal analysis including boundary detection, gene classification,
534
+ and phylogenetic analysis for DNA sequences.
535
+ """)
536
+
537
+ # System Status
538
  with gr.Row():
539
  with gr.Column():
540
+ try:
541
+ mafft_available, iqtree_available, _, _ = check_tool_availability()
542
+ status_html = f"""
543
+ <div class="status-box">
544
+ <h3>🔧 System Status</h3>
545
+ <div style="display: grid; grid-template-columns: 1fr 1fr; gap: 10px; margin-top: 10px;">
546
+ <div>🤖 Boundary Model: <strong>{'✅ Loaded' if boundary_model else '❌ Missing'}</strong></div>
547
+ <div>🧠 Keras Model: <strong>{'✅ Loaded' if keras_model else '❌ Missing'}</strong></div>
548
+ <div>🌳 Tree Analyzer: <strong>{'✅ Loaded' if analyzer else '❌ Missing'}</strong></div>
549
+ <div>🧬 MAFFT: <strong>{'✅ Available' if mafft_available else '❌ Missing'}</strong></div>
550
+ <div>🌲 IQ-TREE: <strong>{'✅ Available' if iqtree_available else '❌ Missing'}</strong></div>
551
+ <div>📊 Safe Mode: <strong>{'✅ Active' if not all([boundary_model, keras_model, analyzer]) else '⚠️ Inactive'}</strong></div>
552
+ </div>
553
+ </div>
554
+ """
555
+ except Exception as e:
556
+ status_html = f"""
557
+ <div class="status-box error">
558
+ <h3>❌ System Status Error</h3>
559
+ <p>Failed to check system status: {str(e)}</p>
560
+ </div>
561
+ """
562
+
563
+ gr.HTML(value=status_html)
564
+
565
+ # Main Interface
566
  with gr.Tabs():
567
  with gr.Tab("📝 Text Input"):
568
  with gr.Row():
569
  with gr.Column(scale=2):
570
  dna_input = gr.Textbox(
571
  label="🧬 DNA Sequence",
572
+ placeholder="Enter your DNA sequence (ATCG format)...\nExample: ATCGATCGATCG...",
573
  lines=8,
574
+ max_lines=15,
575
+ info="Paste your DNA sequence here. Supports FASTA format or raw sequence."
576
  )
577
 
578
  with gr.Row():
 
588
  ml_tree_checkbox = gr.Checkbox(
589
  label="🌲 Build ML Tree",
590
  value=False,
591
+ info="Perform phylogenetic placement (requires external tools)"
592
  )
593
 
594
+ analyze_btn = gr.Button(
595
+ "🔬 Analyze Sequence",
596
+ variant="primary",
597
+ size="lg",
598
+ scale=1
599
+ )
600
 
601
  with gr.Column(scale=1):
 
602
  gr.Markdown("""
603
+ ### 📋 Quick Guide
604
+
605
  1. **Paste DNA sequence** in ATCG format
606
  2. **Adjust similarity** threshold (1-99%)
607
+ 3. **Enable ML tree** for phylogenetic analysis
608
  4. **Click Analyze** to start processing
609
 
610
  **Supported formats:**
611
+ - Raw DNA sequence: `ATCGATCG...`
612
+ - FASTA format: `>header\\nATCG...`
613
  - Mixed case (auto-converted)
614
  - With/without spaces/newlines
615
+
616
+ **Requirements:**
617
+ - Minimum length: 10 bp for basic analysis
618
+ - Minimum length: 100 bp for ML tree
619
+ - Only ATCG nucleotides (others converted to N)
620
  """)
621
 
622
  with gr.Tab("📁 File Upload"):
 
642
  value=False
643
  )
644
 
645
+ analyze_file_btn = gr.Button(
646
+ "🔬 Analyze File",
647
+ variant="primary",
648
+ size="lg"
649
+ )
650
 
651
  with gr.Column(scale=1):
 
652
  gr.Markdown("""
653
+ ### 📄 File Requirements
654
+
655
  **Accepted formats:**
656
  - `.fasta`, `.fa`, `.fas`
657
  - `.txt` with FASTA content
 
663
  ```
664
 
665
  **Notes:**
666
+ - Single or multiple sequences supported
667
  - First sequence will be analyzed
668
  - Maximum file size: 10MB
669
+ - UTF-8 encoding recommended
670
  """)
671
 
672
  # Results Section
673
  gr.Markdown("## 📊 Analysis Results")
674
 
675
  with gr.Row():
676
+ with gr.Column(scale=1):
677
  boundary_output = gr.Textbox(
678
  label="🎯 Boundary Detection",
679
  interactive=False,
680
+ lines=3,
681
+ info="Gene region identification results"
682
  )
683
 
684
  keras_output = gr.Textbox(
685
  label="🧠 Gene Classification",
686
  interactive=False,
687
+ lines=3,
688
+ info="Machine learning classification confidence"
689
  )
690
 
691
+ with gr.Column(scale=1):
692
  ml_tree_output = gr.Textbox(
693
  label="🌲 Phylogenetic Placement",
694
  interactive=False,
695
+ lines=3,
696
+ info="Maximum likelihood tree placement"
697
  )
698
 
699
  tree_analysis_output = gr.Textbox(
700
  label="🌳 Tree Analysis",
701
  interactive=False,
702
+ lines=3,
703
+ info="Phylogenetic tree construction results"
704
  )
705
 
706
  summary_output = gr.Textbox(
707
+ label="📋 Comprehensive Summary Report",
708
  interactive=False,
709
+ lines=12,
710
+ info="Complete analysis overview"
711
  )
712
 
713
  # Visualization Section
 
715
  with gr.Tab("🌳 Interactive Tree"):
716
  tree_html = gr.HTML(
717
  label="Phylogenetic Tree Visualization",
718
+ value="""
719
+ <div style='text-align: center; color: #666; padding: 50px; border: 2px dashed #ccc; border-radius: 8px;'>
720
+ <h3>🌳 Tree Visualization</h3>
721
+ <p>No tree generated yet. Run analysis to see interactive phylogenetic tree.</p>
722
+ <p><em>Note: Tree visualization requires successful sequence analysis.</em></p>
723
+ </div>
724
+ """
725
  )
726
 
727
  with gr.Tab("📊 Detailed Report"):
728
  report_html = gr.HTML(
729
  label="Analysis Report",
730
+ value="""
731
+ <div style='text-align: center; color: #666; padding: 50px; border: 2px dashed #ccc; border-radius: 8px;'>
732
+ <h3>📊 Analysis Report</h3>
733
+ <p>No report generated yet. Run analysis to see detailed results.</p>
734
+ <p><em>Note: Report includes statistical analysis and recommendations.</em></p>
735
+ </div>
736
+ """
737
  )
738
 
 
 
 
 
 
 
 
 
 
 
 
 
739
  # Event Handlers
740
+ def handle_analysis_safe(dna_seq, similarity, build_ml):
741
+ """Safe analysis handler with comprehensive error handling"""
742
  try:
743
+ if not dna_seq or not dna_seq.strip():
744
+ error_msg = "❌ Please enter a DNA sequence"
745
+ return (
746
+ error_msg, "", "", "", error_msg,
747
+ None, None, None, None,
748
+ f"<div style='color: red;'>{error_msg}</div>",
749
+ f"<div style='color: red;'>{error_msg}</div>"
750
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
751
 
752
+ return run_pipeline_safe(dna_seq, similarity, build_ml)
753
+
754
  except Exception as e:
755
+ logger.error(f"Analysis handler error: {e}")
756
  error_msg = f"❌ Analysis failed: {str(e)}"
 
757
  return (
758
  error_msg, "", "", "", error_msg,
759
+ None, None, None, None,
760
  f"<div style='color: red;'>{error_msg}</div>",
761
+ f"<div style='color: red;'>{error_msg}</div>"
 
 
 
762
  )
763
 
764
+ def handle_file_analysis_safe(file_obj, similarity, build_ml):
765
+ """Safe file analysis handler"""
766
  try:
767
  if file_obj is None:
768
+ error_msg = "❌ Please upload a FASTA file"
769
  return (
770
  error_msg, "", "", "", error_msg,
771
+ None, None, None, None,
772
  f"<div style='color: red;'>{error_msg}</div>",
773
+ f"<div style='color: red;'>{error_msg}</div>"
 
 
 
774
  )
775
 
776
+ sequence = read_fasta_file(file_obj)
777
+ if not sequence:
778
+ error_msg = "❌ Failed to read sequence from file"
 
779
  return (
780
  error_msg, "", "", "", error_msg,
781
+ None, None, None, None,
782
  f"<div style='color: red;'>{error_msg}</div>",
783
+ f"<div style='color: red;'>{error_msg}</div>"
 
 
 
784
  )
785
 
786
+ return run_pipeline_safe(sequence, similarity, build_ml)
787
+
 
788
  except Exception as e:
789
+ logger.error(f"File analysis handler error: {e}")
790
  error_msg = f"❌ File analysis failed: {str(e)}"
 
791
  return (
792
  error_msg, "", "", "", error_msg,
793
+ None, None, None, None,
794
  f"<div style='color: red;'>{error_msg}</div>",
795
+ f"<div style='color: red;'>{error_msg}</div>"
 
 
 
796
  )
797
 
798
  # Connect event handlers
799
  analyze_btn.click(
800
+ fn=handle_analysis_safe,
801
  inputs=[dna_input, similarity_slider, ml_tree_checkbox],
802
  outputs=[
803
  boundary_output,
 
806
  tree_analysis_output,
807
  summary_output,
808
  tree_html,
809
+ report_html
810
+ ],
811
+ show_progress=True
 
 
 
812
  )
813
+
814
  analyze_file_btn.click(
815
+ fn=handle_file_analysis_safe,
816
  inputs=[file_input, file_similarity_slider, file_ml_tree_checkbox],
817
  outputs=[
818
  boundary_output,
 
821
  tree_analysis_output,
822
  summary_output,
823
  tree_html,
824
+ report_html
825
+ ],
826
+ show_progress=True
 
 
 
827
  )
828
 
829
  # Footer
830
  gr.Markdown("""
831
  ---
832
+ ### 🔬 About This Tool
833
 
834
+ This Gene Analysis Pipeline provides comprehensive DNA sequence analysis using:
835
+ - **Boundary Detection**: Machine learning-based gene region identification
836
+ - **Classification**: Deep learning confidence scoring for gene classification
837
+ - **Phylogenetic Analysis**: Maximum likelihood tree construction and placement
838
+ - **Interactive Visualization**: Dynamic tree and report generation
839
 
840
+ **Safe Mode**: When external tools or models are unavailable, the pipeline operates in safe mode with basic analysis capabilities.
 
 
 
841
 
842
+ ---
843
+ *Powered by Gradio • Built with ❤️ for genomics research*
 
 
 
 
 
 
 
 
 
844
  """)
845
 
846
  return iface
847
 
848
  except Exception as e:
849
+ logger.error(f"Interface creation failed: {e}")
850
+ # Return minimal interface on failure
851
+ def minimal_interface():
852
+ return gr.Interface(
853
+ fn=lambda x: f"❌ System Error: {str(e)}",
854
+ inputs=gr.Textbox(label="Input"),
855
+ outputs=gr.Textbox(label="Output"),
856
+ title="Gene Analysis Pipeline - Error Mode"
857
+ )
858
+ return minimal_interface()
859
 
860
+ # --- Main Execution ---
861
  def main():
862
+ """Main function with comprehensive error handling"""
863
  try:
864
  logger.info("🚀 Starting Gene Analysis Pipeline...")
865
 
866
+ # Load models
867
+ load_models_safely()
868
 
869
+ # Create interface
870
+ logger.info("🎨 Creating Gradio interface...")
871
+ iface = create_safe_gradio_interface()
872
 
873
+ # Launch
874
+ logger.info("🌐 Launching application...")
875
+ iface.launch(
876
+ server_name="0.0.0.0",
877
+ server_port=7860,
878
+ share=False,
879
+ show_error=True,
880
+ show_tips=True,
881
+ enable_queue=True,
882
+ max_threads=10
 
 
883
  )
884
 
885
+ except KeyboardInterrupt:
886
+ logger.info("🛑 Application stopped by user")
887
  except Exception as e:
888
+ logger.error(f"❌ Critical error in main: {e}", exc_info=True)
889
+
890
+ # Emergency fallback interface
891
+ try:
892
+ logger.info("🚨 Starting emergency fallback interface...")
893
+ emergency_iface = gr.Interface(
894
+ fn=lambda x: f"❌ System in emergency mode. Error: {str(e)}",
895
+ inputs=gr.Textbox(label="DNA Sequence", placeholder="Emergency mode - limited functionality"),
896
+ outputs=gr.Textbox(label="Status"),
897
+ title="🚨 Gene Analysis Pipeline - Emergency Mode",
898
+ description="The system is running in emergency mode due to initialization errors."
899
+ )
900
+ emergency_iface.launch(
901
+ server_name="0.0.0.0",
902
+ server_port=7860,
903
+ share=False
904
+ )
905
+ except Exception as emergency_e:
906
+ logger.error(f"❌ Emergency interface failed: {emergency_e}")
907
+ print(f"CRITICAL: Complete system failure. Error: {e}")
908
+ print(f"Emergency fallback also failed: {emergency_e}")
909
+ sys.exit(1)
910
 
 
911
  if __name__ == "__main__":
912
  main()