File size: 38,388 Bytes
1d23751
b42442e
1d23751
3c94bd8
b42442e
3c94bd8
1d23751
d6bbc7a
 
 
 
 
 
 
 
c3d2d01
 
 
740aa59
 
1dbbf09
f7b5b44
6a65f2a
c3d2d01
 
 
 
e52106a
9b7e7f5
c3d2d01
 
 
 
 
 
b03fbbd
c3d2d01
 
 
 
a79d7d1
c3d2d01
 
 
 
 
 
14ac608
c3d2d01
 
a79d7d1
c3d2d01
a79d7d1
c3d2d01
 
 
4d41dd0
 
 
1d23751
 
4d41dd0
 
1d23751
c3d2d01
4d41dd0
b42442e
1d23751
b42442e
4d41dd0
b42442e
3f97919
 
 
574dbbb
e2795d4
c3d2d01
574dbbb
 
c3d2d01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5ff3d5b
c3d2d01
 
 
 
5ff3d5b
c3d2d01
 
 
 
956abb6
c3d2d01
 
 
 
 
 
 
 
 
 
 
 
 
 
8c08884
1dbbf09
e52106a
c3d2d01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c71a2d
 
 
 
 
 
 
 
 
c3d2d01
434da56
7537055
c3d2d01
 
 
 
 
 
 
5c71a2d
c3d2d01
 
 
 
5c71a2d
 
 
 
 
 
 
 
c3d2d01
5c71a2d
c3d2d01
5c71a2d
 
 
 
 
 
 
c3d2d01
 
5c71a2d
c3d2d01
 
5c71a2d
c3d2d01
5c71a2d
6443af8
5c71a2d
 
434da56
966c834
1d23751
c3d2d01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d23751
c3d2d01
 
1d23751
574dbbb
8c08884
574dbbb
4d41dd0
574dbbb
4d41dd0
574dbbb
 
4d41dd0
 
 
 
 
8c08884
c3d2d01
4d41dd0
574dbbb
1d23751
 
 
 
 
 
 
 
 
 
 
b42442e
1d23751
c3d2d01
1d23751
 
c3d2d01
574dbbb
4d41dd0
c3d2d01
 
 
 
 
7537055
574dbbb
 
c3d2d01
 
 
 
 
7537055
c3d2d01
 
7537055
4d41dd0
c3d2d01
7537055
c3d2d01
 
 
 
7537055
c3d2d01
 
 
 
 
 
 
 
 
 
 
 
 
 
574dbbb
c3d2d01
 
 
 
 
 
 
574dbbb
c3d2d01
 
 
 
 
 
 
 
 
 
 
 
574dbbb
c3d2d01
 
 
574dbbb
c3d2d01
 
 
7537055
4d41dd0
 
c3d2d01
 
 
 
 
4d41dd0
574dbbb
7537055
c3d2d01
 
 
7537055
574dbbb
14ac608
4d41dd0
c3d2d01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5c71a2d
c3d2d01
9c289cf
5c71a2d
9c289cf
434da56
9c289cf
5c71a2d
9c289cf
5c71a2d
434da56
c3d2d01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434da56
c3d2d01
 
 
 
 
 
 
 
 
 
 
 
434da56
c3d2d01
 
 
 
 
 
 
 
 
7537055
434da56
9c289cf
434da56
c3d2d01
 
 
 
9c289cf
c3d2d01
434da56
c3d2d01
 
 
 
 
 
 
 
 
 
 
 
 
7537055
1d23751
c3d2d01
4d41dd0
 
 
b42442e
4d41dd0
c3d2d01
 
 
 
 
4d41dd0
 
c3d2d01
4d41dd0
 
c3d2d01
 
 
 
 
 
 
 
 
 
4d41dd0
c3d2d01
1d23751
 
447480d
1d23751
b42442e
c3d2d01
6526046
1d23751
 
447480d
c3d2d01
 
 
 
 
447480d
c3d2d01
447480d
c3d2d01
 
447480d
c3d2d01
 
 
1d23751
 
447480d
1d23751
b42442e
447480d
a79d7d1
1d23751
447480d
c3d2d01
 
 
 
 
447480d
c3d2d01
447480d
c3d2d01
 
447480d
c3d2d01
 
b42442e
4d41dd0
c3d2d01
b42442e
 
 
c3d2d01
b42442e
 
c3d2d01
b42442e
c3d2d01
b42442e
c3d2d01
b42442e
 
 
c3d2d01
1dbbf09
b42442e
 
 
c3d2d01
1dbbf09
b42442e
c3d2d01
b42442e
c3d2d01
b42442e
c3d2d01
 
 
 
 
b42442e
c3d2d01
b42442e
c3d2d01
255fa59
c3d2d01
b42442e
 
c3d2d01
b42442e
 
eafd943
c3d2d01
eafd943
 
 
 
 
 
 
 
 
 
88cb35a
1d23751
eafd943
c3d2d01
4d41dd0
c3d2d01
 
c55e9fe
5ff3d5b
a79d7d1
d264132
eafd943
c3d2d01
4d41dd0
c3d2d01
 
c55e9fe
c3d2d01
 
 
 
 
 
 
a79d7d1
c3d2d01
 
778cbfd
b42442e
 
c3d2d01
 
 
 
 
 
 
 
 
 
 
 
 
b42442e
 
4d41dd0
447480d
 
c3d2d01
447480d
c3d2d01
 
 
 
434da56
c3d2d01
905619b
c31e216
3c94bd8
905619b
3c94bd8
 
 
905619b
3c94bd8
434da56
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c94bd8
 
434da56
028ac9e
905619b
 
 
 
3c94bd8
 
 
434da56
 
 
 
3c94bd8
 
434da56
905619b
 
3c94bd8
 
434da56
b82ceb2
c3d2d01
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
import os
# Disable GPU to avoid CUDA errors
os.environ["CUDA_VISIBLE_DEVICES"] = ""
os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"  # Prevent TensorFlow memory issues
# Suppress TensorFlow warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # More aggressive suppression

import gradio as gr
import torch
import pickle
import subprocess
import pandas as pd
import re
import logging
import numpy as np
from predictor import EnhancedGenePredictor
from tensorflow.keras.models import load_model
from analyzer import PhylogeneticTreeAnalyzer
import tempfile
import shutil
import sys
import uuid
from pathlib import Path
from huggingface_hub import hf_hub_download
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
import stat
import time
import asyncio
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
from fastapi.responses import HTMLResponse, FileResponse
from pydantic import BaseModel
from typing import Optional
import uvicorn

# --- Logging Setup ---
log_formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
log_handler = logging.StreamHandler()
log_handler.setFormatter(log_formatter)
try:
    file_handler = logging.FileHandler('/tmp/app.log')
    file_handler.setFormatter(log_formatter)
    logging.basicConfig(level=logging.INFO, handlers=[log_handler, file_handler])
except Exception as e:
    logging.basicConfig(level=logging.INFO, handlers=[log_handler])
    logging.warning(f"Failed to set up file logging: {e}")

logger = logging.getLogger(__name__)
logger.info(f"Gradio version: {gr.__version__}")

# Set event loop policy for compatibility with Gradio Spaces
try:
    asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
except Exception as e:
    logger.warning(f"Failed to set event loop policy: {e}")

# --- Global Variables ---
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
MAFFT_PATH = os.path.join(BASE_DIR, "binaries", "mafft", "mafft")
IQTREE_PATH = os.path.join(BASE_DIR, "binaries", "iqtree", "bin", "iqtree3")
ALIGNMENT_PATH = os.path.join(BASE_DIR, "f_gene_sequences_aligned.fasta")
TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
QUERY_OUTPUT_DIR = os.path.join(BASE_DIR, "queries")
os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)

# Model repository and file paths
MODEL_REPO = "GGproject10/best_boundary_aware_model"
CSV_PATH = "f cleaned.csv"

# Initialize models as None
boundary_model = None
keras_model = None
kmer_to_index = None
analyzer = None

# --- Model Loading ---
def load_models_safely():
    global boundary_model, keras_model, kmer_to_index, analyzer
    logger.info("🔍 Loading models...")
    try:
        boundary_path = hf_hub_download(
            repo_id=MODEL_REPO,
            filename="best_boundary_aware_model.pth",
            token=None
        )
        if os.path.exists(boundary_path):
            boundary_model = EnhancedGenePredictor(boundary_path)
            logger.info("✅ Boundary model loaded successfully.")
        else:
            logger.error(f"❌ Boundary model file not found after download.")
    except Exception as e:
        logger.error(f"❌ Failed to load boundary model: {e}")
        boundary_model = None
    try:
        keras_path = hf_hub_download(
            repo_id=MODEL_REPO,
            filename="best_model.keras",
            token=None
        )
        kmer_path = hf_hub_download(
            repo_id=MODEL_REPO,
            filename="kmer_to_index.pkl",
            token=None
        )
        if os.path.exists(keras_path) and os.path.exists(kmer_path):
            keras_model = load_model(keras_path)
            with open(kmer_path, "rb") as f:
                kmer_to_index = pickle.load(f)
            logger.info("✅ Keras model and k-mer index loaded successfully.")
        else:
            logger.error(f"❌ Keras model or k-mer files not found.")
    except Exception as e:
        logger.error(f"❌ Failed to load Keras model: {e}")
        keras_model = None
        kmer_to_index = None
    try:
        logger.info("🌳 Initializing tree analyzer...")
        analyzer = PhylogeneticTreeAnalyzer()
        csv_candidates = [
            CSV_PATH,
            os.path.join(BASE_DIR, CSV_PATH),
            os.path.join(BASE_DIR, "app", CSV_PATH),
            os.path.join(os.path.dirname(__file__), CSV_PATH),
            "f_cleaned.csv",
            os.path.join(BASE_DIR, "f_cleaned.csv")
        ]
        csv_loaded = False
        for csv_candidate in csv_candidates:
            if os.path.exists(csv_candidate):
                logger.info(f"📊 Trying CSV: {csv_candidate}")
                try:
                    if analyzer.load_data(csv_candidate):
                        logger.info(f"✅ CSV loaded from: {csv_candidate}")
                        csv_loaded = True
                        break
                except Exception as e:
                    logger.warning(f"CSV load failed for {csv_candidate}: {e}")
                    continue
        if not csv_loaded:
            logger.error("❌ Failed to load CSV data from any candidate location.")
            analyzer = None
        else:
            try:
                if analyzer.train_ai_model():
                    logger.info("✅ AI model training completed successfully")
                else:
                    logger.warning("⚠️ AI model training failed; proceeding with basic analysis.")
            except Exception as e:
                logger.warning(f"⚠️ AI model training failed: {e}")
    except Exception as e:
        logger.error(f"❌ Tree analyzer initialization failed: {e}")
        analyzer = None

# Load models at startup
load_models_safely()

# --- Tool Detection ---
def setup_binary_permissions():
    for binary in [MAFFT_PATH, IQTREE_PATH]:
        if os.path.exists(binary):
            try:
                os.chmod(binary, os.stat(binary).st_mode | stat.S_IEXEC)
                logger.info(f"Set executable permission on {binary}")
            except Exception as e:
                logger.warning(f"Failed to set permission on {binary}: {e}")

def check_tool_availability():
    setup_binary_permissions()
    mafft_available = False
    mafft_cmd = None
    mafft_candidates = ['mafft', '/usr/bin/mafft', '/usr/local/bin/mafft', MAFFT_PATH]
    for candidate in mafft_candidates:
        if shutil.which(candidate) or os.path.exists(candidate):
            try:
                result = subprocess.run(
                    [candidate, "--help"],
                    capture_output=True,
                    text=True,
                    timeout=5
                )
                if result.returncode == 0 or "mafft" in result.stderr.lower():
                    mafft_available = True
                    mafft_cmd = candidate
                    logger.info(f"✅ MAFFT found at: {candidate}")
                    break
            except Exception as e:
                logger.debug(f"MAFFT test failed for {candidate}: {e}")
    iqtree_available = False
    iqtree_cmd = None
    iqtree_candidates = ['iqtree', 'iqtree2', 'iqtree3', '/usr/bin/iqtree', '/usr/local/bin/iqtree', IQTREE_PATH]
    for candidate in iqtree_candidates:
        if shutil.which(candidate) or os.path.exists(candidate):
            try:
                result = subprocess.run(
                    [candidate, "--help"],
                    capture_output=True,
                    text=True,
                    timeout=5
                )
                if result.returncode == 0 or "iqtree" in result.stderr.lower():
                    iqtree_available = True
                    iqtree_cmd = candidate
                    logger.info(f"✅ IQ-TREE found at: {candidate}")
                    break
            except Exception as e:
                logger.debug(f"IQ-TREE test failed for {candidate}: {e}")
    return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd

# --- Pipeline Functions ---
def cleanup_file(file_path: str) -> None:
    """Utility function to safely delete a file and log errors."""
    if file_path and os.path.exists(file_path):
        try:
            os.unlink(file_path)
            logger.debug(f"Cleaned up {file_path}")
        except Exception as cleanup_error:
            logger.warning(f"Failed to clean up {file_path}: {cleanup_error}")

def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
    query_fasta = None
    try:
        if len(sequence.strip()) < 100:
            return False, "Sequence too short (<100 bp).", None, None
        query_id = f"QUERY_{uuid.uuid4().hex[:8]}"
        query_fasta = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}.fa")
        aligned_with_query = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_aligned.fa")
        output_prefix = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_placed_tree")
        if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
            cleanup_file(query_fasta)
            return False, "Reference alignment or tree not found.", None, None
        query_record = SeqRecord(Seq(sequence.upper()), id=query_id, description="")
        SeqIO.write([query_record], query_fasta, "fasta")
        with open(aligned_with_query, "w") as output_file:
            result = subprocess.run(
                [mafft_cmd, "--add", query_fasta, "--reorder", ALIGNMENT_PATH],
                stdout=output_file,
                stderr=subprocess.PIPE,
                text=True,
                timeout=600,
                check=True
            )
        if not os.path.exists(aligned_with_query) or os.path.getsize(aligned_with_query) == 0:
            cleanup_file(query_fasta)
            return False, "MAFFT alignment failed.", None, None
        result = subprocess.run(
            [iqtree_cmd, "-s", aligned_with_query, "-g", TREE_PATH, "-m", "GTR+G", "-pre", output_prefix, "-redo"],
            capture_output=True,
            text=True,
            timeout=1200,
            check=True
        )
        treefile = f"{output_prefix}.treefile"
        if not os.path.exists(treefile):
            cleanup_file(query_fasta)
            return False, "IQ-TREE placement failed.", aligned_with_query, None
        success_msg = f"Placement completed!\nQuery ID: {query_id}\nAlignment: {os.path.basename(aligned_with_query)}\nTree: {os.path.basename(treefile)}"
        cleanup_file(query_fasta)
        return True, success_msg, aligned_with_query, treefile
    except Exception as main_error:
        logger.error(f"Phylogenetic placement failed: {main_error}", exc_info=True)
        cleanup_file(query_fasta)
        return False, f"Error: {str(main_error)}", None, None

def analyze_sequence_for_tree(sequence: str, matching_percentage: float):
    try:
        logger.debug("Starting tree analysis...")
        if not analyzer:
            return "❌ Tree analyzer not initialized.", None, None
        if not sequence or len(sequence.strip()) < 10:
            return "❌ Invalid sequence.", None, None
        if not (1 <= matching_percentage <= 99):
            return "❌ Matching percentage must be 1-99.", None, None
        logger.debug("Finding query sequence...")
        if not analyzer.find_query_sequence(sequence):
            return "❌ Sequence not accepted.", None, None
        logger.debug("Finding similar sequences...")
        matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
        if not matched_ids:
            return f"❌ No similar sequences at {matching_percentage}% threshold.", None, None
        logger.debug("Building tree structure...")
        analyzer.build_tree_structure_with_ml_safe(matched_ids)
        logger.debug("Creating interactive tree...")
        fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
        query_id = analyzer.query_id or f"query_{int(time.time())}"
        tree_html_path = os.path.join("/tmp", f'phylogenetic_tree_{query_id}.html')
        logger.debug(f"Saving tree to {tree_html_path}")
        fig.write_html(tree_html_path)
        analyzer.matching_percentage = matching_percentage
        logger.debug("Generating detailed report...")
        report_success = analyzer.generate_detailed_report(matched_ids, actual_percentage)
        report_html_path = os.path.join("/tmp", f'detailed_report_{query_id}.html') if report_success else None
        logger.debug(f"Tree analysis completed: {len(matched_ids)} matches")
        return f"✅ Found {len(matched_ids)} sequences at {actual_percentage:.2f}% similarity.", tree_html_path, report_html_path
    except Exception as e:
        logger.error(f"Tree analysis failed: {e}", exc_info=True)
        return f"❌ Error: {str(e)}", None, None

def predict_with_keras(sequence):
    try:
        if not keras_model or not kmer_to_index:
            return "❌ Keras model not available."
        if len(sequence) < 6:
            return "❌ Sequence too short (<6 bp)."
        kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
        indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
        input_arr = np.array([indices])
        prediction = keras_model.predict(input_arr, verbose=0)[0]
        f_gene_prob = prediction[-1]
        percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))
        return f"✅ {percentage}% F gene confidence"
    except Exception as e:
        logger.error(f"Keras prediction failed: {e}", exc_info=True)
        return f"❌ Error: {str(e)}"

def read_fasta_file(file_obj):
    try:
        if file_obj is None:
            return ""
        if isinstance(file_obj, str):
            with open(file_obj, "r") as f:
                content = f.read()
        else:
            content = file_obj.read().decode("utf-8")
        lines = content.strip().split("\n")
        seq_lines = [line.strip() for line in lines if not line.startswith(">")]
        return ''.join(seq_lines)
    except Exception as e:
        logger.error(f"Failed to read FASTA file: {e}", exc_info=True)
        return ""

def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
    try:
        dna_input = dna_input.upper().strip()
        if not dna_input:
            return "❌ Empty input", "", "", "", "", None, None, None, None, "No input", "No input", None, None
        if not re.match('^[ACTGN]+$', dna_input):
            dna_input = ''.join(c if c in 'ACTGN' else 'N' for c in dna_input)
        processed_sequence = dna_input
        boundary_output = ""
        if boundary_model:
            try:
                result = boundary_model.predict_sequence(dna_input)
                regions = result['gene_regions']
                if regions:
                    processed_sequence = regions[0]["sequence"]
                    boundary_output = f"✅ F gene region found: {len(processed_sequence)} bp"
                else:
                    boundary_output = "⚠️ No F gene regions found."
                    processed_sequence = dna_input
            except Exception as e:
                boundary_output = f"❌ Boundary prediction error: {str(e)}"
                processed_sequence = dna_input
        else:
            boundary_output = f"⚠️ Boundary model not available. Using full input: {len(dna_input)} bp"
        keras_output = predict_with_keras(processed_sequence) if processed_sequence and len(processed_sequence) >= 6 else "❌ Sequence too short."
        aligned_file = None
        phy_file = None
        ml_tree_output = ""
        if build_ml_tree and processed_sequence and len(processed_sequence) >= 100:
            try:
                mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
                if mafft_available and iqtree_available:
                    ml_success, ml_message, ml_aligned, ml_tree = phylogenetic_placement(processed_sequence, mafft_cmd, iqtree_cmd)
                    ml_tree_output = ml_message
                    aligned_file = ml_aligned
                    phy_file = ml_tree
                else:
                    ml_tree_output = "❌ MAFFT or IQ-TREE not available"
            except Exception as e:
                ml_tree_output = f"❌ ML tree error: {str(e)}"
        elif build_ml_tree:
            ml_tree_output = "❌ Sequence too short for placement (<100 bp)."
        else:
            ml_tree_output = "⚠️ Phylogenetic placement skipped."
        tree_html_content = "No tree generated."
        report_html_content = "No report generated."
        tree_html_path = None
        report_html_path = None
        simplified_ml_output = ""
        if analyzer and processed_sequence and len(processed_sequence) >= 10:
            try:
                tree_result, tree_html_path, report_html_path = analyze_sequence_for_tree(processed_sequence, similarity_score)
                simplified_ml_output = tree_result
                if tree_html_path and os.path.exists(tree_html_path):
                    with open(tree_html_path, 'r', encoding='utf-8') as f:
                        tree_html_content = f.read()
                else:
                    tree_html_content = f"<div style='color: red;'>{tree_result}</div>"
                if report_html_path and os.path.exists(report_html_path):
                    with open(report_html_path, 'r', encoding='utf-8') as f:
                        report_html_content = f.read()
                else:
                    report_html_content = f"<div style='color: red;'>{tree_result}</div>"
            except Exception as e:
                simplified_ml_output = f"❌ Tree analysis error: {str(e)}"
                tree_html_content = f"<div style='color: red;'>{simplified_ml_output}</div>"
                report_html_content = f"<div style='color: red;'>{simplified_ml_output}</div>"
        else:
            simplified_ml_output = "❌ Tree analyzer not available." if not analyzer else "❌ Sequence too short (<10 bp)."
            tree_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
            report_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
        summary_output = f"""
📊 ANALYSIS SUMMARY:
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Input: {len(dna_input)} bp
F Gene: {len(processed_sequence)} bp
Validation: {keras_output.split(':')[-1].strip() if ':' in keras_output else keras_output}
Placement: {'✅ OK' if '✅' in ml_tree_output else '⚠️ Skipped' if 'skipped' in ml_tree_output else '❌ Failed'}
Tree Analysis: {'✅ OK' if 'Found' in simplified_ml_output else '❌ Failed'}
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
"""
        return (
            boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output,
            aligned_file, phy_file, None, None, tree_html_content, report_html_content,
            tree_html_path, report_html_path
        )
    except Exception as e:
        logger.error(f"Pipeline error: {e}", exc_info=True)
        error_msg = f"❌ Pipeline Error: {str(e)}"
        return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg, None, None

async def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree):
    temp_file_path = None
    try:
        if fasta_file_obj is None:
            return "❌ No file provided", "", "", "", "", None, None, None, None, "No input", "No input", None, None
        with tempfile.NamedTemporaryFile(delete=False, suffix=".fasta", dir="/tmp") as temp_file:
            if isinstance(fasta_file_obj, UploadFile):
                content = await fasta_file_obj.read()
                temp_file.write(content)
            else:
                with open(fasta_file_obj, 'rb') as f:
                    content = f.read()
                temp_file.write(content)
            temp_file_path = temp_file.name
        dna_input = read_fasta_file(temp_file_path)
        if not dna_input:
            cleanup_file(temp_file_path)
            return "❌ Failed to read FASTA file", "", "", "", "", None, None, None, None, "No input", "No input", None, None
        result = run_pipeline(dna_input, similarity_score, build_ml_tree)
        cleanup_file(temp_file_path)
        return result
    except Exception as main_error:
        logger.error(f"Pipeline from file error: {main_error}", exc_info=True)
        cleanup_file(temp_file_path)
        error_msg = f"❌ Error: {str(main_error)}"
        return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg, None, None

class AnalysisRequest(BaseModel):
    sequence: str
    similarity_score: float = 95.0
    build_ml_tree: bool = False

class AnalysisResponse(BaseModel):
    boundary_output: str
    keras_output: str
    ml_tree_output: str
    tree_analysis_output: str
    summary_output: str
    success: bool
    error_message: Optional[str] = None
    tree_html_path: Optional[str] = None
    report_html_path: Optional[str] = None

# --- FastAPI App Setup ---
app = FastAPI(title="🧬 Gene Analysis Pipeline", version="1.0.0")

@app.get("/")
async def root():
    return {
        "message": "🧬 Gene Analysis Pipeline API",
        "status": "running",
        "endpoints": {
            "docs": "/docs",
            "health": "/health",
            "gradio": "/gradio",
            "analyze": "/analyze",
            "analyze_file": "/analyze-file",
            "download": "/download/{file_type}/{query_id}"
        }
    }

@app.get("/health")
async def health_check():
    try:
        mafft_available, iqtree_available, _, _ = check_tool_availability()
        return {
            "status": "healthy",
            "components": {
                "boundary_model": boundary_model is not None,
                "keras_model": keras_model is not None,
                "tree_analyzer": analyzer is not None,
                "mafft_available": mafft_available,
                "iqtree_available": iqtree_available
            },
            "paths": {
                "base_dir": BASE_DIR,
                "query_output_dir": QUERY_OUTPUT_DIR
            }
        }
    except Exception as e:
        logger.error(f"Health check error: {e}", exc_info=True)
        return {"status": "unhealthy", "error": str(e)}

@app.post("/analyze", response_model=AnalysisResponse)
async def analyze_sequence(request: AnalysisRequest):
    try:
        result = run_pipeline(request.sequence, request.similarity_score, request.build_ml_tree)
        return AnalysisResponse(
            boundary_output=result[0] or "",
            keras_output=result[1] or "",
            ml_tree_output=result[2] or "",
            tree_analysis_output=result[3] or "",
            summary_output=result[4] or "",
            tree_html_path=result[11],
            report_html_path=result[12],
            success=True
        )
    except Exception as e:
        logger.error(f"Analyze error: {e}", exc_info=True)
        return AnalysisResponse(
            boundary_output="", keras_output="", ml_tree_output="",
            tree_analysis_output="", summary_output="",
            tree_html_path=None, report_html_path=None,
            success=False, error_message=str(e)
        )

@app.post("/analyze-file", response_model=AnalysisResponse)
async def analyze_file(
    file: UploadFile = File(...),
    similarity_score: float = Form(95.0),
    build_ml_tree: bool = Form(False)
):
    temp_file_path = None
    try:
        with tempfile.NamedTemporaryFile(delete=False, suffix=".fasta", dir="/tmp") as temp_file:
            content = await file.read()
            temp_file.write(content)
            temp_file_path = temp_file.name
        result = await run_pipeline_from_file(temp_file_path, similarity_score, build_ml_tree)
        cleanup_file(temp_file_path)
        return AnalysisResponse(
            boundary_output=result[0] or "",
            keras_output=result[1] or "",
            ml_tree_output=result[2] or "",
            tree_analysis_output=result[3] or "",
            summary_output=result[4] or "",
            tree_html_path=result[11],
            report_html_path=result[12],
            success=True
        )
    except Exception as main_error:
        logger.error(f"Analyze-file error: {main_error}", exc_info=True)
        cleanup_file(temp_file_path)
        return AnalysisResponse(
            boundary_output="", keras_output="", ml_tree_output="",
            tree_analysis_output="", summary_output="",
            tree_html_path=None, report_html_path=None,
            success=False, error_message=str(main_error)
        )

@app.get("/download/{file_type}/{query_id}")
async def download_file(file_type: str, query_id: str):
    try:
        if file_type not in ["tree", "report"]:
            raise HTTPException(status_code=400, detail="Invalid file type. Use 'tree' or 'report'.")
        file_name = f"phylogenetic_tree_{query_id}.html" if file_type == "tree" else f"detailed_report_{query_id}.html"
        file_path = os.path.join("/tmp", file_name)
        if not os.path.exists(file_path):
            raise HTTPException(status_code=404, detail="File not found.")
        return FileResponse(file_path, filename=file_name, media_type="text/html")
    except Exception as e:
        logger.error(f"Download error: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Error serving file: {str(e)}")

# --- Gradio Interface ---
def create_gradio_interface():
    try:
        with gr.Blocks(
            title="🧬 Gene Analysis Pipeline",
            theme=gr.themes.Soft(),
            css="""
            .gradio-container { max-width: 1200px !important; }
            .status-box { padding: 10px; border-radius: 5px; margin: 5px 0; }
            .success { background-color: #d4edda; border: 1px solid #c3e6cb; color: #155724; }
            .warning { background-color: #fff3cd; border: 1px solid #ffeaa7; color: #856404; }
            .error { background-color: #f8d7da; border: 1px solid #f5c6cb; color: #721c24; }
            """
        ) as iface:
            gr.Markdown("# 🧬 Gene Analysis Pipeline")
            with gr.Row():
                with gr.Column():
                    status_display = gr.HTML(value=f"""
                    <div class="status-box">
                        <h3>🔧 System Status</h3>
                        <p>🤖 Boundary Model: {'✅ Loaded' if boundary_model else '❌ Missing'}</p>
                        <p>🧠 Keras Model: {'✅ Loaded' if keras_model else '❌ Missing'}</p>
                        <p>🌳 Tree Analyzer: {'✅ Loaded' if analyzer else '❌ Missing'}</p>
                        <p>🧬 MAFFT: {'✅ Available' if check_tool_availability()[0] else '❌ Missing'}</p>
                        <p>🌲 IQ-TREE: {'✅ Available' if check_tool_availability()[1] else '❌ Missing'}</p>
                    </div>
                    """)
            with gr.Tabs():
                with gr.TabItem("📝 Text Input"):
                    with gr.Row():
                        with gr.Column(scale=2):
                            gr.Markdown("Paste your DNA sequence here")
                            dna_input = gr.Textbox(
                                label="🧬 DNA Sequence",
                                placeholder="Enter DNA sequence (ATCG format)...",
                                lines=5
                            )
                        with gr.Column(scale=1):
                            gr.Markdown("Minimum similarity for tree analysis")
                            similarity_score = gr.Slider(
                                minimum=1,
                                maximum=99,
                                value=95.0,
                                step=1.0,
                                label="🎯 Similarity Threshold (%)"
                            )
                            gr.Markdown("Generate phylogenetic placement (slower)")
                            build_ml_tree = gr.Checkbox(
                                label="🌲 Build ML Tree",
                                value=False
                            )
                            analyze_btn = gr.Button("🔬 Analyze Sequence", variant="primary")
                with gr.TabItem("📁 File Upload"):
                    with gr.Row():
                        with gr.Column(scale=2):
                            gr.Markdown("Upload a FASTA file containing your sequence")
                            file_input = gr.File(
                                label="📄 Upload FASTA File",
                                file_types=[".fasta", ".fa", ".fas", ".txt"]
                            )
                        with gr.Column(scale=1):
                            gr.Markdown("Minimum similarity for tree analysis")
                            file_similarity_score = gr.Slider(
                                minimum=1,
                                maximum=99,
                                value=95.0,
                                step=1.0,
                                label="🎯 Similarity Threshold (%)"
                            )
                            gr.Markdown("Generate phylogenetic placement (slower)")
                            file_build_ml_tree = gr.Checkbox(
                                label="🌲 Build ML Tree",
                                value=False
                            )
                            analyze_file_btn = gr.Button("🔬 Analyze File", variant="primary")
            gr.Markdown("## 📊 Analysis Results")
            with gr.Row():
                with gr.Column():
                    boundary_output = gr.Textbox(
                        label="🎯 Boundary Detection",
                        interactive=False,
                        lines=2
                    )
                    keras_output = gr.Textbox(
                        label="🧠 F Gene Validation",
                        interactive=False,
                        lines=2
                    )
                with gr.Column():
                    ml_tree_output = gr.Textbox(
                        label="🌲 Phylogenetic Placement",
                        interactive=False,
                        lines=2
                    )
                    tree_analysis_output = gr.Textbox(
                        label="🌳 Tree Analysis",
                        interactive=False,
                        lines=2
                    )
            summary_output = gr.Textbox(
                label="📋 Summary",
                interactive=False,
                lines=8
            )
            with gr.Row():
                aligned_file = gr.File(label="📄 Alignment File", visible=False)
                tree_file = gr.File(label="🌲 Tree File", visible=False)
                tree_html_file = gr.File(label="🌳 Simplified Tree HTML", visible=False)
                report_html_file = gr.File(label="📊 Detailed Report HTML", visible=False)
            with gr.Tabs():
                with gr.TabItem("🌳 Interactive Tree"):
                    tree_html = gr.HTML(
                        value="<div style='text-align: center; color: #666; padding: 20px;'>No tree generated yet. Run analysis to see results.</div>"
                    )
                with gr.TabItem("📊 Detailed Report"):
                    report_html = gr.HTML(
                        label="Analysis Report",
                        value="<div style='text-align: center; color: #666; padding: 20px;'>No report generated yet. Run analysis to see results.</div>"
                    )

            # Event handlers
            def handle_analysis_output(*outputs):
                boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output, aligned_file, phy_file, _, _, tree_html_content, report_html_content, tree_html_path, report_html_path = outputs
                return (
                    boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output,
                    gr.File.update(value=aligned_file, visible=aligned_file is not None),
                    gr.File.update(value=phy_file, visible=phy_file is not None),
                    gr.File.update(value=tree_html_path, visible=tree_html_path is not None),
                    gr.File.update(value=report_html_path, visible=report_html_path is not None),
                    tree_html_content,
                    report_html_content
                )

            analyze_btn.click(
                fn=run_pipeline,
                inputs=[dna_input, similarity_score, build_ml_tree],
                outputs=[
                    boundary_output, keras_output, ml_tree_output, tree_analysis_output, summary_output,
                    aligned_file, tree_file, tree_html_file, report_html_file, tree_html, report_html
                ]
            )

            analyze_file_btn.click(
                fn=run_pipeline_from_file,
                inputs=[file_input, file_similarity_score, file_build_ml_tree],
                outputs=[
                    boundary_output, keras_output, ml_tree_output, tree_analysis_output, summary_output,
                    aligned_file, tree_file, tree_html_file, report_html_file, tree_html, report_html
                ]
            )

            # Examples
            gr.Examples(
                examples=[
                    ["ATCG" * 250, 85.0, False],
                    ["CGATCG" * 150, 90.0, True]
                ],
                inputs=[dna_input, similarity_score, build_ml_tree],
                label="Example Sequences"
            )

            gr.Markdown("""
            ## 📚 Instructions
            1. **Input**: Enter a DNA sequence (ATCG format) or upload a FASTA file
            2. **Parameters**: 
               - Set similarity threshold for phylogenetic analysis (1-99%)
               - Choose whether to build ML tree (slower but more accurate)
            3. **Analysis**: Click analyze to run the complete pipeline
            4. **Results**: View results in different tabs - summary, tree visualization, and detailed report
            5. **Downloads**: Download alignment, tree, simplified tree HTML, and detailed report HTML files
            ### 🔬 Pipeline Components:
            - **Boundary Detection**: Identifies F gene regions
            - **F Gene Validation**: Validates F gene using ML
            - **Phylogenetic Placement**: Places sequence in reference tree (optional)
            - **Tree Analysis**: Builds phylogenetic tree with similar sequences
            """)

        return iface
    except Exception as main_error:
        logger.error(f"Gradio interface creation failed: {main_error}", exc_info=True)
        return gr.Interface(
            fn=lambda x: f"Error: {str(main_error)}",
            inputs=gr.Textbox(label="DNA Sequence"),
            outputs=gr.Textbox(label="Error"),
            title="🧬 Gene Analysis Pipeline (Error Mode)"
        )

# --- Application Startup ---
def run_application():
    try:
        logger.info("🧬 Initializing Gene Analysis Pipeline...")
        main_gradio_app = create_gradio_interface()
        if main_gradio_app is None:
            raise RuntimeError("Gradio interface creation returned None")
        logger.info("✅ Gradio interface created successfully")
        main_gradio_app = gr.mount_gradio_app(app, main_gradio_app, path="/gradio")
        logger.info("✅ Gradio mounted to FastAPI at /gradio")
        logger.info("=" * 50)
        logger.info("🔍 Checking system components...")
        logger.info(f"🤖 Boundary Model: {'✅ Loaded' if boundary_model else '❌ Missing'}")
        logger.info(f"🧠 Keras Model: {'✅ Loaded' if keras_model else '❌ Missing'}")
        logger.info(f"🌳 Tree Analyzer: {'✅ Loaded' if analyzer else '❌ Missing'}")
        mafft_available, iqtree_available, _, _ = check_tool_availability()
        logger.info(f"🧬 MAFFT: {'✅ Available' if mafft_available else '❌ Missing'}")
        logger.info(f"🌲 IQ-TREE: {'✅ Available' if iqtree_available else '❌ Missing'}")
        logger.info("=" * 50)
        logger.info("🚀 Starting Gene Analysis Pipeline...")
        logger.warning("⚠️ Running without request queuing. Concurrent requests may block.")
        logger.info("📊 FastAPI docs available at: http://localhost:7860/docs")
        logger.info("🧬 Gradio interface available at: http://localhost:7860/gradio")
        uvicorn.run(
            app,
            host="0.0.0.0",
            port=7860,
            log_level="info",
            access_log=True,
            timeout_keep_alive=120
        )
    except Exception as main_error:
        logger.error(f"Application startup failed: {main_error}", exc_info=True)
        try:
            logger.info("🔄 Falling back to Gradio-only mode...")
            fallback_gradio_app = create_gradio_interface()
            if fallback_gradio_app is None:
                raise RuntimeError("Fallback Gradio interface creation returned None")
            logger.info("✅ Fallback Gradio interface created successfully")
            logger.info("🧬 Gradio interface available at: http://localhost:7860")
            fallback_gradio_app.launch(
                server_name="0.0.0.0",
                server_port=7860,
                prevent_thread_lock=True,
                quiet=True
            )
        except Exception as fallback_error:
            logger.error(f"Fallback failed: {fallback_error}", exc_info=True)
            print("❌ Application failed to start. Check logs at /tmp/app.log for details.")
            sys.exit(1)

if __name__ == "__main__":
    print("🧬 Gene Analysis Pipeline Starting...")
    print("=" * 50)
    print("🔍 Checking system components...")
    mafft_available, iqtree_available, _, _ = check_tool_availability()
    print(f"🤖 Boundary Model: {'✅' if boundary_model else '❌'}")
    print(f"🧠 Keras Model: {'✅' if keras_model else '❌'}")
    print(f"🌳 Tree Analyzer: {'✅' if analyzer else '❌'}")
    print(f"🧬 MAFFT: {'✅' if mafft_available else '❌'}")
    print(f"🌲 IQ-TREE: {'✅' if iqtree_available else '❌'}")
    print("=" * 50)
    run_application()