re-type commited on
Commit
1dbbf09
·
verified ·
1 Parent(s): d264132

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +137 -37
app.py CHANGED
@@ -17,6 +17,7 @@ from tensorflow.keras.models import load_model
17
  from analyzer import PhylogeneticTreeAnalyzer
18
  import tempfile
19
  import shutil
 
20
  import uuid
21
  from pathlib import Path
22
  from huggingface_hub import hf_hub_download
@@ -27,7 +28,7 @@ import stat
27
  import time
28
  import asyncio
29
  from fastapi import FastAPI, File, UploadFile, Form, HTTPException
30
- from fastapi.responses import FileResponse
31
  from pydantic import BaseModel
32
  from typing import Optional
33
  import uvicorn
@@ -43,10 +44,11 @@ try:
43
  except Exception as e:
44
  logging.basicConfig(level=logging.INFO, handlers=[log_handler])
45
  logging.warning(f"Failed to set up file logging: {e}")
 
46
  logger = logging.getLogger(__name__)
47
  logger.info(f"Gradio version: {gr.__version__}")
48
 
49
- # Set event loop policy
50
  try:
51
  asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
52
  except Exception as e:
@@ -61,39 +63,52 @@ TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
61
  QUERY_OUTPUT_DIR = os.path.join(BASE_DIR, "queries")
62
  os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
63
 
 
64
  MODEL_REPO = "GGproject10/best_boundary_aware_model"
65
  CSV_PATH = "f cleaned.csv"
66
 
67
- # Initialize models
68
  boundary_model = None
69
  keras_model = None
70
  kmer_to_index = None
71
  analyzer = None
72
 
73
- # --- Model Loading (from Script 2) ---
74
  def load_models_safely():
75
  global boundary_model, keras_model, kmer_to_index, analyzer
76
  logger.info("🔍 Loading models...")
77
  try:
78
- boundary_path = hf_hub_download(repo_id=MODEL_REPO, filename="best_boundary_aware_model.pth", token=None)
 
 
 
 
79
  if os.path.exists(boundary_path):
80
  boundary_model = EnhancedGenePredictor(boundary_path)
81
- logger.info("✅ Boundary model loaded.")
82
  else:
83
- logger.error(f"❌ Boundary model file not found.")
84
  except Exception as e:
85
  logger.error(f"❌ Failed to load boundary model: {e}")
86
  boundary_model = None
87
  try:
88
- keras_path = hf_hub_download(repo_id=MODEL_REPO, filename="best_model.keras", token=None)
89
- kmer_path = hf_hub_download(repo_id=MODEL_REPO, filename="kmer_to_index.pkl", token=None)
 
 
 
 
 
 
 
 
90
  if os.path.exists(keras_path) and os.path.exists(kmer_path):
91
  keras_model = load_model(keras_path)
92
  with open(kmer_path, "rb") as f:
93
  kmer_to_index = pickle.load(f)
94
- logger.info("✅ Keras model loaded.")
95
  else:
96
- logger.error(f"❌ Keras model files not found.")
97
  except Exception as e:
98
  logger.error(f"❌ Failed to load Keras model: {e}")
99
  keras_model = None
@@ -102,8 +117,12 @@ def load_models_safely():
102
  logger.info("🌳 Initializing tree analyzer...")
103
  analyzer = PhylogeneticTreeAnalyzer()
104
  csv_candidates = [
105
- CSV_PATH, os.path.join(BASE_DIR, CSV_PATH), os.path.join(BASE_DIR, "app", CSV_PATH),
106
- os.path.join(os.path.dirname(__file__), CSV_PATH), "f_cleaned.csv", os.path.join(BASE_DIR, "f_cleaned.csv")
 
 
 
 
107
  ]
108
  csv_loaded = False
109
  for csv_candidate in csv_candidates:
@@ -116,24 +135,26 @@ def load_models_safely():
116
  break
117
  except Exception as e:
118
  logger.warning(f"CSV load failed for {csv_candidate}: {e}")
 
119
  if not csv_loaded:
120
- logger.error("❌ Failed to load CSV data.")
121
  analyzer = None
122
  else:
123
  try:
124
  if analyzer.train_ai_model():
125
- logger.info("✅ AI model training completed.")
126
  else:
127
- logger.warning("⚠️ AI model training failed.")
128
  except Exception as e:
129
  logger.warning(f"⚠️ AI model training failed: {e}")
130
  except Exception as e:
131
  logger.error(f"❌ Tree analyzer initialization failed: {e}")
132
  analyzer = None
133
 
 
134
  load_models_safely()
135
 
136
- # --- Tool Detection (from Script 2) ---
137
  def setup_binary_permissions():
138
  for binary in [MAFFT_PATH, IQTREE_PATH]:
139
  if os.path.exists(binary):
@@ -151,7 +172,12 @@ def check_tool_availability():
151
  for candidate in mafft_candidates:
152
  if shutil.which(candidate) or os.path.exists(candidate):
153
  try:
154
- result = subprocess.run([candidate, "--help"], capture_output=True, text=True, timeout=5)
 
 
 
 
 
155
  if result.returncode == 0 or "mafft" in result.stderr.lower():
156
  mafft_available = True
157
  mafft_cmd = candidate
@@ -165,7 +191,12 @@ def check_tool_availability():
165
  for candidate in iqtree_candidates:
166
  if shutil.which(candidate) or os.path.exists(candidate):
167
  try:
168
- result = subprocess.run([candidate, "--help"], capture_output=True, text=True, timeout=5)
 
 
 
 
 
169
  if result.returncode == 0 or "iqtree" in result.stderr.lower():
170
  iqtree_available = True
171
  iqtree_cmd = candidate
@@ -371,7 +402,7 @@ Tree Analysis: {'✅ OK' if 'Found' in simplified_ml_output else '❌ Failed'}
371
  error_msg = f"❌ Pipeline Error: {str(e)}"
372
  return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg, None, None
373
 
374
- async def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_file):
375
  temp_file_path = None
376
  try:
377
  if fasta_file_obj is None:
@@ -566,7 +597,8 @@ def create_gradio_interface():
566
  dna_input = gr.Textbox(
567
  label="🧬 DNA Sequence",
568
  placeholder="Enter DNA sequence (ATCG format)...",
569
- lines=5
 
570
  )
571
  with gr.Column(scale=1):
572
  similarity_score = gr.Slider(
@@ -574,11 +606,13 @@ def create_gradio_interface():
574
  maximum=99,
575
  value=95.0,
576
  step=1.0,
577
- label="🎯 Similarity Threshold (%)"
 
578
  )
579
  build_ml_tree = gr.Checkbox(
580
  label="🌲 Build ML Tree",
581
- value=False
 
582
  )
583
  analyze_btn = gr.Button("🔬 Analyze Sequence", variant="primary")
584
  with gr.TabItem("📁 File Upload"):
@@ -586,7 +620,8 @@ def create_gradio_interface():
586
  with gr.Column(scale=2):
587
  file_input = gr.File(
588
  label="📄 Upload FASTA File",
589
- file_types=[".fasta", ".fa", ".fas", ".txt"]
 
590
  )
591
  with gr.Column(scale=1):
592
  file_similarity_score = gr.Slider(
@@ -594,22 +629,44 @@ def create_gradio_interface():
594
  maximum=99,
595
  value=95.0,
596
  step=1.0,
597
- label="🎯 Similarity Threshold (%)"
 
598
  )
599
  file_build_ml_tree = gr.Checkbox(
600
  label="🌲 Build ML Tree",
601
- value=False
 
602
  )
603
  analyze_file_btn = gr.Button("🔬 Analyze File", variant="primary")
604
  gr.Markdown("## 📊 Analysis Results")
605
  with gr.Row():
606
  with gr.Column():
607
- boundary_output = gr.Textbox(label="🎯 Boundary Detection", interactive=False, lines=2)
608
- keras_output = gr.Textbox(label="🧠 F Gene Validation", interactive=False, lines=2)
 
 
 
 
 
 
 
 
609
  with gr.Column():
610
- ml_tree_output = gr.Textbox(label="🌲 Phylogenetic Placement", interactive=False, lines=2)
611
- tree_analysis_output = gr.Textbox(label="🌳 Tree Analysis", interactive=False, lines=2)
612
- summary_output = gr.Textbox(label="📋 Summary", interactive=False, lines=8)
 
 
 
 
 
 
 
 
 
 
 
 
613
  with gr.Row():
614
  aligned_file = gr.File(label="📄 Alignment File", visible=False)
615
  tree_file = gr.File(label="🌲 Tree File", visible=False)
@@ -617,9 +674,27 @@ def create_gradio_interface():
617
  report_html_file = gr.File(label="📊 Detailed Report HTML", visible=False)
618
  with gr.Tabs():
619
  with gr.TabItem("🌳 Interactive Tree"):
620
- tree_html = gr.HTML(value="<div style='text-align: center; color: #666; padding: 20px;'>No tree generated yet.</div>")
 
 
621
  with gr.TabItem("📊 Detailed Report"):
622
- report_html = gr.HTML(value="<div style='text-align: center; color: #666; padding: 20px;'>No report generated yet.</div>")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
623
 
624
  analyze_btn.click(
625
  fn=run_pipeline,
@@ -627,7 +702,10 @@ def create_gradio_interface():
627
  outputs=[
628
  boundary_output, keras_output, ml_tree_output, tree_analysis_output, summary_output,
629
  aligned_file, tree_file, tree_html_file, report_html_file, tree_html, report_html
630
- ]
 
 
 
631
  )
632
 
633
  analyze_file_btn.click(
@@ -636,18 +714,38 @@ def create_gradio_interface():
636
  outputs=[
637
  boundary_output, keras_output, ml_tree_output, tree_analysis_output, summary_output,
638
  aligned_file, tree_file, tree_html_file, report_html_file, tree_html, report_html
639
- ]
 
 
 
640
  )
641
 
 
642
  gr.Examples(
643
  examples=[
644
- ["ATCG" * 100, 85.0, False],
645
- ["CGAT" * 100, 90.0, True]
646
  ],
647
  inputs=[dna_input, similarity_score, build_ml_tree],
648
  label="Example Sequences"
649
  )
650
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
651
  return iface
652
  except Exception as e:
653
  logger.error(f"Gradio interface creation failed: {e}", exc_info=True)
@@ -664,6 +762,8 @@ def run_application():
664
  gradio_app = create_gradio_interface()
665
  gradio_app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
666
  logger.info("🚀 Starting Gene Analysis Pipeline...")
 
 
667
  uvicorn.run(
668
  app,
669
  host="0.0.0.0",
 
17
  from analyzer import PhylogeneticTreeAnalyzer
18
  import tempfile
19
  import shutil
20
+ import sys
21
  import uuid
22
  from pathlib import Path
23
  from huggingface_hub import hf_hub_download
 
28
  import time
29
  import asyncio
30
  from fastapi import FastAPI, File, UploadFile, Form, HTTPException
31
+ from fastapi.responses import HTMLResponse, FileResponse
32
  from pydantic import BaseModel
33
  from typing import Optional
34
  import uvicorn
 
44
  except Exception as e:
45
  logging.basicConfig(level=logging.INFO, handlers=[log_handler])
46
  logging.warning(f"Failed to set up file logging: {e}")
47
+
48
  logger = logging.getLogger(__name__)
49
  logger.info(f"Gradio version: {gr.__version__}")
50
 
51
+ # Set event loop policy for compatibility with Gradio Spaces
52
  try:
53
  asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
54
  except Exception as e:
 
63
  QUERY_OUTPUT_DIR = os.path.join(BASE_DIR, "queries")
64
  os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
65
 
66
+ # Model repository and file paths
67
  MODEL_REPO = "GGproject10/best_boundary_aware_model"
68
  CSV_PATH = "f cleaned.csv"
69
 
70
+ # Initialize models as None
71
  boundary_model = None
72
  keras_model = None
73
  kmer_to_index = None
74
  analyzer = None
75
 
76
+ # --- Model Loading ---
77
  def load_models_safely():
78
  global boundary_model, keras_model, kmer_to_index, analyzer
79
  logger.info("🔍 Loading models...")
80
  try:
81
+ boundary_path = hf_hub_download(
82
+ repo_id=MODEL_REPO,
83
+ filename="best_boundary_aware_model.pth",
84
+ token=None
85
+ )
86
  if os.path.exists(boundary_path):
87
  boundary_model = EnhancedGenePredictor(boundary_path)
88
+ logger.info("✅ Boundary model loaded successfully.")
89
  else:
90
+ logger.error(f"❌ Boundary model file not found after download.")
91
  except Exception as e:
92
  logger.error(f"❌ Failed to load boundary model: {e}")
93
  boundary_model = None
94
  try:
95
+ keras_path = hf_hub_download(
96
+ repo_id=MODEL_REPO,
97
+ filename="best_model.keras",
98
+ token=None
99
+ )
100
+ kmer_path = hf_hub_download(
101
+ repo_id=MODEL_REPO,
102
+ filename="kmer_to_index.pkl",
103
+ token=None
104
+ )
105
  if os.path.exists(keras_path) and os.path.exists(kmer_path):
106
  keras_model = load_model(keras_path)
107
  with open(kmer_path, "rb") as f:
108
  kmer_to_index = pickle.load(f)
109
+ logger.info("✅ Keras model and k-mer index loaded successfully.")
110
  else:
111
+ logger.error(f"❌ Keras model or k-mer files not found.")
112
  except Exception as e:
113
  logger.error(f"❌ Failed to load Keras model: {e}")
114
  keras_model = None
 
117
  logger.info("🌳 Initializing tree analyzer...")
118
  analyzer = PhylogeneticTreeAnalyzer()
119
  csv_candidates = [
120
+ CSV_PATH,
121
+ os.path.join(BASE_DIR, CSV_PATH),
122
+ os.path.join(BASE_DIR, "app", CSV_PATH),
123
+ os.path.join(os.path.dirname(__file__), CSV_PATH),
124
+ "f_cleaned.csv",
125
+ os.path.join(BASE_DIR, "f_cleaned.csv")
126
  ]
127
  csv_loaded = False
128
  for csv_candidate in csv_candidates:
 
135
  break
136
  except Exception as e:
137
  logger.warning(f"CSV load failed for {csv_candidate}: {e}")
138
+ continue
139
  if not csv_loaded:
140
+ logger.error("❌ Failed to load CSV data from any candidate location.")
141
  analyzer = None
142
  else:
143
  try:
144
  if analyzer.train_ai_model():
145
+ logger.info("✅ AI model training completed successfully")
146
  else:
147
+ logger.warning("⚠️ AI model training failed; proceeding with basic analysis.")
148
  except Exception as e:
149
  logger.warning(f"⚠️ AI model training failed: {e}")
150
  except Exception as e:
151
  logger.error(f"❌ Tree analyzer initialization failed: {e}")
152
  analyzer = None
153
 
154
+ # Load models at startup
155
  load_models_safely()
156
 
157
+ # --- Tool Detection ---
158
  def setup_binary_permissions():
159
  for binary in [MAFFT_PATH, IQTREE_PATH]:
160
  if os.path.exists(binary):
 
172
  for candidate in mafft_candidates:
173
  if shutil.which(candidate) or os.path.exists(candidate):
174
  try:
175
+ result = subprocess.run(
176
+ [candidate, "--help"],
177
+ capture_output=True,
178
+ text=True,
179
+ timeout=5
180
+ )
181
  if result.returncode == 0 or "mafft" in result.stderr.lower():
182
  mafft_available = True
183
  mafft_cmd = candidate
 
191
  for candidate in iqtree_candidates:
192
  if shutil.which(candidate) or os.path.exists(candidate):
193
  try:
194
+ result = subprocess.run(
195
+ [candidate, "--help"],
196
+ capture_output=True,
197
+ text=True,
198
+ timeout=5
199
+ )
200
  if result.returncode == 0 or "iqtree" in result.stderr.lower():
201
  iqtree_available = True
202
  iqtree_cmd = candidate
 
402
  error_msg = f"❌ Pipeline Error: {str(e)}"
403
  return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg, None, None
404
 
405
+ async def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree):
406
  temp_file_path = None
407
  try:
408
  if fasta_file_obj is None:
 
597
  dna_input = gr.Textbox(
598
  label="🧬 DNA Sequence",
599
  placeholder="Enter DNA sequence (ATCG format)...",
600
+ lines=5,
601
+ description="Paste your DNA sequence here"
602
  )
603
  with gr.Column(scale=1):
604
  similarity_score = gr.Slider(
 
606
  maximum=99,
607
  value=95.0,
608
  step=1.0,
609
+ label="🎯 Similarity Threshold (%)",
610
+ description="Minimum similarity for tree analysis"
611
  )
612
  build_ml_tree = gr.Checkbox(
613
  label="🌲 Build ML Tree",
614
+ value=False,
615
+ description="Generate phylogenetic placement (slower)"
616
  )
617
  analyze_btn = gr.Button("🔬 Analyze Sequence", variant="primary")
618
  with gr.TabItem("📁 File Upload"):
 
620
  with gr.Column(scale=2):
621
  file_input = gr.File(
622
  label="📄 Upload FASTA File",
623
+ file_types=[".fasta", ".fa", ".fas", ".txt"],
624
+ description="Upload a FASTA file containing your sequence"
625
  )
626
  with gr.Column(scale=1):
627
  file_similarity_score = gr.Slider(
 
629
  maximum=99,
630
  value=95.0,
631
  step=1.0,
632
+ label="🎯 Similarity Threshold (%)",
633
+ description="Minimum similarity for tree analysis"
634
  )
635
  file_build_ml_tree = gr.Checkbox(
636
  label="🌲 Build ML Tree",
637
+ value=False,
638
+ description="Generate phylogenetic placement (slower)"
639
  )
640
  analyze_file_btn = gr.Button("🔬 Analyze File", variant="primary")
641
  gr.Markdown("## 📊 Analysis Results")
642
  with gr.Row():
643
  with gr.Column():
644
+ boundary_output = gr.Textbox(
645
+ label="🎯 Boundary Detection",
646
+ interactive=False,
647
+ lines=2
648
+ )
649
+ keras_output = gr.Textbox(
650
+ label="🧠 F Gene Validation",
651
+ interactive=False,
652
+ lines=2
653
+ )
654
  with gr.Column():
655
+ ml_tree_output = gr.Textbox(
656
+ label="🌲 Phylogenetic Placement",
657
+ interactive=False,
658
+ lines=2
659
+ )
660
+ tree_analysis_output = gr.Textbox(
661
+ label="🌳 Tree Analysis",
662
+ interactive=False,
663
+ lines=2
664
+ )
665
+ summary_output = gr.Textbox(
666
+ label="📋 Summary",
667
+ interactive=False,
668
+ lines=8
669
+ )
670
  with gr.Row():
671
  aligned_file = gr.File(label="📄 Alignment File", visible=False)
672
  tree_file = gr.File(label="🌲 Tree File", visible=False)
 
674
  report_html_file = gr.File(label="📊 Detailed Report HTML", visible=False)
675
  with gr.Tabs():
676
  with gr.TabItem("🌳 Interactive Tree"):
677
+ tree_html = gr.HTML(
678
+ value="<div style='text-align: center; color: #666; padding: 20px;'>No tree generated yet. Run analysis to see results.</div>"
679
+ )
680
  with gr.TabItem("📊 Detailed Report"):
681
+ report_html = gr.HTML(
682
+ label="Analysis Report",
683
+ value="<div style='text-align: center; color: #666; padding: 20px;'>No report generated yet. Run analysis to see results.</div>"
684
+ )
685
+
686
+ # Event handlers
687
+ def handle_analysis_output(*outputs):
688
+ boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output, aligned_file, phy_file, _, _, tree_html_content, report_html_content, tree_html_path, report_html_path = outputs
689
+ return (
690
+ boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output,
691
+ gr.File.update(value=aligned_file, visible=aligned_file is not None),
692
+ gr.File.update(value=phy_file, visible=phy_file is not None),
693
+ gr.File.update(value=tree_html_path, visible=tree_html_path is not None),
694
+ gr.File.update(value=report_html_path, visible=report_html_path is not None),
695
+ tree_html_content,
696
+ report_html_content
697
+ )
698
 
699
  analyze_btn.click(
700
  fn=run_pipeline,
 
702
  outputs=[
703
  boundary_output, keras_output, ml_tree_output, tree_analysis_output, summary_output,
704
  aligned_file, tree_file, tree_html_file, report_html_file, tree_html, report_html
705
+ ],
706
+ _js="""(outputs) => {
707
+ return outputs;
708
+ }"""
709
  )
710
 
711
  analyze_file_btn.click(
 
714
  outputs=[
715
  boundary_output, keras_output, ml_tree_output, tree_analysis_output, summary_output,
716
  aligned_file, tree_file, tree_html_file, report_html_file, tree_html, report_html
717
+ ],
718
+ _js="""(outputs) => {
719
+ return outputs;
720
+ }"""
721
  )
722
 
723
+ # Examples
724
  gr.Examples(
725
  examples=[
726
+ ["ATCG" * 250, 85.0, False],
727
+ ["CGATCG" * 150, 90.0, True]
728
  ],
729
  inputs=[dna_input, similarity_score, build_ml_tree],
730
  label="Example Sequences"
731
  )
732
 
733
+ gr.Markdown("""
734
+ ## 📚 Instructions
735
+ 1. **Input**: Enter a DNA sequence (ATCG format) or upload a FASTA file
736
+ 2. **Parameters**:
737
+ - Set similarity threshold for phylogenetic analysis (1-99%)
738
+ - Choose whether to build ML tree (slower but more accurate)
739
+ 3. **Analysis**: Click analyze to run the complete pipeline
740
+ 4. **Results**: View results in different tabs - summary, tree visualization, and detailed report
741
+ 5. **Downloads**: Download alignment, tree, simplified tree HTML, and detailed report HTML files
742
+ ### 🔬 Pipeline Components:
743
+ - **Boundary Detection**: Identifies F gene regions
744
+ - **F Gene Validation**: Validates F gene using ML
745
+ - **Phylogenetic Placement**: Places sequence in reference tree (optional)
746
+ - **Tree Analysis**: Builds phylogenetic tree with similar sequences
747
+ """)
748
+
749
  return iface
750
  except Exception as e:
751
  logger.error(f"Gradio interface creation failed: {e}", exc_info=True)
 
762
  gradio_app = create_gradio_interface()
763
  gradio_app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
764
  logger.info("🚀 Starting Gene Analysis Pipeline...")
765
+ logger.info("📊 FastAPI docs available at: http://localhost:7860/docs")
766
+ logger.info("🧬 Gradio interface available at: http://localhost:7860/gradio")
767
  uvicorn.run(
768
  app,
769
  host="0.0.0.0",