re-type commited on
Commit
8c277d8
·
verified ·
1 Parent(s): 291422a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -139
app.py CHANGED
@@ -17,7 +17,6 @@ from tensorflow.keras.models import load_model
17
  from analyzer import PhylogeneticTreeAnalyzer
18
  import tempfile
19
  import shutil
20
- import sys
21
  import uuid
22
  from pathlib import Path
23
  from huggingface_hub import hf_hub_download
@@ -28,7 +27,7 @@ import stat
28
  import time
29
  import asyncio
30
  from fastapi import FastAPI, File, UploadFile, Form, HTTPException
31
- from fastapi.responses import HTMLResponse, FileResponse
32
  from pydantic import BaseModel
33
  from typing import Optional
34
  import uvicorn
@@ -44,11 +43,10 @@ try:
44
  except Exception as e:
45
  logging.basicConfig(level=logging.INFO, handlers=[log_handler])
46
  logging.warning(f"Failed to set up file logging: {e}")
47
-
48
  logger = logging.getLogger(__name__)
49
  logger.info(f"Gradio version: {gr.__version__}")
50
 
51
- # Set event loop policy for compatibility with Gradio Spaces
52
  try:
53
  asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
54
  except Exception as e:
@@ -63,52 +61,39 @@ TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
63
  QUERY_OUTPUT_DIR = os.path.join(BASE_DIR, "queries")
64
  os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
65
 
66
- # Model repository and file paths
67
  MODEL_REPO = "GGproject10/best_boundary_aware_model"
68
  CSV_PATH = "f cleaned.csv"
69
 
70
- # Initialize models as None
71
  boundary_model = None
72
  keras_model = None
73
  kmer_to_index = None
74
  analyzer = None
75
 
76
- # --- Model Loading ---
77
  def load_models_safely():
78
  global boundary_model, keras_model, kmer_to_index, analyzer
79
  logger.info("🔍 Loading models...")
80
  try:
81
- boundary_path = hf_hub_download(
82
- repo_id=MODEL_REPO,
83
- filename="best_boundary_aware_model.pth",
84
- token=None
85
- )
86
  if os.path.exists(boundary_path):
87
  boundary_model = EnhancedGenePredictor(boundary_path)
88
- logger.info("✅ Boundary model loaded successfully.")
89
  else:
90
- logger.error(f"❌ Boundary model file not found after download.")
91
  except Exception as e:
92
  logger.error(f"❌ Failed to load boundary model: {e}")
93
  boundary_model = None
94
  try:
95
- keras_path = hf_hub_download(
96
- repo_id=MODEL_REPO,
97
- filename="best_model.keras",
98
- token=None
99
- )
100
- kmer_path = hf_hub_download(
101
- repo_id=MODEL_REPO,
102
- filename="kmer_to_index.pkl",
103
- token=None
104
- )
105
  if os.path.exists(keras_path) and os.path.exists(kmer_path):
106
  keras_model = load_model(keras_path)
107
  with open(kmer_path, "rb") as f:
108
  kmer_to_index = pickle.load(f)
109
- logger.info("✅ Keras model and k-mer index loaded successfully.")
110
  else:
111
- logger.error(f"❌ Keras model or k-mer files not found.")
112
  except Exception as e:
113
  logger.error(f"❌ Failed to load Keras model: {e}")
114
  keras_model = None
@@ -117,12 +102,8 @@ def load_models_safely():
117
  logger.info("🌳 Initializing tree analyzer...")
118
  analyzer = PhylogeneticTreeAnalyzer()
119
  csv_candidates = [
120
- CSV_PATH,
121
- os.path.join(BASE_DIR, CSV_PATH),
122
- os.path.join(BASE_DIR, "app", CSV_PATH),
123
- os.path.join(os.path.dirname(__file__), CSV_PATH),
124
- "f_cleaned.csv",
125
- os.path.join(BASE_DIR, "f_cleaned.csv")
126
  ]
127
  csv_loaded = False
128
  for csv_candidate in csv_candidates:
@@ -135,26 +116,24 @@ def load_models_safely():
135
  break
136
  except Exception as e:
137
  logger.warning(f"CSV load failed for {csv_candidate}: {e}")
138
- continue
139
  if not csv_loaded:
140
- logger.error("❌ Failed to load CSV data from any candidate location.")
141
  analyzer = None
142
  else:
143
  try:
144
  if analyzer.train_ai_model():
145
- logger.info("✅ AI model training completed successfully")
146
  else:
147
- logger.warning("⚠️ AI model training failed; proceeding with basic analysis.")
148
  except Exception as e:
149
  logger.warning(f"⚠️ AI model training failed: {e}")
150
  except Exception as e:
151
  logger.error(f"❌ Tree analyzer initialization failed: {e}")
152
  analyzer = None
153
 
154
- # Load models at startup
155
  load_models_safely()
156
 
157
- # --- Tool Detection ---
158
  def setup_binary_permissions():
159
  for binary in [MAFFT_PATH, IQTREE_PATH]:
160
  if os.path.exists(binary):
@@ -172,12 +151,7 @@ def check_tool_availability():
172
  for candidate in mafft_candidates:
173
  if shutil.which(candidate) or os.path.exists(candidate):
174
  try:
175
- result = subprocess.run(
176
- [candidate, "--help"],
177
- capture_output=True,
178
- text=True,
179
- timeout=5
180
- )
181
  if result.returncode == 0 or "mafft" in result.stderr.lower():
182
  mafft_available = True
183
  mafft_cmd = candidate
@@ -191,12 +165,7 @@ def check_tool_availability():
191
  for candidate in iqtree_candidates:
192
  if shutil.which(candidate) or os.path.exists(candidate):
193
  try:
194
- result = subprocess.run(
195
- [candidate, "--help"],
196
- capture_output=True,
197
- text=True,
198
- timeout=5
199
- )
200
  if result.returncode == 0 or "iqtree" in result.stderr.lower():
201
  iqtree_available = True
202
  iqtree_cmd = candidate
@@ -241,8 +210,8 @@ def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
241
  if 'query_fasta' in locals() and os.path.exists(query_fasta):
242
  try:
243
  os.unlink(query_fasta)
244
- except Exception as cleanup_error:
245
- logger.warning(f"Failed to clean up {query_fasta}: {cleanup_error}")
246
 
247
  def analyze_sequence_for_tree(sequence: str, matching_percentage: float):
248
  try:
@@ -402,7 +371,7 @@ Tree Analysis: {'✅ OK' if 'Found' in simplified_ml_output else '❌ Failed'}
402
  error_msg = f"❌ Pipeline Error: {str(e)}"
403
  return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg, None, None
404
 
405
- async def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree):
406
  temp_file_path = None
407
  try:
408
  if fasta_file_obj is None:
@@ -597,8 +566,7 @@ def create_gradio_interface():
597
  dna_input = gr.Textbox(
598
  label="🧬 DNA Sequence",
599
  placeholder="Enter DNA sequence (ATCG format)...",
600
- lines=5,
601
- description="Paste your DNA sequence here"
602
  )
603
  with gr.Column(scale=1):
604
  similarity_score = gr.Slider(
@@ -606,13 +574,11 @@ def create_gradio_interface():
606
  maximum=99,
607
  value=95.0,
608
  step=1.0,
609
- label="🎯 Similarity Threshold (%)",
610
- description="Minimum similarity for tree analysis"
611
  )
612
  build_ml_tree = gr.Checkbox(
613
  label="🌲 Build ML Tree",
614
- value=False,
615
- description="Generate phylogenetic placement (slower)"
616
  )
617
  analyze_btn = gr.Button("🔬 Analyze Sequence", variant="primary")
618
  with gr.TabItem("📁 File Upload"):
@@ -620,8 +586,7 @@ def create_gradio_interface():
620
  with gr.Column(scale=2):
621
  file_input = gr.File(
622
  label="📄 Upload FASTA File",
623
- file_types=[".fasta", ".fa", ".fas", ".txt"],
624
- description="Upload a FASTA file containing your sequence"
625
  )
626
  with gr.Column(scale=1):
627
  file_similarity_score = gr.Slider(
@@ -629,44 +594,22 @@ def create_gradio_interface():
629
  maximum=99,
630
  value=95.0,
631
  step=1.0,
632
- label="🎯 Similarity Threshold (%)",
633
- description="Minimum similarity for tree analysis"
634
  )
635
  file_build_ml_tree = gr.Checkbox(
636
  label="🌲 Build ML Tree",
637
- value=False,
638
- description="Generate phylogenetic placement (slower)"
639
  )
640
  analyze_file_btn = gr.Button("🔬 Analyze File", variant="primary")
641
  gr.Markdown("## 📊 Analysis Results")
642
  with gr.Row():
643
  with gr.Column():
644
- boundary_output = gr.Textbox(
645
- label="🎯 Boundary Detection",
646
- interactive=False,
647
- lines=2
648
- )
649
- keras_output = gr.Textbox(
650
- label="🧠 F Gene Validation",
651
- interactive=False,
652
- lines=2
653
- )
654
  with gr.Column():
655
- ml_tree_output = gr.Textbox(
656
- label="🌲 Phylogenetic Placement",
657
- interactive=False,
658
- lines=2
659
- )
660
- tree_analysis_output = gr.Textbox(
661
- label="🌳 Tree Analysis",
662
- interactive=False,
663
- lines=2
664
- )
665
- summary_output = gr.Textbox(
666
- label="📋 Summary",
667
- interactive=False,
668
- lines=8
669
- )
670
  with gr.Row():
671
  aligned_file = gr.File(label="📄 Alignment File", visible=False)
672
  tree_file = gr.File(label="🌲 Tree File", visible=False)
@@ -674,27 +617,9 @@ def create_gradio_interface():
674
  report_html_file = gr.File(label="📊 Detailed Report HTML", visible=False)
675
  with gr.Tabs():
676
  with gr.TabItem("🌳 Interactive Tree"):
677
- tree_html = gr.HTML(
678
- value="<div style='text-align: center; color: #666; padding: 20px;'>No tree generated yet. Run analysis to see results.</div>"
679
- )
680
  with gr.TabItem("📊 Detailed Report"):
681
- report_html = gr.HTML(
682
- label="Analysis Report",
683
- value="<div style='text-align: center; color: #666; padding: 20px;'>No report generated yet. Run analysis to see results.</div>"
684
- )
685
-
686
- # Event handlers
687
- def handle_analysis_output(*outputs):
688
- boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output, aligned_file, phy_file, _, _, tree_html_content, report_html_content, tree_html_path, report_html_path = outputs
689
- return (
690
- boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output,
691
- gr.File.update(value=aligned_file, visible=aligned_file is not None),
692
- gr.File.update(value=phy_file, visible=phy_file is not None),
693
- gr.File.update(value=tree_html_path, visible=tree_html_path is not None),
694
- gr.File.update(value=report_html_path, visible=report_html_path is not None),
695
- tree_html_content,
696
- report_html_content
697
- )
698
 
699
  analyze_btn.click(
700
  fn=run_pipeline,
@@ -702,10 +627,7 @@ def create_gradio_interface():
702
  outputs=[
703
  boundary_output, keras_output, ml_tree_output, tree_analysis_output, summary_output,
704
  aligned_file, tree_file, tree_html_file, report_html_file, tree_html, report_html
705
- ],
706
- _js="""(outputs) => {
707
- return outputs;
708
- }"""
709
  )
710
 
711
  analyze_file_btn.click(
@@ -714,38 +636,18 @@ def create_gradio_interface():
714
  outputs=[
715
  boundary_output, keras_output, ml_tree_output, tree_analysis_output, summary_output,
716
  aligned_file, tree_file, tree_html_file, report_html_file, tree_html, report_html
717
- ],
718
- _js="""(outputs) => {
719
- return outputs;
720
- }"""
721
  )
722
 
723
- # Examples
724
  gr.Examples(
725
  examples=[
726
- ["ATCG" * 250, 85.0, False],
727
- ["CGATCG" * 150, 90.0, True]
728
  ],
729
  inputs=[dna_input, similarity_score, build_ml_tree],
730
  label="Example Sequences"
731
  )
732
 
733
- gr.Markdown("""
734
- ## 📚 Instructions
735
- 1. **Input**: Enter a DNA sequence (ATCG format) or upload a FASTA file
736
- 2. **Parameters**:
737
- - Set similarity threshold for phylogenetic analysis (1-99%)
738
- - Choose whether to build ML tree (slower but more accurate)
739
- 3. **Analysis**: Click analyze to run the complete pipeline
740
- 4. **Results**: View results in different tabs - summary, tree visualization, and detailed report
741
- 5. **Downloads**: Download alignment, tree, simplified tree HTML, and detailed report HTML files
742
- ### 🔬 Pipeline Components:
743
- - **Boundary Detection**: Identifies F gene regions
744
- - **F Gene Validation**: Validates F gene using ML
745
- - **Phylogenetic Placement**: Places sequence in reference tree (optional)
746
- - **Tree Analysis**: Builds phylogenetic tree with similar sequences
747
- """)
748
-
749
  return iface
750
  except Exception as e:
751
  logger.error(f"Gradio interface creation failed: {e}", exc_info=True)
@@ -762,8 +664,6 @@ def run_application():
762
  gradio_app = create_gradio_interface()
763
  gradio_app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
764
  logger.info("🚀 Starting Gene Analysis Pipeline...")
765
- logger.info("📊 FastAPI docs available at: http://localhost:7860/docs")
766
- logger.info("🧬 Gradio interface available at: http://localhost:7860/gradio")
767
  uvicorn.run(
768
  app,
769
  host="0.0.0.0",
 
17
  from analyzer import PhylogeneticTreeAnalyzer
18
  import tempfile
19
  import shutil
 
20
  import uuid
21
  from pathlib import Path
22
  from huggingface_hub import hf_hub_download
 
27
  import time
28
  import asyncio
29
  from fastapi import FastAPI, File, UploadFile, Form, HTTPException
30
+ from fastapi.responses import FileResponse
31
  from pydantic import BaseModel
32
  from typing import Optional
33
  import uvicorn
 
43
  except Exception as e:
44
  logging.basicConfig(level=logging.INFO, handlers=[log_handler])
45
  logging.warning(f"Failed to set up file logging: {e}")
 
46
  logger = logging.getLogger(__name__)
47
  logger.info(f"Gradio version: {gr.__version__}")
48
 
49
+ # Set event loop policy
50
  try:
51
  asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy())
52
  except Exception as e:
 
61
  QUERY_OUTPUT_DIR = os.path.join(BASE_DIR, "queries")
62
  os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
63
 
 
64
  MODEL_REPO = "GGproject10/best_boundary_aware_model"
65
  CSV_PATH = "f cleaned.csv"
66
 
67
+ # Initialize models
68
  boundary_model = None
69
  keras_model = None
70
  kmer_to_index = None
71
  analyzer = None
72
 
73
+ # --- Model Loading (from Script 2) ---
74
  def load_models_safely():
75
  global boundary_model, keras_model, kmer_to_index, analyzer
76
  logger.info("🔍 Loading models...")
77
  try:
78
+ boundary_path = hf_hub_download(repo_id=MODEL_REPO, filename="best_boundary_aware_model.pth", token=None)
 
 
 
 
79
  if os.path.exists(boundary_path):
80
  boundary_model = EnhancedGenePredictor(boundary_path)
81
+ logger.info("✅ Boundary model loaded.")
82
  else:
83
+ logger.error(f"❌ Boundary model file not found.")
84
  except Exception as e:
85
  logger.error(f"❌ Failed to load boundary model: {e}")
86
  boundary_model = None
87
  try:
88
+ keras_path = hf_hub_download(repo_id=MODEL_REPO, filename="best_model.keras", token=None)
89
+ kmer_path = hf_hub_download(repo_id=MODEL_REPO, filename="kmer_to_index.pkl", token=None)
 
 
 
 
 
 
 
 
90
  if os.path.exists(keras_path) and os.path.exists(kmer_path):
91
  keras_model = load_model(keras_path)
92
  with open(kmer_path, "rb") as f:
93
  kmer_to_index = pickle.load(f)
94
+ logger.info("✅ Keras model loaded.")
95
  else:
96
+ logger.error(f"❌ Keras model files not found.")
97
  except Exception as e:
98
  logger.error(f"❌ Failed to load Keras model: {e}")
99
  keras_model = None
 
102
  logger.info("🌳 Initializing tree analyzer...")
103
  analyzer = PhylogeneticTreeAnalyzer()
104
  csv_candidates = [
105
+ CSV_PATH, os.path.join(BASE_DIR, CSV_PATH), os.path.join(BASE_DIR, "app", CSV_PATH),
106
+ os.path.join(os.path.dirname(__file__), CSV_PATH), "f_cleaned.csv", os.path.join(BASE_DIR, "f_cleaned.csv")
 
 
 
 
107
  ]
108
  csv_loaded = False
109
  for csv_candidate in csv_candidates:
 
116
  break
117
  except Exception as e:
118
  logger.warning(f"CSV load failed for {csv_candidate}: {e}")
 
119
  if not csv_loaded:
120
+ logger.error("❌ Failed to load CSV data.")
121
  analyzer = None
122
  else:
123
  try:
124
  if analyzer.train_ai_model():
125
+ logger.info("✅ AI model training completed.")
126
  else:
127
+ logger.warning("⚠️ AI model training failed.")
128
  except Exception as e:
129
  logger.warning(f"⚠️ AI model training failed: {e}")
130
  except Exception as e:
131
  logger.error(f"❌ Tree analyzer initialization failed: {e}")
132
  analyzer = None
133
 
 
134
  load_models_safely()
135
 
136
+ # --- Tool Detection (from Script 2) ---
137
  def setup_binary_permissions():
138
  for binary in [MAFFT_PATH, IQTREE_PATH]:
139
  if os.path.exists(binary):
 
151
  for candidate in mafft_candidates:
152
  if shutil.which(candidate) or os.path.exists(candidate):
153
  try:
154
+ result = subprocess.run([candidate, "--help"], capture_output=True, text=True, timeout=5)
 
 
 
 
 
155
  if result.returncode == 0 or "mafft" in result.stderr.lower():
156
  mafft_available = True
157
  mafft_cmd = candidate
 
165
  for candidate in iqtree_candidates:
166
  if shutil.which(candidate) or os.path.exists(candidate):
167
  try:
168
+ result = subprocess.run([candidate, "--help"], capture_output=True, text=True, timeout=5)
 
 
 
 
 
169
  if result.returncode == 0 or "iqtree" in result.stderr.lower():
170
  iqtree_available = True
171
  iqtree_cmd = candidate
 
210
  if 'query_fasta' in locals() and os.path.exists(query_fasta):
211
  try:
212
  os.unlink(query_fasta)
213
+ except Exception as e: # Fixed bare 'except'
214
+ logger.warning(f"Failed to clean up {query_fasta}: {e}")
215
 
216
  def analyze_sequence_for_tree(sequence: str, matching_percentage: float):
217
  try:
 
371
  error_msg = f"❌ Pipeline Error: {str(e)}"
372
  return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg, None, None
373
 
374
+ async def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_file):
375
  temp_file_path = None
376
  try:
377
  if fasta_file_obj is None:
 
566
  dna_input = gr.Textbox(
567
  label="🧬 DNA Sequence",
568
  placeholder="Enter DNA sequence (ATCG format)...",
569
+ lines=5
 
570
  )
571
  with gr.Column(scale=1):
572
  similarity_score = gr.Slider(
 
574
  maximum=99,
575
  value=95.0,
576
  step=1.0,
577
+ label="🎯 Similarity Threshold (%)"
 
578
  )
579
  build_ml_tree = gr.Checkbox(
580
  label="🌲 Build ML Tree",
581
+ value=False
 
582
  )
583
  analyze_btn = gr.Button("🔬 Analyze Sequence", variant="primary")
584
  with gr.TabItem("📁 File Upload"):
 
586
  with gr.Column(scale=2):
587
  file_input = gr.File(
588
  label="📄 Upload FASTA File",
589
+ file_types=[".fasta", ".fa", ".fas", ".txt"]
 
590
  )
591
  with gr.Column(scale=1):
592
  file_similarity_score = gr.Slider(
 
594
  maximum=99,
595
  value=95.0,
596
  step=1.0,
597
+ label="🎯 Similarity Threshold (%)"
 
598
  )
599
  file_build_ml_tree = gr.Checkbox(
600
  label="🌲 Build ML Tree",
601
+ value=False
 
602
  )
603
  analyze_file_btn = gr.Button("🔬 Analyze File", variant="primary")
604
  gr.Markdown("## 📊 Analysis Results")
605
  with gr.Row():
606
  with gr.Column():
607
+ boundary_output = gr.Textbox(label="🎯 Boundary Detection", interactive=False, lines=2)
608
+ keras_output = gr.Textbox(label="🧠 F Gene Validation", interactive=False, lines=2)
 
 
 
 
 
 
 
 
609
  with gr.Column():
610
+ ml_tree_output = gr.Textbox(label="🌲 Phylogenetic Placement", interactive=False, lines=2)
611
+ tree_analysis_output = gr.Textbox(label="🌳 Tree Analysis", interactive=False, lines=2)
612
+ summary_output = gr.Textbox(label="📋 Summary", interactive=False, lines=8)
 
 
 
 
 
 
 
 
 
 
 
 
613
  with gr.Row():
614
  aligned_file = gr.File(label="📄 Alignment File", visible=False)
615
  tree_file = gr.File(label="🌲 Tree File", visible=False)
 
617
  report_html_file = gr.File(label="📊 Detailed Report HTML", visible=False)
618
  with gr.Tabs():
619
  with gr.TabItem("🌳 Interactive Tree"):
620
+ tree_html = gr.HTML(value="<div style='text-align: center; color: #666; padding: 20px;'>No tree generated yet.</div>")
 
 
621
  with gr.TabItem("📊 Detailed Report"):
622
+ report_html = gr.HTML(value="<div style='text-align: center; color: #666; padding: 20px;'>No report generated yet.</div>")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
623
 
624
  analyze_btn.click(
625
  fn=run_pipeline,
 
627
  outputs=[
628
  boundary_output, keras_output, ml_tree_output, tree_analysis_output, summary_output,
629
  aligned_file, tree_file, tree_html_file, report_html_file, tree_html, report_html
630
+ ]
 
 
 
631
  )
632
 
633
  analyze_file_btn.click(
 
636
  outputs=[
637
  boundary_output, keras_output, ml_tree_output, tree_analysis_output, summary_output,
638
  aligned_file, tree_file, tree_html_file, report_html_file, tree_html, report_html
639
+ ]
 
 
 
640
  )
641
 
 
642
  gr.Examples(
643
  examples=[
644
+ ["ATCG" * 100, 85.0, False],
645
+ ["CGAT" * 100, 90.0, True]
646
  ],
647
  inputs=[dna_input, similarity_score, build_ml_tree],
648
  label="Example Sequences"
649
  )
650
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
651
  return iface
652
  except Exception as e:
653
  logger.error(f"Gradio interface creation failed: {e}", exc_info=True)
 
664
  gradio_app = create_gradio_interface()
665
  gradio_app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
666
  logger.info("🚀 Starting Gene Analysis Pipeline...")
 
 
667
  uvicorn.run(
668
  app,
669
  host="0.0.0.0",