re-type commited on
Commit
42bde38
·
verified ·
1 Parent(s): 2c6a591

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -634
app.py DELETED
@@ -1,634 +0,0 @@
1
- import os
2
- import logging
3
- import pickle
4
- import subprocess
5
- import pandas as pd
6
- import re
7
- import numpy as np
8
- import torch
9
- from flask import Flask, request, jsonify, send_file
10
- from werkzeug.utils import secure_filename
11
- from predictor import EnhancedGenePredictor
12
- from tensorflow.keras.models import load_model
13
- from analyzer import PhylogeneticTreeAnalyzer
14
- import tempfile
15
- import shutil
16
- import sys
17
- import uuid
18
- from pathlib import Path
19
- from huggingface_hub import hf_hub_download
20
- from Bio import SeqIO
21
- from Bio.Seq import Seq
22
- from Bio.SeqRecord import SeqRecord
23
- import stat
24
- import time
25
-
26
- # --- Logging Setup ---
27
- os.makedirs('/tmp', exist_ok=True)
28
- logging.basicConfig(
29
- level=logging.INFO,
30
- format='%(asctime)s - %(levelname)s - %(message)s',
31
- handlers=[
32
- logging.StreamHandler(),
33
- logging.FileHandler('/tmp/flask_app.log')
34
- ]
35
- )
36
- logger = logging.getLogger(__name__)
37
-
38
- # Disable GPU to avoid CUDA errors
39
- os.environ["CUDA_VISIBLE_DEVICES"] = ""
40
- os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
41
- os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"
42
-
43
- # --- Global Variables ---
44
- BASE_DIR = os.path.dirname(os.path.abspath(__file__))
45
- MAFFT_PATH = os.path.join(BASE_DIR, "binaries", "mafft", "mafft")
46
- IQTREE_PATH = os.path.join(BASE_DIR, "binaries", "iqtree", "bin", "iqtree3")
47
- ALIGNMENT_PATH = os.path.join(BASE_DIR, "f_gene_sequences_aligned.fasta")
48
- TREE_PATH = os.path.join(BASE_DIR, "f_gene_sequences.phy.treefile")
49
- QUERY_OUTPUT_DIR = os.path.join(BASE_DIR, "queries")
50
- os.makedirs(QUERY_OUTPUT_DIR, exist_ok=True)
51
- MODEL_REPO = "GGproject10/best_boundary_aware_model"
52
- CSV_PATH = "f cleaned.csv"
53
- HF_TOKEN = os.getenv("HF_TOKEN")
54
-
55
- # Initialize models
56
- boundary_model = None
57
- keras_model = None
58
- kmer_to_index = None
59
- analyzer = None
60
-
61
- # --- Load Models ---
62
- def load_models_safely():
63
- global boundary_model, keras_model, kmer_to_index, analyzer
64
- logger.info("🔍 Loading models...")
65
-
66
- # Boundary model
67
- try:
68
- boundary_path = os.path.join(BASE_DIR, "models", "best_boundary_aware_model.pth")
69
- if not os.path.exists(boundary_path):
70
- logger.info(f"Downloading boundary model from {MODEL_REPO}...")
71
- boundary_path = hf_hub_download(
72
- repo_id=MODEL_REPO,
73
- filename="best_boundary_aware_model.pth",
74
- token=HF_TOKEN,
75
- local_dir=os.path.join(BASE_DIR, "models")
76
- )
77
- boundary_model = EnhancedGenePredictor(boundary_path)
78
- logger.info("✅ Boundary model loaded")
79
- except Exception as e:
80
- logger.error(f"❌ Failed to load boundary model: {e}")
81
- boundary_model = None
82
-
83
- # Keras model
84
- try:
85
- keras_path = os.path.join(BASE_DIR, "models", "best_model.keras")
86
- kmer_path = os.path.join(BASE_DIR, "models", "kmer_to_index.pkl")
87
- if not os.path.exists(keras_path):
88
- logger.info(f"Downloading Keras model from {MODEL_REPO}...")
89
- keras_path = hf_hub_download(
90
- repo_id=MODEL_REPO,
91
- filename="best_model.keras",
92
- token=HF_TOKEN,
93
- local_dir=os.path.join(BASE_DIR, "models")
94
- )
95
- if not os.path.exists(kmer_path):
96
- logger.info(f"Downloading k-mer index from {MODEL_REPO}...")
97
- kmer_path = hf_hub_download(
98
- repo_id=MODEL_REPO,
99
- filename="kmer_to_index.pkl",
100
- token=HF_TOKEN,
101
- local_dir=os.path.join(BASE_DIR, "models")
102
- )
103
- keras_model = load_model(keras_path)
104
- with open(kmer_path, "rb") as f:
105
- kmer_to_index = pickle.load(f)
106
- logger.info("✅ Keras model and k-mer index loaded")
107
- except Exception as e:
108
- logger.error(f"❌ Failed to load Keras model: {e}")
109
- keras_model = None
110
- kmer_to_index = None
111
-
112
- # Tree analyzer
113
- try:
114
- analyzer = PhylogeneticTreeAnalyzer()
115
- csv_candidates = [
116
- CSV_PATH,
117
- os.path.join(BASE_DIR, CSV_PATH),
118
- os.path.join(BASE_DIR, "app", CSV_PATH),
119
- os.path.join(os.path.dirname(__file__), CSV_PATH),
120
- "f_cleaned.csv",
121
- os.path.join(BASE_DIR, "f_cleaned.csv")
122
- ]
123
- csv_loaded = False
124
- for csv_candidate in csv_candidates:
125
- if os.path.exists(csv_candidate):
126
- if analyzer.load_data(csv_candidate):
127
- logger.info(f"✅ CSV loaded: {csv_candidate}")
128
- csv_loaded = True
129
- break
130
- if not csv_loaded:
131
- logger.error("❌ Failed to load CSV")
132
- analyzer = None
133
- else:
134
- if analyzer.train_ai_model():
135
- logger.info("✅ AI model trained")
136
- except Exception as e:
137
- logger.error(f"❌ Tree analyzer failed: {e}")
138
- analyzer = None
139
-
140
- try:
141
- load_models_safely()
142
- except Exception as e:
143
- logger.critical(f"Model loading failed: {e}")
144
- sys.exit(1)
145
-
146
- # --- Tool Detection ---
147
- def setup_binary_permissions():
148
- for binary in [MAFFT_PATH, IQTREE_PATH]:
149
- if os.path.exists(binary):
150
- os.chmod(binary, os.stat(binary).st_mode | stat.S_IEXEC)
151
- logger.info(f"✅ Set permission: {binary}")
152
- else:
153
- logger.warning(f"⚠️ Binary not found: {binary}")
154
-
155
- def check_tool_availability():
156
- setup_binary_permissions()
157
- mafft_available = False
158
- mafft_cmd = None
159
- mafft_candidates = [
160
- MAFFT_PATH,
161
- os.path.join(BASE_DIR, "binaries", "mafft", "mafft"),
162
- os.path.join(BASE_DIR, "binaries", "mafft", "mafft.bat"),
163
- 'mafft',
164
- '/usr/bin/mafft',
165
- '/usr/local/bin/mafft',
166
- os.path.join(BASE_DIR, "binaries", "mafft", "mafftdir", "bin", "mafft"),
167
- os.path.expanduser("~/anaconda3/bin/mafft"),
168
- os.path.expanduser("~/miniconda3/bin/mafft"),
169
- "/opt/conda/bin/mafft",
170
- "/usr/local/miniconda3/bin/mafft"
171
- ]
172
- for candidate in mafft_candidates:
173
- if os.path.exists(candidate) or shutil.which(candidate):
174
- try:
175
- result = subprocess.run(
176
- [candidate, "--help"],
177
- capture_output=True,
178
- text=True,
179
- timeout=10
180
- )
181
- if result.returncode == 0 or "mafft" in result.stderr.lower():
182
- mafft_available = True
183
- mafft_cmd = candidate
184
- logger.info(f"✅ MAFFT: {candidate}")
185
- break
186
- except Exception as e:
187
- logger.debug(f"MAFFT test failed: {candidate}: {e}")
188
- iqtree_available = False
189
- iqtree_cmd = None
190
- iqtree_candidates = [
191
- IQTREE_PATH,
192
- 'iqtree',
193
- 'iqtree2',
194
- 'iqtree3',
195
- '/usr/bin/iqtree',
196
- '/usr/local/bin/iqtree',
197
- 'iqtree.exe',
198
- 'iqtree2.exe',
199
- 'iqtree3.exe',
200
- os.path.join(BASE_DIR, "binaries", "iqtree", "bin", "iqtree2"),
201
- os.path.expanduser("~/anaconda3/bin/iqtree2"),
202
- os.path.expanduser("~/miniconda3/bin/iqtree2"),
203
- "/opt/conda/bin/iqtree2",
204
- "/usr/local/miniconda3/bin/iqtree2"
205
- ]
206
- for candidate in iqtree_candidates:
207
- if os.path.exists(candidate) or shutil.which(candidate):
208
- try:
209
- result = subprocess.run(
210
- [candidate, "--help"],
211
- capture_output=True,
212
- text=True,
213
- timeout=10
214
- )
215
- if result.returncode == 0 or "iqtree" in result.stderr.lower():
216
- iqtree_available = True
217
- iqtree_cmd = candidate
218
- logger.info(f"✅ IQ-TREE: {candidate}")
219
- break
220
- except Exception as e:
221
- logger.debug(f"IQ-TREE test failed: {candidate}: {e}")
222
- return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
223
-
224
- def install_dependencies_guide():
225
- return """
226
- 🔧 INSTALLATION GUIDE FOR MISSING DEPENDENCIES:
227
- For MAFFT:
228
- - Ubuntu/Debian: sudo apt-get install mafft
229
- - CentOS/RHEL: sudo yum install mafft
230
- - macOS: brew install mafft
231
- - Windows: Download from https://mafft.cbrc.jp/alignment/software/
232
- - Conda: conda install -c bioconda mafft
233
- For IQ-TREE:
234
- - Ubuntu/Debian: sudo apt-get install iqtree
235
- - CentOS/RHEL: sudo yum install iqtree
236
- - macOS: brew install iqtree
237
- - Windows: Download from http://www.iqtree.org/
238
- - Conda: conda install -c bioconda iqtree
239
- """
240
-
241
- # --- Pipeline Functions ---
242
- def cleanup_file(file_path: str):
243
- if file_path and os.path.exists(file_path):
244
- try:
245
- os.unlink(file_path)
246
- logger.debug(f"Cleaned up {file_path}")
247
- except Exception as e:
248
- logger.warning(f"Failed to clean up {file_path}: {e}")
249
-
250
- def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
251
- query_fasta = None
252
- try:
253
- if len(sequence.strip()) < 100:
254
- return False, "Sequence too short (<100 bp).", None, None
255
- query_id = f"QUERY_{uuid.uuid4().hex[:8]}"
256
- query_fasta = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}.fa")
257
- aligned_with_query = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_aligned.fa")
258
- output_prefix = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_placed_tree")
259
- if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
260
- return False, f"Reference files missing: {ALIGNMENT_PATH}, {TREE_PATH}", None, None
261
- query_record = SeqRecord(Seq(sequence.upper()), id=query_id, description="")
262
- SeqIO.write([query_record], query_fasta, "fasta")
263
- with open(aligned_with_query, "w") as output_file:
264
- result = subprocess.run(
265
- [mafft_cmd, "--add", query_fasta, "--reorder", ALIGNMENT_PATH],
266
- stdout=output_file,
267
- stderr=subprocess.PIPE,
268
- text=True,
269
- timeout=600,
270
- check=True
271
- )
272
- if not os.path.exists(aligned_with_query) or os.path.getsize(aligned_with_query) == 0:
273
- cleanup_file(query_fasta)
274
- return False, "MAFFT alignment failed.", None, None
275
- result = subprocess.run(
276
- [iqtree_cmd, "-s", aligned_with_query, "-g", TREE_PATH, "-m", "GTR+G", "-pre", output_prefix, "-redo"],
277
- capture_output=True,
278
- text=True,
279
- timeout=1200,
280
- check=True
281
- )
282
- treefile = f"{output_prefix}.treefile"
283
- if not os.path.exists(treefile):
284
- cleanup_file(query_fasta)
285
- return False, "IQ-TREE placement failed.", aligned_with_query, None
286
- success_msg = f"Placement completed!\nQuery ID: {query_id}\nAlignment: {os.path.basename(aligned_with_query)}\nTree: {os.path.basename(treefile)}"
287
- cleanup_file(query_fasta)
288
- return True, success_msg, aligned_with_query, treefile
289
- except Exception as e:
290
- logger.error(f"Phylogenetic placement failed: {e}")
291
- cleanup_file(query_fasta)
292
- return False, f"Error: {str(e)}", None, None
293
-
294
- def build_maximum_likelihood_tree(f_gene_sequence):
295
- try:
296
- mafft_available, iqtree_available, mafft_cmd, iqtree_cmd = check_tool_availability()
297
- status_msg = "🔍 Checking dependencies...\n"
298
- if not mafft_available:
299
- status_msg += "❌ MAFFT not found\n"
300
- else:
301
- status_msg += f"✅ MAFFT found: {mafft_cmd}\n"
302
- if not iqtree_available:
303
- status_msg += "❌ IQ-TREE not found\n"
304
- else:
305
- status_msg += f"✅ IQ-TREE found: {iqtree_cmd}\n"
306
- if not os.path.exists(ALIGNMENT_PATH):
307
- status_msg += f"❌ Reference alignment not found: {ALIGNMENT_PATH}\n"
308
- else:
309
- status_msg += f"✅ Reference alignment found\n"
310
- if not os.path.exists(TREE_PATH):
311
- status_msg += f"❌ Reference tree not found: {TREE_PATH}\n"
312
- else:
313
- status_msg += f"✅ Reference tree found\n"
314
- if not mafft_available or not iqtree_available:
315
- guide = install_dependencies_guide()
316
- return False, f"{status_msg}\n{guide}", None, None
317
- if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
318
- status_msg += "\n❌ Missing reference files.\n"
319
- return False, status_msg, None, None
320
- placement_success, placement_message, aligned_file, tree_file = phylogenetic_placement(
321
- f_gene_sequence, mafft_cmd, iqtree_cmd
322
- )
323
- if placement_success:
324
- final_message = f"{status_msg}\n{placement_message}"
325
- if aligned_file and os.path.exists(aligned_file):
326
- standard_aligned = os.path.join(QUERY_OUTPUT_DIR, "query_with_references_aligned.fasta")
327
- shutil.copy2(aligned_file, standard_aligned)
328
- aligned_file = standard_aligned
329
- if tree_file and os.path.exists(tree_file):
330
- standard_tree = os.path.join(QUERY_OUTPUT_DIR, "query_placement_tree.treefile")
331
- shutil.copy2(tree_file, standard_tree)
332
- tree_file = standard_tree
333
- return True, final_message, aligned_file, tree_file
334
- else:
335
- return False, f"{status_msg}\n{placement_message}", aligned_file, tree_file
336
- except Exception as e:
337
- logger.error(f"ML tree construction failed: {e}")
338
- return False, f"Error: {str(e)}", None, None
339
-
340
- def analyze_sequence_for_tree(sequence: str, matching_percentage: float):
341
- try:
342
- if not analyzer:
343
- return "❌ Tree analyzer not initialized.", None
344
- if not sequence or len(sequence.strip()) < 10:
345
- return "❌ Invalid sequence.", None
346
- if not (1 <= matching_percentage <= 99):
347
- return "❌ Matching percentage must be 1-99.", None
348
- if not analyzer.find_query_sequence(sequence):
349
- return "❌ Sequence not accepted.", None
350
- matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
351
- if not matched_ids:
352
- return f"❌ No similar sequences at {matching_percentage}% threshold.", None
353
- analyzer.build_tree_structure_with_ml_safe(matched_ids)
354
- fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
355
- query_id = analyzer.query_id or f"query_{int(time.time())}"
356
- output_dir = os.path.join(BASE_DIR, "output")
357
- os.makedirs(output_dir, exist_ok=True)
358
- html_filename = f"tree_{query_id}.html"
359
- html_path = os.path.join(output_dir, html_filename)
360
- fig.write_html(html_path)
361
- success_msg = f"✅ Found {len(matched_ids)} sequences at {actual_percentage:.2f}% similarity."
362
- return success_msg, html_path
363
- except Exception as e:
364
- logger.error(f"Tree analysis failed: {e}")
365
- return f"❌ Error: {str(e)}", None
366
-
367
- def predict_with_keras(sequence):
368
- try:
369
- if not keras_model or not kmer_to_index:
370
- return f"❌ Keras model not available."
371
- if len(sequence) < 6:
372
- return "❌ Sequence too short (<6 bp)."
373
- kmers = [sequence[i:i+6] for i in range(len(sequence)-5)]
374
- indices = [kmer_to_index.get(kmer, 0) for kmer in kmers]
375
- input_arr = np.array([indices])
376
- prediction = keras_model.predict(input_arr, verbose=0)[0]
377
- f_gene_prob = prediction[-1]
378
- percentage = min(100, max(0, int(f_gene_prob * 100 + 5)))
379
- return f"✅ {percentage}% F gene confidence"
380
- except Exception as e:
381
- logger.error(f"Keras prediction failed: {e}")
382
- return f"❌ Error: {str(e)}"
383
-
384
- def read_fasta_file(file_path):
385
- try:
386
- if not file_path:
387
- return ""
388
- with open(file_path, "r") as f:
389
- content = f.read()
390
- lines = content.strip().split("\n")
391
- seq_lines = [line.strip() for line in lines if not line.startswith(">")]
392
- return ''.join(seq_lines)
393
- except Exception as e:
394
- logger.error(f"Failed to read FASTA file: {e}")
395
- return ""
396
-
397
- def run_pipeline_from_file(fasta_file_path, similarity_score, build_ml_tree):
398
- try:
399
- dna_input = read_fasta_file(fasta_file_path)
400
- if not dna_input:
401
- return "❌ Failed to read FASTA file", "", "", "", "", None, None, None, "No input"
402
- return run_pipeline(dna_input, similarity_score, build_ml_tree)
403
- except Exception as e:
404
- logger.error(f"Pipeline from file error: {e}")
405
- return f"❌ Error: {str(e)}", "", "", "", "", None, None, None, f"❌ Error: {str(e)}"
406
-
407
- def run_pipeline(dna_input, similarity_score=95.0, build_ml_tree=False):
408
- try:
409
- dna_input = dna_input.upper().strip()
410
- if not dna_input:
411
- return "❌ Empty input", "", "", "", "", None, None, None, "No input"
412
- if not re.match('^[ACTGN]+$', dna_input):
413
- dna_input = ''.join(c if c in 'ACTGN' else 'N' for c in dna_input)
414
- processed_sequence = dna_input
415
- boundary_output = ""
416
- if boundary_model:
417
- try:
418
- predictions, probs, confidence = boundary_model.predict(dna_input)
419
- regions = boundary_model.extract_gene_regions(predictions, dna_input)
420
- if regions:
421
- processed_sequence = regions[0]["sequence"]
422
- boundary_output = processed_sequence
423
- logger.info(f"F gene extracted: {len(processed_sequence)} bp")
424
- else:
425
- boundary_output = "⚠️ No F gene regions found."
426
- processed_sequence = dna_input
427
- except Exception as e:
428
- boundary_output = f"❌ Boundary error: {str(e)}"
429
- processed_sequence = dna_input
430
- else:
431
- boundary_output = f"⚠️ Boundary model not available. Using full input: {len(dna_input)} bp"
432
- keras_output = predict_with_keras(processed_sequence) if processed_sequence and len(processed_sequence) >= 6 else "❌ Sequence too short."
433
- aligned_file = None
434
- phy_file = None
435
- ml_tree_output = ""
436
- if build_ml_tree and processed_sequence and len(processed_sequence) >= 100:
437
- ml_success, ml_message, ml_aligned, ml_tree = build_maximum_likelihood_tree(processed_sequence)
438
- ml_tree_output = ml_message
439
- aligned_file = ml_aligned
440
- phy_file = ml_tree
441
- elif build_ml_tree:
442
- ml_tree_output = "❌ Sequence too short for placement (<100 bp)."
443
- else:
444
- ml_tree_output = "⚠️ Phylogenetic placement skipped."
445
- html_file = None
446
- tree_html_content = "No tree generated."
447
- simplified_ml_output = ""
448
- if analyzer and processed_sequence and len(processed_sequence) >= 10:
449
- tree_result, html_path = analyze_sequence_for_tree(processed_sequence, similarity_score)
450
- simplified_ml_output = tree_result
451
- html_file = html_path
452
- if html_path and os.path.exists(html_path):
453
- with open(html_path, 'r', encoding='utf-8') as f:
454
- tree_html_content = f.read()
455
- else:
456
- tree_html_content = f"<div style='color: red;'>{tree_result}</div>"
457
- else:
458
- simplified_ml_output = "❌ Tree analyzer not available." if not analyzer else "❌ Sequence too short (<10 bp)."
459
- tree_html_content = f"<div style='color: orange;'>{simplified_ml_output}</div>"
460
- summary_output = f"""
461
- 📊 ANALYSIS SUMMARY:
462
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
463
- Input: {len(dna_input)} bp
464
- F Gene: {len(processed_sequence)} bp
465
- Validation: {keras_output.split(':')[-1].strip() if ':' in keras_output else keras_output}
466
- Placement: {'✅ OK' if '✅' in ml_tree_output else '⚠️ Skipped' if 'skipped' in ml_tree_output else '❌ Failed'}
467
- Tree Analysis: {'✅ OK' if '✅' in simplified_ml_output else '❌ Failed'}
468
- ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
469
- """
470
- return (
471
- boundary_output, keras_output, ml_tree_output, simplified_ml_output, summary_output,
472
- aligned_file, phy_file, html_file, tree_html_content
473
- )
474
- except Exception as e:
475
- logger.error(f"Pipeline error: {e}")
476
- error_msg = f"❌ Pipeline Error: {str(e)}"
477
- return error_msg, "", "", "", "", None, None, None, error_msg
478
-
479
- # --- Flask App ---
480
- app = Flask(__name__)
481
-
482
- @app.route("/health", methods=["GET"])
483
- def health_check():
484
- try:
485
- mafft_available, iqtree_available, _, _ = check_tool_availability()
486
- files_exist = {
487
- "alignment": os.path.exists(ALIGNMENT_PATH),
488
- "tree": os.path.exists(TREE_PATH),
489
- "csv": any(os.path.exists(c) for c in [
490
- CSV_PATH,
491
- os.path.join(BASE_DIR, CSV_PATH),
492
- os.path.join(BASE_DIR, "app", CSV_PATH),
493
- os.path.join(os.path.dirname(__file__), CSV_PATH),
494
- "f_cleaned.csv",
495
- os.path.join(BASE_DIR, "f_cleaned.csv")
496
- ])
497
- }
498
- return jsonify({
499
- "status": "healthy" if all([boundary_model, keras_model, analyzer, mafft_available, iqtree_available, files_exist["alignment"], files_exist["tree"], files_exist["csv"]]) else "unhealthy",
500
- "components": {
501
- "boundary_model": boundary_model is not None,
502
- "keras_model": keras_model is not None,
503
- "kmer_index": kmer_to_index is not None,
504
- "tree_analyzer": analyzer is not None,
505
- "mafft_available": mafft_available,
506
- "iqtree_available": iqtree_available,
507
- "files": files_exist
508
- },
509
- "paths": {
510
- "base_dir": BASE_DIR,
511
- "query_output_dir": QUERY_OUTPUT_DIR,
512
- "alignment_path": ALIGNMENT_PATH,
513
- "tree_path": TREE_PATH
514
- }
515
- }), 200
516
- except Exception as e:
517
- logger.error(f"Health check failed: {e}")
518
- return jsonify({"status": "unhealthy", "error": str(e)}), 500
519
-
520
- @app.route("/analyze", methods=["POST"])
521
- def analyze_sequence():
522
- try:
523
- data = request.get_json()
524
- if not data or "sequence" not in data:
525
- return jsonify({"error": "Missing 'sequence' in JSON body"}), 400
526
- sequence = data["sequence"].upper().strip()
527
- similarity_score = float(data.get("similarity_score", 95.0))
528
- build_ml_tree = data.get("build_ml_tree", False)
529
- if not sequence:
530
- return jsonify({"error": "Empty sequence"}), 400
531
- if not re.match('^[ACTGN]+$', sequence):
532
- return jsonify({"error": "Invalid sequence (use A, T, C, G, N)"}), 400
533
- if not 30.0 <= similarity_score <= 99.0:
534
- return jsonify({"error": "Similarity score must be between 30 and 99"}), 400
535
- result = run_pipeline(sequence, similarity_score, build_ml_tree)
536
- return jsonify({
537
- "status": "success",
538
- "boundary_output": result[0],
539
- "keras_output": result[1],
540
- "ml_tree_output": result[2],
541
- "tree_analysis_output": result[3],
542
- "summary_output": result[4],
543
- "aligned_file": os.path.basename(result[5]) if result[5] else None,
544
- "tree_file": os.path.basename(result[6]) if result[6] else None,
545
- "html_tree_file": os.path.basename(result[7]) if result[7] else None,
546
- "tree_html_content": result[8]
547
- }), 200
548
- except Exception as e:
549
- logger.error(f"Analyze error: {e}")
550
- return jsonify({"error": str(e)}), 500
551
-
552
- @app.route("/analyze-file", methods=["POST"])
553
- def analyze_file():
554
- try:
555
- if 'file' not in request.files:
556
- return jsonify({"error": "No file provided"}), 400
557
- file = request.files['file']
558
- if file.filename == '':
559
- return jsonify({"error": "Empty filename"}), 400
560
- if not file.filename.endswith(('.fasta', '.fa', '.fas', '.txt')):
561
- return jsonify({"error": "Invalid file type (use .fasta, .fa, .fas, .txt)"}), 400
562
- similarity_score = float(request.form.get("similarity_score", 95.0))
563
- build_ml_tree = request.form.get("build_ml_tree", "false").lower() == "true"
564
- if not 30.0 <= similarity_score <= 99.0:
565
- return jsonify({"error": "Similarity score must be between 30 and 99"}), 400
566
- with tempfile.NamedTemporaryFile(delete=False, suffix=".fasta", dir="/tmp") as temp_file:
567
- file.save(temp_file.name)
568
- temp_file_path = temp_file.name
569
- result = run_pipeline_from_file(temp_file_path, similarity_score, build_ml_tree)
570
- cleanup_file(temp_file_path)
571
- return jsonify({
572
- "status": "success",
573
- "boundary_output": result[0],
574
- "keras_output": result[1],
575
- "ml_tree_output": result[2],
576
- "tree_analysis_output": result[3],
577
- "summary_output": result[4],
578
- "aligned_file": os.path.basename(result[5]) if result[5] else None,
579
- "tree_file": os.path.basename(result[6]) if result[6] else None,
580
- "html_tree_file": os.path.basename(result[7]) if result[7] else None,
581
- "tree_html_content": result[8]
582
- }), 200
583
- except Exception as e:
584
- logger.error(f"Analyze-file error: {e}")
585
- cleanup_file(temp_file_path) if 'temp_file_path' in locals() else None
586
- return jsonify({"error": str(e)}), 500
587
-
588
- @app.route("/download/<file_type>/<filename>", methods=["GET"])
589
- def download_file(file_type, filename):
590
- try:
591
- if file_type not in ["alignment", "tree", "html"]:
592
- return jsonify({"error": "Invalid file type (use alignment, tree, html)"}), 400
593
- if file_type == "html":
594
- file_path = os.path.join(BASE_DIR, "output", filename)
595
- if not filename.startswith("tree_") or not filename.endswith(".html"):
596
- return jsonify({"error": "Invalid HTML filename"}), 400
597
- else:
598
- file_path = os.path.join(QUERY_OUTPUT_DIR, filename)
599
- if file_type == "alignment" and not filename.endswith((".fasta", ".fa")):
600
- return jsonify({"error": "Invalid alignment filename"}), 400
601
- if file_type == "tree" and not filename.endswith(".treefile"):
602
- return jsonify({"error": "Invalid tree filename"}), 400
603
- if not os.path.exists(file_path):
604
- return jsonify({"error": "File not found"}), 404
605
- return send_file(file_path, as_attachment=True, download_name=filename)
606
- except Exception as e:
607
- logger.error(f"Download error: {e}")
608
- return jsonify({"error": str(e)}), 500
609
-
610
- if __name__ == "__main__":
611
- logger.info("🧬 Starting Flask Gene Analysis API...")
612
- mafft_available, iqtree_available, _, _ = check_tool_availability()
613
- logger.info(f"🤖 Boundary Model: {'✅ Loaded' if boundary_model else '❌ Missing'}")
614
- logger.info(f"🧠 Keras Model: {'✅ Loaded' if keras_model else '❌ Missing'}")
615
- logger.info(f"🌳 Tree Analyzer: {'✅ Loaded' if analyzer else '❌ Missing'}")
616
- logger.info(f"🧬 MAFFT: {'✅ Available' if mafft_available else '❌ Missing'}")
617
- logger.info(f"🌲 IQ-TREE: {'✅ Available' if iqtree_available else '❌ Missing'}")
618
- files_exist = {
619
- "alignment": os.path.exists(ALIGNMENT_PATH),
620
- "tree": os.path.exists(TREE_PATH),
621
- "csv": any(os.path.exists(c) for c in [
622
- CSV_PATH,
623
- os.path.join(BASE_DIR, CSV_PATH),
624
- os.path.join(BASE_DIR, "app", CSV_PATH),
625
- os.path.join(os.path.dirname(__file__), CSV_PATH),
626
- "f_cleaned.csv",
627
- os.path.join(BASE_DIR, "f_cleaned.csv")
628
- ])
629
- }
630
- logger.info(f"📂 Files: Alignment={'✅' if files_exist['alignment'] else '❌'}, Tree={'✅' if files_exist['tree'] else '❌'}, CSV={'✅' if files_exist['csv'] else '❌'}")
631
- if not all(files_exist.values()):
632
- logger.critical("Missing required reference files")
633
- sys.exit(1)
634
- app.run(host="0.0.0.0", port=8080, debug=False)