Spaces:
No application file
No application file
Update app.py
Browse files
app.py
CHANGED
|
@@ -207,62 +207,73 @@ def check_tool_availability():
|
|
| 207 |
return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
|
| 208 |
|
| 209 |
# --- Pipeline Functions ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
|
| 211 |
query_fasta = None # Predefine to avoid scoping issues
|
| 212 |
try:
|
|
|
|
| 213 |
if len(sequence.strip()) < 100:
|
| 214 |
return False, "Sequence too short (<100 bp).", None, None
|
|
|
|
|
|
|
| 215 |
query_id = f"QUERY_{uuid.uuid4().hex[:8]}"
|
| 216 |
query_fasta = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}.fa")
|
| 217 |
aligned_with_query = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_aligned.fa")
|
| 218 |
output_prefix = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_placed_tree")
|
|
|
|
|
|
|
| 219 |
if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
|
| 220 |
-
|
| 221 |
-
try:
|
| 222 |
-
os.unlink(query_fasta)
|
| 223 |
-
except Exception as cleanup_error:
|
| 224 |
-
logger.warning(f"Failed to clean up {query_fasta}: {cleanup_error}")
|
| 225 |
return False, "Reference alignment or tree not found.", None, None
|
|
|
|
|
|
|
| 226 |
query_record = SeqRecord(Seq(sequence.upper()), id=query_id, description="")
|
| 227 |
SeqIO.write([query_record], query_fasta, "fasta")
|
|
|
|
|
|
|
| 228 |
with open(aligned_with_query, "w") as output_file:
|
| 229 |
-
subprocess.run(
|
| 230 |
-
mafft_cmd, "--add", query_fasta, "--reorder", ALIGNMENT_PATH
|
| 231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
if not os.path.exists(aligned_with_query) or os.path.getsize(aligned_with_query) == 0:
|
| 233 |
-
|
| 234 |
-
try:
|
| 235 |
-
os.unlink(query_fasta)
|
| 236 |
-
except Exception as cleanup_error:
|
| 237 |
-
logger.warning(f"Failed to clean up {query_fasta}: {cleanup_error}")
|
| 238 |
return False, "MAFFT alignment failed.", None, None
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
treefile = f"{output_prefix}.treefile"
|
| 244 |
if not os.path.exists(treefile):
|
| 245 |
-
|
| 246 |
-
try:
|
| 247 |
-
os.unlink(query_fasta)
|
| 248 |
-
except Exception as cleanup_error:
|
| 249 |
-
logger.warning(f"Failed to clean up {query_fasta}: {cleanup_error}")
|
| 250 |
return False, "IQ-TREE placement failed.", aligned_with_query, None
|
|
|
|
|
|
|
| 251 |
success_msg = f"Placement completed!\nQuery ID: {query_id}\nAlignment: {os.path.basename(aligned_with_query)}\nTree: {os.path.basename(treefile)}"
|
| 252 |
-
|
| 253 |
-
try:
|
| 254 |
-
os.unlink(query_fasta)
|
| 255 |
-
except Exception as cleanup_error:
|
| 256 |
-
logger.warning(f"Failed to clean up {query_fasta}: {cleanup_error}")
|
| 257 |
return True, success_msg, aligned_with_query, treefile
|
| 258 |
-
|
|
|
|
| 259 |
logger.error(f"Phylogenetic placement failed: {main_error}", exc_info=True)
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
os.unlink(query_fasta)
|
| 263 |
-
except Exception as cleanup_error:
|
| 264 |
-
logger.warning(f"Failed to clean up {query_fasta}: {cleanup_error}")
|
| 265 |
-
return False, f"Error: {str(main_error)}", None, None
|
| 266 |
def analyze_sequence_for_tree(sequence: str, matching_percentage: float):
|
| 267 |
try:
|
| 268 |
logger.debug("Starting tree analysis...")
|
|
@@ -437,28 +448,16 @@ async def run_pipeline_from_file(fasta_file_obj, similarity_score, build_ml_tree
|
|
| 437 |
temp_file_path = temp_file.name
|
| 438 |
dna_input = read_fasta_file(temp_file_path)
|
| 439 |
if not dna_input:
|
| 440 |
-
|
| 441 |
-
try:
|
| 442 |
-
os.unlink(temp_file_path)
|
| 443 |
-
except Exception as cleanup_error:
|
| 444 |
-
logger.warning(f"Failed to delete temp file {temp_file_path}: {cleanup_error}")
|
| 445 |
return "❌ Failed to read FASTA file", "", "", "", "", None, None, None, None, "No input", "No input", None, None
|
| 446 |
result = run_pipeline(dna_input, similarity_score, build_ml_tree)
|
| 447 |
-
|
| 448 |
-
try:
|
| 449 |
-
os.unlink(temp_file_path)
|
| 450 |
-
except Exception as cleanup_error:
|
| 451 |
-
logger.warning(f"Failed to delete temp file {temp_file_path}: {cleanup_error}")
|
| 452 |
return result
|
| 453 |
except Exception as main_error: # Renamed from 'e'
|
| 454 |
logger.error(f"Pipeline from file error: {main_error}", exc_info=True)
|
| 455 |
-
|
| 456 |
-
try:
|
| 457 |
-
os.unlink(temp_file_path)
|
| 458 |
-
except Exception as cleanup_error:
|
| 459 |
-
logger.warning(f"Failed to delete temp file {temp_file_path}: {cleanup_error}")
|
| 460 |
error_msg = f"❌ Error: {str(main_error)}"
|
| 461 |
-
return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg, None, None
|
| 462 |
class AnalysisRequest(BaseModel):
|
| 463 |
sequence: str
|
| 464 |
similarity_score: float = 95.0
|
|
|
|
| 207 |
return mafft_available, iqtree_available, mafft_cmd, iqtree_cmd
|
| 208 |
|
| 209 |
# --- Pipeline Functions ---
|
| 210 |
+
def cleanup_file(file_path: str) -> None:
|
| 211 |
+
"""Utility function to safely delete a file and log errors."""
|
| 212 |
+
if file_path and os.path.exists(file_path):
|
| 213 |
+
try:
|
| 214 |
+
os.unlink(file_path)
|
| 215 |
+
logger.debug(f"Cleaned up {file_path}")
|
| 216 |
+
except Exception as cleanup_error:
|
| 217 |
+
logger.warning(f"Failed to clean up {file_path}: {cleanup_error}")
|
| 218 |
+
|
| 219 |
def phylogenetic_placement(sequence: str, mafft_cmd: str, iqtree_cmd: str):
|
| 220 |
query_fasta = None # Predefine to avoid scoping issues
|
| 221 |
try:
|
| 222 |
+
# Input validation
|
| 223 |
if len(sequence.strip()) < 100:
|
| 224 |
return False, "Sequence too short (<100 bp).", None, None
|
| 225 |
+
|
| 226 |
+
# Setup file paths
|
| 227 |
query_id = f"QUERY_{uuid.uuid4().hex[:8]}"
|
| 228 |
query_fasta = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}.fa")
|
| 229 |
aligned_with_query = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_aligned.fa")
|
| 230 |
output_prefix = os.path.join(QUERY_OUTPUT_DIR, f"{query_id}_placed_tree")
|
| 231 |
+
|
| 232 |
+
# Check reference files
|
| 233 |
if not os.path.exists(ALIGNMENT_PATH) or not os.path.exists(TREE_PATH):
|
| 234 |
+
cleanup_file(query_fasta)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
return False, "Reference alignment or tree not found.", None, None
|
| 236 |
+
|
| 237 |
+
# Write query FASTA
|
| 238 |
query_record = SeqRecord(Seq(sequence.upper()), id=query_id, description="")
|
| 239 |
SeqIO.write([query_record], query_fasta, "fasta")
|
| 240 |
+
|
| 241 |
+
# Run MAFFT
|
| 242 |
with open(aligned_with_query, "w") as output_file:
|
| 243 |
+
result = subprocess.run(
|
| 244 |
+
[mafft_cmd, "--add", query_fasta, "--reorder", ALIGNMENT_PATH],
|
| 245 |
+
stdout=output_file,
|
| 246 |
+
stderr=subprocess.PIPE,
|
| 247 |
+
text=True,
|
| 248 |
+
timeout=600,
|
| 249 |
+
check=True
|
| 250 |
+
)
|
| 251 |
if not os.path.exists(aligned_with_query) or os.path.getsize(aligned_with_query) == 0:
|
| 252 |
+
cleanup_file(query_fasta)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
return False, "MAFFT alignment failed.", None, None
|
| 254 |
+
|
| 255 |
+
# Run IQ-TREE
|
| 256 |
+
result = subprocess.run(
|
| 257 |
+
[iqtree_cmd, "-s", aligned_with_query, "-g", TREE_PATH, "-m", "GTR+G", "-pre", output_prefix, "-redo"],
|
| 258 |
+
capture_output=True,
|
| 259 |
+
text=True,
|
| 260 |
+
timeout=1200,
|
| 261 |
+
check=True
|
| 262 |
+
)
|
| 263 |
treefile = f"{output_prefix}.treefile"
|
| 264 |
if not os.path.exists(treefile):
|
| 265 |
+
cleanup_file(query_fasta)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
return False, "IQ-TREE placement failed.", aligned_with_query, None
|
| 267 |
+
|
| 268 |
+
# Success
|
| 269 |
success_msg = f"Placement completed!\nQuery ID: {query_id}\nAlignment: {os.path.basename(aligned_with_query)}\nTree: {os.path.basename(treefile)}"
|
| 270 |
+
cleanup_file(query_fasta)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 271 |
return True, success_msg, aligned_with_query, treefile
|
| 272 |
+
|
| 273 |
+
except Exception as main_error:
|
| 274 |
logger.error(f"Phylogenetic placement failed: {main_error}", exc_info=True)
|
| 275 |
+
cleanup_file(query_fasta)
|
| 276 |
+
return False, f"Error: {str(main_error)}", None, None
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
def analyze_sequence_for_tree(sequence: str, matching_percentage: float):
|
| 278 |
try:
|
| 279 |
logger.debug("Starting tree analysis...")
|
|
|
|
| 448 |
temp_file_path = temp_file.name
|
| 449 |
dna_input = read_fasta_file(temp_file_path)
|
| 450 |
if not dna_input:
|
| 451 |
+
cleanup_file(temp_file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 452 |
return "❌ Failed to read FASTA file", "", "", "", "", None, None, None, None, "No input", "No input", None, None
|
| 453 |
result = run_pipeline(dna_input, similarity_score, build_ml_tree)
|
| 454 |
+
cleanup_file(temp_file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 455 |
return result
|
| 456 |
except Exception as main_error: # Renamed from 'e'
|
| 457 |
logger.error(f"Pipeline from file error: {main_error}", exc_info=True)
|
| 458 |
+
cleanup_file(temp_file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 459 |
error_msg = f"❌ Error: {str(main_error)}"
|
| 460 |
+
return error_msg, "", "", "", "", None, None, None, None, error_msg, error_msg, None, None
|
| 461 |
class AnalysisRequest(BaseModel):
|
| 462 |
sequence: str
|
| 463 |
similarity_score: float = 95.0
|