Spaces:
No application file
No application file
Update app.py
Browse files
app.py
CHANGED
|
@@ -204,51 +204,91 @@ def run_pipeline(dna_input):
|
|
| 204 |
logging.info("F gene sequence found in dataset")
|
| 205 |
matched_ids, perc = analyzer.find_similar_sequences(analyzer.matching_percentage)
|
| 206 |
|
| 207 |
-
#
|
| 208 |
try:
|
| 209 |
-
|
| 210 |
|
| 211 |
-
#
|
|
|
|
|
|
|
|
|
|
| 212 |
possible_html_files = [
|
| 213 |
"phylogenetic_tree_normalized_horizontal.html",
|
|
|
|
| 214 |
"phylogenetic_tree.html",
|
| 215 |
-
"tree.html"
|
|
|
|
| 216 |
]
|
| 217 |
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
break
|
| 226 |
|
| 227 |
if not html_file:
|
| 228 |
-
#
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
|
| 233 |
except Exception as tree_error:
|
| 234 |
-
ml_output = f"Tree creation failed: {str(tree_error)}"
|
|
|
|
|
|
|
| 235 |
logging.error(f"Tree creation error: {tree_error}")
|
|
|
|
|
|
|
| 236 |
|
| 237 |
else:
|
| 238 |
-
ml_output = f"F gene sequence not found in dataset
|
| 239 |
logging.warning(f"F gene sequence not found. Length: {len(processed_sequence)}")
|
| 240 |
else:
|
| 241 |
-
ml_output = "Failed to load CSV dataset"
|
| 242 |
logging.error("Failed to load CSV dataset")
|
| 243 |
except Exception as e:
|
| 244 |
-
ml_output = f"ML Tree analysis failed: {str(e)}"
|
| 245 |
logging.error(f"ML Tree failed: {e}")
|
|
|
|
|
|
|
| 246 |
elif not os.path.exists(csv_path):
|
| 247 |
-
ml_output = f"CSV dataset not found at {csv_path}"
|
| 248 |
elif not processed_sequence or len(processed_sequence) < 10:
|
| 249 |
-
ml_output = f"F gene sequence too short for analysis (length: {len(processed_sequence) if processed_sequence else 0})"
|
| 250 |
else:
|
| 251 |
-
ml_output = "Skipped due to previous step errors"
|
| 252 |
|
| 253 |
return (
|
| 254 |
boundary_output,
|
|
@@ -264,45 +304,48 @@ def run_pipeline(dna_input):
|
|
| 264 |
except Exception as e:
|
| 265 |
error_msg = f"Pipeline failed: {str(e)}"
|
| 266 |
logging.error(error_msg)
|
|
|
|
|
|
|
| 267 |
return error_msg, "", "", "", None, None, None, error_msg
|
| 268 |
|
| 269 |
# --- Gradio UI ---
|
| 270 |
-
with gr.Blocks(title="Viral Gene Phylogenetic Pipeline") as demo:
|
| 271 |
-
gr.Markdown("# Viral Gene Phylogenetic Inference Pipeline")
|
| 272 |
gr.Markdown("This pipeline processes DNA sequences through boundary detection, k-mer analysis, and phylogenetic tree construction.")
|
| 273 |
|
| 274 |
-
with gr.Tab("Paste DNA Sequence"):
|
| 275 |
inp = gr.Textbox(
|
| 276 |
label="DNA Input",
|
| 277 |
placeholder="Paste your DNA sequence here (ACTG format)",
|
| 278 |
lines=5
|
| 279 |
)
|
| 280 |
-
btn1 = gr.Button("Run Pipeline", variant="primary")
|
| 281 |
|
| 282 |
-
with gr.Tab("Upload FASTA File"):
|
| 283 |
file_input = gr.File(
|
| 284 |
label="FASTA File",
|
| 285 |
file_types=['.fasta', '.fa', '.txt']
|
| 286 |
)
|
| 287 |
-
btn2 = gr.Button("Run on FASTA", variant="primary")
|
| 288 |
|
| 289 |
# Outputs
|
| 290 |
-
gr.Markdown("## Pipeline Results")
|
| 291 |
|
| 292 |
with gr.Row():
|
| 293 |
with gr.Column():
|
| 294 |
-
out1 = gr.Textbox(label="Step 1: Extracted F Gene Sequence", lines=8)
|
| 295 |
-
out2 = gr.Textbox(label="Step 2: F Gene Validation (Keras)", lines=3)
|
| 296 |
with gr.Column():
|
| 297 |
-
out3 = gr.Textbox(label="Dataset Used")
|
| 298 |
-
out4 = gr.Textbox(label="Step 3: Phylogenetic Tree Status", lines=
|
| 299 |
|
| 300 |
with gr.Row():
|
| 301 |
-
html = gr.File(label="Download Tree (HTML)")
|
| 302 |
-
fasta = gr.File(label="Download Aligned FASTA")
|
| 303 |
-
phy = gr.File(label="Download IQ-TREE .phy File")
|
| 304 |
|
| 305 |
-
|
|
|
|
| 306 |
|
| 307 |
# Event handlers
|
| 308 |
btn1.click(
|
|
|
|
| 204 |
logging.info("F gene sequence found in dataset")
|
| 205 |
matched_ids, perc = analyzer.find_similar_sequences(analyzer.matching_percentage)
|
| 206 |
|
| 207 |
+
# Enhanced tree creation with better error handling
|
| 208 |
try:
|
| 209 |
+
logging.info(f"Creating tree with {len(matched_ids)} sequences at {perc:.1f}% similarity")
|
| 210 |
|
| 211 |
+
# Call the tree creation method
|
| 212 |
+
tree_result = analyzer.create_interactive_tree(matched_ids, perc)
|
| 213 |
+
|
| 214 |
+
# Check for multiple possible HTML file names and locations
|
| 215 |
possible_html_files = [
|
| 216 |
"phylogenetic_tree_normalized_horizontal.html",
|
| 217 |
+
"phylogenetic_tree_horizontal.html",
|
| 218 |
"phylogenetic_tree.html",
|
| 219 |
+
"tree.html",
|
| 220 |
+
"interactive_tree.html"
|
| 221 |
]
|
| 222 |
|
| 223 |
+
# Also check in common subdirectories
|
| 224 |
+
search_dirs = [".", "output", "results", "trees"]
|
| 225 |
+
|
| 226 |
+
for search_dir in search_dirs:
|
| 227 |
+
if not os.path.exists(search_dir):
|
| 228 |
+
continue
|
| 229 |
+
|
| 230 |
+
for html_filename in possible_html_files:
|
| 231 |
+
full_path = os.path.join(search_dir, html_filename)
|
| 232 |
+
if os.path.exists(full_path):
|
| 233 |
+
html_file = full_path
|
| 234 |
+
try:
|
| 235 |
+
with open(full_path, "r", encoding='utf-8') as f:
|
| 236 |
+
tree_html_content = f.read()
|
| 237 |
+
ml_output = f"✅ Phylogenetic tree generated successfully!\n- {len(matched_ids)} sequences analyzed\n- Similarity threshold: {perc:.1f}%\n- Tree file: {html_filename}"
|
| 238 |
+
logging.info(f"Tree HTML file found: {full_path}")
|
| 239 |
+
break
|
| 240 |
+
except Exception as read_error:
|
| 241 |
+
logging.error(f"Error reading HTML file {full_path}: {read_error}")
|
| 242 |
+
continue
|
| 243 |
+
|
| 244 |
+
if html_file:
|
| 245 |
break
|
| 246 |
|
| 247 |
if not html_file:
|
| 248 |
+
# Enhanced debugging - list all files in current directory
|
| 249 |
+
all_files = []
|
| 250 |
+
for root, dirs, files in os.walk("."):
|
| 251 |
+
for file in files:
|
| 252 |
+
if file.endswith(('.html', '.htm')):
|
| 253 |
+
all_files.append(os.path.join(root, file))
|
| 254 |
+
|
| 255 |
+
ml_output = f"⚠️ Tree analysis completed but HTML file not found.\n"
|
| 256 |
+
ml_output += f"- Analyzed {len(matched_ids)} sequences at {perc:.1f}% similarity\n"
|
| 257 |
+
ml_output += f"- Available HTML files in directory: {all_files}\n"
|
| 258 |
+
ml_output += f"- Tree creation result: {tree_result if 'tree_result' in locals() else 'Unknown'}"
|
| 259 |
+
|
| 260 |
+
logging.warning(f"HTML files found in directory tree: {all_files}")
|
| 261 |
+
|
| 262 |
+
# Try to get any HTML content that might have been generated
|
| 263 |
+
if hasattr(analyzer, 'last_generated_html') and analyzer.last_generated_html:
|
| 264 |
+
tree_html_content = analyzer.last_generated_html
|
| 265 |
+
ml_output += "\n- Using HTML content from analyzer object"
|
| 266 |
|
| 267 |
except Exception as tree_error:
|
| 268 |
+
ml_output = f"❌ Tree creation failed: {str(tree_error)}\n"
|
| 269 |
+
ml_output += f"- Sequences available: {len(matched_ids)}\n"
|
| 270 |
+
ml_output += f"- Similarity threshold: {perc:.1f}%"
|
| 271 |
logging.error(f"Tree creation error: {tree_error}")
|
| 272 |
+
import traceback
|
| 273 |
+
logging.error(f"Full traceback: {traceback.format_exc()}")
|
| 274 |
|
| 275 |
else:
|
| 276 |
+
ml_output = f"❌ F gene sequence not found in dataset\n- Query length: {len(processed_sequence)} bp\n- Dataset contains {len(analyzer.data) if hasattr(analyzer, 'data') else 'unknown'} sequences"
|
| 277 |
logging.warning(f"F gene sequence not found. Length: {len(processed_sequence)}")
|
| 278 |
else:
|
| 279 |
+
ml_output = "❌ Failed to load CSV dataset"
|
| 280 |
logging.error("Failed to load CSV dataset")
|
| 281 |
except Exception as e:
|
| 282 |
+
ml_output = f"❌ ML Tree analysis failed: {str(e)}"
|
| 283 |
logging.error(f"ML Tree failed: {e}")
|
| 284 |
+
import traceback
|
| 285 |
+
logging.error(f"Full traceback: {traceback.format_exc()}")
|
| 286 |
elif not os.path.exists(csv_path):
|
| 287 |
+
ml_output = f"❌ CSV dataset not found at {csv_path}"
|
| 288 |
elif not processed_sequence or len(processed_sequence) < 10:
|
| 289 |
+
ml_output = f"❌ F gene sequence too short for analysis (length: {len(processed_sequence) if processed_sequence else 0})"
|
| 290 |
else:
|
| 291 |
+
ml_output = "❌ Skipped due to previous step errors"
|
| 292 |
|
| 293 |
return (
|
| 294 |
boundary_output,
|
|
|
|
| 304 |
except Exception as e:
|
| 305 |
error_msg = f"Pipeline failed: {str(e)}"
|
| 306 |
logging.error(error_msg)
|
| 307 |
+
import traceback
|
| 308 |
+
logging.error(f"Full traceback: {traceback.format_exc()}")
|
| 309 |
return error_msg, "", "", "", None, None, None, error_msg
|
| 310 |
|
| 311 |
# --- Gradio UI ---
|
| 312 |
+
with gr.Blocks(title="Viral Gene Phylogenetic Pipeline", theme=gr.themes.Soft()) as demo:
|
| 313 |
+
gr.Markdown("# 🧬 Viral Gene Phylogenetic Inference Pipeline")
|
| 314 |
gr.Markdown("This pipeline processes DNA sequences through boundary detection, k-mer analysis, and phylogenetic tree construction.")
|
| 315 |
|
| 316 |
+
with gr.Tab("📝 Paste DNA Sequence"):
|
| 317 |
inp = gr.Textbox(
|
| 318 |
label="DNA Input",
|
| 319 |
placeholder="Paste your DNA sequence here (ACTG format)",
|
| 320 |
lines=5
|
| 321 |
)
|
| 322 |
+
btn1 = gr.Button("🚀 Run Pipeline", variant="primary", size="lg")
|
| 323 |
|
| 324 |
+
with gr.Tab("📁 Upload FASTA File"):
|
| 325 |
file_input = gr.File(
|
| 326 |
label="FASTA File",
|
| 327 |
file_types=['.fasta', '.fa', '.txt']
|
| 328 |
)
|
| 329 |
+
btn2 = gr.Button("🚀 Run on FASTA", variant="primary", size="lg")
|
| 330 |
|
| 331 |
# Outputs
|
| 332 |
+
gr.Markdown("## 📊 Pipeline Results")
|
| 333 |
|
| 334 |
with gr.Row():
|
| 335 |
with gr.Column():
|
| 336 |
+
out1 = gr.Textbox(label="🎯 Step 1: Extracted F Gene Sequence", lines=8)
|
| 337 |
+
out2 = gr.Textbox(label="🔍 Step 2: F Gene Validation (Keras)", lines=3)
|
| 338 |
with gr.Column():
|
| 339 |
+
out3 = gr.Textbox(label="📋 Dataset Used")
|
| 340 |
+
out4 = gr.Textbox(label="🌳 Step 3: Phylogenetic Tree Status", lines=5)
|
| 341 |
|
| 342 |
with gr.Row():
|
| 343 |
+
html = gr.File(label="📥 Download Tree (HTML)")
|
| 344 |
+
fasta = gr.File(label="📥 Download Aligned FASTA")
|
| 345 |
+
phy = gr.File(label="📥 Download IQ-TREE .phy File")
|
| 346 |
|
| 347 |
+
with gr.Row():
|
| 348 |
+
tree_html = gr.HTML(label="🌳 Interactive Tree Preview", height=600)
|
| 349 |
|
| 350 |
# Event handlers
|
| 351 |
btn1.click(
|