re-type commited on
Commit
7d8c0b6
·
verified ·
1 Parent(s): 61b9191

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -250
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py - Enhanced version with improved tree visualization
2
  import gradio as gr
3
  import torch
4
  import pickle
@@ -72,230 +72,93 @@ try:
72
  except Exception as e:
73
  logging.error(f"Failed to load Keras model from HF Hub: {e}")
74
 
75
- # --- Enhanced Tree Visualization Functions ---
76
- def create_fallback_tree_html(matched_ids, similarities, query_id="QUERY"):
77
- """Create a simple fallback tree visualization using HTML/CSS/JS"""
78
-
79
- # Generate tree data structure
80
- tree_data = []
81
- for i, (seq_id, similarity) in enumerate(zip(matched_ids, similarities)):
82
- tree_data.append({
83
- 'id': seq_id,
84
- 'similarity': similarity,
85
- 'level': 1 if similarity > 90 else 2
86
- })
87
-
88
- # Add query sequence
89
- tree_data.insert(0, {'id': query_id, 'similarity': 100.0, 'level': 0})
90
-
91
- html_content = f"""
92
- <!DOCTYPE html>
93
- <html>
94
- <head>
95
- <title>Phylogenetic Tree Visualization</title>
96
- <style>
97
- body {{
98
- font-family: Arial, sans-serif;
99
- margin: 20px;
100
- background-color: #f5f5f5;
101
- }}
102
- .tree-container {{
103
- background: white;
104
- border-radius: 8px;
105
- padding: 20px;
106
- box-shadow: 0 2px 10px rgba(0,0,0,0.1);
107
- }}
108
- .tree-title {{
109
- font-size: 24px;
110
- font-weight: bold;
111
- margin-bottom: 20px;
112
- text-align: center;
113
- color: #2c3e50;
114
- }}
115
- .tree-node {{
116
- display: flex;
117
- align-items: center;
118
- margin: 10px 0;
119
- padding: 8px;
120
- border-radius: 4px;
121
- transition: background-color 0.3s;
122
- }}
123
- .tree-node:hover {{
124
- background-color: #ecf0f1;
125
- }}
126
- .node-query {{
127
- background-color: #e74c3c;
128
- color: white;
129
- font-weight: bold;
130
- }}
131
- .node-high {{
132
- background-color: #27ae60;
133
- color: white;
134
- }}
135
- .node-medium {{
136
- background-color: #f39c12;
137
- color: white;
138
- }}
139
- .node-line {{
140
- width: 20px;
141
- height: 2px;
142
- background-color: #34495e;
143
- margin-right: 10px;
144
- }}
145
- .node-info {{
146
- display: flex;
147
- justify-content: space-between;
148
- width: 100%;
149
- }}
150
- .similarity-bar {{
151
- width: 100px;
152
- height: 20px;
153
- background-color: #ecf0f1;
154
- border-radius: 10px;
155
- overflow: hidden;
156
- margin-left: 10px;
157
- }}
158
- .similarity-fill {{
159
- height: 100%;
160
- background: linear-gradient(90deg, #e74c3c 0%, #f39c12 50%, #27ae60 100%);
161
- transition: width 0.5s ease;
162
- }}
163
- .stats {{
164
- margin-top: 20px;
165
- padding: 15px;
166
- background-color: #ecf0f1;
167
- border-radius: 4px;
168
- }}
169
- </style>
170
- </head>
171
- <body>
172
- <div class="tree-container">
173
- <div class="tree-title">🌳 Phylogenetic Tree Analysis</div>
174
-
175
- <div class="stats">
176
- <strong>Analysis Summary:</strong><br>
177
- • Total sequences analyzed: {len(tree_data)}<br>
178
- • Similarity range: {min([d['similarity'] for d in tree_data[1:]]):.1f}% - {max([d['similarity'] for d in tree_data[1:]]):.1f}%<br>
179
- • Average similarity: {np.mean([d['similarity'] for d in tree_data[1:]]):.1f}%
180
- </div>
181
-
182
- <div style="margin-top: 20px;">
183
- """
184
-
185
- for node in tree_data:
186
- indent = " " * node['level'] * 2
187
- if node['id'] == query_id:
188
- node_class = "node-query"
189
- elif node['similarity'] > 90:
190
- node_class = "node-high"
191
  else:
192
- node_class = "node-medium"
193
-
194
- html_content += f"""
195
- <div class="tree-node {node_class}">
196
- <div style="margin-left: {node['level'] * 20}px;">
197
- <div class="node-line"></div>
198
- </div>
199
- <div class="node-info">
200
- <span>{node['id']}</span>
201
- <div style="display: flex; align-items: center;">
202
- <span>{node['similarity']:.1f}%</span>
203
- <div class="similarity-bar">
204
- <div class="similarity-fill" style="width: {node['similarity']}%;"></div>
205
- </div>
206
- </div>
207
- </div>
208
- </div>
209
- """
210
-
211
- html_content += """
212
- </div>
213
- </div>
214
-
215
- <script>
216
- // Add some interactivity
217
- document.querySelectorAll('.tree-node').forEach(node => {
218
- node.addEventListener('click', function() {
219
- this.style.transform = this.style.transform === 'scale(1.02)' ? 'scale(1)' : 'scale(1.02)';
220
- });
221
- });
222
-
223
- // Animate similarity bars on load
224
- window.addEventListener('load', function() {
225
- document.querySelectorAll('.similarity-fill').forEach(fill => {
226
- const width = fill.style.width;
227
- fill.style.width = '0%';
228
- setTimeout(() => {
229
- fill.style.width = width;
230
- }, 100);
231
- });
232
- });
233
- </script>
234
- </body>
235
- </html>
236
- """
237
-
238
- return html_content
239
 
240
- def safe_tree_creation(analyzer, matched_ids, perc):
241
- """Safely create a tree with multiple fallback options"""
242
-
 
 
243
  try:
244
- # First, try the original method
245
- logging.info("Attempting original tree creation method...")
246
- result = analyzer.create_interactive_tree(matched_ids, perc)
 
 
 
 
 
247
 
248
- # Check if HTML file was created
249
- possible_files = [
250
- "phylogenetic_tree_normalized_horizontal.html",
251
- "phylogenetic_tree_horizontal.html",
252
- "phylogenetic_tree.html",
253
- "tree.html",
254
- "interactive_tree.html"
255
- ]
256
 
257
- for filename in possible_files:
258
- if os.path.exists(filename):
259
- with open(filename, "r", encoding='utf-8') as f:
260
- html_content = f.read()
261
- return filename, html_content, f"✅ Tree created successfully using {filename}"
262
 
263
- # If no file found, check if analyzer has HTML content
264
- if hasattr(analyzer, 'last_generated_html') and analyzer.last_generated_html:
265
- return None, analyzer.last_generated_html, "✅ Tree created (using analyzer HTML content)"
266
-
267
- except Exception as e:
268
- logging.error(f"Original tree creation failed: {e}")
269
-
270
- try:
271
- # Fallback: Create simple tree visualization
272
- logging.info("Creating fallback tree visualization...")
273
 
274
- # Get similarity data if available
275
- similarities = []
276
- if hasattr(analyzer, 'data') and analyzer.data is not None:
277
- for seq_id in matched_ids:
278
- if seq_id in analyzer.data.index:
279
- # Calculate similarity based on sequence comparison (simplified)
280
- similarities.append(np.random.uniform(perc-5, perc+5)) # Placeholder
281
- else:
282
- similarities.append(perc)
283
- else:
284
- similarities = [perc] * len(matched_ids)
 
 
 
 
 
 
285
 
286
- # Create fallback HTML
287
- fallback_html = create_fallback_tree_html(matched_ids, similarities)
 
288
 
289
- # Save fallback HTML
290
- fallback_filename = "fallback_phylogenetic_tree.html"
291
- with open(fallback_filename, "w", encoding='utf-8') as f:
292
- f.write(fallback_html)
293
 
294
- return fallback_filename, fallback_html, f"✅ Fallback tree created with {len(matched_ids)} sequences"
 
 
 
 
 
295
 
296
  except Exception as e:
297
- logging.error(f"Fallback tree creation also failed: {e}")
298
- return None, "<p>Tree visualization failed. Please check the logs for details.</p>", f"❌ Tree creation failed: {str(e)}"
 
 
 
299
 
300
  # --- Keras Prediction ---
301
  def predict_with_keras(sequence):
@@ -411,58 +274,48 @@ def run_pipeline(dna_input):
411
  # Skip MAFFT due to configuration issues in the container
412
  logging.info("Skipping MAFFT/IQ-TREE due to container configuration issues")
413
 
414
- # Step 4: ML Simplified Tree (using the extracted F gene sequence) - ENHANCED
415
  html_file = None
416
  tree_html_content = "No tree generated"
417
  ml_output = ""
418
 
419
- if os.path.exists(csv_path) and processed_sequence and len(processed_sequence) >= 10:
420
  try:
421
  logging.info(f"Starting ML tree analysis with F gene sequence length: {len(processed_sequence)}")
422
- analyzer = ml_simplified_tree.PhylogeneticTreeAnalyzer()
423
 
424
- if analyzer.load_data(csv_path):
425
- logging.info("CSV data loaded successfully")
 
 
 
 
 
 
 
 
 
 
 
 
 
426
 
427
- # Use the extracted F gene sequence from boundary model
428
  if analyzer.find_query_sequence(processed_sequence):
429
- logging.info("F gene sequence found in dataset")
430
- matched_ids, perc = analyzer.find_similar_sequences(analyzer.matching_percentage)
431
-
432
- # Enhanced tree creation with robust error handling
433
- try:
434
- logging.info(f"Creating tree with {len(matched_ids)} sequences at {perc:.1f}% similarity")
435
-
436
- # Use the safe tree creation function
437
- html_file, tree_html_content, status_message = safe_tree_creation(analyzer, matched_ids, perc)
438
- ml_output = status_message
439
-
440
- if html_file:
441
- logging.info(f"Tree HTML file created: {html_file}")
442
- else:
443
- logging.info("Tree HTML content generated in memory")
444
-
445
- except Exception as tree_error:
446
- ml_output = f"❌ Tree creation failed: {str(tree_error)}\n"
447
- ml_output += f"- Sequences available: {len(matched_ids)}\n"
448
- ml_output += f"- Similarity threshold: {perc:.1f}%"
449
- logging.error(f"Tree creation error: {tree_error}")
450
- import traceback
451
- logging.error(f"Full traceback: {traceback.format_exc()}")
452
-
453
- else:
454
- ml_output = f"❌ F gene sequence not found in dataset\n- Query length: {len(processed_sequence)} bp\n- Dataset contains {len(analyzer.data) if hasattr(analyzer, 'data') else 'unknown'} sequences"
455
- logging.warning(f"F gene sequence not found. Length: {len(processed_sequence)}")
456
  else:
457
- ml_output = "❌ Failed to load CSV dataset"
458
- logging.error("Failed to load CSV dataset")
 
 
459
  except Exception as e:
460
  ml_output = f"❌ ML Tree analysis failed: {str(e)}"
461
  logging.error(f"ML Tree failed: {e}")
462
  import traceback
463
  logging.error(f"Full traceback: {traceback.format_exc()}")
464
- elif not os.path.exists(csv_path):
465
- ml_output = f"❌ CSV dataset not found at {csv_path}"
466
  elif not processed_sequence or len(processed_sequence) < 10:
467
  ml_output = f"❌ F gene sequence too short for analysis (length: {len(processed_sequence) if processed_sequence else 0})"
468
  else:
 
1
+ # app.py
2
  import gradio as gr
3
  import torch
4
  import pickle
 
72
  except Exception as e:
73
  logging.error(f"Failed to load Keras model from HF Hub: {e}")
74
 
75
+ # --- Initialize Tree Analyzer ---
76
+ analyzer = None
77
+ try:
78
+ analyzer = ml_simplified_tree.PhylogeneticTreeAnalyzer()
79
+ if os.path.exists(csv_path):
80
+ if analyzer.load_data(csv_path):
81
+ logging.info("Tree analyzer initialized successfully")
82
+ # Try to train AI model (optional)
83
+ try:
84
+ if not analyzer.train_ai_model():
85
+ logging.warning("AI model training failed; proceeding with basic analysis.")
86
+ except Exception as e:
87
+ logging.warning(f"AI model training failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  else:
89
+ logging.error("Failed to load CSV data for tree analyzer")
90
+ analyzer = None
91
+ else:
92
+ logging.error(f"CSV file not found: {csv_path}")
93
+ analyzer = None
94
+ except Exception as e:
95
+ logging.error(f"Failed to initialize tree analyzer: {e}")
96
+ analyzer = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
 
98
+ # --- Tree Analysis Function (Based on old Gradio API) ---
99
+ def analyze_sequence_for_tree(sequence: str, matching_percentage: float = 95.0) -> str:
100
+ """
101
+ Analyze sequence and create phylogenetic tree using the working Gradio API pattern
102
+ """
103
  try:
104
+ if not analyzer:
105
+ return "Error: Tree analyzer not initialized."
106
+
107
+ if not sequence:
108
+ return "Error: Please provide a sequence."
109
+
110
+ if not (1 <= matching_percentage <= 99):
111
+ return "Error: Matching percentage must be between 1 and 99."
112
 
113
+ # Find query sequence
114
+ if not analyzer.find_query_sequence(sequence):
115
+ return "Error: Invalid query sequence or sequence not found in dataset."
 
 
 
 
 
116
 
117
+ # Set matching percentage
118
+ analyzer.matching_percentage = matching_percentage
 
 
 
119
 
120
+ # Find similar sequences
121
+ matched_ids, actual_percentage = analyzer.find_similar_sequences(matching_percentage)
 
 
 
 
 
 
 
 
122
 
123
+ if not matched_ids:
124
+ return f"No similar sequences found at {matching_percentage}% similarity."
125
+
126
+ logging.info(f"Found {len(matched_ids)} similar sequences at {actual_percentage:.1f}% similarity")
127
+
128
+ # Build tree structure
129
+ tree_structure = analyzer.build_tree_structure(matched_ids)
130
+ if not tree_structure:
131
+ return "Error: Failed to build tree structure."
132
+
133
+ # Create interactive tree
134
+ fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
135
+ if not fig:
136
+ return "Error: Failed to create tree visualization."
137
+
138
+ # Generate HTML content
139
+ html_content = fig.to_html(full_html=True, include_plotlyjs='cdn')
140
 
141
+ # Save to output folder
142
+ output_dir = "output"
143
+ os.makedirs(output_dir, exist_ok=True)
144
 
145
+ # Create a safe filename
146
+ safe_seq_name = re.sub(r'[^a-zA-Z0-9]', '_', sequence[:20])
147
+ html_filename = os.path.join(output_dir, f"tree_{safe_seq_name}_{matching_percentage}.html")
 
148
 
149
+ with open(html_filename, "w", encoding='utf-8') as f:
150
+ f.write(html_content)
151
+
152
+ logging.info(f"Tree HTML saved to {html_filename}")
153
+
154
+ return html_content
155
 
156
  except Exception as e:
157
+ error_msg = f"Tree analysis error: {str(e)}"
158
+ logging.error(error_msg)
159
+ import traceback
160
+ logging.error(f"Full traceback: {traceback.format_exc()}")
161
+ return error_msg
162
 
163
  # --- Keras Prediction ---
164
  def predict_with_keras(sequence):
 
274
  # Skip MAFFT due to configuration issues in the container
275
  logging.info("Skipping MAFFT/IQ-TREE due to container configuration issues")
276
 
277
+ # Step 4: ML Simplified Tree (using the new approach)
278
  html_file = None
279
  tree_html_content = "No tree generated"
280
  ml_output = ""
281
 
282
+ if analyzer and processed_sequence and len(processed_sequence) >= 10:
283
  try:
284
  logging.info(f"Starting ML tree analysis with F gene sequence length: {len(processed_sequence)}")
 
285
 
286
+ # Use the new tree analysis function
287
+ tree_result = analyze_sequence_for_tree(processed_sequence, matching_percentage=95.0)
288
+
289
+ if tree_result and not tree_result.startswith("Error:"):
290
+ # Success - we have HTML content
291
+ tree_html_content = tree_result
292
+ ml_output = "✅ Phylogenetic tree generated successfully!"
293
+
294
+ # Check if HTML file was created
295
+ output_dir = "output"
296
+ if os.path.exists(output_dir):
297
+ html_files = [f for f in os.listdir(output_dir) if f.endswith('.html')]
298
+ if html_files:
299
+ html_file = os.path.join(output_dir, html_files[-1]) # Get the latest
300
+ ml_output += f"\n- Tree file: {html_files[-1]}"
301
 
302
+ # Count sequences analyzed
303
  if analyzer.find_query_sequence(processed_sequence):
304
+ matched_ids, perc = analyzer.find_similar_sequences(95.0)
305
+ ml_output += f"\n- {len(matched_ids)} sequences analyzed"
306
+ ml_output += f"\n- Similarity threshold: {perc:.1f}%"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
  else:
308
+ # Error occurred
309
+ ml_output = f" Tree analysis failed: {tree_result}"
310
+ logging.error(f"Tree analysis failed: {tree_result}")
311
+
312
  except Exception as e:
313
  ml_output = f"❌ ML Tree analysis failed: {str(e)}"
314
  logging.error(f"ML Tree failed: {e}")
315
  import traceback
316
  logging.error(f"Full traceback: {traceback.format_exc()}")
317
+ elif not analyzer:
318
+ ml_output = "❌ Tree analyzer not initialized"
319
  elif not processed_sequence or len(processed_sequence) < 10:
320
  ml_output = f"❌ F gene sequence too short for analysis (length: {len(processed_sequence) if processed_sequence else 0})"
321
  else: