Spaces:
No application file
No application file
Delete analyzer.py
Browse files- analyzer.py +0 -1577
analyzer.py
DELETED
|
@@ -1,1577 +0,0 @@
|
|
| 1 |
-
import pandas as pd
|
| 2 |
-
import numpy as np
|
| 3 |
-
import plotly.graph_objects as go
|
| 4 |
-
from Bio import SeqIO, AlignIO
|
| 5 |
-
from Bio.Phylo.TreeConstruction import DistanceCalculator, DistanceTreeConstructor, DistanceMatrix
|
| 6 |
-
from Bio.Phylo.BaseTree import Tree
|
| 7 |
-
from Bio.Align import MultipleSeqAlignment
|
| 8 |
-
from Bio.Seq import Seq
|
| 9 |
-
from Bio.SeqRecord import SeqRecord
|
| 10 |
-
from sklearn.ensemble import RandomForestClassifier
|
| 11 |
-
from sklearn.model_selection import train_test_split
|
| 12 |
-
from sklearn.preprocessing import LabelEncoder
|
| 13 |
-
import warnings
|
| 14 |
-
import os
|
| 15 |
-
import sys
|
| 16 |
-
import re
|
| 17 |
-
import time
|
| 18 |
-
from pathlib import Path
|
| 19 |
-
from typing import Dict, List, Tuple, Optional
|
| 20 |
-
import itertools
|
| 21 |
-
import argparse
|
| 22 |
-
|
| 23 |
-
warnings.filterwarnings('ignore')
|
| 24 |
-
|
| 25 |
-
class PhylogeneticTreeAnalyzer:
|
| 26 |
-
"""Analyzes phylogenetic relationships using ML-based sequence similarity and tree construction."""
|
| 27 |
-
|
| 28 |
-
def __init__(self):
|
| 29 |
-
self.data = None
|
| 30 |
-
self.query_sequence = None
|
| 31 |
-
self.query_id = None
|
| 32 |
-
self.matching_percentage = 95.0
|
| 33 |
-
self.actual_percentage = None
|
| 34 |
-
self.matched_sequences = []
|
| 35 |
-
self.tree_structure = {}
|
| 36 |
-
self.similarity_scores = {}
|
| 37 |
-
self.ai_model = None # ML model for sequence classification
|
| 38 |
-
self.genotype_model = None # Model for genotype prediction
|
| 39 |
-
self.label_encoder = LabelEncoder() # Encoder for ML labels
|
| 40 |
-
self.genotype_label_encoder = LabelEncoder() # Encoder for genotype labels
|
| 41 |
-
self.ml_tree = None
|
| 42 |
-
self.ml_alignment = None
|
| 43 |
-
self.ml_results = {}
|
| 44 |
-
self.horizontal_line_tracker = []
|
| 45 |
-
self.query_ml_group = None
|
| 46 |
-
self.base_horizontal_length = 1.2
|
| 47 |
-
self.ml_model_accuracy = None # Accuracy of ML model
|
| 48 |
-
self.genotype_model_accuracy = None # Accuracy of genotype model
|
| 49 |
-
|
| 50 |
-
# --- Data Loading ---
|
| 51 |
-
def load_data(self, data_file: str) -> bool:
|
| 52 |
-
"""Loads sequence data from a CSV file."""
|
| 53 |
-
try:
|
| 54 |
-
self.data = pd.read_csv(data_file)
|
| 55 |
-
print(f"✓ Data loaded: {len(self.data)} sequences, "
|
| 56 |
-
f"{self.data['ML'].nunique()} ML groups, "
|
| 57 |
-
f"{self.data['Genotype'].nunique()} genotypes")
|
| 58 |
-
return True
|
| 59 |
-
except Exception as e:
|
| 60 |
-
print(f"Error loading data: {e}")
|
| 61 |
-
return False
|
| 62 |
-
|
| 63 |
-
# --- Model Training ---
|
| 64 |
-
def train_ai_model(self) -> bool:
|
| 65 |
-
"""Trains RandomForest models for ML group and genotype prediction."""
|
| 66 |
-
try:
|
| 67 |
-
if len(self.data) < 10:
|
| 68 |
-
print("⚠️ Insufficient data for training (minimum 10 samples)")
|
| 69 |
-
return False
|
| 70 |
-
|
| 71 |
-
print("🤖 Training AI models...")
|
| 72 |
-
f_gene_sequences = self.data['F-gene'].fillna('').astype(str)
|
| 73 |
-
features = []
|
| 74 |
-
for seq in f_gene_sequences:
|
| 75 |
-
seq_clean = re.sub(r'[^ATGC]', '', seq.upper())
|
| 76 |
-
if len(seq_clean) < 3:
|
| 77 |
-
features.append([0] * 100)
|
| 78 |
-
continue
|
| 79 |
-
feature_vector = []
|
| 80 |
-
kmers_3 = [seq_clean[i:i+3] for i in range(len(seq_clean)-2)]
|
| 81 |
-
kmer_counts_3 = {kmer: kmers_3.count(kmer) for kmer in set(kmers_3)}
|
| 82 |
-
kmers_4 = [seq_clean[i:i+4] for i in range(len(seq_clean)-3)]
|
| 83 |
-
kmer_counts_4 = {kmer: kmers_4.count(kmer) for kmer in set(kmers_4)}
|
| 84 |
-
all_3mers = [''.join(p) for p in itertools.product('ATGC', repeat=3)]
|
| 85 |
-
all_4mers = [''.join(p) for p in itertools.product('ATGC', repeat=4)]
|
| 86 |
-
feature_vector.extend([kmer_counts_3.get(kmer, 0) for kmer in all_3mers[:50]])
|
| 87 |
-
feature_vector.extend([kmer_counts_4.get(kmer, 0) for kmer in all_4mers[:50]])
|
| 88 |
-
features.append(feature_vector)
|
| 89 |
-
|
| 90 |
-
X = np.array(features)
|
| 91 |
-
|
| 92 |
-
# Train ML model
|
| 93 |
-
ml_targets = self.label_encoder.fit_transform(self.data['ML'].fillna('Unknown'))
|
| 94 |
-
if len(np.unique(ml_targets)) < 2:
|
| 95 |
-
print("⚠️ Need at least 2 ML classes for training")
|
| 96 |
-
return False
|
| 97 |
-
X_train, X_test, y_train, y_test = train_test_split(X, ml_targets, test_size=0.2, random_state=42)
|
| 98 |
-
self.ai_model = RandomForestClassifier(n_estimators=100, random_state=42)
|
| 99 |
-
self.ai_model.fit(X_train, y_train)
|
| 100 |
-
self.ml_model_accuracy = self.ai_model.score(X_test, y_test)
|
| 101 |
-
print(f"✓ ML model trained with accuracy: {self.ml_model_accuracy:.2%}")
|
| 102 |
-
|
| 103 |
-
# Train genotype model
|
| 104 |
-
genotype_targets = self.genotype_label_encoder.fit_transform(self.data['Genotype'].fillna('Unknown'))
|
| 105 |
-
if len(np.unique(genotype_targets)) >= 2:
|
| 106 |
-
X_train, X_test, y_train, y_test = train_test_split(X, genotype_targets, test_size=0.2, random_state=42)
|
| 107 |
-
self.genotype_model = RandomForestClassifier(n_estimators=100, random_state=42)
|
| 108 |
-
self.genotype_model.fit(X_train, y_train)
|
| 109 |
-
self.genotype_model_accuracy = self.genotype_model.score(X_test, y_test)
|
| 110 |
-
print(f"✓ Genotype model trained with accuracy: {self.genotype_model_accuracy:.2%}")
|
| 111 |
-
|
| 112 |
-
return True
|
| 113 |
-
except Exception as e:
|
| 114 |
-
print(f"Error training models: {e}")
|
| 115 |
-
return False
|
| 116 |
-
|
| 117 |
-
def predict_ml_group(self, sequence: str) -> str:
|
| 118 |
-
"""Predicts ML group for a sequence using the trained model."""
|
| 119 |
-
try:
|
| 120 |
-
if not self.ai_model:
|
| 121 |
-
return "Unknown"
|
| 122 |
-
seq_clean = re.sub(r'[^ATGC]', '', sequence.upper())
|
| 123 |
-
if len(seq_clean) < 3:
|
| 124 |
-
return "Unknown"
|
| 125 |
-
feature_vector = []
|
| 126 |
-
kmers_3 = [seq_clean[i:i+3] for i in range(len(seq_clean)-2)]
|
| 127 |
-
kmer_counts_3 = {kmer: kmers_3.count(kmer) for kmer in set(kmers_3)}
|
| 128 |
-
kmers_4 = [seq_clean[i:i+4] for i in range(len(seq_clean)-3)]
|
| 129 |
-
kmer_counts_4 = {kmer: kmers_4.count(kmer) for kmer in set(kmers_4)}
|
| 130 |
-
all_3mers = [''.join(p) for p in itertools.product('ATGC', repeat=3)]
|
| 131 |
-
all_4mers = [''.join(p) for p in itertools.product('ATGC', repeat=4)]
|
| 132 |
-
feature_vector.extend([kmer_counts_3.get(kmer, 0) for kmer in all_3mers[:50]])
|
| 133 |
-
feature_vector.extend([kmer_counts_4.get(kmer, 0) for kmer in all_4mers[:50]])
|
| 134 |
-
X = np.array([feature_vector])
|
| 135 |
-
ml_pred = self.label_encoder.inverse_transform(self.ai_model.predict(X))[0]
|
| 136 |
-
return ml_pred
|
| 137 |
-
except Exception as e:
|
| 138 |
-
print(f"Error predicting ML group: {e}")
|
| 139 |
-
return "Unknown"
|
| 140 |
-
|
| 141 |
-
def predict_genotype(self, sequence: str) -> str:
|
| 142 |
-
"""Predicts genotype for a sequence using the trained model."""
|
| 143 |
-
try:
|
| 144 |
-
if not self.genotype_model:
|
| 145 |
-
return "Unknown"
|
| 146 |
-
seq_clean = re.sub(r'[^ATGC]', '', sequence.upper())
|
| 147 |
-
if len(seq_clean) < 3:
|
| 148 |
-
return "Unknown"
|
| 149 |
-
feature_vector = []
|
| 150 |
-
kmers_3 = [seq_clean[i:i+3] for i in range(len(seq_clean)-2)]
|
| 151 |
-
kmer_counts_3 = {kmer: kmers_3.count(kmer) for kmer in set(kmers_3)}
|
| 152 |
-
kmers_4 = [seq_clean[i:i+4] for i in range(len(seq_clean)-3)]
|
| 153 |
-
kmer_counts_4 = {kmer: kmers_4.count(kmer) for kmer in set(kmers_4)}
|
| 154 |
-
all_3mers = [''.join(p) for p in itertools.product('ATGC', repeat=3)]
|
| 155 |
-
all_4mers = [''.join(p) for p in itertools.product('ATGC', repeat=4)]
|
| 156 |
-
feature_vector.extend([kmer_counts_3.get(kmer, 0) for kmer in all_3mers[:50]])
|
| 157 |
-
feature_vector.extend([kmer_counts_4.get(kmer, 0) for kmer in all_4mers[:50]])
|
| 158 |
-
X = np.array([feature_vector])
|
| 159 |
-
genotype_pred = self.genotype_label_encoder.inverse_transform(self.genotype_model.predict(X))[0]
|
| 160 |
-
return genotype_pred
|
| 161 |
-
except Exception as e:
|
| 162 |
-
print(f"Error predicting genotype: {e}")
|
| 163 |
-
return "Unknown"
|
| 164 |
-
|
| 165 |
-
# --- Sequence Processing ---
|
| 166 |
-
def find_query_sequence(self, query_input: str) -> bool:
|
| 167 |
-
"""Identifies query sequence by accession number, F-gene, or as a novel sequence."""
|
| 168 |
-
try:
|
| 169 |
-
query_input = query_input.strip()
|
| 170 |
-
if query_input in self.data['Accession Number'].values:
|
| 171 |
-
self.query_id = query_input
|
| 172 |
-
query_row = self.data[self.data['Accession Number'] == query_input].iloc[0]
|
| 173 |
-
self.query_sequence = query_row['F-gene']
|
| 174 |
-
print(f"✓ Query found by accession: {query_input}, ML: {query_row['ML']}, Genotype: {query_row['Genotype']}")
|
| 175 |
-
return True
|
| 176 |
-
query_clean = re.sub(r'[^ATGC]', '', str(query_input).upper())
|
| 177 |
-
if query_clean in self.data['F-gene'].values:
|
| 178 |
-
query_row = self.data[self.data['F-gene'] == query_clean].iloc[0]
|
| 179 |
-
self.query_id = query_row['Accession Number']
|
| 180 |
-
self.query_sequence = query_clean
|
| 181 |
-
print(f"✓ Query matched to accession: {self.query_id}, ML: {query_row['ML']}, Genotype: {query_row['Genotype']}")
|
| 182 |
-
return True
|
| 183 |
-
if len(query_clean) >= 10:
|
| 184 |
-
self.query_id = f"QUERY_{hash(query_clean) % 100000:05d}"
|
| 185 |
-
self.query_sequence = query_clean
|
| 186 |
-
predicted_ml = self.predict_ml_group(query_clean)
|
| 187 |
-
predicted_genotype = self.predict_genotype(query_clean)
|
| 188 |
-
print(f"✓ Novel query accepted: {self.query_id}, Length: {len(query_clean)}, "
|
| 189 |
-
f"Predicted ML: {predicted_ml}, Predicted Genotype: {predicted_genotype}")
|
| 190 |
-
return True
|
| 191 |
-
print(f"✗ Invalid query: Too short (<10) or not found")
|
| 192 |
-
return False
|
| 193 |
-
except Exception as e:
|
| 194 |
-
print(f"Error processing query: {e}")
|
| 195 |
-
return False
|
| 196 |
-
|
| 197 |
-
def calculate_f_gene_similarity(self, seq1: str, seq2: str) -> float:
|
| 198 |
-
"""Calculates similarity between two sequences using k-mer analysis."""
|
| 199 |
-
try:
|
| 200 |
-
if not seq1 or not seq2:
|
| 201 |
-
return 0.0
|
| 202 |
-
seq1 = re.sub(r'[^ATGC]', '', str(seq1).upper())
|
| 203 |
-
seq2 = re.sub(r'[^ATGC]', '', str(seq2).upper())
|
| 204 |
-
if len(seq1) == 0 or len(seq2) == 0:
|
| 205 |
-
return 0.0
|
| 206 |
-
k = 5
|
| 207 |
-
kmers1 = set(seq1[i:i+k] for i in range(len(seq1)-k+1) if len(seq1[i:i+k]) == k)
|
| 208 |
-
kmers2 = set(seq2[i:i+k] for i in range(len(seq2)-k+1) if len(seq2[i:i+k]) == k)
|
| 209 |
-
if len(kmers1) == 0 and len(kmers2) == 0:
|
| 210 |
-
return 100.0
|
| 211 |
-
if len(kmers1) == 0 or len(kmers2) == 0:
|
| 212 |
-
return 0.0
|
| 213 |
-
intersection = len(kmers1.intersection(kmers2))
|
| 214 |
-
union = len(kmers1.union(kmers2))
|
| 215 |
-
return round((intersection / union) * 100, 2) if union > 0 else 0.0
|
| 216 |
-
except Exception as e:
|
| 217 |
-
print(f"Error calculating similarity: {e}")
|
| 218 |
-
return 0.0
|
| 219 |
-
|
| 220 |
-
def find_similar_sequences(self, target_percentage: float) -> Tuple[List[str], float]:
|
| 221 |
-
"""Finds sequences similar to the query sequence."""
|
| 222 |
-
try:
|
| 223 |
-
print(f"🔍 Finding sequences with {target_percentage}% similarity...")
|
| 224 |
-
similarities = []
|
| 225 |
-
for idx, row in self.data.iterrows():
|
| 226 |
-
if row['Accession Number'] == self.query_id:
|
| 227 |
-
continue
|
| 228 |
-
similarity = self.calculate_f_gene_similarity(self.query_sequence, row['F-gene'])
|
| 229 |
-
similarities.append({
|
| 230 |
-
'id': row['Accession Number'],
|
| 231 |
-
'similarity': similarity,
|
| 232 |
-
'ml': row.get('ML', 'Unknown'),
|
| 233 |
-
'genotype': row.get('Genotype', 'Unknown')
|
| 234 |
-
})
|
| 235 |
-
if not similarities:
|
| 236 |
-
print("❌ No valid sequences for comparison")
|
| 237 |
-
return [], target_percentage
|
| 238 |
-
similarities.sort(key=lambda x: x['similarity'], reverse=True)
|
| 239 |
-
target_range = 2.0
|
| 240 |
-
candidates = [s for s in similarities if abs(s['similarity'] - target_percentage) <= target_range]
|
| 241 |
-
if not candidates:
|
| 242 |
-
closest = min(similarities, key=lambda x: abs(x['similarity'] - target_percentage))
|
| 243 |
-
actual_percentage = closest['similarity']
|
| 244 |
-
candidates = [s for s in similarities if abs(s['similarity'] - actual_percentage) <= 1.0]
|
| 245 |
-
print(f"⚠ No sequences at {target_percentage}%. Using closest: {actual_percentage:.1f}%")
|
| 246 |
-
else:
|
| 247 |
-
actual_percentage = target_percentage
|
| 248 |
-
max_results = 50
|
| 249 |
-
if len(candidates) > max_results:
|
| 250 |
-
candidates = candidates[:max_results]
|
| 251 |
-
print(f"⚠ Limited to top {max_results} matches")
|
| 252 |
-
self.similarity_scores = {c['id']: c['similarity'] for c in candidates}
|
| 253 |
-
matched_ids = [c['id'] for c in candidates]
|
| 254 |
-
if similarities:
|
| 255 |
-
max_sim = max(s['similarity'] for s in similarities)
|
| 256 |
-
min_sim = min(s['similarity'] for s in similarities)
|
| 257 |
-
avg_sim = sum(s['similarity'] for s in similarities) / len(similarities)
|
| 258 |
-
print(f"✓ Found {len(matched_ids)} sequences at ~{actual_percentage:.1f}% similarity, "
|
| 259 |
-
f"Range: {min_sim:.1f}% - {max_sim:.1f}% (avg: {avg_sim:.1f}%)")
|
| 260 |
-
return matched_ids, actual_percentage
|
| 261 |
-
except Exception as e:
|
| 262 |
-
print(f"Error finding similar sequences: {e}")
|
| 263 |
-
return [], target_percentage
|
| 264 |
-
|
| 265 |
-
# --- Tree Construction ---
|
| 266 |
-
def build_tree_structure(self, matched_ids: List[str]) -> Dict:
|
| 267 |
-
"""Builds a hierarchical tree structure based on ML groups and genotypes."""
|
| 268 |
-
try:
|
| 269 |
-
print("🌳 Building normalized tree structure...")
|
| 270 |
-
tree_structure = {
|
| 271 |
-
'root': {'name': 'Root', 'type': 'root', 'children': {}, 'x': 0, 'y': 0,
|
| 272 |
-
'has_vertical_attachment': False, 'extension_level': 0}
|
| 273 |
-
}
|
| 274 |
-
ml_groups = {}
|
| 275 |
-
for idx, row in self.data.iterrows():
|
| 276 |
-
ml_group = row['ML']
|
| 277 |
-
genotype = row['Genotype']
|
| 278 |
-
seq_id = row['Accession Number']
|
| 279 |
-
if ml_group not in ml_groups:
|
| 280 |
-
ml_groups[ml_group] = {}
|
| 281 |
-
if genotype not in ml_groups[ml_group]:
|
| 282 |
-
ml_groups[ml_group][genotype] = []
|
| 283 |
-
ml_groups[ml_group][genotype].append({
|
| 284 |
-
'id': seq_id, 'data': row.to_dict(), 'is_query': seq_id == self.query_id,
|
| 285 |
-
'is_matched': seq_id in matched_ids, 'similarity': self.similarity_scores.get(seq_id, 0.0)
|
| 286 |
-
})
|
| 287 |
-
if self.query_id.startswith("QUERY_"):
|
| 288 |
-
predicted_ml = self.predict_ml_group(self.query_sequence)
|
| 289 |
-
predicted_genotype = self.predict_genotype(self.query_sequence)
|
| 290 |
-
if predicted_ml not in ml_groups:
|
| 291 |
-
ml_groups[predicted_ml] = {}
|
| 292 |
-
if predicted_genotype not in ml_groups[predicted_ml]:
|
| 293 |
-
ml_groups[predicted_ml][predicted_genotype] = []
|
| 294 |
-
ml_groups[predicted_ml][predicted_genotype].append({
|
| 295 |
-
'id': self.query_id, 'data': {
|
| 296 |
-
'F-gene': self.query_sequence, 'ML': predicted_ml, 'Genotype': predicted_genotype,
|
| 297 |
-
'Accession Number': self.query_id
|
| 298 |
-
}, 'is_query': True, 'is_matched': False, 'similarity': 100.0
|
| 299 |
-
})
|
| 300 |
-
normalized_ml_groups = self._normalize_ml_groups(ml_groups)
|
| 301 |
-
self._build_normalized_ml_nodes(tree_structure, normalized_ml_groups, matched_ids)
|
| 302 |
-
self.tree_structure = tree_structure
|
| 303 |
-
print("✓ Tree structure built")
|
| 304 |
-
return tree_structure
|
| 305 |
-
except Exception as e:
|
| 306 |
-
print(f"Error building tree structure: {e}")
|
| 307 |
-
return {}
|
| 308 |
-
|
| 309 |
-
def build_tree_structure_with_ml_safe(self, matched_ids: List[str]) -> Dict:
|
| 310 |
-
"""Enhances tree structure with ML analysis."""
|
| 311 |
-
try:
|
| 312 |
-
print("🌳 Building ML-enhanced tree structure...")
|
| 313 |
-
ml_results = self.perform_ml_analysis_safe(matched_ids)
|
| 314 |
-
tree_structure = self.build_tree_structure(matched_ids)
|
| 315 |
-
if ml_results and 'tree' in ml_results:
|
| 316 |
-
tree_structure['ml_analysis'] = {
|
| 317 |
-
'log_likelihood': ml_results['log_likelihood'],
|
| 318 |
-
'sequence_count': ml_results['sequence_count'],
|
| 319 |
-
'alignment_length': ml_results['alignment_length'],
|
| 320 |
-
'ml_tree_available': True
|
| 321 |
-
}
|
| 322 |
-
self.ml_tree = ml_results['tree']
|
| 323 |
-
self.ml_alignment = ml_results.get('alignment')
|
| 324 |
-
print("✓ Tree enhanced with ML analysis")
|
| 325 |
-
else:
|
| 326 |
-
tree_structure['ml_analysis'] = {'ml_tree_available': False, 'error': 'ML analysis failed'}
|
| 327 |
-
print("⚠ ML analysis failed, using standard tree")
|
| 328 |
-
return tree_structure
|
| 329 |
-
except Exception as e:
|
| 330 |
-
print(f"Error building ML-enhanced tree: {e}")
|
| 331 |
-
try:
|
| 332 |
-
return self.build_tree_structure(matched_ids)
|
| 333 |
-
except Exception as e2:
|
| 334 |
-
print(f"Fallback failed: {e2}")
|
| 335 |
-
return {'error': 'Tree construction failed'}
|
| 336 |
-
|
| 337 |
-
def _normalize_ml_groups(self, ml_groups: Dict) -> Dict:
|
| 338 |
-
"""Normalizes ML group names for hierarchical organization."""
|
| 339 |
-
try:
|
| 340 |
-
normalized_groups = {}
|
| 341 |
-
for ml_name, genotypes in ml_groups.items():
|
| 342 |
-
base_ml = 'UNCL' if ml_name.startswith('UNCL') else ml_name.split('.')[0] if '.' in ml_name and any(c.isdigit() for c in ml_name) else ml_name
|
| 343 |
-
if base_ml not in normalized_groups:
|
| 344 |
-
normalized_groups[base_ml] = {'full_ml_groups': {}, 'representative_sequences': [], 'has_special_sequences': False}
|
| 345 |
-
has_special = any(any(seq['is_query'] or seq['is_matched'] for seq in seqs) for seqs in genotypes.values())
|
| 346 |
-
if has_special:
|
| 347 |
-
normalized_groups[base_ml]['has_special_sequences'] = True
|
| 348 |
-
normalized_groups[base_ml]['full_ml_groups'][ml_name] = genotypes
|
| 349 |
-
elif len(normalized_groups[base_ml]['representative_sequences']) < 2:
|
| 350 |
-
for genotype, sequences in list(genotypes.items())[:2]:
|
| 351 |
-
if len(normalized_groups[base_ml]['representative_sequences']) < 2:
|
| 352 |
-
normalized_groups[base_ml]['representative_sequences'].extend(sequences[:1])
|
| 353 |
-
return normalized_groups
|
| 354 |
-
except Exception as e:
|
| 355 |
-
print(f"Error normalizing ML groups: {e}")
|
| 356 |
-
return {}
|
| 357 |
-
|
| 358 |
-
def _build_normalized_ml_nodes(self, tree_structure: Dict, normalized_ml_groups: Dict, matched_ids: List[str]):
|
| 359 |
-
"""Builds normalized ML nodes with equal spacing."""
|
| 360 |
-
try:
|
| 361 |
-
self.horizontal_line_tracker = []
|
| 362 |
-
self._identify_query_ml_group(normalized_ml_groups)
|
| 363 |
-
ml_positions = self._calculate_dynamic_ml_positions(normalized_ml_groups)
|
| 364 |
-
tree_structure['root']['has_vertical_attachment'] = len(normalized_ml_groups) > 1
|
| 365 |
-
for ml_idx, (base_ml, ml_data) in enumerate(normalized_ml_groups.items()):
|
| 366 |
-
y_pos = ml_positions[ml_idx]
|
| 367 |
-
has_vertical = ml_data['has_special_sequences'] and len(ml_data['full_ml_groups']) > 1
|
| 368 |
-
contains_query = base_ml == self.query_ml_group
|
| 369 |
-
horizontal_length = self._determine_horizontal_line_length('normalized_ml_group', has_vertical, contains_query)
|
| 370 |
-
x_pos = horizontal_length
|
| 371 |
-
tree_structure['root']['children'][base_ml] = {
|
| 372 |
-
'name': base_ml, 'type': 'normalized_ml_group', 'children': {}, 'x': x_pos, 'y': y_pos,
|
| 373 |
-
'has_special_sequences': ml_data['has_special_sequences'], 'has_vertical_attachment': has_vertical,
|
| 374 |
-
'horizontal_line_length': horizontal_length, 'contains_query': contains_query
|
| 375 |
-
}
|
| 376 |
-
if ml_data['has_special_sequences']:
|
| 377 |
-
self._build_full_ml_nodes(tree_structure['root']['children'][base_ml], ml_data['full_ml_groups'],
|
| 378 |
-
y_pos, matched_ids, x_pos)
|
| 379 |
-
else:
|
| 380 |
-
self._add_representative_sequences(tree_structure['root']['children'][base_ml],
|
| 381 |
-
ml_data['representative_sequences'], y_pos, x_pos)
|
| 382 |
-
except Exception as e:
|
| 383 |
-
print(f"Error building normalized ML nodes: {e}")
|
| 384 |
-
|
| 385 |
-
def _build_full_ml_nodes(self, normalized_ml_node: Dict, full_ml_groups: Dict, base_y: float, matched_ids: List[str], parent_x: float):
|
| 386 |
-
"""Builds full ML nodes with genotypes."""
|
| 387 |
-
try:
|
| 388 |
-
full_ml_positions = self._calculate_full_ml_positions(full_ml_groups, base_y)
|
| 389 |
-
for ml_idx, (full_ml_name, genotypes) in enumerate(full_ml_groups.items()):
|
| 390 |
-
y_pos = full_ml_positions[ml_idx]
|
| 391 |
-
special_genotypes_count = sum(1 for g, seqs in genotypes.items() if any(s['is_query'] or s['is_matched'] for s in seqs))
|
| 392 |
-
has_vertical = special_genotypes_count > 1
|
| 393 |
-
contains_query = any(any(seq['is_query'] for seq in seqs) for seqs in genotypes.values())
|
| 394 |
-
horizontal_length = self._determine_horizontal_line_length('full_ml_group', has_vertical, contains_query)
|
| 395 |
-
x_pos = parent_x + horizontal_length
|
| 396 |
-
normalized_ml_node['children'][full_ml_name] = {
|
| 397 |
-
'name': full_ml_name, 'type': 'full_ml_group', 'children': {}, 'x': x_pos, 'y': y_pos,
|
| 398 |
-
'sequences_count': sum(len(seqs) for seqs in genotypes.values()), 'has_vertical_attachment': has_vertical,
|
| 399 |
-
'horizontal_line_length': horizontal_length, 'contains_query': contains_query
|
| 400 |
-
}
|
| 401 |
-
self._build_genotype_nodes(normalized_ml_node['children'][full_ml_name], genotypes, y_pos, matched_ids, x_pos)
|
| 402 |
-
except Exception as e:
|
| 403 |
-
print(f"Error building full ML nodes: {e}")
|
| 404 |
-
|
| 405 |
-
def _build_genotype_nodes(self, full_ml_node: Dict, genotypes: Dict, base_y: float, matched_ids: List[str], parent_x: float):
|
| 406 |
-
"""Builds genotype nodes with sequences."""
|
| 407 |
-
try:
|
| 408 |
-
special_genotypes = [(g, seqs) for g, seqs in genotypes.items() if any(s['is_query'] or s['is_matched'] for s in seqs)]
|
| 409 |
-
if not special_genotypes:
|
| 410 |
-
return
|
| 411 |
-
genotype_positions = self._calculate_genotype_positions(special_genotypes, base_y)
|
| 412 |
-
genotype_sequence_counts = [(g, seqs, len([s for s in seqs if s['is_query'] or s['is_matched']])) for g, seqs in special_genotypes]
|
| 413 |
-
for gt_idx, (genotype, sequences, sequence_count) in enumerate(genotype_sequence_counts):
|
| 414 |
-
y_pos = genotype_positions[gt_idx]
|
| 415 |
-
special_sequences = [s for s in sequences if s['is_query'] or s['is_matched']]
|
| 416 |
-
has_vertical = len(special_sequences) > 1
|
| 417 |
-
contains_query = any(s['is_query'] for s in sequences)
|
| 418 |
-
horizontal_length = self._determine_genotype_horizontal_line_length(sequence_count, has_vertical, contains_query)
|
| 419 |
-
x_pos = parent_x + horizontal_length
|
| 420 |
-
full_ml_node['children'][genotype] = {
|
| 421 |
-
'name': genotype, 'type': 'genotype', 'children': {}, 'x': x_pos, 'y': y_pos,
|
| 422 |
-
'sequences': sequences, 'has_vertical_attachment': has_vertical,
|
| 423 |
-
'horizontal_line_length': horizontal_length, 'contains_query': contains_query,
|
| 424 |
-
'sequence_count': sequence_count
|
| 425 |
-
}
|
| 426 |
-
self._add_sequences_horizontal(full_ml_node['children'][genotype], sequences, y_pos, x_pos)
|
| 427 |
-
except Exception as e:
|
| 428 |
-
print(f"Error building genotype nodes: {e}")
|
| 429 |
-
|
| 430 |
-
def _add_representative_sequences(self, normalized_ml_node: Dict, representative_sequences: List[Dict], base_y: float, parent_x: float):
|
| 431 |
-
"""Adds representative sequences to normalized ML nodes."""
|
| 432 |
-
try:
|
| 433 |
-
if not representative_sequences:
|
| 434 |
-
return
|
| 435 |
-
has_vertical = len(representative_sequences) > 1
|
| 436 |
-
horizontal_length = self._determine_horizontal_line_length('representative', has_vertical)
|
| 437 |
-
x_pos = parent_x + horizontal_length
|
| 438 |
-
if len(representative_sequences) == 1:
|
| 439 |
-
seq = representative_sequences[0]
|
| 440 |
-
normalized_ml_node['children'][f"{seq['id']}_rep"] = {
|
| 441 |
-
'name': f"{seq['id']} (Rep)", 'type': 'representative_sequence', 'data': seq,
|
| 442 |
-
'x': x_pos, 'y': base_y, 'has_vertical_attachment': False, 'horizontal_line_length': horizontal_length
|
| 443 |
-
}
|
| 444 |
-
else:
|
| 445 |
-
positions = self._calculate_sequence_positions(representative_sequences, base_y)
|
| 446 |
-
for idx, seq in enumerate(representative_sequences):
|
| 447 |
-
normalized_ml_node['children'][f"{seq['id']}_rep"] = {
|
| 448 |
-
'name': f"{seq['id']} (Rep)", 'type': 'representative_sequence', 'data': seq,
|
| 449 |
-
'x': x_pos, 'y': positions[idx], 'has_vertical_attachment': False, 'horizontal_line_length': horizontal_length
|
| 450 |
-
}
|
| 451 |
-
except Exception as e:
|
| 452 |
-
print(f"Error adding representative sequences: {e}")
|
| 453 |
-
|
| 454 |
-
def _add_sequences_horizontal(self, genotype_node: Dict, sequences: List[Dict], base_y: float, parent_x: float):
|
| 455 |
-
"""Adds sequences with similarity-based line lengths."""
|
| 456 |
-
try:
|
| 457 |
-
query_line_length = 3.0
|
| 458 |
-
query_sequences = [s for s in sequences if s['is_query']]
|
| 459 |
-
matched_sequences = [s for s in sequences if s['is_matched'] and not s['is_query']]
|
| 460 |
-
all_special_sequences = query_sequences + matched_sequences
|
| 461 |
-
if len(all_special_sequences) == 1:
|
| 462 |
-
sequence = all_special_sequences[0]
|
| 463 |
-
line_length = self._calculate_similarity_based_line_length(sequence, query_line_length)
|
| 464 |
-
x_pos = parent_x + line_length
|
| 465 |
-
genotype_node['children'][sequence['id']] = {
|
| 466 |
-
'name': f"{sequence['id']} ({sequence['similarity']}%)" if sequence['is_matched'] else sequence['id'],
|
| 467 |
-
'type': 'sequence', 'data': sequence, 'x': x_pos, 'y': base_y,
|
| 468 |
-
'has_vertical_attachment': False, 'similarity_line_length': line_length
|
| 469 |
-
}
|
| 470 |
-
else:
|
| 471 |
-
sequence_positions = self._calculate_sequence_positions(all_special_sequences, base_y)
|
| 472 |
-
for seq_idx, sequence in enumerate(all_special_sequences):
|
| 473 |
-
line_length = self._calculate_similarity_based_line_length(sequence, query_line_length)
|
| 474 |
-
x_pos = parent_x + line_length
|
| 475 |
-
genotype_node['children'][sequence['id']] = {
|
| 476 |
-
'name': f"{sequence['id']} ({sequence['similarity']}%)" if sequence['is_matched'] else sequence['id'],
|
| 477 |
-
'type': 'sequence', 'data': sequence, 'x': x_pos, 'y': sequence_positions[seq_idx],
|
| 478 |
-
'has_vertical_attachment': False, 'similarity_line_length': line_length
|
| 479 |
-
}
|
| 480 |
-
except Exception as e:
|
| 481 |
-
print(f"Error adding sequences: {e}")
|
| 482 |
-
|
| 483 |
-
def _identify_query_ml_group(self, normalized_ml_groups: Dict):
|
| 484 |
-
"""Identifies the ML group containing the query sequence."""
|
| 485 |
-
try:
|
| 486 |
-
for base_ml, ml_data in normalized_ml_groups.items():
|
| 487 |
-
if ml_data['has_special_sequences']:
|
| 488 |
-
for genotypes in ml_data['full_ml_groups'].values():
|
| 489 |
-
for sequences in genotypes.values():
|
| 490 |
-
if any(seq['is_query'] for seq in sequences):
|
| 491 |
-
self.query_ml_group = base_ml
|
| 492 |
-
return
|
| 493 |
-
except Exception as e:
|
| 494 |
-
print(f"Error identifying query ML group: {e}")
|
| 495 |
-
|
| 496 |
-
def _calculate_dynamic_ml_positions(self, normalized_ml_groups: Dict) -> List[float]:
|
| 497 |
-
"""Calculates equal Y positions for ML groups."""
|
| 498 |
-
try:
|
| 499 |
-
ml_count = len(normalized_ml_groups)
|
| 500 |
-
if ml_count == 0:
|
| 501 |
-
return []
|
| 502 |
-
if ml_count == 1:
|
| 503 |
-
return [0.0]
|
| 504 |
-
total_spacing = (ml_count - 1) * 2.0
|
| 505 |
-
start_y = -total_spacing / 2
|
| 506 |
-
return [start_y + i * 2.0 for i in range(ml_count)]
|
| 507 |
-
except Exception as e:
|
| 508 |
-
print(f"Error calculating ML positions: {e}")
|
| 509 |
-
return list(range(len(normalized_ml_groups)))
|
| 510 |
-
|
| 511 |
-
def _calculate_full_ml_positions(self, full_ml_groups: Dict, base_y: float) -> List[float]:
|
| 512 |
-
"""Calculates equal positions for full ML groups."""
|
| 513 |
-
try:
|
| 514 |
-
ml_count = len(full_ml_groups)
|
| 515 |
-
if ml_count <= 1:
|
| 516 |
-
return [base_y]
|
| 517 |
-
spacing = 1.5
|
| 518 |
-
start_y = base_y - (spacing * (ml_count - 1)) / 2
|
| 519 |
-
return [start_y + i * spacing for i in range(ml_count)]
|
| 520 |
-
except Exception as e:
|
| 521 |
-
print(f"Error calculating full ML positions: {e}")
|
| 522 |
-
return [base_y] * len(full_ml_groups)
|
| 523 |
-
|
| 524 |
-
def _calculate_genotype_positions(self, special_genotypes: List, base_y: float) -> List[float]:
|
| 525 |
-
"""Calculates equal positions for genotypes."""
|
| 526 |
-
try:
|
| 527 |
-
genotype_count = len(special_genotypes)
|
| 528 |
-
if genotype_count <= 1:
|
| 529 |
-
return [base_y]
|
| 530 |
-
spacing = 1.0
|
| 531 |
-
start_y = base_y - (spacing * (genotype_count - 1)) / 2
|
| 532 |
-
return [start_y + i * spacing for i in range(genotype_count)]
|
| 533 |
-
except Exception as e:
|
| 534 |
-
print(f"Error calculating genotype positions: {e}")
|
| 535 |
-
return [base_y] * len(special_genotypes)
|
| 536 |
-
|
| 537 |
-
def _calculate_sequence_positions(self, sequences: List[Dict], base_y: float) -> List[float]:
|
| 538 |
-
"""Calculates equal positions for sequences."""
|
| 539 |
-
try:
|
| 540 |
-
seq_count = len(sequences)
|
| 541 |
-
if seq_count <= 1:
|
| 542 |
-
return [base_y]
|
| 543 |
-
spacing = 0.8
|
| 544 |
-
start_y = base_y - (spacing * (seq_count - 1)) / 2
|
| 545 |
-
return [start_y + i * spacing for i in range(seq_count)]
|
| 546 |
-
except Exception as e:
|
| 547 |
-
print(f"Error calculating sequence positions: {e}")
|
| 548 |
-
return [base_y] * len(sequences)
|
| 549 |
-
|
| 550 |
-
def _calculate_similarity_based_line_length(self, sequence: Dict, query_line_length: float) -> float:
|
| 551 |
-
"""Calculates line length based on sequence similarity."""
|
| 552 |
-
try:
|
| 553 |
-
if sequence['is_query']:
|
| 554 |
-
return query_line_length
|
| 555 |
-
if sequence['is_matched']:
|
| 556 |
-
similarity = sequence['similarity']
|
| 557 |
-
proportional_length = (similarity / 100.0) * query_line_length
|
| 558 |
-
return max(proportional_length, query_line_length * 0.2)
|
| 559 |
-
return query_line_length * 0.5
|
| 560 |
-
except Exception as e:
|
| 561 |
-
print(f"Error calculating line length: {e}")
|
| 562 |
-
return query_line_length * 0.5
|
| 563 |
-
|
| 564 |
-
def _determine_horizontal_line_length(self, node_type: str, has_vertical: bool, contains_query: bool = False) -> float:
|
| 565 |
-
"""Determines horizontal line length based on node type."""
|
| 566 |
-
try:
|
| 567 |
-
base_length = self.base_horizontal_length
|
| 568 |
-
if contains_query and node_type == 'normalized_ml_group':
|
| 569 |
-
return base_length * 2.5
|
| 570 |
-
if has_vertical:
|
| 571 |
-
current_max = base_length
|
| 572 |
-
for length in self.horizontal_line_tracker:
|
| 573 |
-
if length > current_max:
|
| 574 |
-
current_max = length
|
| 575 |
-
new_length = current_max + 0.3
|
| 576 |
-
self.horizontal_line_tracker.append(new_length)
|
| 577 |
-
return new_length
|
| 578 |
-
return base_length
|
| 579 |
-
except Exception as e:
|
| 580 |
-
print(f"Error determining line length: {e}")
|
| 581 |
-
return self.base_horizontal_length
|
| 582 |
-
|
| 583 |
-
def _determine_genotype_horizontal_line_length(self, sequence_count: int, has_vertical: bool, contains_query: bool = False) -> float:
|
| 584 |
-
"""Determines horizontal line length for genotype nodes."""
|
| 585 |
-
try:
|
| 586 |
-
base_length = self.base_horizontal_length
|
| 587 |
-
query_bonus = 0.5 if contains_query else 0.0
|
| 588 |
-
if sequence_count <= 1:
|
| 589 |
-
length_multiplier = 1.0
|
| 590 |
-
elif sequence_count <= 3:
|
| 591 |
-
length_multiplier = 1.6
|
| 592 |
-
elif sequence_count <= 5:
|
| 593 |
-
length_multiplier = 2.3
|
| 594 |
-
else:
|
| 595 |
-
length_multiplier = 6.0
|
| 596 |
-
return base_length * length_multiplier + query_bonus
|
| 597 |
-
except Exception as e:
|
| 598 |
-
print(f"Error determining genotype line length: {e}")
|
| 599 |
-
return self.base_horizontal_length
|
| 600 |
-
|
| 601 |
-
# --- Visualization ---
|
| 602 |
-
def create_interactive_tree(self, matched_ids: List[str], actual_percentage: float) -> Optional[go.Figure]:
|
| 603 |
-
"""Creates an interactive horizontal phylogenetic tree visualization."""
|
| 604 |
-
try:
|
| 605 |
-
print("🎨 Creating interactive tree visualization...")
|
| 606 |
-
edge_x, edge_y = [], []
|
| 607 |
-
node_x, node_y = [], []
|
| 608 |
-
node_colors, node_text, node_hover, node_sizes = [], [], [], []
|
| 609 |
-
colors = {
|
| 610 |
-
'root': '#FF0000', 'normalized_ml_group': '#FFB6C1', 'full_ml_group': '#FF69B4',
|
| 611 |
-
'genotype': '#FFD700', 'representative_sequence': '#FFA500', 'query_sequence': '#4B0082',
|
| 612 |
-
'matched_sequence': '#6A5ACD', 'other_sequence': '#87CEEB'
|
| 613 |
-
}
|
| 614 |
-
|
| 615 |
-
def add_horizontal_edges(parent_x, parent_y, children_dict):
|
| 616 |
-
if not children_dict:
|
| 617 |
-
return
|
| 618 |
-
children_list = list(children_dict.values())
|
| 619 |
-
if len(children_list) == 1:
|
| 620 |
-
child = children_list[0]
|
| 621 |
-
edge_x.extend([parent_x, child['x'], None])
|
| 622 |
-
edge_y.extend([parent_y, child['y'], None])
|
| 623 |
-
else:
|
| 624 |
-
child_x_positions = [child['x'] for child in children_list]
|
| 625 |
-
min_child_x = min(child_x_positions)
|
| 626 |
-
intermediate_x = parent_x + (min_child_x - parent_x) * 0.8
|
| 627 |
-
edge_x.extend([parent_x, intermediate_x, None])
|
| 628 |
-
edge_y.extend([parent_y, parent_y, None])
|
| 629 |
-
child_y_positions = [child['y'] for child in children_list]
|
| 630 |
-
min_y, max_y = min(child_y_positions), max(child_y_positions)
|
| 631 |
-
edge_x.extend([intermediate_x, intermediate_x, None])
|
| 632 |
-
edge_y.extend([min_y, max_y, None])
|
| 633 |
-
for child in children_list:
|
| 634 |
-
edge_x.extend([intermediate_x, child['x'], None])
|
| 635 |
-
edge_y.extend([child['y'], child['y'], None])
|
| 636 |
-
|
| 637 |
-
def get_node_color_and_size(node):
|
| 638 |
-
if node['type'] == 'sequence':
|
| 639 |
-
if node['data']['is_query']:
|
| 640 |
-
return colors['query_sequence'], 10
|
| 641 |
-
if node['data']['is_matched']:
|
| 642 |
-
return colors['matched_sequence'], 8
|
| 643 |
-
return colors['other_sequence'], 6
|
| 644 |
-
if node['type'] == 'representative_sequence':
|
| 645 |
-
return colors['representative_sequence'], 7
|
| 646 |
-
if node['type'] == 'normalized_ml_group':
|
| 647 |
-
return colors['normalized_ml_group'], 9 if node.get('has_special_sequences', False) else 7
|
| 648 |
-
if node['type'] == 'full_ml_group':
|
| 649 |
-
return colors['full_ml_group'], 8
|
| 650 |
-
if node['type'] == 'genotype':
|
| 651 |
-
return colors['genotype'], 7
|
| 652 |
-
return colors.get(node['type'], '#000000'), 7
|
| 653 |
-
|
| 654 |
-
def create_node_text(node):
|
| 655 |
-
if node['type'] == 'sequence':
|
| 656 |
-
return f"{node['name']}" if node['data']['is_matched'] and not node['data']['is_query'] else node['name']
|
| 657 |
-
if node['type'] == 'representative_sequence':
|
| 658 |
-
return node['name']
|
| 659 |
-
if node['type'] == 'normalized_ml_group':
|
| 660 |
-
return f"{node['name']} *" if node.get('has_special_sequences', False) else node['name']
|
| 661 |
-
return node['name']
|
| 662 |
-
|
| 663 |
-
def create_hover_text(node):
|
| 664 |
-
if node['type'] == 'sequence':
|
| 665 |
-
data = node['data']['data']
|
| 666 |
-
hover_text = (
|
| 667 |
-
f"<b>{node['name']}</b><br>Type: {'Query' if node['data']['is_query'] else 'Matched' if node['data']['is_matched'] else 'Other'} Sequence<br>"
|
| 668 |
-
f"ML Group: {data.get('ML', 'N/A')}<br>Genotype: {data.get('Genotype', 'N/A')}<br>"
|
| 669 |
-
f"Host: {data.get('Host', 'N/A')}<br>Country: {data.get('Country', 'N/A')}<br>"
|
| 670 |
-
f"Isolate: {data.get('Isolate', 'N/A')}<br>Year: {data.get('Year', 'N/A')}"
|
| 671 |
-
)
|
| 672 |
-
if node['data']['is_matched']:
|
| 673 |
-
hover_text += f"<br><b>Similarity: {node['data']['similarity']}%</b>"
|
| 674 |
-
elif node['type'] == 'representative_sequence':
|
| 675 |
-
data = node['data']['data']
|
| 676 |
-
hover_text = (
|
| 677 |
-
f"<b>{node['name']}</b><br>Type: Representative Sequence<br>"
|
| 678 |
-
f"ML Group: {data.get('ML', 'N/A')}<br>Genotype: {data.get('Genotype', 'N/A')}<br>"
|
| 679 |
-
f"Host: {data.get('Host', 'N/A')}<br>Country: {data.get('Country', 'N/A')}"
|
| 680 |
-
)
|
| 681 |
-
elif node['type'] == 'normalized_ml_group':
|
| 682 |
-
hover_text = f"<b>{node['name']}</b><br>Type: Normalized ML Group"
|
| 683 |
-
if node.get('has_special_sequences', False):
|
| 684 |
-
hover_text += "<br>Contains query/matched sequences"
|
| 685 |
-
else:
|
| 686 |
-
hover_text += "<br>Representative sequences only"
|
| 687 |
-
elif node['type'] == 'full_ml_group':
|
| 688 |
-
hover_text = f"<b>{node['name']}</b><br>Type: Full ML Group"
|
| 689 |
-
if 'sequences_count' in node:
|
| 690 |
-
hover_text += f"<br>Total Sequences: {node['sequences_count']}"
|
| 691 |
-
elif node['type'] == 'genotype':
|
| 692 |
-
hover_text = f"<b>{node['name']}</b><br>Type: Genotype"
|
| 693 |
-
if 'sequences' in node:
|
| 694 |
-
special_count = sum(1 for seq in node['sequences'] if seq['is_query'] or seq['is_matched'])
|
| 695 |
-
hover_text += f"<br>Special Sequences: {special_count}/{len(node['sequences'])}"
|
| 696 |
-
else:
|
| 697 |
-
hover_text = f"<b>{node['name']}</b><br>Type: {node['type'].replace('_', ' ').title()}"
|
| 698 |
-
return hover_text
|
| 699 |
-
|
| 700 |
-
def add_node_and_edges(node, parent_x=None, parent_y=None):
|
| 701 |
-
x, y = node['x'], node['y']
|
| 702 |
-
node_x.append(x)
|
| 703 |
-
node_y.append(y)
|
| 704 |
-
color, size = get_node_color_and_size(node)
|
| 705 |
-
node_colors.append(color)
|
| 706 |
-
node_sizes.append(size)
|
| 707 |
-
node_text.append(create_node_text(node))
|
| 708 |
-
node_hover.append(create_hover_text(node))
|
| 709 |
-
if 'children' in node and node['children']:
|
| 710 |
-
add_horizontal_edges(x, y, node['children'])
|
| 711 |
-
for child in node['children'].values():
|
| 712 |
-
add_node_and_edges(child, x, y)
|
| 713 |
-
|
| 714 |
-
root_node = self.tree_structure['root']
|
| 715 |
-
add_node_and_edges(root_node)
|
| 716 |
-
if root_node['children']:
|
| 717 |
-
add_horizontal_edges(root_node['x'], root_node['y'], root_node['children'])
|
| 718 |
-
|
| 719 |
-
fig = go.Figure()
|
| 720 |
-
fig.add_trace(go.Scatter(
|
| 721 |
-
x=edge_x, y=edge_y, mode='lines', line=dict(width=1, color='gray'),
|
| 722 |
-
hoverinfo='none', showlegend=False
|
| 723 |
-
))
|
| 724 |
-
fig.add_trace(go.Scatter(
|
| 725 |
-
x=node_x, y=node_y, mode='markers+text',
|
| 726 |
-
marker=dict(size=node_sizes, color=node_colors, line=dict(width=1, color='black'), opacity=0.85),
|
| 727 |
-
text=node_text, textposition="middle right", textfont=dict(size=9, color="black"),
|
| 728 |
-
hoverinfo='text', hovertext=node_hover, showlegend=False
|
| 729 |
-
))
|
| 730 |
-
|
| 731 |
-
min_x, max_x = min(node_x), max(node_x) if node_x else (0, 1)
|
| 732 |
-
min_y, max_y = min(node_y), max(node_y) if node_y else (0, 1)
|
| 733 |
-
x_range = max_x - min_x
|
| 734 |
-
y_range = max_y - min_y
|
| 735 |
-
x_padding = x_range * 0.2 if x_range > 0 else 1
|
| 736 |
-
y_padding = y_range * 0.2 if y_range > 0 else 1
|
| 737 |
-
width = min(1400, max(800, int(x_range * 80 + 400)))
|
| 738 |
-
height = min(900, max(500, int(y_range * 40 + 300)))
|
| 739 |
-
|
| 740 |
-
fig.update_layout(
|
| 741 |
-
title=dict(
|
| 742 |
-
text=f"Horizontal Phylogenetic Tree<br>Query: {self.query_id} | Similarity: {actual_percentage}% | Matched: {len(matched_ids)}",
|
| 743 |
-
x=0.5, font=dict(size=12)
|
| 744 |
-
),
|
| 745 |
-
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[min_x - x_padding, max_x + x_padding], automargin=True),
|
| 746 |
-
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[min_y - y_padding, max_y + y_padding], automargin=True),
|
| 747 |
-
plot_bgcolor="white", paper_bgcolor="white", hovermode="closest",
|
| 748 |
-
width=width, height=height, margin=dict(l=20, r=100, t=40, b=10),
|
| 749 |
-
showlegend=True, legend=dict(x=1.02, y=1, xanchor='left', yanchor='top',
|
| 750 |
-
bgcolor='rgba(255,255,255,0.8)', bordercolor='gray', borderwidth=1, font=dict(size=10))
|
| 751 |
-
)
|
| 752 |
-
|
| 753 |
-
legend_elements = [
|
| 754 |
-
dict(name="Root", marker=dict(color=colors['root'], size=8)),
|
| 755 |
-
dict(name="Normalized ML Groups", marker=dict(color=colors['normalized_ml_group'], size=8)),
|
| 756 |
-
dict(name="Full ML Groups", marker=dict(color=colors['full_ml_group'], size=8)),
|
| 757 |
-
dict(name="Genotypes", marker=dict(color=colors['genotype'], size=8)),
|
| 758 |
-
dict(name="Query Sequence", marker=dict(color=colors['query_sequence'], size=10)),
|
| 759 |
-
dict(name="Similar Sequences", marker=dict(color=colors['matched_sequence'], size=9)),
|
| 760 |
-
dict(name="Representative Sequences", marker=dict(color=colors['representative_sequence'], size=8)),
|
| 761 |
-
]
|
| 762 |
-
for element in legend_elements:
|
| 763 |
-
fig.add_trace(go.Scatter(x=[None], y=[None], mode='markers', marker=element['marker'], name=element['name'], showlegend=True))
|
| 764 |
-
|
| 765 |
-
config = {
|
| 766 |
-
'displayModeBar': True, 'displaylogo': False, 'modeBarButtonsToRemove': ['select2d', 'lasso2d'],
|
| 767 |
-
'toImageButtonOptions': {'format': 'png', 'filename': 'phylogenetic_tree', 'height': height, 'width': width, 'scale': 2}
|
| 768 |
-
}
|
| 769 |
-
try:
|
| 770 |
-
fig.show(config)
|
| 771 |
-
except Exception as e:
|
| 772 |
-
print(f"Warning: Could not display figure: {e}")
|
| 773 |
-
return fig
|
| 774 |
-
except Exception as e:
|
| 775 |
-
print(f"Error creating tree visualization: {e}")
|
| 776 |
-
return None
|
| 777 |
-
|
| 778 |
-
# --- ML Analysis ---
|
| 779 |
-
def perform_ml_analysis_safe(self, matched_ids: List[str]) -> Dict:
|
| 780 |
-
|
| 781 |
-
try:
|
| 782 |
-
print("\n🧬 PERFORMING MAXIMUM LIKELIHOOD ANALYSIS")
|
| 783 |
-
print("="*50)
|
| 784 |
-
|
| 785 |
-
# Include query sequence in analysis
|
| 786 |
-
all_sequences = [self.query_id] + [seq_id for seq_id in matched_ids if seq_id != self.query_id]
|
| 787 |
-
|
| 788 |
-
# Limit number of sequences to prevent memory issues
|
| 789 |
-
if len(all_sequences) > 20:
|
| 790 |
-
print(f"Warning: Limiting analysis to 20 sequences (had {len(all_sequences)})")
|
| 791 |
-
all_sequences = all_sequences[:20]
|
| 792 |
-
|
| 793 |
-
if len(all_sequences) < 3:
|
| 794 |
-
print("❌ Need at least 3 sequences for ML analysis")
|
| 795 |
-
return {}
|
| 796 |
-
|
| 797 |
-
# Step 1: Create multiple sequence alignment
|
| 798 |
-
alignment = self.create_sequence_alignment(all_sequences)
|
| 799 |
-
if not alignment:
|
| 800 |
-
return {}
|
| 801 |
-
|
| 802 |
-
# Step 2: Calculate ML distances
|
| 803 |
-
distance_matrix = self.calculate_ml_distances(alignment)
|
| 804 |
-
if distance_matrix.size == 0:
|
| 805 |
-
return {}
|
| 806 |
-
|
| 807 |
-
# Step 3: Construct ML tree
|
| 808 |
-
ml_tree = self.construct_ml_tree(alignment)
|
| 809 |
-
if not ml_tree:
|
| 810 |
-
return {}
|
| 811 |
-
|
| 812 |
-
# Step 4: Calculate tree likelihood (safely)
|
| 813 |
-
log_likelihood = self.calculate_ml_likelihood_safe(ml_tree, alignment)
|
| 814 |
-
|
| 815 |
-
# Step 5: Prepare results
|
| 816 |
-
ml_results = {
|
| 817 |
-
'tree': ml_tree,
|
| 818 |
-
'alignment': alignment,
|
| 819 |
-
'distance_matrix': distance_matrix,
|
| 820 |
-
'log_likelihood': log_likelihood,
|
| 821 |
-
'sequence_count': len(all_sequences),
|
| 822 |
-
'alignment_length': len(alignment[0]) if alignment else 0
|
| 823 |
-
}
|
| 824 |
-
|
| 825 |
-
print(f"✅ ML analysis completed successfully")
|
| 826 |
-
print(f" Sequences analyzed: {len(all_sequences)}")
|
| 827 |
-
print(f" Alignment length: {ml_results['alignment_length']}")
|
| 828 |
-
print(f" Log-likelihood: {log_likelihood:.2f}")
|
| 829 |
-
|
| 830 |
-
return ml_results
|
| 831 |
-
|
| 832 |
-
except Exception as e:
|
| 833 |
-
print(f"❌ ML analysis failed: {e}")
|
| 834 |
-
import traceback
|
| 835 |
-
traceback.print_exc()
|
| 836 |
-
return {}
|
| 837 |
-
|
| 838 |
-
|
| 839 |
-
def create_sequence_alignment(self, sequence_ids: List[str]) -> Optional[MultipleSeqAlignment]:
|
| 840 |
-
|
| 841 |
-
try:
|
| 842 |
-
print("🧬 Creating multiple sequence alignment...")
|
| 843 |
-
|
| 844 |
-
# Get sequences
|
| 845 |
-
sequences = []
|
| 846 |
-
for seq_id in sequence_ids:
|
| 847 |
-
try:
|
| 848 |
-
row = self.data[self.data['Accession Number'] == seq_id]
|
| 849 |
-
if not row.empty:
|
| 850 |
-
f_gene = str(row.iloc[0]['F-gene'])
|
| 851 |
-
# Clean sequence (remove non-nucleotide characters)
|
| 852 |
-
clean_seq = re.sub(r'[^ATGCN-]', '', f_gene.upper())
|
| 853 |
-
if len(clean_seq) > 10: # Minimum sequence length
|
| 854 |
-
seq_record = SeqRecord(Seq(clean_seq), id=seq_id, description="")
|
| 855 |
-
sequences.append(seq_record)
|
| 856 |
-
except Exception as e:
|
| 857 |
-
print(f"Warning: Skipping sequence {seq_id}: {e}")
|
| 858 |
-
continue
|
| 859 |
-
|
| 860 |
-
if len(sequences) < 2:
|
| 861 |
-
print("❌ Need at least 2 valid sequences for alignment")
|
| 862 |
-
return None
|
| 863 |
-
|
| 864 |
-
# Simple alignment (you might want to use MUSCLE or CLUSTAL for better results)
|
| 865 |
-
aligned_sequences = self._simple_alignment(sequences)
|
| 866 |
-
|
| 867 |
-
print(f"✓ Alignment created with {len(aligned_sequences)} sequences")
|
| 868 |
-
return MultipleSeqAlignment(aligned_sequences)
|
| 869 |
-
|
| 870 |
-
except Exception as e:
|
| 871 |
-
print(f"Error creating alignment: {e}")
|
| 872 |
-
return None
|
| 873 |
-
|
| 874 |
-
def _simple_alignment(self, sequences: List[SeqRecord]) -> List[SeqRecord]:
|
| 875 |
-
|
| 876 |
-
try:
|
| 877 |
-
# Find maximum length
|
| 878 |
-
max_length = max(len(seq.seq) for seq in sequences)
|
| 879 |
-
|
| 880 |
-
# Cap maximum length to prevent memory issues
|
| 881 |
-
if max_length > 10000:
|
| 882 |
-
max_length = 10000
|
| 883 |
-
print(f"Warning: Sequences truncated to {max_length} bp")
|
| 884 |
-
|
| 885 |
-
# Pad sequences to same length
|
| 886 |
-
aligned_sequences = []
|
| 887 |
-
for seq in sequences:
|
| 888 |
-
seq_str = str(seq.seq)[:max_length] # Truncate if too long
|
| 889 |
-
|
| 890 |
-
if len(seq_str) < max_length:
|
| 891 |
-
# Pad with gaps at the end
|
| 892 |
-
padded_seq = seq_str + '-' * (max_length - len(seq_str))
|
| 893 |
-
else:
|
| 894 |
-
padded_seq = seq_str
|
| 895 |
-
|
| 896 |
-
aligned_sequences.append(SeqRecord(Seq(padded_seq), id=seq.id, description=seq.description))
|
| 897 |
-
|
| 898 |
-
return aligned_sequences
|
| 899 |
-
except Exception as e:
|
| 900 |
-
print(f"Error in simple alignment: {e}")
|
| 901 |
-
return sequences
|
| 902 |
-
|
| 903 |
-
def calculate_ml_distances(self, alignment: MultipleSeqAlignment) -> np.ndarray:
|
| 904 |
-
|
| 905 |
-
try:
|
| 906 |
-
print("📊 Calculating ML distances...")
|
| 907 |
-
|
| 908 |
-
# Convert alignment to numeric matrix
|
| 909 |
-
seq_matrix = self._alignment_to_matrix(alignment)
|
| 910 |
-
n_sequences = len(alignment)
|
| 911 |
-
|
| 912 |
-
if n_sequences == 0:
|
| 913 |
-
return np.array([])
|
| 914 |
-
|
| 915 |
-
# Initialize distance matrix
|
| 916 |
-
distance_matrix = np.zeros((n_sequences, n_sequences))
|
| 917 |
-
|
| 918 |
-
# Calculate pairwise ML distances
|
| 919 |
-
for i in range(n_sequences):
|
| 920 |
-
for j in range(i + 1, n_sequences):
|
| 921 |
-
try:
|
| 922 |
-
ml_distance = self._calculate_ml_distance_pair(seq_matrix[i], seq_matrix[j])
|
| 923 |
-
distance_matrix[i][j] = ml_distance
|
| 924 |
-
distance_matrix[j][i] = ml_distance
|
| 925 |
-
except Exception as e:
|
| 926 |
-
print(f"Warning: Error calculating distance between sequences {i} and {j}: {e}")
|
| 927 |
-
# Use maximum distance as fallback
|
| 928 |
-
distance_matrix[i][j] = 1.0
|
| 929 |
-
distance_matrix[j][i] = 1.0
|
| 930 |
-
|
| 931 |
-
print("✓ ML distances calculated")
|
| 932 |
-
return distance_matrix
|
| 933 |
-
|
| 934 |
-
except Exception as e:
|
| 935 |
-
print(f"Error calculating ML distances: {e}")
|
| 936 |
-
return np.array([])
|
| 937 |
-
|
| 938 |
-
def _alignment_to_matrix(self, alignment: MultipleSeqAlignment) -> np.ndarray:
|
| 939 |
-
|
| 940 |
-
try:
|
| 941 |
-
nucleotide_map = {'A': 0, 'T': 1, 'G': 2, 'C': 3, 'N': 4, '-': 5}
|
| 942 |
-
|
| 943 |
-
matrix = []
|
| 944 |
-
for record in alignment:
|
| 945 |
-
sequence = str(record.seq).upper()
|
| 946 |
-
numeric_seq = [nucleotide_map.get(nuc, 4) for nuc in sequence]
|
| 947 |
-
matrix.append(numeric_seq)
|
| 948 |
-
|
| 949 |
-
return np.array(matrix)
|
| 950 |
-
except Exception as e:
|
| 951 |
-
print(f"Error converting alignment to matrix: {e}")
|
| 952 |
-
return np.array([])
|
| 953 |
-
|
| 954 |
-
|
| 955 |
-
def _calculate_ml_distance_pair(self, seq1: np.ndarray, seq2: np.ndarray) -> float:
|
| 956 |
-
|
| 957 |
-
try:
|
| 958 |
-
if len(seq1) == 0 or len(seq2) == 0:
|
| 959 |
-
return 1.0
|
| 960 |
-
|
| 961 |
-
# Count differences (excluding gaps and N's)
|
| 962 |
-
valid_positions = (seq1 < 4) & (seq2 < 4) # Exclude N's and gaps
|
| 963 |
-
|
| 964 |
-
if np.sum(valid_positions) == 0:
|
| 965 |
-
return 1.0 # Maximum distance if no valid comparisons
|
| 966 |
-
|
| 967 |
-
differences = np.sum(seq1[valid_positions] != seq2[valid_positions])
|
| 968 |
-
total_valid = np.sum(valid_positions)
|
| 969 |
-
|
| 970 |
-
if total_valid == 0:
|
| 971 |
-
return 1.0
|
| 972 |
-
|
| 973 |
-
# Calculate proportion of differences
|
| 974 |
-
p = differences / total_valid
|
| 975 |
-
|
| 976 |
-
# Jukes-Cantor correction
|
| 977 |
-
if p >= 0.75:
|
| 978 |
-
return 1.0 # Maximum distance
|
| 979 |
-
|
| 980 |
-
# JC distance formula: -3/4 * ln(1 - 4p/3)
|
| 981 |
-
try:
|
| 982 |
-
jc_distance = -0.75 * np.log(1 - (4 * p / 3))
|
| 983 |
-
return min(max(jc_distance, 0.0), 1.0) # Clamp between 0 and 1
|
| 984 |
-
except (ValueError, RuntimeWarning):
|
| 985 |
-
return 1.0 # Return maximum distance if log calculation fails
|
| 986 |
-
|
| 987 |
-
except Exception as e:
|
| 988 |
-
return 1.0
|
| 989 |
-
|
| 990 |
-
def construct_ml_tree(self, alignment: MultipleSeqAlignment) -> Optional[Tree]:
|
| 991 |
-
"""Constructs a maximum likelihood tree."""
|
| 992 |
-
try:
|
| 993 |
-
print("🌳 Constructing ML tree...")
|
| 994 |
-
distance_matrix = self.calculate_ml_distances(alignment)
|
| 995 |
-
if distance_matrix.size == 0:
|
| 996 |
-
return None
|
| 997 |
-
sequence_names = [record.id for record in alignment]
|
| 998 |
-
tree = self._build_nj_tree_from_distances(distance_matrix, sequence_names)
|
| 999 |
-
if tree:
|
| 1000 |
-
tree = self._optimize_branch_lengths_ml_safe(tree, alignment)
|
| 1001 |
-
print("✓ ML tree constructed")
|
| 1002 |
-
return tree
|
| 1003 |
-
except Exception as e:
|
| 1004 |
-
print(f"Error constructing ML tree: {e}")
|
| 1005 |
-
return None
|
| 1006 |
-
|
| 1007 |
-
def _build_nj_tree_from_distances(self, distance_matrix: np.ndarray, sequence_names: List[str]) -> Optional[Tree]:
|
| 1008 |
-
"""Builds a neighbor-joining tree from distance matrix."""
|
| 1009 |
-
try:
|
| 1010 |
-
if distance_matrix.shape[0] != len(sequence_names):
|
| 1011 |
-
print("Error: Distance matrix size mismatch")
|
| 1012 |
-
return None
|
| 1013 |
-
matrix_data = [[0.0 if i == j else max(0.0, float(distance_matrix[i][j])) for j in range(i + 1)] for i in range(len(sequence_names))]
|
| 1014 |
-
dm = DistanceMatrix(names=sequence_names, matrix=matrix_data)
|
| 1015 |
-
constructor = DistanceTreeConstructor()
|
| 1016 |
-
tree = constructor.nj(dm)
|
| 1017 |
-
return tree if self._validate_tree_structure(tree) else None
|
| 1018 |
-
except Exception as e:
|
| 1019 |
-
print(f"Error building NJ tree: {e}")
|
| 1020 |
-
return None
|
| 1021 |
-
|
| 1022 |
-
def _validate_tree_structure(self, tree: Tree, max_depth: int = 100) -> bool:
|
| 1023 |
-
"""Validates tree structure to prevent recursion issues."""
|
| 1024 |
-
try:
|
| 1025 |
-
visited = set()
|
| 1026 |
-
def check_node(node, depth=0):
|
| 1027 |
-
if depth > max_depth:
|
| 1028 |
-
return False
|
| 1029 |
-
node_id = id(node)
|
| 1030 |
-
if node_id in visited:
|
| 1031 |
-
return False
|
| 1032 |
-
visited.add(node_id)
|
| 1033 |
-
return all(check_node(child, depth + 1) for child in getattr(node, 'clades', []))
|
| 1034 |
-
return check_node(tree.root if hasattr(tree, 'root') else tree)
|
| 1035 |
-
except Exception:
|
| 1036 |
-
return False
|
| 1037 |
-
|
| 1038 |
-
def _optimize_branch_lengths_ml_safe(self, tree: Tree, alignment: MultipleSeqAlignment) -> Tree:
|
| 1039 |
-
"""Optimizes branch lengths using ML model."""
|
| 1040 |
-
try:
|
| 1041 |
-
print("🔧 Optimizing branch lengths...")
|
| 1042 |
-
old_limit = sys.getrecursionlimit()
|
| 1043 |
-
sys.setrecursionlimit(1000)
|
| 1044 |
-
try:
|
| 1045 |
-
seq_matrix = self._alignment_to_matrix(alignment)
|
| 1046 |
-
if seq_matrix.size == 0:
|
| 1047 |
-
return tree
|
| 1048 |
-
all_clades = self._get_clades_safe(tree)
|
| 1049 |
-
for clade in all_clades:
|
| 1050 |
-
if hasattr(clade, 'branch_length') and clade.branch_length is not None:
|
| 1051 |
-
optimal_length = self._calculate_optimal_branch_length(clade, seq_matrix)
|
| 1052 |
-
clade.branch_length = max(optimal_length, 0.001)
|
| 1053 |
-
finally:
|
| 1054 |
-
sys.setrecursionlimit(old_limit)
|
| 1055 |
-
print("✓ Branch lengths optimized")
|
| 1056 |
-
return tree
|
| 1057 |
-
except Exception as e:
|
| 1058 |
-
print(f"Warning: Branch optimization failed: {e}")
|
| 1059 |
-
return tree
|
| 1060 |
-
|
| 1061 |
-
def _get_clades_safe(self, tree: Tree, max_depth: int = 50) -> List:
|
| 1062 |
-
"""Safely retrieves all clades in the tree."""
|
| 1063 |
-
clades = []
|
| 1064 |
-
visited = set()
|
| 1065 |
-
def traverse_node(node, depth=0):
|
| 1066 |
-
if depth > max_depth or id(node) in visited:
|
| 1067 |
-
return
|
| 1068 |
-
visited.add(id(node))
|
| 1069 |
-
clades.append(node)
|
| 1070 |
-
for child in getattr(node, 'clades', []):
|
| 1071 |
-
traverse_node(child, depth + 1)
|
| 1072 |
-
try:
|
| 1073 |
-
traverse_node(tree.root if hasattr(tree, 'root') else tree)
|
| 1074 |
-
except Exception as e:
|
| 1075 |
-
print(f"Warning: Tree traversal error: {e}")
|
| 1076 |
-
return clades
|
| 1077 |
-
|
| 1078 |
-
def _calculate_optimal_branch_length(self, clade: float, seq_matrix: np.ndarray) -> float:
|
| 1079 |
-
"""Calculates optimal branch length for a clade."""
|
| 1080 |
-
try:
|
| 1081 |
-
if not hasattr(clade, 'branch_length') or clade.branch_length is None:
|
| 1082 |
-
return 0.1
|
| 1083 |
-
current_length = float(clade.branch_length)
|
| 1084 |
-
if np.isnan(current_length) or np.isinf(current_length) or current_length <= 0:
|
| 1085 |
-
return 0.1
|
| 1086 |
-
return min(max(current_length * (0.9 if hasattr(clade, 'name') and clade.name else 1.1), 0.001), 1.0)
|
| 1087 |
-
except Exception:
|
| 1088 |
-
return 0.1
|
| 1089 |
-
|
| 1090 |
-
def calculate_ml_likelihood_safe(self, tree: Tree, alignment: MultipleSeqAlignment) -> float:
|
| 1091 |
-
"""Calculates tree likelihood using Jukes-Cantor model."""
|
| 1092 |
-
try:
|
| 1093 |
-
print("Trying to calculate tree likelihood...")
|
| 1094 |
-
seq_matrix = self._alignment_to_matrix(alignment)
|
| 1095 |
-
if seq_matrix.size == 0:
|
| 1096 |
-
return -np.inf
|
| 1097 |
-
total_log_likelihood = 0.0
|
| 1098 |
-
n_sites = min(seq_matrix.shape[1], 1000)
|
| 1099 |
-
for site in range(0, n_sites, max(1, n_sites // 100)):
|
| 1100 |
-
site_pattern = seq_matrix[:, site]
|
| 1101 |
-
valid_positions = site_pattern < 4
|
| 1102 |
-
if np.sum(valid_positions) < 2:
|
| 1103 |
-
continue
|
| 1104 |
-
site_likelihood = self._calculate_site_likelihood_safe(tree, site_pattern)
|
| 1105 |
-
if site_likelihood > 0:
|
| 1106 |
-
total_log_likelihood += np.log(site_likelihood)
|
| 1107 |
-
print(f"Likelihood: {total_log_likelihood:.2f}")
|
| 1108 |
-
return total_log_likelihood
|
| 1109 |
-
except Exception as e:
|
| 1110 |
-
print(f"Error calculating likelihood: {e}")
|
| 1111 |
-
return -np.inf
|
| 1112 |
-
|
| 1113 |
-
def _calculate_site_likelihood_safe(self, tree: np.ndarray, site_pattern: np.ndarray) -> float:
|
| 1114 |
-
"""Calculates likelihood for a single site."""
|
| 1115 |
-
try:
|
| 1116 |
-
valid_nucs = site_pattern[site_pattern < 4]
|
| 1117 |
-
if len(valid_nucs) == 0:
|
| 1118 |
-
return 1.0
|
| 1119 |
-
unique_nucs = len(np.unique(valid_nucs))
|
| 1120 |
-
total_nucs = len(valid_nucs)
|
| 1121 |
-
diversity_factor = unique_nucs / 4.0
|
| 1122 |
-
likelihood = np.exp(-diversity_factor * total_nucs * 0.1)
|
| 1123 |
-
return max(likelihood, 1e-10)
|
| 1124 |
-
except Exception:
|
| 1125 |
-
return 1e-10
|
| 1126 |
-
|
| 1127 |
-
# --- Reporting ---
|
| 1128 |
-
def generate_detailed_report(self, matched_ids: List[str], actual_percentage: float) -> bool:
|
| 1129 |
-
"""
|
| 1130 |
-
Generate a detailed HTML report for virologists/scientists with query details, matched sequences,
|
| 1131 |
-
model performance, phylogenetic tree insights, and ML analysis results in tabular format.
|
| 1132 |
-
Outputs a styled HTML file.
|
| 1133 |
-
Returns True if successful, False otherwise.
|
| 1134 |
-
"""
|
| 1135 |
-
try:
|
| 1136 |
-
print("📝 Generating detailed HTML analysis report...")
|
| 1137 |
-
|
| 1138 |
-
# --- HTML Template with Inline CSS ---
|
| 1139 |
-
html_content = """
|
| 1140 |
-
<!DOCTYPE html>
|
| 1141 |
-
<html lang="en">
|
| 1142 |
-
<head>
|
| 1143 |
-
<meta charset="UTF-8">
|
| 1144 |
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 1145 |
-
<title>Phylogenetic Analysis Report - {query_id}</title>
|
| 1146 |
-
<style>
|
| 1147 |
-
body {{
|
| 1148 |
-
font-family: Arial, sans-serif;
|
| 1149 |
-
margin: 20px;
|
| 1150 |
-
background-color: #f9f9f9;
|
| 1151 |
-
color: #333;
|
| 1152 |
-
}}
|
| 1153 |
-
h1 {{
|
| 1154 |
-
text-align: center;
|
| 1155 |
-
color: #2c3e50;
|
| 1156 |
-
}}
|
| 1157 |
-
h2 {{
|
| 1158 |
-
color: #34495e;
|
| 1159 |
-
margin-top: 20px;
|
| 1160 |
-
}}
|
| 1161 |
-
table {{
|
| 1162 |
-
width: 100%;
|
| 1163 |
-
border-collapse: collapse;
|
| 1164 |
-
margin-bottom: 20px;
|
| 1165 |
-
background-color: #fff;
|
| 1166 |
-
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
|
| 1167 |
-
}}
|
| 1168 |
-
th, td {{
|
| 1169 |
-
padding: 10px;
|
| 1170 |
-
text-align: left;
|
| 1171 |
-
border: 1px solid #ddd;
|
| 1172 |
-
}}
|
| 1173 |
-
th {{
|
| 1174 |
-
background-color: #3498db;
|
| 1175 |
-
color: #fff;
|
| 1176 |
-
}}
|
| 1177 |
-
tr:nth-child(even) {{
|
| 1178 |
-
background-color: #f2f2f2;
|
| 1179 |
-
}}
|
| 1180 |
-
tr:hover {{
|
| 1181 |
-
background-color: #e0f7fa;
|
| 1182 |
-
}}
|
| 1183 |
-
.metadata {{
|
| 1184 |
-
margin-left: 20px;
|
| 1185 |
-
font-size: 0.9em;
|
| 1186 |
-
}}
|
| 1187 |
-
.metadata p {{
|
| 1188 |
-
margin: 5px 0;
|
| 1189 |
-
}}
|
| 1190 |
-
@media (max-width: 600px) {{
|
| 1191 |
-
table {{
|
| 1192 |
-
font-size: 0.85em;
|
| 1193 |
-
}}
|
| 1194 |
-
th, td {{
|
| 1195 |
-
padding: 8px;
|
| 1196 |
-
}}
|
| 1197 |
-
}}
|
| 1198 |
-
</style>
|
| 1199 |
-
</head>
|
| 1200 |
-
<body>
|
| 1201 |
-
<h1>Phylogenetic Analysis Report</h1>
|
| 1202 |
-
<p style="text-align: center;">Generated on: {timestamp}</p>
|
| 1203 |
-
<p style="text-align: center;">Query ID: {query_id}</p>
|
| 1204 |
-
"""
|
| 1205 |
-
|
| 1206 |
-
# Add timestamp and query ID to HTML
|
| 1207 |
-
timestamp = time.strftime("%Y-%m-%d %H:%M:%S %Z")
|
| 1208 |
-
html_content = html_content.format(query_id=self.query_id, timestamp=timestamp)
|
| 1209 |
-
|
| 1210 |
-
# --- Query Information ---
|
| 1211 |
-
query_type = (
|
| 1212 |
-
"Accession Number" if self.query_id in self.data['Accession Number'].values else
|
| 1213 |
-
"Dataset Sequence" if self.query_sequence in self.data['F-gene'].values else
|
| 1214 |
-
"Novel Sequence"
|
| 1215 |
-
)
|
| 1216 |
-
query_ml = "Unknown"
|
| 1217 |
-
query_genotype = "Unknown"
|
| 1218 |
-
query_metadata = {}
|
| 1219 |
-
|
| 1220 |
-
if query_type == "Novel Sequence":
|
| 1221 |
-
query_ml = self.predict_ml_group(self.query_sequence)
|
| 1222 |
-
query_genotype = self.predict_genotype(self.query_sequence)
|
| 1223 |
-
query_metadata = {"F-gene": self.query_sequence[:50] + "..." if len(self.query_sequence) > 50 else self.query_sequence}
|
| 1224 |
-
else:
|
| 1225 |
-
query_row = self.data[
|
| 1226 |
-
(self.data['Accession Number'] == self.query_id) |
|
| 1227 |
-
(self.data['F-gene'] == re.sub(r'[^ATGC]', '', self.query_sequence.upper()))
|
| 1228 |
-
].iloc[0]
|
| 1229 |
-
query_ml = query_row['ML']
|
| 1230 |
-
query_genotype = query_row['Genotype']
|
| 1231 |
-
query_metadata = query_row.to_dict()
|
| 1232 |
-
query_metadata['F-gene'] = query_metadata['F-gene'][:50] + "..." if len(query_metadata['F-gene']) > 50 else query_metadata['F-gene']
|
| 1233 |
-
|
| 1234 |
-
query_info_table = [
|
| 1235 |
-
["Query ID", self.query_id],
|
| 1236 |
-
["Query Type", query_type],
|
| 1237 |
-
["Sequence Length", f"{len(self.query_sequence)} nucleotides"],
|
| 1238 |
-
["ML Group", query_ml],
|
| 1239 |
-
["Genotype", query_genotype],
|
| 1240 |
-
["Target Similarity", f"{self.matching_percentage}%"],
|
| 1241 |
-
["Actual Similarity", f"{actual_percentage:.1f}%"]
|
| 1242 |
-
]
|
| 1243 |
-
|
| 1244 |
-
# Add Query Information section
|
| 1245 |
-
html_content += """
|
| 1246 |
-
<h2>Query Information</h2>
|
| 1247 |
-
<table>
|
| 1248 |
-
<tr><th>Field</th><th>Value</th></tr>
|
| 1249 |
-
"""
|
| 1250 |
-
for row in query_info_table:
|
| 1251 |
-
html_content += f"""
|
| 1252 |
-
<tr><td>{row[0]}</td><td>{row[1]}</td></tr>
|
| 1253 |
-
"""
|
| 1254 |
-
html_content += """
|
| 1255 |
-
</table>
|
| 1256 |
-
<div class="metadata">
|
| 1257 |
-
<h3>Metadata</h3>
|
| 1258 |
-
"""
|
| 1259 |
-
for key, value in query_metadata.items():
|
| 1260 |
-
html_content += f"""
|
| 1261 |
-
<p><strong>{key}:</strong> {value}</p>
|
| 1262 |
-
"""
|
| 1263 |
-
html_content += """
|
| 1264 |
-
</div>
|
| 1265 |
-
"""
|
| 1266 |
-
|
| 1267 |
-
# --- Matched Sequences ---
|
| 1268 |
-
matched_sequences_table = []
|
| 1269 |
-
headers = ["Accession Number", "Similarity (%)", "ML Group", "Genotype", "Host", "Country", "Isolate", "Year"]
|
| 1270 |
-
|
| 1271 |
-
for seq_id in matched_ids:
|
| 1272 |
-
row = self.data[self.data['Accession Number'] == seq_id].iloc[0]
|
| 1273 |
-
matched_sequences_table.append([
|
| 1274 |
-
seq_id,
|
| 1275 |
-
f"{self.similarity_scores.get(seq_id, 0.0):.1f}",
|
| 1276 |
-
row.get('ML', 'N/A'),
|
| 1277 |
-
row.get('Genotype', 'N/A'),
|
| 1278 |
-
row.get('Host', 'N/A'),
|
| 1279 |
-
row.get('Country', 'N/A'),
|
| 1280 |
-
row.get('Isolate', 'N/A'),
|
| 1281 |
-
row.get('Year', 'N/A')
|
| 1282 |
-
])
|
| 1283 |
-
|
| 1284 |
-
# Add Matched Sequences section
|
| 1285 |
-
html_content += f"""
|
| 1286 |
-
<h2>Matched Sequences</h2>
|
| 1287 |
-
<p>Total Matched Sequences: {len(matched_ids)}</p>
|
| 1288 |
-
"""
|
| 1289 |
-
if matched_sequences_table:
|
| 1290 |
-
html_content += """
|
| 1291 |
-
<table>
|
| 1292 |
-
<tr>
|
| 1293 |
-
"""
|
| 1294 |
-
for header in headers:
|
| 1295 |
-
html_content += f"<th>{header}</th>"
|
| 1296 |
-
html_content += """
|
| 1297 |
-
</tr>
|
| 1298 |
-
"""
|
| 1299 |
-
for row in matched_sequences_table:
|
| 1300 |
-
html_content += "<tr>"
|
| 1301 |
-
for cell in row:
|
| 1302 |
-
html_content += f"<td>{cell}</td>"
|
| 1303 |
-
html_content += "</tr>"
|
| 1304 |
-
html_content += """
|
| 1305 |
-
</table>
|
| 1306 |
-
"""
|
| 1307 |
-
else:
|
| 1308 |
-
html_content += """
|
| 1309 |
-
<p>No matched sequences found.</p>
|
| 1310 |
-
"""
|
| 1311 |
-
|
| 1312 |
-
# --- Model Performance ---
|
| 1313 |
-
model_performance_table = [
|
| 1314 |
-
["ML Model Accuracy", f"{self.ml_model_accuracy:.2%}" if self.ml_model_accuracy else "Not trained"],
|
| 1315 |
-
["Genotype Model Accuracy", f"{self.genotype_model_accuracy:.2%}" if self.genotype_model_accuracy else "Not trained"]
|
| 1316 |
-
]
|
| 1317 |
-
|
| 1318 |
-
# Add Model Performance section
|
| 1319 |
-
html_content += """
|
| 1320 |
-
<h2>Model Performance</h2>
|
| 1321 |
-
<table>
|
| 1322 |
-
<tr><th>Metric</th><th>Value</th></tr>
|
| 1323 |
-
"""
|
| 1324 |
-
for row in model_performance_table:
|
| 1325 |
-
html_content += f"""
|
| 1326 |
-
<tr><td>{row[0]}</td><td>{row[1]}</td></tr>
|
| 1327 |
-
"""
|
| 1328 |
-
html_content += """
|
| 1329 |
-
</table>
|
| 1330 |
-
"""
|
| 1331 |
-
|
| 1332 |
-
# --- Phylogenetic Tree Insights ---
|
| 1333 |
-
def count_nodes(node):
|
| 1334 |
-
count = 1
|
| 1335 |
-
for child in node.get('children', {}).values():
|
| 1336 |
-
count += count_nodes(child)
|
| 1337 |
-
return count
|
| 1338 |
-
|
| 1339 |
-
total_nodes = count_nodes(self.tree_structure)
|
| 1340 |
-
query_node_path = []
|
| 1341 |
-
def find_query_path(node, path):
|
| 1342 |
-
if node.get('data', {}).get('is_query', False):
|
| 1343 |
-
query_node_path.append(" -> ".join(path + [node['name']]))
|
| 1344 |
-
for name, child in node.get('children', {}).items():
|
| 1345 |
-
find_query_path(child, path + [node['name']])
|
| 1346 |
-
|
| 1347 |
-
find_query_path(self.tree_structure['root'], [])
|
| 1348 |
-
|
| 1349 |
-
tree_insights_table = [
|
| 1350 |
-
["Total Nodes", total_nodes],
|
| 1351 |
-
["ML Groups Represented", len(self.tree_structure['root']['children'])],
|
| 1352 |
-
["Query Node Path", query_node_path[0] if query_node_path else "Not found"]
|
| 1353 |
-
]
|
| 1354 |
-
|
| 1355 |
-
# Add Phylogenetic Tree Insights section
|
| 1356 |
-
html_content += """
|
| 1357 |
-
<h2>Phylogenetic Tree Insights</h2>
|
| 1358 |
-
<table>
|
| 1359 |
-
<tr><th>Field</th><th>Value</th></tr>
|
| 1360 |
-
"""
|
| 1361 |
-
for row in tree_insights_table:
|
| 1362 |
-
html_content += f"""
|
| 1363 |
-
<tr><td>{row[0]}</td><td>{row[1]}</td></tr>
|
| 1364 |
-
"""
|
| 1365 |
-
html_content += """
|
| 1366 |
-
</table>
|
| 1367 |
-
"""
|
| 1368 |
-
|
| 1369 |
-
# --- ML Analysis Results ---
|
| 1370 |
-
ml_analysis = self.tree_structure.get('ml_analysis', {})
|
| 1371 |
-
ml_analysis_table = [
|
| 1372 |
-
["ML Tree Available", ml_analysis.get('ml_tree_available', False)],
|
| 1373 |
-
["Log-Likelihood", f"{ml_analysis.get('log_likelihood', 'N/A'):.2f}" if ml_analysis.get('log_likelihood') else "N/A"],
|
| 1374 |
-
["Sequence Count", ml_analysis.get('sequence_count', 'N/A')],
|
| 1375 |
-
["Alignment Length", ml_analysis.get('alignment_length', 'N/A')]
|
| 1376 |
-
]
|
| 1377 |
-
|
| 1378 |
-
# Add ML Analysis Results section
|
| 1379 |
-
html_content += """
|
| 1380 |
-
<h2>Maximum Likelihood Analysis Results</h2>
|
| 1381 |
-
<table>
|
| 1382 |
-
<tr><th>Field</th><th>Value</th></tr>
|
| 1383 |
-
"""
|
| 1384 |
-
for row in ml_analysis_table:
|
| 1385 |
-
html_content += f"""
|
| 1386 |
-
<tr><td>{row[0]}</td><td>{row[1]}</td></tr>
|
| 1387 |
-
"""
|
| 1388 |
-
html_content += """
|
| 1389 |
-
</table>
|
| 1390 |
-
"""
|
| 1391 |
-
|
| 1392 |
-
# --- Close HTML ---
|
| 1393 |
-
html_content += """
|
| 1394 |
-
</body>
|
| 1395 |
-
</html>
|
| 1396 |
-
"""
|
| 1397 |
-
|
| 1398 |
-
# --- Save HTML Report ---
|
| 1399 |
-
report_filename = f"detailed_report_{self.query_id.replace('/', '_')}.html"
|
| 1400 |
-
print(f"Attempting to save report to: {os.path.abspath(report_filename)}")
|
| 1401 |
-
with open(report_filename, 'w') as f:
|
| 1402 |
-
f.write(html_content)
|
| 1403 |
-
print(f"✓ Detailed HTML report saved as '{report_filename}'")
|
| 1404 |
-
return True
|
| 1405 |
-
except Exception as e:
|
| 1406 |
-
print(f"Error generating detailed report: {str(e)}")
|
| 1407 |
-
import traceback
|
| 1408 |
-
traceback.print_exc()
|
| 1409 |
-
return False
|
| 1410 |
-
|
| 1411 |
-
def command_line_interface():
|
| 1412 |
-
"""Parse command-line arguments and run phylogenetic analysis."""
|
| 1413 |
-
parser = argparse.ArgumentParser(
|
| 1414 |
-
description="Advanced Phylogenetic Tree Analyzer with AI-enhanced similarity matching",
|
| 1415 |
-
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 1416 |
-
epilog="Examples:\n %(prog)s -d data.csv -q MH087032 -s 95\n %(prog)s -d data.csv -q MH087032 -s 90 --no-ai --batch query1,query2,query3"
|
| 1417 |
-
)
|
| 1418 |
-
parser.add_argument('-d', '--data', required=True, help='Path to CSV data file')
|
| 1419 |
-
parser.add_argument('-q', '--query', required=True, help='Query sequence ID or nucleotide sequence')
|
| 1420 |
-
parser.add_argument('-s', '--similarity', type=float, default=95.0, help='Target similarity percentage (70-99, default: 95)')
|
| 1421 |
-
parser.add_argument('--no-ai', action='store_true', help='Skip AI model training')
|
| 1422 |
-
parser.add_argument('--batch', help='Comma-separated list of query IDs for batch processing')
|
| 1423 |
-
parser.add_argument('--output-dir', default='.', help='Output directory for results')
|
| 1424 |
-
parser.add_argument('--save-json', action='store_true', help='Save detailed results to JSON')
|
| 1425 |
-
|
| 1426 |
-
args = parser.parse_args()
|
| 1427 |
-
|
| 1428 |
-
# Validate arguments
|
| 1429 |
-
if not 70 <= args.similarity <= 99:
|
| 1430 |
-
print("❌ Similarity percentage must be between 70 and 99.")
|
| 1431 |
-
sys.exit(1)
|
| 1432 |
-
if not Path(args.data).exists():
|
| 1433 |
-
print(f"❌ Data file not found: {args.data}")
|
| 1434 |
-
sys.exit(1)
|
| 1435 |
-
|
| 1436 |
-
# Initialize analyzer
|
| 1437 |
-
analyzer = PhylogeneticTreeAnalyzer()
|
| 1438 |
-
if not analyzer.load_data(args.data):
|
| 1439 |
-
print("❌ Failed to load data.")
|
| 1440 |
-
sys.exit(1)
|
| 1441 |
-
|
| 1442 |
-
# Train AI model unless disabled
|
| 1443 |
-
if not args.no_ai:
|
| 1444 |
-
print("⏳ Training AI model...")
|
| 1445 |
-
start_time = time.time()
|
| 1446 |
-
if analyzer.train_ai_model():
|
| 1447 |
-
print(f"✅ AI model training completed in {time.time() - start_time:.1f} seconds")
|
| 1448 |
-
else:
|
| 1449 |
-
print("⚠️ AI model training failed, continuing with basic analysis")
|
| 1450 |
-
|
| 1451 |
-
# Process queries
|
| 1452 |
-
queries = args.batch.split(',') if args.batch else [args.query]
|
| 1453 |
-
for query in queries:
|
| 1454 |
-
query = query.strip()
|
| 1455 |
-
print(f"🔍 Processing: {query}")
|
| 1456 |
-
if not analyzer.find_query_sequence(query):
|
| 1457 |
-
print(f"❌ Query not found: {query}")
|
| 1458 |
-
continue
|
| 1459 |
-
|
| 1460 |
-
matched_ids, actual_percentage = analyzer.find_similar_sequences(args.similarity)
|
| 1461 |
-
if not matched_ids:
|
| 1462 |
-
print(f"❌ No similar sequences found for {query}")
|
| 1463 |
-
continue
|
| 1464 |
-
|
| 1465 |
-
analyzer.build_tree_structure_with_ml_safe(matched_ids)
|
| 1466 |
-
fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
|
| 1467 |
-
if fig:
|
| 1468 |
-
html_filename = f"phylogenetic_tree_{query.replace('/', '_')}_interactive.html"
|
| 1469 |
-
fig.write_html(html_filename)
|
| 1470 |
-
print(f"📄 Interactive HTML saved: {html_filename}")
|
| 1471 |
-
analyzer.generate_detailed_report(matched_ids, actual_percentage)
|
| 1472 |
-
print(f"📄 Detailed HTML report saved: detailed_report_{query.replace('/', '_')}.html")
|
| 1473 |
-
print(f"✅ Analysis completed for {query}")
|
| 1474 |
-
|
| 1475 |
-
def main():
|
| 1476 |
-
"""Run interactive phylogenetic analysis with user input."""
|
| 1477 |
-
print("\n" + "="*70)
|
| 1478 |
-
print("🧬 PHYLOGENETIC TREE ANALYZER - ADVANCED ML-BASED ANALYSIS")
|
| 1479 |
-
print("Version 2.0 | AI-Enhanced Similarity Matching")
|
| 1480 |
-
print("="*70)
|
| 1481 |
-
|
| 1482 |
-
analyzer = PhylogeneticTreeAnalyzer()
|
| 1483 |
-
|
| 1484 |
-
# Load data
|
| 1485 |
-
data_file = "f cleaned.csv"
|
| 1486 |
-
while not Path(data_file).exists() or not analyzer.load_data(data_file):
|
| 1487 |
-
print(f"❌ File not found or invalid: {data_file}")
|
| 1488 |
-
data_file = input("Enter valid data file path: ").strip()
|
| 1489 |
-
if not data_file:
|
| 1490 |
-
print("❌ Analysis cancelled.")
|
| 1491 |
-
return
|
| 1492 |
-
|
| 1493 |
-
# Train AI model
|
| 1494 |
-
print("⏳ Training AI model...")
|
| 1495 |
-
start_time = time.time()
|
| 1496 |
-
if analyzer.train_ai_model():
|
| 1497 |
-
print(f"✅ AI model training completed in {time.time() - start_time:.1f} seconds")
|
| 1498 |
-
else:
|
| 1499 |
-
print("⚠️ AI model training failed, continuing with basic analysis")
|
| 1500 |
-
|
| 1501 |
-
# Get query sequence
|
| 1502 |
-
while True:
|
| 1503 |
-
query_input = input("\nEnter query sequence or ID (min 10 nucleotides): ").strip()
|
| 1504 |
-
if analyzer.find_query_sequence(query_input):
|
| 1505 |
-
break
|
| 1506 |
-
retry = input("❌ Invalid input. Try again? (y/n): ").strip().lower()
|
| 1507 |
-
if retry != 'y':
|
| 1508 |
-
print("👋 Analysis cancelled.")
|
| 1509 |
-
return
|
| 1510 |
-
|
| 1511 |
-
# Set similarity percentage
|
| 1512 |
-
while True:
|
| 1513 |
-
try:
|
| 1514 |
-
similarity_input = input("Enter target similarity percentage (1-99) [85]: ").strip()
|
| 1515 |
-
target_percentage = float(similarity_input) if similarity_input else 85.0
|
| 1516 |
-
if 1 <= target_percentage <= 99:
|
| 1517 |
-
analyzer.matching_percentage = target_percentage
|
| 1518 |
-
break
|
| 1519 |
-
print("❌ Please enter a percentage between 1 and 99.")
|
| 1520 |
-
except ValueError:
|
| 1521 |
-
print("❌ Please enter a valid number.")
|
| 1522 |
-
|
| 1523 |
-
# Find similar sequences
|
| 1524 |
-
print(f"⏳ Analyzing sequences for {target_percentage}% similarity...")
|
| 1525 |
-
start_time = time.time()
|
| 1526 |
-
matched_ids, actual_percentage = analyzer.find_similar_sequences(target_percentage)
|
| 1527 |
-
if not matched_ids:
|
| 1528 |
-
print(f"❌ No similar sequences found at {target_percentage}% similarity.")
|
| 1529 |
-
return
|
| 1530 |
-
analyzer.matched_sequences = matched_ids
|
| 1531 |
-
analyzer.actual_percentage = actual_percentage
|
| 1532 |
-
print(f"✅ Similarity analysis completed in {time.time() - start_time:.1f} seconds")
|
| 1533 |
-
|
| 1534 |
-
# Build tree structure
|
| 1535 |
-
print("⏳ Building phylogenetic tree structure...")
|
| 1536 |
-
start_time = time.time()
|
| 1537 |
-
tree_structure = analyzer.build_tree_structure_with_ml_safe(matched_ids)
|
| 1538 |
-
if not tree_structure:
|
| 1539 |
-
print("❌ Failed to build tree structure.")
|
| 1540 |
-
return
|
| 1541 |
-
print(f"✅ Tree structure built in {time.time() - start_time:.1f} seconds")
|
| 1542 |
-
|
| 1543 |
-
# Create visualization and save HTML
|
| 1544 |
-
print("⏳ Creating interactive visualization...")
|
| 1545 |
-
start_time = time.time()
|
| 1546 |
-
fig = analyzer.create_interactive_tree(matched_ids, actual_percentage)
|
| 1547 |
-
if not fig:
|
| 1548 |
-
print("❌ Visualization creation failed.")
|
| 1549 |
-
return
|
| 1550 |
-
|
| 1551 |
-
html_filename = "phylogenetic_tree_interactive.html"
|
| 1552 |
-
fig.write_html(html_filename)
|
| 1553 |
-
print(f"📄 Interactive HTML saved: {html_filename}")
|
| 1554 |
-
|
| 1555 |
-
# Generate detailed report
|
| 1556 |
-
print("⏳ Generating detailed report...")
|
| 1557 |
-
start_time = time.time()
|
| 1558 |
-
if analyzer.generate_detailed_report(matched_ids, actual_percentage):
|
| 1559 |
-
print(f"✅ Detailed report generated in {time.time() - start_time:.1f} seconds")
|
| 1560 |
-
|
| 1561 |
-
print(f"\n🎉 Analysis completed successfully!")
|
| 1562 |
-
print(f" Query ID: {analyzer.query_id}")
|
| 1563 |
-
print(f" Query sequence length: {len(analyzer.query_sequence)} nucleotides")
|
| 1564 |
-
print(f" Similar sequences found: {len(matched_ids)}")
|
| 1565 |
-
print(f" Actual similarity percentage: {actual_percentage:.1f}%")
|
| 1566 |
-
print(f" HTML visualization file: {html_filename}")
|
| 1567 |
-
print(f" HTML report file: detailed_report_{analyzer.query_id.replace('/', '_')}.html")
|
| 1568 |
-
|
| 1569 |
-
if __name__ == "__main__":
|
| 1570 |
-
try:
|
| 1571 |
-
main()
|
| 1572 |
-
except KeyboardInterrupt:
|
| 1573 |
-
print("\n👋 Goodbye!")
|
| 1574 |
-
sys.exit(0)
|
| 1575 |
-
except Exception as e:
|
| 1576 |
-
print(f"\n❌ Unexpected error: {e}")
|
| 1577 |
-
sys.exit(1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|