File size: 12,687 Bytes
5eace46 ea61d54 5eace46 ea61d54 5eace46 ea61d54 5eace46 ea61d54 5eace46 ea61d54 5eace46 893f11c 5eace46 893f11c 5eace46 893f11c 5eace46 893f11c 5eace46 893f11c 5eace46 893f11c 5eace46 ea61d54 5eace46 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 | # Shared UI: formatting, tables, analysis bullets, exports: used on several sidebar pages
import html as _html
from typing import Dict, List, Tuple, Optional
from utils.analyze import compute_properties
def predicted_confidence(row: Dict) -> Optional[float]:
# Convert AMP probability into confidence of the predicted class.
if not row:
return None
pred = row.get("Prediction")
p_amp = row.get("Confidence")
if p_amp is None:
return None
try:
p_amp = float(p_amp)
except (TypeError, ValueError):
return None
if pred == "AMP":
return p_amp
# Non-AMP: use complement so “confidence” matches the displayed class.
return 1.0 - p_amp
def format_conf_percent(conf_prob: float, digits: int = 1) -> str:
# Probability in [0,1] -> percent string for UI / exports.
return f"{round(conf_prob * 100, digits)}%"
def heuristic_reason_for_profile(charge: float, hydro_fraction: float) -> str:
if charge > 2:
return "High positive charge supports membrane disruption"
if 0.3 <= hydro_fraction <= 0.6:
return "Balanced hydrophobicity"
return "Favorable predicted profile"
def choose_top_candidate(predictions: List[Dict]) -> Optional[Dict]:
# Select best candidate row and attach a short profile-based reason.
if not predictions:
return None
# Prefer AMP rows first, then fall back to highest-confidence overall row.
amp_rows = [r for r in predictions if r.get("Prediction") == "AMP"]
rows = amp_rows if amp_rows else predictions
best_row = None
best_conf = -1.0
for r in rows:
c = predicted_confidence(r)
if c is None:
continue
if c > best_conf:
best_conf = c
best_row = r
if best_row is None:
return None
seq = best_row.get("Sequence", "")
if not seq:
return None
props = compute_properties(seq)
charge = props.get("Net Charge (approx.)", 0)
hydro = props.get("Hydrophobic Fraction", 0)
return {
"Sequence": seq,
"Prediction": best_row.get("Prediction"),
"predicted_confidence": best_conf,
"Reason": heuristic_reason_for_profile(charge, hydro),
"Charge": charge,
"Hydrophobic Fraction": hydro,
}
def mutation_heatmap_html(original: str, final: str) -> str:
# Highlight per-position residue changes between original and final sequences.
orig = original or ""
fin = final or ""
max_len = max(len(orig), len(fin))
# Use monospace layout so per-position residue changes align visually.
out: List[str] = [
"<div style='font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, \"Liberation Mono\", monospace; white-space: pre-wrap;'>"
]
for i in range(max_len):
o = orig[i] if i < len(orig) else ""
f = fin[i] if i < len(fin) else ""
residue = f if f else o
changed = (o != f)
residue_escaped = _html.escape(residue)
if changed and residue:
out.append(f"<span style='color:#d62728; font-weight:700;'>{residue_escaped}</span>")
else:
out.append(residue_escaped if residue else " ")
out.append("</div>")
return "".join(out)
def mutation_diff_table(original: str, final: str) -> List[Dict]:
# Side-by-side per-position rows for the optimizer diff expander.
orig = original or ""
fin = final or ""
max_len = max(len(orig), len(fin))
rows: List[Dict] = []
for i in range(max_len):
o = orig[i] if i < len(orig) else ""
f = fin[i] if i < len(fin) else ""
rows.append(
{
"Position": i + 1,
"Original": o,
"Final": f,
"Changed": "Yes" if o != f else "No",
}
)
return rows
def _ideal_distance_to_interval(value: float, low: float, high: float) -> float:
# Zero if inside [low, high]; else distance to nearest bound (hydrophobic “ideal band”).
if low <= value <= high:
return 0.0
if value < low:
return low - value
return value - high
def optimization_summary(orig_seq: str, orig_conf: float, final_seq: str, final_conf: float) -> Dict:
# Compute confidence and property deltas for the Optimize summary panel.
orig_seq = orig_seq or ""
final_seq = final_seq or ""
# Property deltas drive the compact "what changed" summary panel.
props_orig = compute_properties(orig_seq) if orig_seq else {}
props_final = compute_properties(final_seq) if final_seq else {}
charge_orig = props_orig.get("Net Charge (approx.)", props_orig.get("Net charge", 0))
charge_final = props_final.get("Net Charge (approx.)", props_final.get("Net charge", 0))
hydro_orig = props_orig.get("Hydrophobic Fraction", 0)
hydro_final = props_final.get("Hydrophobic Fraction", 0)
delta_conf_pct = (float(final_conf) - float(orig_conf)) * 100.0
if charge_final > charge_orig:
charge_change = "Increased"
elif charge_final < charge_orig:
charge_change = "Decreased"
else:
charge_change = "Same"
ideal_low, ideal_high = 0.4, 0.5
dist_orig = _ideal_distance_to_interval(float(hydro_orig), ideal_low, ideal_high)
dist_final = _ideal_distance_to_interval(float(hydro_final), ideal_low, ideal_high)
if dist_final < dist_orig:
hydro_change = "Improved balance"
elif dist_final > dist_orig:
hydro_change = "Less optimal"
else:
hydro_change = "Same"
return {
"delta_conf_pct": delta_conf_pct,
"charge_orig": charge_orig,
"charge_final": charge_final,
"charge_change": charge_change,
"hydro_orig": hydro_orig,
"hydro_final": hydro_final,
"hydro_change": hydro_change,
}
def sequence_length_warning(seq: str) -> Optional[str]:
# Soft guardrails for typical AMP length; model itself has no hard cutoff.
if not seq:
return None
n = len(seq)
if n < 8:
return "Too short for typical AMP"
if n > 50:
return "Unusually long sequence"
return None
def sequence_health_label(conf_prob: float, charge: float, hydro_fraction: float) -> Tuple[str, str]:
# Return a short quality label plus color for Analyze page status display.
# Very high model confidence is treated as strong even outside ideal property ranges.
if conf_prob >= 0.9:
return "Strong AMP candidate", "#2ca02c"
if conf_prob > 0.75 and charge >= 2 and 0.3 <= hydro_fraction <= 0.6:
return "Strong AMP candidate", "#2ca02c"
if conf_prob > 0.5:
return "Moderate potential", "#ff9800"
return "Unlikely AMP", "#d62728"
# Plain-language bullets for Analyze, rules of thumb, not a second model.
def build_analysis_insights(
label: str,
conf: float,
comp: Dict[str, float],
length: int,
hydro: float,
charge: float,
) -> List[str]:
# Short, mechanism-oriented bullets for the Analyze page (heuristics, not lab truth).
lines: List[str] = []
p_amp = float(conf)
conf_pct = round(p_amp * 100, 1)
pred_conf = conf_pct if label == "AMP" else round((1 - p_amp) * 100, 1)
if label == "AMP":
if pred_conf >= 80:
lines.append(
f"Model: **AMP** with high confidence ({pred_conf}% on this prediction). Profile below explains typical mechanisms."
)
elif pred_conf >= 60:
lines.append(
f"Model: **AMP** with moderate confidence ({pred_conf}%); cross-check chemistry bullets before treating it as a strong hit."
)
else:
lines.append(
f"Model: **AMP** but low confidence ({pred_conf}%); the mechanistic notes below matter more than the label alone."
)
else:
if pred_conf >= 80:
lines.append(
f"Model: **Non-AMP** with high confidence ({pred_conf}% on this prediction). Below are common reasons a sequence may not behave like a classic AMP."
)
elif pred_conf >= 60:
lines.append(
f"Model: **Non-AMP** with moderate confidence ({pred_conf}%); reasons below are typical but not exhaustive."
)
else:
lines.append(
f"Model: **Non-AMP** with low confidence ({pred_conf}%); treat the label as tentative and read the property-based notes."
)
polar_frac = sum(float(comp.get(aa, 0.0)) for aa in "STNQYC")
basic_frac = sum(float(comp.get(aa, 0.0)) for aa in "KRH")
explain_weak = (label == "Non-AMP") or (label == "AMP" and pred_conf < 65)
if explain_weak:
if charge <= 0:
lines.append(
"Weak or absent **positive net charge**: many AMPs rely on cationic residues to bind **anionic bacterial surfaces** (e.g. LPS, teichoic acids); near-neutral or negative peptides often lack that first electrostatic hook."
)
if hydro < 0.28:
lines.append(
"Low **hydrophobic** content: membrane insertion, pore formation, or lipid disruption is harder without a hydrophobic face or core to partition into the bilayer."
)
if hydro > 0.65:
lines.append(
"Very high **hydrophobic** content: risk of aggregation or poor **aqueous solubility** before the peptide can reach bacteria, delivery and effective concentration suffer."
)
if polar_frac < 0.12:
lines.append(
"Few **polar / H-bonding** residues (S, T, N, Q, Y, C): weaker interfacial interactions with lipids and water at the membrane. Many AMP mechanisms benefit from polar positioning at the interface."
)
if basic_frac < 0.06 and charge < 2:
lines.append(
"Sparse **basic** residues (K, R, H): a hallmark of many AMPs is concentrated positive charge for initial **bacterial association**; this sequence is thin on that axis."
)
if length < 8:
lines.append(
"Very **short** length: may be too small to form a stable membrane-active structure or to span a bilayer meaningfully."
)
elif length > 50:
lines.append(
"Unusually **long** chain: folding, proteolysis, and synthesis cost can diverge from small cationic AMP archetypes."
)
if label == "Non-AMP" and charge >= 2 and 0.28 <= hydro <= 0.58:
lines.append(
"**Note:** Charge and hydrophobic balance still look somewhat AMP-like; the model says Non-AMP. Treat this as a **disagreement** worth validating experimentally, not proof either way."
)
if label == "AMP" and pred_conf >= 65:
if charge >= 2 and 0.28 <= hydro <= 0.58:
lines.append(
"**Positive charge** plus **moderate hydrophobic fraction** aligns with membrane-targeting motifs common in AMP literature."
)
if polar_frac >= 0.12:
lines.append(
"Adequate **polar** residues can help **interfacial** placement and H-bonding at the membrane."
)
if (comp.get("K", 0) + comp.get("R", 0) + comp.get("H", 0)) >= 0.18:
lines.append(
"Higher **basic** residue fraction supports **electrostatic** attraction to anionic bacterial components."
)
if (comp.get("C", 0) + comp.get("W", 0)) >= 0.08:
lines.append(
"**Cysteine / tryptophan** can contribute to membrane insertion, stacking, or oxidative chemistry depending on context."
)
# De-duplicate while preserving order.
out: List[str] = []
seen = set()
for line in lines:
if line not in seen:
seen.add(line)
out.append(line)
return out[:14]
def build_analysis_summary_text(
sequence: str,
prediction: str,
confidence_display: str,
props: Dict,
analysis_lines: List[str],
) -> str:
# Flat text blob for Analyze page TXT download.
length = props.get("Length", len(sequence))
charge = props.get("Net Charge (approx.)", props.get("Net charge", 0))
hydro = props.get("Hydrophobic Fraction", props.get("Hydrophobic", 0))
analysis_block = "\n".join(f"- {line}" for line in (analysis_lines or []))
return (
f"Sequence: {sequence}\n"
f"Prediction: {prediction}\n"
f"Confidence: {confidence_display}\n"
f"Length: {length}\n"
f"Net Charge (approx.): {charge}\n"
f"Hydrophobic Fraction: {hydro}\n\n"
f"Summary:\n{analysis_block}\n"
)
|