| """ |
| Candidate objective scoring for mRNA design. |
| |
| Condenses a full AnalysisReport into the four objectives an mRNA designer |
| trades off, each on a 0β100 scale where **higher is better**: |
| |
| - **Expression** β translation potential (CAI, Kozak strength) |
| - **Stability** β predicted durability (GC balance, structure, homopolymers) |
| - **Immunogenicity** β *inverse* of innate-immune risk (uridine content) |
| - **Manufacturability**β clean synthesis/IVT (restriction sites, homopolymers, GC extremes) |
| |
| These are transparent heuristics, not trained predictors β they exist to rank and |
| shortlist candidates from the metrics already computed. The function reads the |
| report by duck typing and degrades gracefully when a metric is unavailable (e.g. |
| ViennaRNA not installed β structure ignored rather than penalised). |
| """ |
| from __future__ import annotations |
|
|
| from dataclasses import dataclass, field |
| from typing import Any, Dict |
|
|
|
|
| def _clamp(x: float) -> float: |
| return max(0.0, min(100.0, x)) |
|
|
|
|
| @dataclass |
| class ObjectiveScores: |
| expression: float |
| stability: float |
| immunogenicity: float |
| manufacturability: float |
| overall: float |
| details: Dict[str, str] = field(default_factory=dict) |
|
|
| def as_row(self) -> Dict[str, float]: |
| return { |
| "Expression": round(self.expression), |
| "Stability": round(self.stability), |
| "Immunogenicity": round(self.immunogenicity), |
| "Manufacturability": round(self.manufacturability), |
| "Overall": round(self.overall), |
| } |
|
|
|
|
| |
| OBJECTIVE_WEIGHTS = { |
| "expression": 0.30, |
| "stability": 0.25, |
| "immunogenicity": 0.20, |
| "manufacturability": 0.25, |
| } |
|
|
| _KOZAK_SCORE = {"strong": 100.0, "adequate": 70.0, "weak": 35.0} |
|
|
|
|
| def _liability_categories(report: Any) -> Dict[str, str]: |
| """Map liability category -> worst severity seen (from report.liability.flags).""" |
| out: Dict[str, str] = {} |
| lia = getattr(report, "liability", None) |
| order = {"critical": 0, "warning": 1, "info": 2} |
| for f in getattr(lia, "flags", []) or []: |
| cur = out.get(f.category) |
| if cur is None or order.get(f.severity, 9) < order.get(cur, 9): |
| out[f.category] = f.severity |
| return out |
|
|
|
|
| def score_objectives(report: Any) -> ObjectiveScores: |
| """Compute the four 0β100 objective scores from an analysis report.""" |
| details: Dict[str, str] = {} |
| cats = _liability_categories(report) |
|
|
| |
| cai = getattr(report, "cai", None) |
| cai_score = cai * 100.0 if cai is not None else 60.0 |
| kz = getattr(report, "kozak", None) |
| kz_strength = getattr(kz, "strength", None) |
| kozak_score = _KOZAK_SCORE.get(kz_strength, 60.0) |
| expression = _clamp(0.6 * cai_score + 0.4 * kozak_score) |
| details["expression"] = ( |
| f"CAI {('%.2f' % cai) if cai is not None else 'n/a'}, " |
| f"Kozak {kz_strength or 'n/a'}" |
| ) |
|
|
| |
| gc = getattr(report, "gc_percent_global", None) |
| if gc: |
| |
| gc_score = 100.0 - 3.0 * max(0.0, abs(gc - 55.0) - 5.0) |
| else: |
| gc_score = 60.0 |
| hp_sev = cats.get("Homopolymer") |
| hp_pen = 25.0 if hp_sev == "critical" else 10.0 if hp_sev == "warning" else 0.0 |
| struct = getattr(report, "structure", None) |
| struct_note = "" |
| if struct is not None and not getattr(struct, "is_stub", True): |
| length = max(len(getattr(struct, "sequence", "") or ""), 1) |
| per_nt = getattr(struct, "mfe", 0.0) / length |
| |
| struct_bonus = max(-10.0, min(10.0, (-per_nt) * 25.0 - 5.0)) |
| gc_score += struct_bonus |
| struct_note = f", MFE {per_nt:.2f}/nt" |
| stability = _clamp(gc_score - hp_pen) |
| details["stability"] = f"GC {gc:.0f}%" if gc else "GC n/a" |
| details["stability"] += struct_note + (f", homopolymer {hp_sev}" if hp_sev else "") |
|
|
| |
| uri = getattr(report, "uridine", None) |
| u_pct = getattr(uri, "u_percent", None) |
| n_stretch = len(getattr(uri, "high_u_stretches", []) or []) |
| if u_pct is None: |
| immunogenicity = 60.0 |
| details["immunogenicity"] = "uridine n/a" |
| else: |
| base = 100.0 - max(0.0, u_pct - 20.0) * 3.0 |
| immunogenicity = _clamp(base - 8.0 * n_stretch) |
| details["immunogenicity"] = f"U {u_pct:.0f}%, {n_stretch} high-U stretch(es)" |
|
|
| |
| manuf = 100.0 |
| n_re = len(getattr(report, "restriction_enzymes_present", []) or []) |
| manuf -= min(36.0, 12.0 * n_re) |
| if hp_sev == "critical": |
| manuf -= 25.0 |
| elif hp_sev == "warning": |
| manuf -= 12.0 |
| gc_sev = cats.get("GC") |
| manuf -= 20.0 if gc_sev == "critical" else 8.0 if gc_sev == "warning" else 0.0 |
| if cats.get("Motif"): |
| manuf -= 5.0 |
| manufacturability = _clamp(manuf) |
| details["manufacturability"] = ( |
| f"{n_re} restriction site(s)" |
| + (f", homopolymer {hp_sev}" if hp_sev else "") |
| + (f", GC {gc_sev}" if gc_sev else "") |
| ) |
|
|
| overall = ( |
| OBJECTIVE_WEIGHTS["expression"] * expression |
| + OBJECTIVE_WEIGHTS["stability"] * stability |
| + OBJECTIVE_WEIGHTS["immunogenicity"] * immunogenicity |
| + OBJECTIVE_WEIGHTS["manufacturability"] * manufacturability |
| ) |
|
|
| return ObjectiveScores( |
| expression=expression, stability=stability, |
| immunogenicity=immunogenicity, manufacturability=manufacturability, |
| overall=_clamp(overall), details=details, |
| ) |
|
|