""" Candidate objective scoring for mRNA design. Condenses a full AnalysisReport into the four objectives an mRNA designer trades off, each on a 0–100 scale where **higher is better**: - **Expression** — translation potential (CAI, Kozak strength) - **Stability** — predicted durability (GC balance, structure, homopolymers) - **Immunogenicity** — *inverse* of innate-immune risk (uridine content) - **Manufacturability**— clean synthesis/IVT (restriction sites, homopolymers, GC extremes) These are transparent heuristics, not trained predictors — they exist to rank and shortlist candidates from the metrics already computed. The function reads the report by duck typing and degrades gracefully when a metric is unavailable (e.g. ViennaRNA not installed → structure ignored rather than penalised). """ from __future__ import annotations from dataclasses import dataclass, field from typing import Any, Dict def _clamp(x: float) -> float: return max(0.0, min(100.0, x)) @dataclass class ObjectiveScores: expression: float stability: float immunogenicity: float manufacturability: float overall: float details: Dict[str, str] = field(default_factory=dict) def as_row(self) -> Dict[str, float]: return { "Expression": round(self.expression), "Stability": round(self.stability), "Immunogenicity": round(self.immunogenicity), "Manufacturability": round(self.manufacturability), "Overall": round(self.overall), } # objective -> weight in the overall score OBJECTIVE_WEIGHTS = { "expression": 0.30, "stability": 0.25, "immunogenicity": 0.20, "manufacturability": 0.25, } _KOZAK_SCORE = {"strong": 100.0, "adequate": 70.0, "weak": 35.0} def _liability_categories(report: Any) -> Dict[str, str]: """Map liability category -> worst severity seen (from report.liability.flags).""" out: Dict[str, str] = {} lia = getattr(report, "liability", None) order = {"critical": 0, "warning": 1, "info": 2} for f in getattr(lia, "flags", []) or []: cur = out.get(f.category) if cur is None or order.get(f.severity, 9) < order.get(cur, 9): out[f.category] = f.severity return out def score_objectives(report: Any) -> ObjectiveScores: """Compute the four 0–100 objective scores from an analysis report.""" details: Dict[str, str] = {} cats = _liability_categories(report) # ── Expression ──────────────────────────────────────────────────────────── cai = getattr(report, "cai", None) cai_score = cai * 100.0 if cai is not None else 60.0 kz = getattr(report, "kozak", None) kz_strength = getattr(kz, "strength", None) kozak_score = _KOZAK_SCORE.get(kz_strength, 60.0) expression = _clamp(0.6 * cai_score + 0.4 * kozak_score) details["expression"] = ( f"CAI {('%.2f' % cai) if cai is not None else 'n/a'}, " f"Kozak {kz_strength or 'n/a'}" ) # ── Stability ───────────────────────────────────────────────────────────── gc = getattr(report, "gc_percent_global", None) if gc: # full marks in 50–60%, falling off outside gc_score = 100.0 - 3.0 * max(0.0, abs(gc - 55.0) - 5.0) else: gc_score = 60.0 hp_sev = cats.get("Homopolymer") hp_pen = 25.0 if hp_sev == "critical" else 10.0 if hp_sev == "warning" else 0.0 struct = getattr(report, "structure", None) struct_note = "" if struct is not None and not getattr(struct, "is_stub", True): length = max(len(getattr(struct, "sequence", "") or ""), 1) per_nt = getattr(struct, "mfe", 0.0) / length # moderate structure is stabilising; reward down to ~-0.4/nt, then taper struct_bonus = max(-10.0, min(10.0, (-per_nt) * 25.0 - 5.0)) gc_score += struct_bonus struct_note = f", MFE {per_nt:.2f}/nt" stability = _clamp(gc_score - hp_pen) details["stability"] = f"GC {gc:.0f}%" if gc else "GC n/a" details["stability"] += struct_note + (f", homopolymer {hp_sev}" if hp_sev else "") # ── Immunogenicity (higher = less immunogenic) ──────────────────────────── uri = getattr(report, "uridine", None) u_pct = getattr(uri, "u_percent", None) n_stretch = len(getattr(uri, "high_u_stretches", []) or []) if u_pct is None: immunogenicity = 60.0 details["immunogenicity"] = "uridine n/a" else: base = 100.0 - max(0.0, u_pct - 20.0) * 3.0 immunogenicity = _clamp(base - 8.0 * n_stretch) details["immunogenicity"] = f"U {u_pct:.0f}%, {n_stretch} high-U stretch(es)" # ── Manufacturability ───────────────────────────────────────────────────── manuf = 100.0 n_re = len(getattr(report, "restriction_enzymes_present", []) or []) manuf -= min(36.0, 12.0 * n_re) if hp_sev == "critical": manuf -= 25.0 elif hp_sev == "warning": manuf -= 12.0 gc_sev = cats.get("GC") manuf -= 20.0 if gc_sev == "critical" else 8.0 if gc_sev == "warning" else 0.0 if cats.get("Motif"): manuf -= 5.0 manufacturability = _clamp(manuf) details["manufacturability"] = ( f"{n_re} restriction site(s)" + (f", homopolymer {hp_sev}" if hp_sev else "") + (f", GC {gc_sev}" if gc_sev else "") ) overall = ( OBJECTIVE_WEIGHTS["expression"] * expression + OBJECTIVE_WEIGHTS["stability"] * stability + OBJECTIVE_WEIGHTS["immunogenicity"] * immunogenicity + OBJECTIVE_WEIGHTS["manufacturability"] * manufacturability ) return ObjectiveScores( expression=expression, stability=stability, immunogenicity=immunogenicity, manufacturability=manufacturability, overall=_clamp(overall), details=details, )