Spaces:

offtargeteffect
/

mrna-design-studio

Running

App Files Files Community

mrna-design-studio / core /analysis /candidate_score.py

offtargeteffect

Replace clustering with Candidate Analysis tab

2038bdc verified 5 days ago

Raw

History Blame Contribute Delete

6.24 kB

	"""
	Candidate objective scoring for mRNA design.

	Condenses a full AnalysisReport into the four objectives an mRNA designer
	trades off, each on a 0–100 scale where higher is better:

	- Expression — translation potential (CAI, Kozak strength)
	- Stability — predicted durability (GC balance, structure, homopolymers)
	- Immunogenicity — inverse of innate-immune risk (uridine content)
	- Manufacturability— clean synthesis/IVT (restriction sites, homopolymers, GC extremes)

	These are transparent heuristics, not trained predictors — they exist to rank and
	shortlist candidates from the metrics already computed. The function reads the
	report by duck typing and degrades gracefully when a metric is unavailable (e.g.
	ViennaRNA not installed → structure ignored rather than penalised).
	"""
	from __future__ import annotations

	from dataclasses import dataclass, field
	from typing import Any, Dict


	def _clamp(x: float) -> float:
	return max(0.0, min(100.0, x))


	@dataclass
	class ObjectiveScores:
	expression: float
	stability: float
	immunogenicity: float
	manufacturability: float
	overall: float
	details: Dict[str, str] = field(default_factory=dict)

	def as_row(self) -> Dict[str, float]:
	return {
	"Expression": round(self.expression),
	"Stability": round(self.stability),
	"Immunogenicity": round(self.immunogenicity),
	"Manufacturability": round(self.manufacturability),
	"Overall": round(self.overall),
	}


	# objective -> weight in the overall score
	OBJECTIVE_WEIGHTS = {
	"expression": 0.30,
	"stability": 0.25,
	"immunogenicity": 0.20,
	"manufacturability": 0.25,
	}

	_KOZAK_SCORE = {"strong": 100.0, "adequate": 70.0, "weak": 35.0}


	def _liability_categories(report: Any) -> Dict[str, str]:
	"""Map liability category -> worst severity seen (from report.liability.flags)."""
	out: Dict[str, str] = {}
	lia = getattr(report, "liability", None)
	order = {"critical": 0, "warning": 1, "info": 2}
	for f in getattr(lia, "flags", []) or []:
	cur = out.get(f.category)
	if cur is None or order.get(f.severity, 9) < order.get(cur, 9):
	out[f.category] = f.severity
	return out


	def score_objectives(report: Any) -> ObjectiveScores:
	"""Compute the four 0–100 objective scores from an analysis report."""
	details: Dict[str, str] = {}
	cats = _liability_categories(report)

	# ── Expression ────────────────────────────────────────────────────────────
	cai = getattr(report, "cai", None)
	cai_score = cai * 100.0 if cai is not None else 60.0
	kz = getattr(report, "kozak", None)
	kz_strength = getattr(kz, "strength", None)
	kozak_score = _KOZAK_SCORE.get(kz_strength, 60.0)
	expression = _clamp(0.6 * cai_score + 0.4 * kozak_score)
	details["expression"] = (
	f"CAI {('%.2f' % cai) if cai is not None else 'n/a'}, "
	f"Kozak {kz_strength or 'n/a'}"
	)

	# ── Stability ─────────────────────────────────────────────────────────────
	gc = getattr(report, "gc_percent_global", None)
	if gc:
	# full marks in 50–60%, falling off outside
	gc_score = 100.0 - 3.0 * max(0.0, abs(gc - 55.0) - 5.0)
	else:
	gc_score = 60.0
	hp_sev = cats.get("Homopolymer")
	hp_pen = 25.0 if hp_sev == "critical" else 10.0 if hp_sev == "warning" else 0.0
	struct = getattr(report, "structure", None)
	struct_note = ""
	if struct is not None and not getattr(struct, "is_stub", True):
	length = max(len(getattr(struct, "sequence", "") or ""), 1)
	per_nt = getattr(struct, "mfe", 0.0) / length
	# moderate structure is stabilising; reward down to ~-0.4/nt, then taper
	struct_bonus = max(-10.0, min(10.0, (-per_nt) * 25.0 - 5.0))
	gc_score += struct_bonus
	struct_note = f", MFE {per_nt:.2f}/nt"
	stability = _clamp(gc_score - hp_pen)
	details["stability"] = f"GC {gc:.0f}%" if gc else "GC n/a"
	details["stability"] += struct_note + (f", homopolymer {hp_sev}" if hp_sev else "")

	# ── Immunogenicity (higher = less immunogenic) ────────────────────────────
	uri = getattr(report, "uridine", None)
	u_pct = getattr(uri, "u_percent", None)
	n_stretch = len(getattr(uri, "high_u_stretches", []) or [])
	if u_pct is None:
	immunogenicity = 60.0
	details["immunogenicity"] = "uridine n/a"
	else:
	base = 100.0 - max(0.0, u_pct - 20.0) * 3.0
	immunogenicity = _clamp(base - 8.0 * n_stretch)
	details["immunogenicity"] = f"U {u_pct:.0f}%, {n_stretch} high-U stretch(es)"

	# ── Manufacturability ─────────────────────────────────────────────────────
	manuf = 100.0
	n_re = len(getattr(report, "restriction_enzymes_present", []) or [])
	manuf -= min(36.0, 12.0 * n_re)
	if hp_sev == "critical":
	manuf -= 25.0
	elif hp_sev == "warning":
	manuf -= 12.0
	gc_sev = cats.get("GC")
	manuf -= 20.0 if gc_sev == "critical" else 8.0 if gc_sev == "warning" else 0.0
	if cats.get("Motif"):
	manuf -= 5.0
	manufacturability = _clamp(manuf)
	details["manufacturability"] = (
	f"{n_re} restriction site(s)"
	+ (f", homopolymer {hp_sev}" if hp_sev else "")
	+ (f", GC {gc_sev}" if gc_sev else "")
	)

	overall = (
	OBJECTIVE_WEIGHTS["expression"] * expression
	+ OBJECTIVE_WEIGHTS["stability"] * stability
	+ OBJECTIVE_WEIGHTS["immunogenicity"] * immunogenicity
	+ OBJECTIVE_WEIGHTS["manufacturability"] * manufacturability
	)

	return ObjectiveScores(
	expression=expression, stability=stability,
	immunogenicity=immunogenicity, manufacturability=manufacturability,
	overall=_clamp(overall), details=details,
	)