offtargeteffect commited on
Commit
2038bdc
Β·
verified Β·
1 Parent(s): bdd3f19

Replace clustering with Candidate Analysis tab

Browse files
core/analysis/candidate_score.py ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Candidate objective scoring for mRNA design.
3
+
4
+ Condenses a full AnalysisReport into the four objectives an mRNA designer
5
+ trades off, each on a 0–100 scale where **higher is better**:
6
+
7
+ - **Expression** β€” translation potential (CAI, Kozak strength)
8
+ - **Stability** β€” predicted durability (GC balance, structure, homopolymers)
9
+ - **Immunogenicity** β€” *inverse* of innate-immune risk (uridine content)
10
+ - **Manufacturability**β€” clean synthesis/IVT (restriction sites, homopolymers, GC extremes)
11
+
12
+ These are transparent heuristics, not trained predictors β€” they exist to rank and
13
+ shortlist candidates from the metrics already computed. The function reads the
14
+ report by duck typing and degrades gracefully when a metric is unavailable (e.g.
15
+ ViennaRNA not installed β†’ structure ignored rather than penalised).
16
+ """
17
+ from __future__ import annotations
18
+
19
+ from dataclasses import dataclass, field
20
+ from typing import Any, Dict
21
+
22
+
23
+ def _clamp(x: float) -> float:
24
+ return max(0.0, min(100.0, x))
25
+
26
+
27
+ @dataclass
28
+ class ObjectiveScores:
29
+ expression: float
30
+ stability: float
31
+ immunogenicity: float
32
+ manufacturability: float
33
+ overall: float
34
+ details: Dict[str, str] = field(default_factory=dict)
35
+
36
+ def as_row(self) -> Dict[str, float]:
37
+ return {
38
+ "Expression": round(self.expression),
39
+ "Stability": round(self.stability),
40
+ "Immunogenicity": round(self.immunogenicity),
41
+ "Manufacturability": round(self.manufacturability),
42
+ "Overall": round(self.overall),
43
+ }
44
+
45
+
46
+ # objective -> weight in the overall score
47
+ OBJECTIVE_WEIGHTS = {
48
+ "expression": 0.30,
49
+ "stability": 0.25,
50
+ "immunogenicity": 0.20,
51
+ "manufacturability": 0.25,
52
+ }
53
+
54
+ _KOZAK_SCORE = {"strong": 100.0, "adequate": 70.0, "weak": 35.0}
55
+
56
+
57
+ def _liability_categories(report: Any) -> Dict[str, str]:
58
+ """Map liability category -> worst severity seen (from report.liability.flags)."""
59
+ out: Dict[str, str] = {}
60
+ lia = getattr(report, "liability", None)
61
+ order = {"critical": 0, "warning": 1, "info": 2}
62
+ for f in getattr(lia, "flags", []) or []:
63
+ cur = out.get(f.category)
64
+ if cur is None or order.get(f.severity, 9) < order.get(cur, 9):
65
+ out[f.category] = f.severity
66
+ return out
67
+
68
+
69
+ def score_objectives(report: Any) -> ObjectiveScores:
70
+ """Compute the four 0–100 objective scores from an analysis report."""
71
+ details: Dict[str, str] = {}
72
+ cats = _liability_categories(report)
73
+
74
+ # ── Expression ────────────────────────────────────────────────────────────
75
+ cai = getattr(report, "cai", None)
76
+ cai_score = cai * 100.0 if cai is not None else 60.0
77
+ kz = getattr(report, "kozak", None)
78
+ kz_strength = getattr(kz, "strength", None)
79
+ kozak_score = _KOZAK_SCORE.get(kz_strength, 60.0)
80
+ expression = _clamp(0.6 * cai_score + 0.4 * kozak_score)
81
+ details["expression"] = (
82
+ f"CAI {('%.2f' % cai) if cai is not None else 'n/a'}, "
83
+ f"Kozak {kz_strength or 'n/a'}"
84
+ )
85
+
86
+ # ── Stability ─────────────────────────────────────────────────────────────
87
+ gc = getattr(report, "gc_percent_global", None)
88
+ if gc:
89
+ # full marks in 50–60%, falling off outside
90
+ gc_score = 100.0 - 3.0 * max(0.0, abs(gc - 55.0) - 5.0)
91
+ else:
92
+ gc_score = 60.0
93
+ hp_sev = cats.get("Homopolymer")
94
+ hp_pen = 25.0 if hp_sev == "critical" else 10.0 if hp_sev == "warning" else 0.0
95
+ struct = getattr(report, "structure", None)
96
+ struct_note = ""
97
+ if struct is not None and not getattr(struct, "is_stub", True):
98
+ length = max(len(getattr(struct, "sequence", "") or ""), 1)
99
+ per_nt = getattr(struct, "mfe", 0.0) / length
100
+ # moderate structure is stabilising; reward down to ~-0.4/nt, then taper
101
+ struct_bonus = max(-10.0, min(10.0, (-per_nt) * 25.0 - 5.0))
102
+ gc_score += struct_bonus
103
+ struct_note = f", MFE {per_nt:.2f}/nt"
104
+ stability = _clamp(gc_score - hp_pen)
105
+ details["stability"] = f"GC {gc:.0f}%" if gc else "GC n/a"
106
+ details["stability"] += struct_note + (f", homopolymer {hp_sev}" if hp_sev else "")
107
+
108
+ # ── Immunogenicity (higher = less immunogenic) ────────────────────────────
109
+ uri = getattr(report, "uridine", None)
110
+ u_pct = getattr(uri, "u_percent", None)
111
+ n_stretch = len(getattr(uri, "high_u_stretches", []) or [])
112
+ if u_pct is None:
113
+ immunogenicity = 60.0
114
+ details["immunogenicity"] = "uridine n/a"
115
+ else:
116
+ base = 100.0 - max(0.0, u_pct - 20.0) * 3.0
117
+ immunogenicity = _clamp(base - 8.0 * n_stretch)
118
+ details["immunogenicity"] = f"U {u_pct:.0f}%, {n_stretch} high-U stretch(es)"
119
+
120
+ # ── Manufacturability ─────────────────────────────────────────────────────
121
+ manuf = 100.0
122
+ n_re = len(getattr(report, "restriction_enzymes_present", []) or [])
123
+ manuf -= min(36.0, 12.0 * n_re)
124
+ if hp_sev == "critical":
125
+ manuf -= 25.0
126
+ elif hp_sev == "warning":
127
+ manuf -= 12.0
128
+ gc_sev = cats.get("GC")
129
+ manuf -= 20.0 if gc_sev == "critical" else 8.0 if gc_sev == "warning" else 0.0
130
+ if cats.get("Motif"):
131
+ manuf -= 5.0
132
+ manufacturability = _clamp(manuf)
133
+ details["manufacturability"] = (
134
+ f"{n_re} restriction site(s)"
135
+ + (f", homopolymer {hp_sev}" if hp_sev else "")
136
+ + (f", GC {gc_sev}" if gc_sev else "")
137
+ )
138
+
139
+ overall = (
140
+ OBJECTIVE_WEIGHTS["expression"] * expression
141
+ + OBJECTIVE_WEIGHTS["stability"] * stability
142
+ + OBJECTIVE_WEIGHTS["immunogenicity"] * immunogenicity
143
+ + OBJECTIVE_WEIGHTS["manufacturability"] * manufacturability
144
+ )
145
+
146
+ return ObjectiveScores(
147
+ expression=expression, stability=stability,
148
+ immunogenicity=immunogenicity, manufacturability=manufacturability,
149
+ overall=_clamp(overall), details=details,
150
+ )
demo/DEMO_SCRIPT.md CHANGED
@@ -1,54 +1,93 @@
1
- # mRNA Design Studio β€” Demo Script (one page)
2
 
3
  **Live app:** https://offtargeteffect-mrna-design-studio.hf.space
4
- **Login:** username `admin` Β· password `vOAMljsXrzCemLZK4A38` *(or remove the password for a smoother live demo β€” see Prep)*
5
- **Open in its own browser tab** β€” not the Hugging Face embedded preview (that loops on login).
6
 
7
  ---
8
 
9
- ## Prep (do 5 min before)
10
- - [ ] Visit the URL to **wake the Space** (free tier sleeps; first load is slow).
11
- - [ ] Have the sample file ready to drag in: `demo/demo_sequences_extended.csv` (14 constructs).
12
- - [ ] *(Optional)* For the live database demo, have the Postgres connection details on a sticky note.
13
- - [ ] *(Optional)* Remove the login: Space β†’ Settings β†’ secrets β†’ delete `MRNA_STUDIO_PASSWORD` β†’ app opens with no login.
 
 
 
 
 
 
 
14
 
15
  ---
16
 
17
- ## The story β€” follow the sidebar top to bottom (~8–10 min)
 
 
 
 
18
 
19
- **1. Import Data (90s)** β€” "It ingests real-world sequence tables and structures them automatically."
20
- - Drag `demo/demo_sequences_extended.csv` onto the CSV uploader.
21
- - Show the **auto-suggested column mapping** (gene_name, cds, UTRs, …).
22
- - Click **Import Records** β†’ 14 sequences land in the Worklist.
23
- - *Or* demo the **PostgreSQL** path: pick PostgreSQL, paste the connection details, Connect β†’ select `mrna_sequences` β†’ Preview β†’ Import.
 
 
 
 
 
24
 
25
- **2. Worklist β†’ Analyze (2 min)** β€” "Instant QC across the whole panel."
26
- - Select all β†’ **Analyze**.
27
- - Point out **GC%**, **CAI** (codon adaptation), **homopolymer** runs (the poly-A tails!), **restriction sites**.
28
- - Note the contrast: component-based vs monolithic records both analyze cleanly.
 
 
 
 
 
 
 
 
29
 
30
- **3. Model Repository (1 min)** β€” "Pluggable scoring β€” local models or remote APIs."
31
- - Show the two built-in scorers: **mRNA Stability Scorer** and **RNA Structure Scorer**.
32
- - Mention you can register a remote API endpoint too.
 
33
 
34
- **4. Worklist β†’ Score & Export (2 min)** β€” "Rank candidates, hand off to the lab."
35
- - Back on the Worklist, **Score** with a loaded model β†’ **sort by score**.
36
- - **Export CSV** of the ranked panel.
 
 
37
 
38
- **5. Parts Workshop (1 min)** β€” "A reusable parts library."
39
- - Browse 5'UTR / Kozak / CDS / 3'UTR / poly-A parts; compose a construct.
 
 
 
 
 
 
40
 
41
- **6. Assemble Plasmid β†’ Generate Sequences (2 min)** β€” "Close the loop."
42
- - Pick the **pUC19-MCS** backbone, run **QC**, export the assembled construct.
43
- - In **Generate Sequences**, produce a codon-optimized variant.
 
 
44
 
45
  ---
46
 
47
- ## If you have only 3 minutes
48
- Import `demo/demo_sequences_extended.csv` β†’ **Analyze** β†’ **Score** β†’ **Export.**
49
- That's the whole value: ingest β†’ analyze β†’ score β†’ export.
 
50
 
51
  ## Likely questions
52
- - *"Where does the data live?"* β†’ CSV/Excel upload or a PostgreSQL connection you provide.
53
- - *"Can I use my own models?"* β†’ Yes β€” register a local Python model or a remote API endpoint.
54
- - *"Is it hosted?"* β†’ Runs on Hugging Face Spaces (Docker); also runs locally with `make run`.
 
 
 
 
1
+ # mRNA Design Studio β€” Demo Script (full run-through)
2
 
3
  **Live app:** https://offtargeteffect-mrna-design-studio.hf.space
4
+ **Login:** username `admin` Β· password `vOAMljsXrzCemLZK4A38`
5
+ **Open in its own browser tab** β€” NOT the Hugging Face embedded preview (that loops on login).
6
 
7
  ---
8
 
9
+ ## 0. Prep (5 min before)
10
+ - [ ] Visit the URL to **wake the Space** (free tier sleeps; first load is slow). Log in once.
11
+ - [ ] Have the CSV ready to drag in: `demo/demo_sequences_extended.csv` (14 constructs).
12
+ - [ ] (Optional) Postgres path β€” keep these handy for the Import Data β†’ PostgreSQL form:
13
+ host `ep-blue-flower-abs3fw0x.eu-west-2.aws.neon.tech` Β· port `5432` Β· db `neondb`
14
+ Β· user `neondb_owner` Β· pass `npg_oJzU6SfIK7yg` Β· table `mrna_sequences`
15
+ - [ ] (Optional) For a no-login live demo: delete the `MRNA_STUDIO_PASSWORD` secret in Space settings.
16
+
17
+ ## The pitch (say this first, ~30s)
18
+ "This is a workbench that takes mRNA sequence data from import all the way to a
19
+ QC'd, scored, assembled construct β€” in one no-code UI. I'll walk the funnel:
20
+ **import β†’ analyze & flag liabilities β†’ compare candidates β†’ score β†’ track runs β†’ assemble.**"
21
 
22
  ---
23
 
24
+ ## 1. Import Data (~90s)
25
+ - **[Click]** the **Import Data** tab.
26
+ - **CSV path:** drag `demo_sequences_extended.csv` onto the uploader β†’ it **auto-suggests column mappings** (gene_name, cds, UTRs…) β†’ **Import Records**.
27
+ - **OR Postgres path:** choose **PostgreSQL**, paste the connection details above, **Connect** β†’ pick table `mrna_sequences` β†’ **Preview** β†’ **Import Records**.
28
+ - **[Say]** "It ingests messy real-world tables β€” component-based *or* monolithic β€” and maps them to a structured mRNA model automatically."
29
 
30
+ ## 2. Worklist β€” analysis + liability/QC (~3 min) β˜… NEW
31
+ - **[Click]** the **Worklist** tab β†’ your 14 sequences are listed.
32
+ - **[Click]** the **Analysis** dropdown β†’ **Base Analysis** β†’ **Run**.
33
+ - **[Show]** the new columns populate: **GC%, CAI, Homopolymers, Restriction Sites**, and the new **QC** (`Pass/Review Β· score`) and **Liabilities** count.
34
+ - **[Click] a row** (e.g. `eGFP-hBG-HEK`) β†’ a **Liability / QC breakdown** appears below the table:
35
+ - a **QC scorecard** (0–100 score, Pass/Review/Fail verdict, severity counts),
36
+ - a ranked list of **flags** with severity, detail, location, and a recommendation
37
+ (e.g. internal restriction site, uORF in the 5β€²UTR, elevated uridine).
38
+ - **[Say]** "This is the developability/liability overlay β€” every candidate gets a QC
39
+ score and specific, actionable flags, right on the candidate list."
40
 
41
+ ## 3. Candidate Analysis (~3 min) β˜… NEW
42
+ - **[Click]** the **Candidate Analysis** tab.
43
+ - **[Show]** the **Comparison scorecard** β€” every candidate scored 0–100 on the four mRNA
44
+ objectives (**Expression, Stability, Immunogenicity, Manufacturability**) + an **Overall**,
45
+ ranked, with a β˜… **top-N shortlist** (drag the slider).
46
+ - **[Say]** "This is the design trade-off view β€” a candidate can win on expression but lose
47
+ on immunogenicity. You rank and shortlist on the criteria that actually matter for mRNA."
48
+ - **[Use]** the **Inspect candidate** dropdown β†’ the **Sequence / structure map** shows that
49
+ molecule's region bands (5β€²UTR/CDS/3β€²UTR/polyA), GC profile, and markers for restriction
50
+ sites / homopolymers / liability motifs β€” i.e. *where* the problems are β€” plus its full
51
+ liability scorecard.
52
+ - **[Say]** "And drill into any candidate to see exactly where its features and liabilities sit."
53
 
54
+ ## 4. Model Repository (~1 min)
55
+ - **[Click]** the **Model Repository** tab β†’ browse models; note each has a **version**.
56
+ - **[Show]** the two built-in scorers: **mRNA Stability Scorer** and **RNA Structure Scorer**
57
+ (and that you can register a local Python model or a remote API endpoint).
58
 
59
+ ## 5. Score the worklist (~1 min)
60
+ - **[Click]** back to **Worklist** β†’ **Analysis** dropdown β†’ pick a model
61
+ (e.g. **mRNA Stability Scorer**) β†’ **Run**.
62
+ - **[Show]** a score column appears; sort by it to rank candidates. **Export CSV** for the lab.
63
+ - (Run a *second* model too β€” e.g. RNA Structure Scorer β€” so you have two runs to compare next.)
64
 
65
+ ## 6. Experiments β€” run history + comparison (~2 min) β˜… NEW
66
+ - **[Click]** the **Experiments** tab.
67
+ - **[Show]** **Registered models** (with versions) and a **Run history** table β€” every scoring
68
+ run is logged with version, N, mean/range of scores, and timestamp.
69
+ - **[Use]** the **Compare runs** dropdowns (Run A baseline β†’ Run B) β†’ a summary shows
70
+ **mean Ξ”, β–² improved / β–Ό worsened** counts and a per-sequence delta table.
71
+ - **[Say]** "This is the lifecycle layer: track every scoring run and compare versions or
72
+ scorers to see exactly which candidates moved and by how much."
73
 
74
+ ## 7. Parts Workshop β†’ Assemble β†’ Generate (~2 min)
75
+ - **Parts Workshop:** browse reusable parts (5β€²UTR / Kozak / CDS / 3β€²UTR / poly-A) and compose.
76
+ - **Assemble Plasmid:** pick the **pUC19-MCS** backbone, run **QC**, export the assembled construct.
77
+ - **Generate Sequences:** produce a codon-optimized variant.
78
+ - **[Say]** "Close the loop β€” assemble into a plasmid with QC, or generate optimized variants."
79
 
80
  ---
81
 
82
+ ## If you only have 3 minutes
83
+ Import `demo_sequences_extended.csv` β†’ **Worklist** Run base analysis β†’ click a row for the
84
+ **liability breakdown** β†’ **Candidate Analysis** scorecard + map β†’ score a model β†’ **Experiments** compare.
85
+ That hits the four differentiators (QC liability, candidate comparison, scoring, experiment tracking).
86
 
87
  ## Likely questions
88
+ - *"Where does data live?"* β†’ CSV/Excel upload or a PostgreSQL connection you provide.
89
+ - *"Custom models?"* β†’ register a local Python model or a remote API endpoint; runs are tracked.
90
+ - *"How is this like/unlike ENPICOM?"* β†’ same no-code, data+AI philosophy; this is the
91
+ design/build + light-liability side (mRNA), not NGS-scale repertoire discovery. See
92
+ `demo/ENPICOM_gap_analysis.md`.
93
+ - *"Is it hosted?"* β†’ runs on Hugging Face Spaces (Docker); also runs locally with `make run`.
tests/test_candidate_score.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tests for candidate objective scoring."""
2
+ from types import SimpleNamespace
3
+
4
+ import pytest
5
+
6
+ from core.analysis.candidate_score import score_objectives, OBJECTIVE_WEIGHTS
7
+
8
+
9
+ def _report(cai=0.8, kozak="strong", gc=55.0, u_pct=20.0, stretches=0,
10
+ enzymes=None, flags=None, structure_stub=True):
11
+ return SimpleNamespace(
12
+ cai=cai,
13
+ kozak=SimpleNamespace(strength=kozak) if kozak else None,
14
+ gc_percent_global=gc,
15
+ uridine=SimpleNamespace(u_percent=u_pct, high_u_stretches=[(0, 1, 1)] * stretches),
16
+ restriction_enzymes_present=enzymes or [],
17
+ structure=SimpleNamespace(is_stub=structure_stub, mfe=0.0, sequence=""),
18
+ liability=SimpleNamespace(flags=flags or []),
19
+ )
20
+
21
+
22
+ def _flag(category, severity):
23
+ return SimpleNamespace(category=category, severity=severity)
24
+
25
+
26
+ class TestObjectiveScores:
27
+ def test_ideal_candidate_scores_high(self):
28
+ s = score_objectives(_report())
29
+ assert s.expression > 80
30
+ assert s.immunogenicity > 90 # low uridine
31
+ assert s.manufacturability == 100 # no liabilities
32
+ assert s.overall > 80
33
+
34
+ def test_weights_sum_to_one(self):
35
+ assert abs(sum(OBJECTIVE_WEIGHTS.values()) - 1.0) < 1e-9
36
+
37
+ def test_overall_is_weighted_mean(self):
38
+ s = score_objectives(_report())
39
+ expected = (
40
+ OBJECTIVE_WEIGHTS["expression"] * s.expression
41
+ + OBJECTIVE_WEIGHTS["stability"] * s.stability
42
+ + OBJECTIVE_WEIGHTS["immunogenicity"] * s.immunogenicity
43
+ + OBJECTIVE_WEIGHTS["manufacturability"] * s.manufacturability
44
+ )
45
+ assert s.overall == pytest.approx(expected, abs=0.5)
46
+
47
+ def test_low_cai_and_weak_kozak_drop_expression(self):
48
+ hi = score_objectives(_report(cai=0.9, kozak="strong"))
49
+ lo = score_objectives(_report(cai=0.3, kozak="weak"))
50
+ assert lo.expression < hi.expression
51
+
52
+ def test_high_uridine_lowers_immunogenicity_score(self):
53
+ clean = score_objectives(_report(u_pct=18.0))
54
+ hot = score_objectives(_report(u_pct=45.0, stretches=2))
55
+ assert hot.immunogenicity < clean.immunogenicity
56
+
57
+ def test_restriction_sites_lower_manufacturability(self):
58
+ s = score_objectives(_report(enzymes=["EcoRI", "BamHI"]))
59
+ assert s.manufacturability < 100
60
+
61
+ def test_homopolymer_flag_hits_stability_and_manufacturability(self):
62
+ s = score_objectives(_report(flags=[_flag("Homopolymer", "critical")]))
63
+ clean = score_objectives(_report())
64
+ assert s.stability < clean.stability
65
+ assert s.manufacturability < clean.manufacturability
66
+
67
+ def test_missing_metrics_are_neutral_not_zero(self):
68
+ s = score_objectives(SimpleNamespace()) # empty report
69
+ for v in (s.expression, s.stability, s.immunogenicity, s.manufacturability):
70
+ assert 0 < v <= 100
71
+
72
+ def test_scores_bounded_0_100(self):
73
+ s = score_objectives(_report(cai=0.0, kozak="weak", gc=10.0, u_pct=90.0,
74
+ stretches=10, enzymes=["A", "B", "C", "D"],
75
+ flags=[_flag("Homopolymer", "critical"),
76
+ _flag("GC", "critical"), _flag("Motif", "warning")]))
77
+ for v in (s.expression, s.stability, s.immunogenicity, s.manufacturability, s.overall):
78
+ assert 0 <= v <= 100
ui/app.py CHANGED
@@ -40,7 +40,7 @@ from ui.components.plasmid_view import PlasmidView
40
  from ui.components.model_repository import ModelRepositoryPanel
41
  from ui.components.plasmid_assembly import PlasmidAssemblyPanel
42
  from ui.components.generate_sequences import GenerateSequencesPanel
43
- from ui.components.cluster_view import ClusterView
44
  from ui.components.experiment_view import ExperimentView
45
 
46
 
@@ -152,13 +152,13 @@ _TAB_NAMES = [
152
  "Import Data",
153
  "Model Repository",
154
  "Worklist",
155
- "Cluster & Tree",
156
  "Experiments",
157
  "Parts Workshop",
158
  "Assemble Plasmid",
159
  "Generate Sequences",
160
  ]
161
- _TAB_KEYS = ["import_db", "model_repo", "worklist", "clusters", "experiments", "parts", "assemble", "generate"]
162
 
163
 
164
  logger = logging.getLogger(__name__)
@@ -189,7 +189,7 @@ class StudioApp(param.Parameterized):
189
  self._model_repo = ModelRepositoryPanel(self.state)
190
  self._assembly = PlasmidAssemblyPanel(self.state)
191
  self._generate = GenerateSequencesPanel(self.state)
192
- self._cluster = ClusterView(self.state)
193
  self._experiments = ExperimentView(self.state)
194
 
195
  # ── Build persistent widgets once ─────────────────────────────────────
@@ -206,7 +206,7 @@ class StudioApp(param.Parameterized):
206
  pn.panel(self._worklist.panel),
207
  sizing_mode="stretch_width",
208
  )),
209
- (_TAB_NAMES[3], pn.panel(self._cluster.panel)),
210
  (_TAB_NAMES[4], pn.panel(self._experiments.panel)),
211
  (_TAB_NAMES[5], pn.panel(self._parts.panel)),
212
  (_TAB_NAMES[6], pn.panel(self._assembly.panel)),
 
40
  from ui.components.model_repository import ModelRepositoryPanel
41
  from ui.components.plasmid_assembly import PlasmidAssemblyPanel
42
  from ui.components.generate_sequences import GenerateSequencesPanel
43
+ from ui.components.candidate_view import CandidateView
44
  from ui.components.experiment_view import ExperimentView
45
 
46
 
 
152
  "Import Data",
153
  "Model Repository",
154
  "Worklist",
155
+ "Candidate Analysis",
156
  "Experiments",
157
  "Parts Workshop",
158
  "Assemble Plasmid",
159
  "Generate Sequences",
160
  ]
161
+ _TAB_KEYS = ["import_db", "model_repo", "worklist", "candidates", "experiments", "parts", "assemble", "generate"]
162
 
163
 
164
  logger = logging.getLogger(__name__)
 
189
  self._model_repo = ModelRepositoryPanel(self.state)
190
  self._assembly = PlasmidAssemblyPanel(self.state)
191
  self._generate = GenerateSequencesPanel(self.state)
192
+ self._candidates = CandidateView(self.state)
193
  self._experiments = ExperimentView(self.state)
194
 
195
  # ── Build persistent widgets once ─────────────────────────────────────
 
206
  pn.panel(self._worklist.panel),
207
  sizing_mode="stretch_width",
208
  )),
209
+ (_TAB_NAMES[3], pn.panel(self._candidates.panel)),
210
  (_TAB_NAMES[4], pn.panel(self._experiments.panel)),
211
  (_TAB_NAMES[5], pn.panel(self._parts.panel)),
212
  (_TAB_NAMES[6], pn.panel(self._assembly.panel)),
ui/components/candidate_view.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Candidate Analysis.
3
+
4
+ Two authentic mRNA-design views over the current worklist:
5
+
6
+ A. **Comparison scorecard** β€” every candidate scored on the four objectives a
7
+ designer trades off (Expression, Stability, Immunogenicity, Manufacturability)
8
+ plus an overall, ranked, with a top-N shortlist.
9
+ B. **Sequence/structure track** β€” for a selected candidate, a per-position map:
10
+ region bands (5'UTR/Kozak/CDS/3'UTR/polyA), GC sliding window, and markers
11
+ for restriction sites, homopolymers, and liability motifs β€” i.e. *where* the
12
+ problems are.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ from typing import TYPE_CHECKING, List, Tuple
17
+
18
+ import panel as pn
19
+ import param
20
+ import plotly.graph_objects as go
21
+
22
+ from core.analysis.candidate_score import score_objectives, ObjectiveScores
23
+
24
+ if TYPE_CHECKING:
25
+ from ui.state import AppState
26
+
27
+
28
+ _OBJECTIVES = ["Expression", "Stability", "Immunogenicity", "Manufacturability", "Overall"]
29
+ _REGION_COLORS = {
30
+ "5'UTR": "#3B82F6", "Kozak": "#D97706", "CDS": "#10B981",
31
+ "3'UTR": "#8B5CF6", "PolyA": "#EF4444",
32
+ }
33
+
34
+
35
+ def _empty(msg: str) -> pn.pane.HTML:
36
+ return pn.pane.HTML(f'<div style="color:#64748B;padding:30px;text-align:center;">{msg}</div>')
37
+
38
+
39
+ def _score_color(v: float) -> Tuple[str, str]:
40
+ """(background, text) for a 0–100 score."""
41
+ if v >= 80:
42
+ return "#DCFCE7", "#166534"
43
+ if v >= 60:
44
+ return "#FEF9C3", "#854D0E"
45
+ if v >= 40:
46
+ return "#FFEDD5", "#9A3412"
47
+ return "#FEE2E2", "#991B1B"
48
+
49
+
50
+ class CandidateView(param.Parameterized):
51
+ """Multi-objective candidate comparison + per-candidate track."""
52
+
53
+ def __init__(self, state: "AppState", **params: object) -> None:
54
+ super().__init__(**params)
55
+ self._state = state
56
+ self._shortlist = pn.widgets.IntSlider(
57
+ name="Shortlist top N", start=1, end=10, value=3, width=220, margin=(4, 10))
58
+ self._candidate = pn.widgets.Select(name="Inspect candidate", width=340, margin=(4, 10))
59
+
60
+ # ── analysis ──────────────────────────────────────────────────────────────
61
+ def _analyzed(self) -> List[tuple]:
62
+ """Return [(item, report, ObjectiveScores), …] for worklist items with content."""
63
+ from core.analysis.analyzer import SequenceAnalyzer
64
+ az = SequenceAnalyzer()
65
+ out = []
66
+ for item in self._state.worklist.items:
67
+ try:
68
+ rep = az.run_full_analysis(item.sequence)
69
+ out.append((item, rep, score_objectives(rep)))
70
+ except Exception:
71
+ continue
72
+ return out
73
+
74
+ # ── A. comparison scorecard ───────────────────────────────────────────────
75
+ def _comparison_table(self, analyzed: List[tuple], top_n: int) -> pn.pane.HTML:
76
+ if not analyzed:
77
+ return _empty("No analyzable sequences in the worklist.")
78
+ ranked = sorted(analyzed, key=lambda t: t[2].overall, reverse=True)
79
+
80
+ head = (
81
+ '<tr style="font-size:11px;color:#64748B;border-bottom:1px solid #E2E8F0;">'
82
+ '<td style="padding:5px 10px;">#</td><td style="padding:5px 10px;">Candidate</td>'
83
+ + "".join(f'<td style="padding:5px 10px;text-align:center;">{o}</td>' for o in _OBJECTIVES)
84
+ + '</tr>'
85
+ )
86
+ rows = ""
87
+ for i, (item, _rep, s) in enumerate(ranked, 1):
88
+ shortlisted = i <= top_n
89
+ mark = 'β˜…' if shortlisted else ''
90
+ name_bg = "background:#F0FDFA;" if shortlisted else ""
91
+ cells = ""
92
+ for obj in _OBJECTIVES:
93
+ val = s.as_row()[obj]
94
+ bg, fg = _score_color(val)
95
+ weight = "800" if obj == "Overall" else "600"
96
+ cells += (
97
+ f'<td style="padding:4px 8px;text-align:center;">'
98
+ f'<span style="display:inline-block;min-width:34px;background:{bg};color:{fg};'
99
+ f'border-radius:4px;padding:2px 6px;font-weight:{weight};font-size:12px;">{val}</span></td>'
100
+ )
101
+ rows += (
102
+ f'<tr style="border-bottom:1px solid #F1F5F9;{name_bg}">'
103
+ f'<td style="padding:4px 10px;color:#94A3B8;font-size:12px;">{i}</td>'
104
+ f'<td style="padding:4px 10px;font-size:12px;font-weight:600;">{mark} {item.sequence.name}</td>'
105
+ f'{cells}</tr>'
106
+ )
107
+ legend = (
108
+ '<div style="font-size:11px;color:#64748B;margin-top:8px;">'
109
+ 'Higher is better (0–100). β˜… = shortlisted. '
110
+ 'Overall = weighted blend (Expression 30% Β· Stability 25% Β· '
111
+ 'Immunogenicity 20% Β· Manufacturability 25%). Heuristic scores from computed metrics.'
112
+ '</div>'
113
+ )
114
+ return pn.pane.HTML(
115
+ f'<table style="border-collapse:collapse;width:100%;">{head}{rows}</table>{legend}'
116
+ )
117
+
118
+ # ── B. per-candidate sequence/structure track ──────────────────────────────
119
+ def _track(self, analyzed: List[tuple], candidate_name: str) -> pn.viewable.Viewable:
120
+ match = next((t for t in analyzed if t[0].sequence.name == candidate_name), None)
121
+ if match is None:
122
+ return _empty("Select a candidate to inspect.")
123
+ item, report, _scores = match
124
+ seq = item.sequence
125
+
126
+ # region bands from component lengths (offsets line up with assembled_sequence)
127
+ comps = [("5'UTR", seq.five_prime_utr), ("Kozak", seq.kozak), ("CDS", seq.cds),
128
+ ("3'UTR", seq.three_prime_utr), ("PolyA", seq.poly_a)]
129
+ bands: List[Tuple[str, int, int]] = []
130
+ region_off = {}
131
+ offset = 0
132
+ for nm, s in comps:
133
+ if s:
134
+ bands.append((nm, offset, offset + len(s)))
135
+ region_off[nm] = offset
136
+ offset += len(s)
137
+ polya_start = region_off.get("PolyA", 10 ** 12)
138
+
139
+ fig = go.Figure()
140
+ for nm, a, b in bands:
141
+ fig.add_vrect(x0=a, x1=b, fillcolor=_REGION_COLORS.get(nm, "#94A3B8"),
142
+ opacity=0.12, line_width=0,
143
+ annotation_text=nm, annotation_position="top left",
144
+ annotation_font_size=10)
145
+
146
+ pos = report.gc_sliding_positions
147
+ vals = report.gc_sliding_values
148
+ if pos is not None and len(pos):
149
+ fig.add_trace(go.Scatter(x=list(pos), y=list(vals), mode="lines",
150
+ line={"color": "#0F766E", "width": 1.4}, name="GC%",
151
+ hovertemplate="pos %{x}<br>GC %{y:.0f}%<extra></extra>"))
152
+ fig.add_hline(y=50, line_dash="dot", line_color="#CBD5E1", opacity=0.7)
153
+
154
+ # restriction sites
155
+ rx, rt = [], []
156
+ for enz, hits in (report.restriction_hits or {}).items():
157
+ for h in hits:
158
+ rx.append(h.position); rt.append(enz)
159
+ if rx:
160
+ fig.add_trace(go.Scatter(x=rx, y=[96] * len(rx), mode="markers",
161
+ marker={"symbol": "triangle-down", "size": 10, "color": "#DC2626"},
162
+ name="Restriction site", text=rt,
163
+ hovertemplate="%{text}<br>pos %{x}<extra></extra>"))
164
+
165
+ # homopolymers (exclude the legitimate poly-A tail)
166
+ hx = [r.start for r in report.homopolymer_runs if r.start < polya_start]
167
+ ht = [f"{r.nucleotide}Γ—{r.length}" for r in report.homopolymer_runs if r.start < polya_start]
168
+ if hx:
169
+ fig.add_trace(go.Scatter(x=hx, y=[89] * len(hx), mode="markers",
170
+ marker={"symbol": "square", "size": 9, "color": "#D97706"},
171
+ name="Homopolymer", text=ht,
172
+ hovertemplate="%{text}<br>pos %{x}<extra></extra>"))
173
+
174
+ # liability motifs
175
+ mx, mt = [], []
176
+ for h in (report.motif_hits or []):
177
+ mx.append(region_off.get(h.region, 0) + h.start); mt.append(h.label)
178
+ if mx:
179
+ fig.add_trace(go.Scatter(x=mx, y=[82] * len(mx), mode="markers",
180
+ marker={"symbol": "diamond", "size": 9, "color": "#7C3AED"},
181
+ name="Liability motif", text=mt,
182
+ hovertemplate="%{text}<br>pos %{x}<extra></extra>"))
183
+
184
+ fig.update_layout(
185
+ title={"text": f"{seq.name} β€” sequence map", "font": {"size": 13}},
186
+ xaxis_title="position (nt)", yaxis={"title": "GC %", "range": [0, 100]},
187
+ height=340, margin={"l": 55, "r": 20, "t": 40, "b": 45},
188
+ plot_bgcolor="#F8FAFC", paper_bgcolor="white",
189
+ legend={"orientation": "h", "y": -0.3, "font": {"size": 10}},
190
+ )
191
+
192
+ # reuse the liability scorecard for this candidate
193
+ from ui.components.analysis_dashboard import render_liability_panel
194
+ return pn.Column(
195
+ pn.pane.Plotly(fig, sizing_mode="stretch_width"),
196
+ render_liability_panel(report),
197
+ sizing_mode="stretch_width",
198
+ )
199
+
200
+ # ── panel ─────────────────────────────────────────────────────────────────
201
+ @param.depends("_state.worklist")
202
+ def panel(self) -> pn.Column:
203
+ wl = self._state.worklist
204
+ if wl is None or wl.count == 0:
205
+ return pn.Column(
206
+ pn.pane.HTML('<div style="font-size:16px;font-weight:800;padding:8px 0;">'
207
+ 'Candidate Analysis</div>'),
208
+ _empty("Worklist is empty. Import sequences to compare candidates."),
209
+ styles={"padding": "8px 16px"},
210
+ )
211
+
212
+ analyzed = self._analyzed()
213
+ names = [t[0].sequence.name for t in analyzed]
214
+ self._candidate.options = names
215
+ if names and (self._candidate.value not in names):
216
+ self._candidate.value = names[0]
217
+
218
+ self._shortlist.end = max(1, len(analyzed))
219
+ table = pn.bind(lambda n: self._comparison_table(analyzed, n), self._shortlist)
220
+ track = pn.bind(lambda nm: self._track(analyzed, nm), self._candidate)
221
+
222
+ def card(title, body, controls=None):
223
+ inner = [pn.pane.HTML(f'<div style="font-size:13px;font-weight:700;margin:6px 0;">{title}</div>')]
224
+ if controls is not None:
225
+ inner.append(controls)
226
+ inner.append(body)
227
+ return pn.Column(*inner, styles={"background": "white", "border": "1px solid #CBD5E1",
228
+ "border-radius": "8px", "padding": "12px 14px"},
229
+ margin=(0, 0, 12, 0), sizing_mode="stretch_width")
230
+
231
+ return pn.Column(
232
+ pn.pane.HTML(
233
+ '<div style="font-size:16px;font-weight:800;padding:8px 0 2px 0;">'
234
+ f'Candidate Analysis <span style="color:#64748B;font-size:13px;">'
235
+ f'({wl.count} sequences)</span></div>'
236
+ '<div style="font-size:12px;color:#64748B;margin-bottom:8px;">'
237
+ 'Rank candidates across the four mRNA design objectives, then inspect where '
238
+ 'a candidate&#39;s features and liabilities sit along the molecule.</div>'
239
+ ),
240
+ card("Comparison scorecard", pn.panel(table), self._shortlist),
241
+ card("Sequence / structure map", pn.panel(track), self._candidate),
242
+ sizing_mode="stretch_width",
243
+ styles={"padding": "8px 16px", "max-height": "82vh", "overflow-y": "auto"},
244
+ )