Spaces:
Running
Demo polish: colorblind-safe §6 strand; §5 completion-fold; §1 helix occlusion; §7 score chip; logo SVG; §8 evals refresh
Browse files- §6 UMAP: strand palette switched from green/red to Okabe-Ito blue/vermillion
so the +/- coloring survives deuteranopia and protanopia (the previous pair
collapsed to two near-identical grays under both common CVD types).
- §5 Folding: new completion-fold pipeline (75% CDS prompt → 25% predicted →
fold the C-term tail with ESMFold); mrna-info row reads the prompt/predict
ratio for completion-fold genes, falls back to the original
genomic→mRNA→ORF chain for legacy fixtures. genes.json gains the
fold_setup / fold_example fields and a sections[] tag to scope the showcase
set per panel.
- §1 Intro: helix occlusion now splits each strand at depth-zero PEAKS rather
than centerline crossings, so the "behind" arcs alternate between upper and
lower lobes and rungs occlude correctly under the front strand. Edge-touching
arcs render solid (no ghosted 5'/3' label ties).
- §7 Tree: agreement-score chip redesigned as a typographic stat block —
percentage in Carbon green + raw ratio + uppercase caption, no chrome.
- Banner: replaces the inline "C / carbon" glyph stamp with a real
img/logo.svg asset; .logo-card simplified to a 44x44 image holder.
- §8 Results: re-pulled values from HuggingFaceBio/final-paper-evals; lede
and takeaway prose updated to match (Carbon-8B leads 5/8 instead of 6/8).
- Misc: shared/config.js gains genesForSection() so each section can scope
the catalog without polluting siblings.
Co-authored-by: Cursor <cursoragent@cursor.com>
- assets/js/sections/completion.js +2 -1
- assets/js/sections/folding.js +24 -1
- assets/js/sections/intro.js +106 -16
- assets/js/sections/results.js +11 -7
- assets/js/sections/track.js +2 -1
- assets/js/sections/tree.js +6 -2
- assets/js/sections/umap.js +7 -2
- assets/js/sections/vep.js +15 -6
- assets/js/shared/config.js +9 -0
- assets/styles/banner.css +13 -26
- assets/styles/section-tree.css +22 -9
- data/genes.json +0 -0
- demo.html +31 -26
- img/logo.svg +20 -0
|
@@ -310,7 +310,8 @@
|
|
| 310 |
}
|
| 311 |
|
| 312 |
// Bootstrap
|
| 313 |
-
loadGenes().then(
|
|
|
|
| 314 |
els.pills.innerHTML = genes.map((g, i) =>
|
| 315 |
`<button class="pill${i === 0 ? " active" : ""}" data-gene="${g.symbol}">${g.symbol}</button>`
|
| 316 |
).join("");
|
|
|
|
| 310 |
}
|
| 311 |
|
| 312 |
// Bootstrap
|
| 313 |
+
loadGenes().then(allGenes => {
|
| 314 |
+
const genes = genesForSection(allGenes, "completion");
|
| 315 |
els.pills.innerHTML = genes.map((g, i) =>
|
| 316 |
`<button class="pill${i === 0 ? " active" : ""}" data-gene="${g.symbol}">${g.symbol}</button>`
|
| 317 |
).join("");
|
|
@@ -195,6 +195,28 @@
|
|
| 195 |
function renderMRNAInfo() {
|
| 196 |
const g = GENES_LOCAL?.find(x => x.symbol === currentGeneSymbol);
|
| 197 |
if (!g) { els.mrna.textContent = "·"; return; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 198 |
const mrna = spliceExons(g.seq, g.exons);
|
| 199 |
const orf = findLongestORF(mrna, 30);
|
| 200 |
const genomicBP = g.length;
|
|
@@ -629,7 +651,8 @@
|
|
| 629 |
}
|
| 630 |
|
| 631 |
// --- Bootstrap ---------------------------------------------------------
|
| 632 |
-
loadGenes().then(
|
|
|
|
| 633 |
GENES_LOCAL = genes;
|
| 634 |
els.pills.innerHTML = genes.map((g, i) =>
|
| 635 |
`<button class="pill${i === 0 ? " active" : ""}" data-gene="${g.symbol}">${g.symbol}</button>`
|
|
|
|
| 195 |
function renderMRNAInfo() {
|
| 196 |
const g = GENES_LOCAL?.find(x => x.symbol === currentGeneSymbol);
|
| 197 |
if (!g) { els.mrna.textContent = "·"; return; }
|
| 198 |
+
|
| 199 |
+
// "completion-fold" genes ship as a CDS-only fixture (no genomic
|
| 200 |
+
// intron annotation): the prompt is the first ~75% of the CDS, Carbon
|
| 201 |
+
// is asked to predict the remaining ~25%, and only that C-terminal
|
| 202 |
+
// tail is folded. Render a reading of THAT pipeline rather than the
|
| 203 |
+
// genomic→mRNA→ORF chain that suits the original §5 fixtures.
|
| 204 |
+
if (g.fold_setup?.mode === "completion-fold") {
|
| 205 |
+
const fs = g.fold_setup;
|
| 206 |
+
const totalBP = g.length;
|
| 207 |
+
const promptBP = fs.prompt_bp;
|
| 208 |
+
const genBP = fs.generated_bp;
|
| 209 |
+
const promptPct = Math.round((promptBP / totalBP) * 100);
|
| 210 |
+
const genPct = 100 - promptPct;
|
| 211 |
+
const aaLen = g.fold_example?.ref_aa?.length ?? Math.floor(genBP / 3);
|
| 212 |
+
els.mrna.innerHTML =
|
| 213 |
+
`<strong>${totalBP.toLocaleString("en-US")} bp</strong> CDS` +
|
| 214 |
+
` <span class="arrow">→</span> prompt <strong>${promptBP.toLocaleString("en-US")} bp</strong> (${promptPct}%)` +
|
| 215 |
+
` <span class="arrow">→</span> predict <strong>${genBP.toLocaleString("en-US")} bp</strong> (${genPct}%)` +
|
| 216 |
+
` <span class="arrow">→</span> fold <strong>${aaLen} aa</strong> C-term`;
|
| 217 |
+
return;
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
const mrna = spliceExons(g.seq, g.exons);
|
| 221 |
const orf = findLongestORF(mrna, 30);
|
| 222 |
const genomicBP = g.length;
|
|
|
|
| 651 |
}
|
| 652 |
|
| 653 |
// --- Bootstrap ---------------------------------------------------------
|
| 654 |
+
loadGenes().then(allGenes => {
|
| 655 |
+
const genes = genesForSection(allGenes, "folding");
|
| 656 |
GENES_LOCAL = genes;
|
| 657 |
els.pills.innerHTML = genes.map((g, i) =>
|
| 658 |
`<button class="pill${i === 0 ? " active" : ""}" data-gene="${g.symbol}">${g.symbol}</button>`
|
|
@@ -222,11 +222,102 @@
|
|
| 222 |
return [r.yTop + sep * 0.25, r.yTop + sep * 0.75];
|
| 223 |
}
|
| 224 |
|
| 225 |
-
//
|
| 226 |
-
//
|
| 227 |
-
//
|
| 228 |
-
//
|
| 229 |
-
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
for (const r of rungs) {
|
| 231 |
const fade = fadeForSep(r.sep);
|
| 232 |
const [yA, yB] = rungLetterYs(r);
|
|
@@ -244,19 +335,18 @@
|
|
| 244 |
}
|
| 245 |
}
|
| 246 |
|
| 247 |
-
//
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
d
|
| 253 |
-
|
| 254 |
-
|
|
|
|
| 255 |
}
|
| 256 |
-
el("path", { d: pathD(phaseA), fill: "none", stroke: STROKE, "stroke-width": 2, "stroke-linecap": "round" }, svg);
|
| 257 |
-
el("path", { d: pathD(phaseB), fill: "none", stroke: STROKE, "stroke-width": 2, "stroke-linecap": "round" }, svg);
|
| 258 |
|
| 259 |
-
//
|
| 260 |
// interrupted around each letter). fade tracks fadeForSep so the
|
| 261 |
// glyphs at the crossings sit as ghosted "out of focus" letters
|
| 262 |
// rather than disappearing entirely — fills the visual gap at the
|
|
|
|
| 222 |
return [r.yTop + sep * 0.25, r.yTop + sep * 0.75];
|
| 223 |
}
|
| 224 |
|
| 225 |
+
// Split each strand at its DEPTH transitions (z = 0) rather than at
|
| 226 |
+
// its y=yc crossings. In a 3D helix the depth coord is 90° out of
|
| 227 |
+
// phase with the visible y, so z(x) = cos(2πx/T + phase). z hits
|
| 228 |
+
// zero — i.e. the strand is exactly mid-depth, neither front nor
|
| 229 |
+
// back — at the visual PEAKS of the wave (x = T/4, 3T/4, 5T/4, …).
|
| 230 |
+
// Between two consecutive peaks the strand sits entirely on one
|
| 231 |
+
// side of the camera plane: front if z > 0, back if z < 0. Each
|
| 232 |
+
// "back" arc therefore runs diagonally from one peak to the next
|
| 233 |
+
// (bottom-peak → top-peak, or vice versa), passing THROUGH the
|
| 234 |
+
// centerline crossing in its middle. That's exactly where the
|
| 235 |
+
// strand crosses behind the other strand, which is also where we
|
| 236 |
+
// want the strongest fade — STROKE at the peak ends of each arc,
|
| 237 |
+
// STRAND_BACK in the middle, smoothly back to STROKE at the next
|
| 238 |
+
// peak. Net effect: the fade alternates between the upper-going
|
| 239 |
+
// strand and the lower-going strand as we walk along the helix,
|
| 240 |
+
// instead of always tagging the below-centerline lobes as the
|
| 241 |
+
// back (the bug the previous "split at y=yc" version had — both
|
| 242 |
+
// bottom lobes were faded, both top lobes were full ink). The
|
| 243 |
+
// same gradient also handles the strand extremities cleanly: the
|
| 244 |
+
// edge segments are clipped to x=6 / x=W-6 but their gradient
|
| 245 |
+
// still anchors at STROKE there, so the 5'/3' label ties never
|
| 246 |
+
// look ghosted.
|
| 247 |
+
const STRAND_BACK = "rgb(185, 185, 175)"; // ~25% contrast vs paper — clearly recessed
|
| 248 |
+
|
| 249 |
+
// Boundaries: the strand extremities plus every depth-zero peak
|
| 250 |
+
// (x = T/4, 3T/4, …) that falls inside the SVG.
|
| 251 |
+
const boundaries = [0];
|
| 252 |
+
for (let k = 0; ; k++) {
|
| 253 |
+
const x = period / 4 + k * (period / 2);
|
| 254 |
+
if (x >= W) break;
|
| 255 |
+
boundaries.push(x);
|
| 256 |
+
}
|
| 257 |
+
boundaries.push(W);
|
| 258 |
+
|
| 259 |
+
const defs = el("defs", {}, svg);
|
| 260 |
+
let backGradCount = 0;
|
| 261 |
+
function backGradId(x0, x1) {
|
| 262 |
+
const id = `cd-helix-back-${backGradCount++}`;
|
| 263 |
+
const grad = el("linearGradient", {
|
| 264 |
+
id, gradientUnits: "userSpaceOnUse",
|
| 265 |
+
x1: x0, y1: 0, x2: x1, y2: 0,
|
| 266 |
+
}, defs);
|
| 267 |
+
el("stop", { offset: "0%", "stop-color": STROKE }, grad);
|
| 268 |
+
el("stop", { offset: "50%", "stop-color": STRAND_BACK }, grad);
|
| 269 |
+
el("stop", { offset: "100%", "stop-color": STROKE }, grad);
|
| 270 |
+
return id;
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
function zAt(x, phase) { return Math.cos((2 * Math.PI * x) / period + phase); }
|
| 274 |
+
|
| 275 |
+
function buildSegments(phase) {
|
| 276 |
+
const out = [];
|
| 277 |
+
for (let i = 0; i < boundaries.length - 1; i++) {
|
| 278 |
+
const b0 = boundaries[i], b1 = boundaries[i + 1];
|
| 279 |
+
const x0 = Math.max(b0, 6);
|
| 280 |
+
const x1 = Math.min(b1, W - 6);
|
| 281 |
+
if (x1 - x0 < 1) continue;
|
| 282 |
+
// Classify against the UN-clipped segment centre so edge-clipped
|
| 283 |
+
// segments still pick up the right sign of z.
|
| 284 |
+
const isFront = zAt((b0 + b1) / 2, phase) > 0;
|
| 285 |
+
let d = "";
|
| 286 |
+
for (let x = x0; x <= x1; x += 2) {
|
| 287 |
+
d += (d ? " L " : "M ") + x.toFixed(2) + " " + yAt(x, phase).toFixed(2);
|
| 288 |
+
}
|
| 289 |
+
// Make sure the path reaches x1 exactly: the 2-px sampling
|
| 290 |
+
// can otherwise leave a hairline gap right at the join.
|
| 291 |
+
const lastX = x0 + Math.floor((x1 - x0) / 2) * 2;
|
| 292 |
+
if (lastX < x1) d += " L " + x1.toFixed(2) + " " + yAt(x1, phase).toFixed(2);
|
| 293 |
+
out.push({ d, isFront, x0, x1 });
|
| 294 |
+
}
|
| 295 |
+
return out;
|
| 296 |
+
}
|
| 297 |
+
const allSegs = buildSegments(phaseA).concat(buildSegments(phaseB));
|
| 298 |
+
|
| 299 |
+
// 1. Back arcs — drawn first so the rungs sit on top of them.
|
| 300 |
+
// Arcs that touch a strand extremity (the very-left or very-right
|
| 301 |
+
// edge of the rendered range) skip the fade gradient entirely and
|
| 302 |
+
// render in solid STROKE: the 5'/3' label ties should land on a
|
| 303 |
+
// fully-inked stretch of strand, not on a half-ghosted segment.
|
| 304 |
+
// They still draw in the back z-layer so they sit BEHIND the
|
| 305 |
+
// rungs like any other recessed arc.
|
| 306 |
+
for (const s of allSegs) {
|
| 307 |
+
if (s.isFront) continue;
|
| 308 |
+
const touchesEdge = s.x0 <= 6 || s.x1 >= W - 6;
|
| 309 |
+
el("path", {
|
| 310 |
+
d: s.d, fill: "none",
|
| 311 |
+
stroke: touchesEdge ? STROKE : `url(#${backGradId(s.x0, s.x1)})`,
|
| 312 |
+
"stroke-width": 2, "stroke-linecap": "round",
|
| 313 |
+
}, svg);
|
| 314 |
+
}
|
| 315 |
+
|
| 316 |
+
// 2. Rungs. Each rung becomes up to three short segments,
|
| 317 |
+
// interrupted around each letter so the letter reads as sitting
|
| 318 |
+
// *in* the rung. Rungs at the helix waist still get drawn but
|
| 319 |
+
// with their opacity tapered via fadeForSep so the crossings
|
| 320 |
+
// don't read as bald gaps.
|
| 321 |
for (const r of rungs) {
|
| 322 |
const fade = fadeForSep(r.sep);
|
| 323 |
const [yA, yB] = rungLetterYs(r);
|
|
|
|
| 335 |
}
|
| 336 |
}
|
| 337 |
|
| 338 |
+
// 3. Front arcs — on top of the rungs so they visually OCCLUDE
|
| 339 |
+
// the rungs they pass in front of, completing the depth illusion.
|
| 340 |
+
for (const s of allSegs) {
|
| 341 |
+
if (!s.isFront) continue;
|
| 342 |
+
el("path", {
|
| 343 |
+
d: s.d, fill: "none",
|
| 344 |
+
stroke: STROKE,
|
| 345 |
+
"stroke-width": 2, "stroke-linecap": "round",
|
| 346 |
+
}, svg);
|
| 347 |
}
|
|
|
|
|
|
|
| 348 |
|
| 349 |
+
// 4. Letter glyphs themselves (no haloes, since the rung is already
|
| 350 |
// interrupted around each letter). fade tracks fadeForSep so the
|
| 351 |
// glyphs at the crossings sit as ghosted "out of focus" letters
|
| 352 |
// rather than disappearing entirely — fills the visual gap at the
|
|
@@ -31,14 +31,18 @@
|
|
| 31 |
// Rows are ordered by capability axis. Category strings group consecutive
|
| 32 |
// rows; the renderer collapses runs of identical category into a single
|
| 33 |
// italic gutter label on the left.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
const ROWS = [
|
| 35 |
-
{ task: "Sequence recovery", cat: "Generative", vals: [64.
|
| 36 |
-
{ task: "BRCA2", cat: "Variant effect", vals: [85.
|
| 37 |
-
{ task: "TraitGym Mendelian", cat: "Variant effect", vals: [36.
|
| 38 |
-
{ task: "ClinVar coding", cat: "Variant effect", vals: [93.
|
| 39 |
-
{ task: "ClinVar non-coding", cat: "Variant effect", vals: [91.
|
| 40 |
-
{ task: "Triplet expansion", cat: "Perturbation", vals: [
|
| 41 |
-
{ task: "Synonymous codons", cat: "Perturbation", vals: [
|
| 42 |
{ task: "Genome-NIAH · 393 kbp", cat: "Long-context", vals: [86.00, 79.00, 80.00, null] },
|
| 43 |
];
|
| 44 |
|
|
|
|
| 31 |
// Rows are ordered by capability axis. Category strings group consecutive
|
| 32 |
// rows; the renderer collapses runs of identical category into a single
|
| 33 |
// italic gutter label on the left.
|
| 34 |
+
// Values from the final-paper-evals dataset (HuggingFaceBio/final-paper-evals).
|
| 35 |
+
// Synonymous codons is the mean of syn_human and syn_mouse (the dataset
|
| 36 |
+
// ships an "avg" column that already does this). Column order in `vals`
|
| 37 |
+
// mirrors MODELS above: [Carbon-8B, Carbon-3B, Evo2-7B, GENERator-v2 3B].
|
| 38 |
const ROWS = [
|
| 39 |
+
{ task: "Sequence recovery", cat: "Generative", vals: [64.05, 61.54, 59.86, 58.56] },
|
| 40 |
+
{ task: "BRCA2", cat: "Variant effect", vals: [85.72, 84.63, 83.52, 81.93] },
|
| 41 |
+
{ task: "TraitGym Mendelian", cat: "Variant effect", vals: [36.43, 33.65, 37.78, 27.91] },
|
| 42 |
+
{ task: "ClinVar coding", cat: "Variant effect", vals: [93.11, 92.89, 93.33, 91.55] },
|
| 43 |
+
{ task: "ClinVar non-coding", cat: "Variant effect", vals: [91.63, 91.14, 89.79, 90.13] },
|
| 44 |
+
{ task: "Triplet expansion", cat: "Perturbation", vals: [89.05, 85.20, 88.43, 83.06] },
|
| 45 |
+
{ task: "Synonymous codons", cat: "Perturbation", vals: [91.46, 88.89, 91.59, 87.03] },
|
| 46 |
{ task: "Genome-NIAH · 393 kbp", cat: "Long-context", vals: [86.00, 79.00, 80.00, null] },
|
| 47 |
];
|
| 48 |
|
|
@@ -198,7 +198,8 @@
|
|
| 198 |
updateStats();
|
| 199 |
}
|
| 200 |
|
| 201 |
-
loadGenes().then(
|
|
|
|
| 202 |
// Hydrate cache from precomputed tracks
|
| 203 |
for (const g of genes) {
|
| 204 |
if (g.track) {
|
|
|
|
| 198 |
updateStats();
|
| 199 |
}
|
| 200 |
|
| 201 |
+
loadGenes().then(allGenes => {
|
| 202 |
+
const genes = genesForSection(allGenes, "track");
|
| 203 |
// Hydrate cache from precomputed tracks
|
| 204 |
for (const g of genes) {
|
| 205 |
if (g.track) {
|
|
@@ -46,6 +46,7 @@
|
|
| 46 |
rows: document.getElementById("dtree-rows"),
|
| 47 |
info: document.getElementById("dtree-info"),
|
| 48 |
score: document.getElementById("dtree-score"),
|
|
|
|
| 49 |
scoreSx: document.getElementById("dtree-score-suffix"),
|
| 50 |
nSp: document.getElementById("dtree-n"),
|
| 51 |
nSeq: document.getElementById("dtree-nseq"),
|
|
@@ -97,6 +98,8 @@
|
|
| 97 |
}
|
| 98 |
|
| 99 |
// -------- score chip --------
|
|
|
|
|
|
|
| 100 |
function updateScore() {
|
| 101 |
let m = 0, total = 0;
|
| 102 |
Object.values(agreement).forEach(v => {
|
|
@@ -105,8 +108,9 @@
|
|
| 105 |
if (v === "match") m += 1;
|
| 106 |
});
|
| 107 |
const pct = total ? Math.round(100 * m / total) : 0;
|
| 108 |
-
els.
|
| 109 |
-
els.
|
|
|
|
| 110 |
}
|
| 111 |
|
| 112 |
// -------- SVG dendrogram spine --------
|
|
|
|
| 46 |
rows: document.getElementById("dtree-rows"),
|
| 47 |
info: document.getElementById("dtree-info"),
|
| 48 |
score: document.getElementById("dtree-score"),
|
| 49 |
+
scorePct: document.getElementById("dtree-score-pct"),
|
| 50 |
scoreSx: document.getElementById("dtree-score-suffix"),
|
| 51 |
nSp: document.getElementById("dtree-n"),
|
| 52 |
nSeq: document.getElementById("dtree-nseq"),
|
|
|
|
| 98 |
}
|
| 99 |
|
| 100 |
// -------- score chip --------
|
| 101 |
+
// Three pieces of typography: headline % (Carbon green), raw ratio
|
| 102 |
+
// (m of total, muted), uppercase caption naming the comparison.
|
| 103 |
function updateScore() {
|
| 104 |
let m = 0, total = 0;
|
| 105 |
Object.values(agreement).forEach(v => {
|
|
|
|
| 108 |
if (v === "match") m += 1;
|
| 109 |
});
|
| 110 |
const pct = total ? Math.round(100 * m / total) : 0;
|
| 111 |
+
if (els.scorePct) els.scorePct.textContent = `${pct}%`;
|
| 112 |
+
els.score.textContent = `${m} of ${total}`;
|
| 113 |
+
els.scoreSx.textContent = `match · ncbi ${state.scope}`;
|
| 114 |
}
|
| 115 |
|
| 116 |
// -------- SVG dendrogram spine --------
|
|
@@ -62,9 +62,14 @@
|
|
| 62 |
[40,100,200], // snRNA , vivid blue
|
| 63 |
[240,160,30], // misc_RNA , amber (was gray, invisible)
|
| 64 |
];
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
const STRAND_PALETTE = [
|
| 66 |
-
[
|
| 67 |
-
[
|
| 68 |
];
|
| 69 |
// Continuous gradient for gc_content (uint8 0..255 → [0, 1]).
|
| 70 |
// 3-stop: low GC (AT-rich) reads as cool steel, mid as neutral, high
|
|
|
|
| 62 |
[40,100,200], // snRNA , vivid blue
|
| 63 |
[240,160,30], // misc_RNA , amber (was gray, invisible)
|
| 64 |
];
|
| 65 |
+
// Forward / reverse strand. Bleu / orange dérivés de la palette Okabe-Ito,
|
| 66 |
+
// standard en visu scientifique pour les oppositions binaires : reste
|
| 67 |
+
// lisible en deutéranopie et protanopie (les deux formes les plus courantes
|
| 68 |
+
// de daltonisme), là où le couple vert/reverse-rouge typique s'effondre en
|
| 69 |
+
// deux gris indistincts.
|
| 70 |
const STRAND_PALETTE = [
|
| 71 |
+
[0,114,178], // + (forward), Okabe-Ito blue
|
| 72 |
+
[213,94,0], // - (reverse), Okabe-Ito vermillion
|
| 73 |
];
|
| 74 |
// Continuous gradient for gc_content (uint8 0..255 → [0, 1]).
|
| 75 |
// 3-stop: low GC (AT-rich) reads as cool steel, mid as neutral, high
|
|
@@ -120,14 +120,19 @@
|
|
| 120 |
: `<span class="seq-char arrow-char"> </span>`
|
| 121 |
).join("");
|
| 122 |
|
| 123 |
-
// Scores. While a rescore is pending
|
| 124 |
-
//
|
| 125 |
-
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
let origVal = "·", mutVal = "·", origCls = "", mutCls = "";
|
| 127 |
let verdictHtml = `<span style="color:#888">computing model likelihoods…</span>`;
|
| 128 |
-
const s = editedScore;
|
| 129 |
const cached = cache[v.rs];
|
| 130 |
-
|
| 131 |
if (s) {
|
| 132 |
const delta = s.altSum - s.refSum;
|
| 133 |
origVal = s.refSum.toFixed(2);
|
|
@@ -188,13 +193,17 @@
|
|
| 188 |
const next = cycleBase(cur);
|
| 189 |
mutationSlice = mutationSlice.slice(0, idx) + next + mutationSlice.slice(idx + 1);
|
| 190 |
editedScore = null; // pending refetch
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
renderWindowDisplay(v);
|
| 192 |
clearTimeout(rescoreTimer);
|
| 193 |
rescoreTimer = setTimeout(() => rescoreEdited(v), 500);
|
| 194 |
}
|
| 195 |
|
| 196 |
async function rescoreEdited(v) {
|
| 197 |
-
setStatus(
|
| 198 |
try {
|
| 199 |
const altFull = altWindow(v);
|
| 200 |
const [refResp, altResp] = await Promise.all([
|
|
|
|
| 120 |
: `<span class="seq-char arrow-char"> </span>`
|
| 121 |
).join("");
|
| 122 |
|
| 123 |
+
// Scores + verdict. While a rescore is pending after a user edit
|
| 124 |
+
// (editedScore == null but cache[v.rs] is set), we fall back to the
|
| 125 |
+
// cached canonical score so the verdict sentence and both score cells
|
| 126 |
+
// keep rendering instead of collapsing to "computing model likelihoods…"
|
| 127 |
+
// — that swap shrank the multi-line verdict to a single line and made
|
| 128 |
+
// the whole demo box jump in height on every click. The "pending"
|
| 129 |
+
// state is communicated by the toolbar status pill (set in
|
| 130 |
+
// onBaseClick) instead. On a first-time load of an unscored variant
|
| 131 |
+
// (no cache yet) s is null and we keep the original placeholder.
|
| 132 |
let origVal = "·", mutVal = "·", origCls = "", mutCls = "";
|
| 133 |
let verdictHtml = `<span style="color:#888">computing model likelihoods…</span>`;
|
|
|
|
| 134 |
const cached = cache[v.rs];
|
| 135 |
+
const s = editedScore || cached;
|
| 136 |
if (s) {
|
| 137 |
const delta = s.altSum - s.refSum;
|
| 138 |
origVal = s.refSum.toFixed(2);
|
|
|
|
| 193 |
const next = cycleBase(cur);
|
| 194 |
mutationSlice = mutationSlice.slice(0, idx) + next + mutationSlice.slice(idx + 1);
|
| 195 |
editedScore = null; // pending refetch
|
| 196 |
+
// Surface the pending state in the toolbar pill right away (before the
|
| 197 |
+
// 500 ms debounce kicks in). The verdict block keeps the previous
|
| 198 |
+
// sentence visible so the box height stays put while we wait.
|
| 199 |
+
setStatus("pending", "streaming");
|
| 200 |
renderWindowDisplay(v);
|
| 201 |
clearTimeout(rescoreTimer);
|
| 202 |
rescoreTimer = setTimeout(() => rescoreEdited(v), 500);
|
| 203 |
}
|
| 204 |
|
| 205 |
async function rescoreEdited(v) {
|
| 206 |
+
setStatus("pending", "streaming");
|
| 207 |
try {
|
| 208 |
const altFull = altWindow(v);
|
| 209 |
const [refResp, altResp] = await Promise.all([
|
|
@@ -30,3 +30,12 @@ function loadGenes() {
|
|
| 30 |
}
|
| 31 |
return GENES_PROMISE;
|
| 32 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
}
|
| 31 |
return GENES_PROMISE;
|
| 32 |
}
|
| 33 |
+
|
| 34 |
+
// Filter the gene catalog down to those declared as belonging to a given
|
| 35 |
+
// section ("completion" / "track" / "folding"). Genes that don't ship a
|
| 36 |
+
// `sections` field are visible everywhere (legacy behaviour); genes that
|
| 37 |
+
// do are scoped to the named sections only. Lets §5 swap in a "Carbon
|
| 38 |
+
// works well here" showcase set without polluting §1 / §3, and vice-versa.
|
| 39 |
+
function genesForSection(genes, name) {
|
| 40 |
+
return genes.filter(g => !g.sections || g.sections.includes(name));
|
| 41 |
+
}
|
|
@@ -59,37 +59,24 @@
|
|
| 59 |
align-items: center;
|
| 60 |
gap: 10px;
|
| 61 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
.logo-card {
|
| 63 |
width: 44px;
|
| 64 |
height: 44px;
|
| 65 |
-
|
| 66 |
-
border-radius: 0; /* sharp corners, feels more "stamp" than "chip" */
|
| 67 |
-
background: #fbfaf3;
|
| 68 |
-
display: flex;
|
| 69 |
-
flex-direction: column;
|
| 70 |
-
align-items: center;
|
| 71 |
-
justify-content: center;
|
| 72 |
-
text-decoration: none;
|
| 73 |
-
color: var(--ink);
|
| 74 |
flex-shrink: 0;
|
| 75 |
-
|
| 76 |
-
transition:
|
| 77 |
-
}
|
| 78 |
-
.logo-card:hover { border-color: #1f1f1d; background: #fff; }
|
| 79 |
-
.logo-glyph {
|
| 80 |
-
font-family: "Inter", -apple-system, BlinkMacSystemFont, sans-serif;
|
| 81 |
-
font-size: 22px;
|
| 82 |
-
font-weight: 700;
|
| 83 |
-
line-height: 1;
|
| 84 |
-
letter-spacing: -0.02em;
|
| 85 |
}
|
| 86 |
-
.logo-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
margin-top: 2px;
|
| 92 |
-
letter-spacing: 0.04em;
|
| 93 |
}
|
| 94 |
.banner-breadcrumb {
|
| 95 |
display: flex;
|
|
|
|
| 59 |
align-items: center;
|
| 60 |
gap: 10px;
|
| 61 |
}
|
| 62 |
+
/* The SVG ships with its own black frame + white fill (see img/logo.svg),
|
| 63 |
+
so the parent .logo-card no longer paints a border or a background of
|
| 64 |
+
its own — the previous cream chip + 1px hairline doubled up with the
|
| 65 |
+
SVG's own stamp. We keep the fixed 44x44 footprint and a subtle
|
| 66 |
+
opacity dip on hover so the link still has an affordance. */
|
| 67 |
.logo-card {
|
| 68 |
width: 44px;
|
| 69 |
height: 44px;
|
| 70 |
+
display: block;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
flex-shrink: 0;
|
| 72 |
+
text-decoration: none;
|
| 73 |
+
transition: opacity 0.18s;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
}
|
| 75 |
+
.logo-card:hover { opacity: 0.78; }
|
| 76 |
+
.logo-img {
|
| 77 |
+
width: 100%;
|
| 78 |
+
height: 100%;
|
| 79 |
+
display: block;
|
|
|
|
|
|
|
| 80 |
}
|
| 81 |
.banner-breadcrumb {
|
| 82 |
display: flex;
|
|
@@ -21,21 +21,34 @@
|
|
| 21 |
text-transform: lowercase;
|
| 22 |
letter-spacing: 0;
|
| 23 |
}
|
| 24 |
-
/*
|
|
|
|
|
|
|
|
|
|
| 25 |
.tree-score {
|
| 26 |
-
display: inline-flex;
|
| 27 |
-
|
| 28 |
-
padding: 6px 12px; border-radius: 3px;
|
| 29 |
font-family: "JetBrains Mono", monospace;
|
| 30 |
color: #1f1f1d;
|
| 31 |
}
|
| 32 |
-
.tree-score-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
| 35 |
color: #317f3f;
|
|
|
|
|
|
|
| 36 |
}
|
| 37 |
-
.tree-score-
|
| 38 |
-
font-size:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
text-transform: uppercase; letter-spacing: 1.2px;
|
| 40 |
}
|
| 41 |
/* Main grid : SVG spine on the left, aligned tracks on the right.
|
|
|
|
| 21 |
text-transform: lowercase;
|
| 22 |
letter-spacing: 0;
|
| 23 |
}
|
| 24 |
+
/* Headline agreement metric for §7: a percentage in Carbon green next to
|
| 25 |
+
the raw ratio, with a discrete uppercase caption underneath naming what
|
| 26 |
+
the score compares against (e.g. "match · ncbi kingdom"). Pure typography,
|
| 27 |
+
no chrome, no progress bar — sits in the toolbar as a quiet stat block. */
|
| 28 |
.tree-score {
|
| 29 |
+
display: inline-flex; flex-direction: column;
|
| 30 |
+
align-items: flex-end; gap: 2px;
|
|
|
|
| 31 |
font-family: "JetBrains Mono", monospace;
|
| 32 |
color: #1f1f1d;
|
| 33 |
}
|
| 34 |
+
.tree-score-headline {
|
| 35 |
+
display: flex; align-items: baseline; gap: 8px;
|
| 36 |
+
}
|
| 37 |
+
.tree-score-pct {
|
| 38 |
+
font-size: 17px; font-weight: 700;
|
| 39 |
+
line-height: 1;
|
| 40 |
color: #317f3f;
|
| 41 |
+
font-variant-numeric: tabular-nums;
|
| 42 |
+
letter-spacing: -0.01em;
|
| 43 |
}
|
| 44 |
+
.tree-score-ratio {
|
| 45 |
+
font-size: 11px;
|
| 46 |
+
color: #888;
|
| 47 |
+
font-variant-numeric: tabular-nums;
|
| 48 |
+
white-space: nowrap;
|
| 49 |
+
}
|
| 50 |
+
.tree-score-label {
|
| 51 |
+
font-size: 9px; color: #888;
|
| 52 |
text-transform: uppercase; letter-spacing: 1.2px;
|
| 53 |
}
|
| 54 |
/* Main grid : SVG spine on the left, aligned tracks on the right.
|
|
The diff for this file is too large to render.
See raw diff
|
|
|
|
@@ -57,8 +57,7 @@
|
|
| 57 |
path beside it functions as a breadcrumb / model identifier. -->
|
| 58 |
<div class="banner-identity">
|
| 59 |
<a class="logo-card" href="#" aria-label="Carbon, go to top">
|
| 60 |
-
<
|
| 61 |
-
<span class="logo-label">carbon</span>
|
| 62 |
</a>
|
| 63 |
<div class="banner-breadcrumb">
|
| 64 |
<div class="banner-title">CARBON</div>
|
|
@@ -91,7 +90,7 @@
|
|
| 91 |
</a>
|
| 92 |
</li>
|
| 93 |
<li>
|
| 94 |
-
<a href="
|
| 95 |
Code<span class="arrow" aria-hidden="true">↗</span>
|
| 96 |
</a>
|
| 97 |
</li>
|
|
@@ -935,10 +934,11 @@ for name, ids in zip(species_prefixes, new_ids):
|
|
| 935 |
<div class="section-num">§5 · Folding</div>
|
| 936 |
<div class="section-title">From sequence to structure</div>
|
| 937 |
<p class="lede">
|
| 938 |
-
|
| 939 |
-
|
| 940 |
-
|
| 941 |
-
can
|
|
|
|
| 942 |
</p>
|
| 943 |
</div>
|
| 944 |
|
|
@@ -955,10 +955,10 @@ for name, ids in zip(species_prefixes, new_ids):
|
|
| 955 |
</div>
|
| 956 |
|
| 957 |
<div class="gene-info" id="dfold-info">loading genes…</div>
|
| 958 |
-
<!-- Materialises the §5 lede's "
|
| 959 |
-
|
| 960 |
-
|
| 961 |
-
|
| 962 |
<div class="mrna-info" id="dfold-mrna">·</div>
|
| 963 |
|
| 964 |
<div class="fold-aa-grid">
|
|
@@ -1015,10 +1015,12 @@ for name, ids in zip(species_prefixes, new_ids):
|
|
| 1015 |
|
| 1016 |
<div class="takeaway">
|
| 1017 |
<strong>What to look for</strong>
|
| 1018 |
-
A high <em>pLDDT</em> means ESMFold is confident the predicted structure
|
| 1019 |
-
|
| 1020 |
-
base level
|
| 1021 |
-
|
|
|
|
|
|
|
| 1022 |
</div>
|
| 1023 |
</div>
|
| 1024 |
</section>
|
|
@@ -1123,10 +1125,13 @@ for name, ids in zip(species_prefixes, new_ids):
|
|
| 1123 |
<button class="pill" data-scope="sister">sister-level</button>
|
| 1124 |
</span>
|
| 1125 |
<span class="spacer"></span>
|
| 1126 |
-
<
|
| 1127 |
-
<
|
| 1128 |
-
|
| 1129 |
-
|
|
|
|
|
|
|
|
|
|
| 1130 |
</div>
|
| 1131 |
|
| 1132 |
<div class="gene-info" id="dtree-info">hover a row to see its top neighbours · toggle linkage / scope above</div>
|
|
@@ -1699,7 +1704,7 @@ print(tok.decode(new_ids, skip_special_tokens=True))</code></pre></div>
|
|
| 1699 |
perturbation (synthetic motif insertion and synonymous codon shuffling), and long-context
|
| 1700 |
retrieval (Genome-NIAH at 393 kbp). No fine-tuning, no head training, all four frozen
|
| 1701 |
pretrained models scored under the same protocol. Carbon-3B is competitive with Evo2-7B
|
| 1702 |
-
despite less than half the parameters; Carbon-8B is ahead on
|
| 1703 |
</p>
|
| 1704 |
</div>
|
| 1705 |
|
|
@@ -1716,12 +1721,12 @@ print(tok.decode(new_ids, skip_special_tokens=True))</code></pre></div>
|
|
| 1716 |
|
| 1717 |
<div class="takeaway">
|
| 1718 |
<strong>How to read it</strong>
|
| 1719 |
-
Carbon-8B leads on BRCA2,
|
| 1720 |
-
393 kbp. Evo2-7B
|
| 1721 |
-
on
|
| 1722 |
-
|
| 1723 |
-
tokenizer, and objective design, distributed across tasks,
|
| 1724 |
-
benchmark.
|
| 1725 |
</div>
|
| 1726 |
</div>
|
| 1727 |
</section>
|
|
|
|
| 57 |
path beside it functions as a breadcrumb / model identifier. -->
|
| 58 |
<div class="banner-identity">
|
| 59 |
<a class="logo-card" href="#" aria-label="Carbon, go to top">
|
| 60 |
+
<img class="logo-img" src="/img/logo.svg" alt="" width="44" height="44">
|
|
|
|
| 61 |
</a>
|
| 62 |
<div class="banner-breadcrumb">
|
| 63 |
<div class="banner-title">CARBON</div>
|
|
|
|
| 90 |
</a>
|
| 91 |
</li>
|
| 92 |
<li>
|
| 93 |
+
<a href="https://github.com/huggingface/carbon" target="_blank" rel="noopener">
|
| 94 |
Code<span class="arrow" aria-hidden="true">↗</span>
|
| 95 |
</a>
|
| 96 |
</li>
|
|
|
|
| 934 |
<div class="section-num">§5 · Folding</div>
|
| 935 |
<div class="section-title">From sequence to structure</div>
|
| 936 |
<p class="lede">
|
| 937 |
+
Show Carbon the first <em>75%</em> of a coding sequence, ask it to predict the remaining
|
| 938 |
+
<em>25%</em>, then translate and fold the resulting C-terminal stretch with ESMFold. Each
|
| 939 |
+
panel below pairs Carbon's predicted protein against the reference fold for the same
|
| 940 |
+
residues, so you can read at a glance whether the bases the model emitted assemble into a
|
| 941 |
+
biologically plausible structure or collapse into noise.
|
| 942 |
</p>
|
| 943 |
</div>
|
| 944 |
|
|
|
|
| 955 |
</div>
|
| 956 |
|
| 957 |
<div class="gene-info" id="dfold-info">loading genes…</div>
|
| 958 |
+
<!-- Materialises the §5 lede's "75% prompt → 25% prediction → fold"
|
| 959 |
+
pipeline for the currently selected gene, so the visitor sees how
|
| 960 |
+
many bp Carbon was given vs how many it had to predict before any
|
| 961 |
+
folding happens. -->
|
| 962 |
<div class="mrna-info" id="dfold-mrna">·</div>
|
| 963 |
|
| 964 |
<div class="fold-aa-grid">
|
|
|
|
| 1015 |
|
| 1016 |
<div class="takeaway">
|
| 1017 |
<strong>What to look for</strong>
|
| 1018 |
+
A high <em>pLDDT</em> means ESMFold is confident in the predicted structure
|
| 1019 |
+
at that residue. The interesting case is when Carbon's completion <em>diverges
|
| 1020 |
+
at the base level</em> — sometimes drastically, like CFTR at ~22% identity —
|
| 1021 |
+
but still folds with high confidence into a shape that mirrors the reference
|
| 1022 |
+
backbone. That's the model reaching past memorization for the structural
|
| 1023 |
+
grammar underneath the sequence.
|
| 1024 |
</div>
|
| 1025 |
</div>
|
| 1026 |
</section>
|
|
|
|
| 1125 |
<button class="pill" data-scope="sister">sister-level</button>
|
| 1126 |
</span>
|
| 1127 |
<span class="spacer"></span>
|
| 1128 |
+
<div class="tree-score">
|
| 1129 |
+
<div class="tree-score-headline">
|
| 1130 |
+
<span class="tree-score-pct" id="dtree-score-pct">·</span>
|
| 1131 |
+
<span class="tree-score-ratio" id="dtree-score">·</span>
|
| 1132 |
+
</div>
|
| 1133 |
+
<div class="tree-score-label" id="dtree-score-suffix">match · ncbi kingdom</div>
|
| 1134 |
+
</div>
|
| 1135 |
</div>
|
| 1136 |
|
| 1137 |
<div class="gene-info" id="dtree-info">hover a row to see its top neighbours · toggle linkage / scope above</div>
|
|
|
|
| 1704 |
perturbation (synthetic motif insertion and synonymous codon shuffling), and long-context
|
| 1705 |
retrieval (Genome-NIAH at 393 kbp). No fine-tuning, no head training, all four frozen
|
| 1706 |
pretrained models scored under the same protocol. Carbon-3B is competitive with Evo2-7B
|
| 1707 |
+
despite less than half the parameters; Carbon-8B is ahead on five of eight.
|
| 1708 |
</p>
|
| 1709 |
</div>
|
| 1710 |
|
|
|
|
| 1721 |
|
| 1722 |
<div class="takeaway">
|
| 1723 |
<strong>How to read it</strong>
|
| 1724 |
+
Carbon-8B leads on sequence recovery, BRCA2, ClinVar non-coding, triplet expansion, and
|
| 1725 |
+
Genome-NIAH at 393 kbp. Evo2-7B holds onto TraitGym Mendelian (a hard non-coding variant set),
|
| 1726 |
+
and edges Carbon-8B on ClinVar coding and synonymous codon shuffling by a fraction of a point
|
| 1727 |
+
each — small enough to be effectively a tie. The pattern is broad rather than peaky:
|
| 1728 |
+
Carbon's gains come from data, tokenizer, and objective design, distributed across tasks,
|
| 1729 |
+
not from a single specialised benchmark.
|
| 1730 |
</div>
|
| 1731 |
</div>
|
| 1732 |
</section>
|
|
|