lvwerra HF Staff commited on
Commit
2c5ccd8
·
1 Parent(s): 3cf1b0c

§4 Species: anchor prompt to 2nd exon, drop zebrafish

Browse files

- fetch_species.py: also capture per-species exon coords from Ensembl,
translated into 0-based offsets in the trimmed seq
- species.js: new getPromptWindow() — anchors prefixEnd 35 bp into the
2nd exon (like §1's gene completion); slides prefixStart back by the
user-selected prefixLen; falls back to flat slice when exon 2 isn't
visible. Always render the prompt (no more "click run all" placeholder).
Per-row "{N} bp prompt" note.
- species.json: regenerated with exon data; zebrafish dropped (~450 My
drift made the row look like noise next to mammals + bird)
- demo.html §4 lede/takeaway: reflect new framing and dropped species

assets/js/sections/species.js CHANGED
@@ -40,6 +40,26 @@
40
  return Math.max(20, Math.min(blocks, 12) * 10);
41
  }
42
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  function renderRow(s) {
44
  const wrap = document.createElement("div");
45
  wrap.className = "species-row";
@@ -47,7 +67,8 @@
47
 
48
  const stat = runState[s.species_id] || {};
49
  const genText = stat.genText || "";
50
- const refSlice = s.seq.slice(prefixLen, prefixLen + genLen);
 
51
  let match = 0, total = 0;
52
  for (let i = 0; i < genText.length && i < refSlice.length; i++) {
53
  total++;
@@ -55,12 +76,14 @@
55
  }
56
  const idPct = total > 0 ? `${((match / total) * 100).toFixed(0)}%` : "·";
57
  const meanLp = stat.genTokens ? meanLogprob(stat.genTokens) : null;
 
58
 
59
  wrap.innerHTML = `
60
  <div class="species-meta">
61
  <div class="species-name" style="border-left-color:${s.color}">${s.common}</div>
62
  <div class="species-sub">${s.ortholog_symbol}</div>
63
  <div class="species-sub">chr${s.chrom} · strand ${s.strand}</div>
 
64
  <div class="species-stats">
65
  <div class="stat-id">${idPct}</div>
66
  <div class="stat-sub">${total > 0 ? `${match}/${total} bases` : "not run"}</div>
@@ -68,7 +91,7 @@
68
  </div>
69
  </div>
70
  <div>
71
- <div class="species-seq" data-role="output">click "run all" to generate</div>
72
  <div class="species-seq" data-role="ref" style="margin-top:4px"></div>
73
  </div>
74
  `;
@@ -76,11 +99,7 @@
76
  const outEl = wrap.querySelector('[data-role="output"]');
77
  const refEl = wrap.querySelector('[data-role="ref"]');
78
 
79
- if (genText.length === 0 && (stat.status === "idle" || !stat.status)) {
80
- outEl.classList.add("empty");
81
- outEl.textContent = "click \"run all\" to generate";
82
- refEl.style.display = "none";
83
- } else if (stat.status === "error") {
84
  outEl.classList.add("empty");
85
  outEl.style.color = "#b00020";
86
  outEl.textContent = stat.error || "error";
@@ -88,12 +107,13 @@
88
  } else {
89
  outEl.classList.remove("empty");
90
  const bpl = basesPerLine(outEl);
91
- const total = (s.seq.slice(0, prefixLen) + genText);
 
92
  const lpRange = stat.genTokens ? lpRangeOf(stat.genTokens) : null;
93
  const colorOut = (absIdx) => {
94
- if (absIdx < prefixLen) return { style: `color:rgb(${PROMPT_RGB.join(",")})` };
95
  const tok = stat.genTokens && stat.genTokenAtBase
96
- ? stat.genTokens[stat.genTokenAtBase[absIdx - prefixLen]]
97
  : null;
98
  const [r, g, b] = logprobRgb(tok ? tok.logprob : null, lpRange);
99
  return { style: `color:rgb(${r},${g},${b})` };
@@ -102,9 +122,8 @@
102
 
103
  // Reference (only the generated span)
104
  if (genText.length > 0) {
105
- const refSpanStart = prefixLen;
106
- const refSpanEnd = Math.min(s.length, prefixLen + genLen);
107
- const refSeq = s.seq.slice(refSpanStart, refSpanEnd);
108
  const colorRef = (absIdx, base) => {
109
  // absIdx is local to refSeq (starts at 0)
110
  const genIdx = absIdx;
@@ -132,7 +151,8 @@
132
  }
133
 
134
  async function generateForSpecies(s) {
135
- const prompt = s.seq.slice(0, prefixLen);
 
136
  const stat = { genText: "", genTokens: [], genTokenAtBase: [], status: "running" };
137
  runState[s.species_id] = stat;
138
  renderAll();
@@ -211,7 +231,7 @@
211
  entry = SPECIES_DATA.find(x => x.symbol === symbol);
212
  if (!entry) return;
213
  els.pills.querySelectorAll(".pill").forEach(p => p.classList.toggle("active", p.dataset.gene === symbol));
214
- els.info.innerHTML = `<strong>${entry.symbol}</strong> · same gene, ${entry.species.length} species · prefix from each species' own canonical transcript`;
215
  runState = {};
216
  renderAll();
217
  setStatus("idle");
 
40
  return Math.max(20, Math.min(blocks, 12) * 10);
41
  }
42
 
43
+ // Choose where the prompt sits inside the species seq. The §1 finding is
44
+ // that Carbon is most predictive when it's continuing the 2nd exon with
45
+ // some intron context behind it. We replicate that here: anchor prefixEnd
46
+ // a bit past the start of exon 2 (35 bp of exon context, like §1) and
47
+ // slide prefixStart back by the user-selected `prefixLen`. If exon 2
48
+ // isn't visible in the trimmed seq, fall back to a flat slice from start.
49
+ function getPromptWindow(s, prefixLen) {
50
+ const exons = s.exons || [];
51
+ const exon2 = exons.length >= 2 ? exons[1] : null;
52
+ if (exon2) {
53
+ const EXON_CTX = 35;
54
+ const exonLen = exon2.end - exon2.start;
55
+ const exonCtx = Math.min(EXON_CTX, Math.max(0, exonLen - 30));
56
+ const prefixEnd = Math.min(s.length, exon2.start + exonCtx);
57
+ const prefixStart = Math.max(0, prefixEnd - prefixLen);
58
+ return { prefixStart, prefixEnd };
59
+ }
60
+ return { prefixStart: 0, prefixEnd: Math.min(s.length, prefixLen) };
61
+ }
62
+
63
  function renderRow(s) {
64
  const wrap = document.createElement("div");
65
  wrap.className = "species-row";
 
67
 
68
  const stat = runState[s.species_id] || {};
69
  const genText = stat.genText || "";
70
+ const { prefixStart, prefixEnd } = getPromptWindow(s, prefixLen);
71
+ const refSlice = s.seq.slice(prefixEnd, prefixEnd + genLen);
72
  let match = 0, total = 0;
73
  for (let i = 0; i < genText.length && i < refSlice.length; i++) {
74
  total++;
 
76
  }
77
  const idPct = total > 0 ? `${((match / total) * 100).toFixed(0)}%` : "·";
78
  const meanLp = stat.genTokens ? meanLogprob(stat.genTokens) : null;
79
+ const promptBp = prefixEnd - prefixStart;
80
 
81
  wrap.innerHTML = `
82
  <div class="species-meta">
83
  <div class="species-name" style="border-left-color:${s.color}">${s.common}</div>
84
  <div class="species-sub">${s.ortholog_symbol}</div>
85
  <div class="species-sub">chr${s.chrom} · strand ${s.strand}</div>
86
+ <div class="species-sub" style="color:#999">${promptBp} bp prompt</div>
87
  <div class="species-stats">
88
  <div class="stat-id">${idPct}</div>
89
  <div class="stat-sub">${total > 0 ? `${match}/${total} bases` : "not run"}</div>
 
91
  </div>
92
  </div>
93
  <div>
94
+ <div class="species-seq" data-role="output"></div>
95
  <div class="species-seq" data-role="ref" style="margin-top:4px"></div>
96
  </div>
97
  `;
 
99
  const outEl = wrap.querySelector('[data-role="output"]');
100
  const refEl = wrap.querySelector('[data-role="ref"]');
101
 
102
+ if (stat.status === "error") {
 
 
 
 
103
  outEl.classList.add("empty");
104
  outEl.style.color = "#b00020";
105
  outEl.textContent = stat.error || "error";
 
107
  } else {
108
  outEl.classList.remove("empty");
109
  const bpl = basesPerLine(outEl);
110
+ const prompt = s.seq.slice(prefixStart, prefixEnd);
111
+ const total = prompt + genText;
112
  const lpRange = stat.genTokens ? lpRangeOf(stat.genTokens) : null;
113
  const colorOut = (absIdx) => {
114
+ if (absIdx < prompt.length) return { style: `color:rgb(${PROMPT_RGB.join(",")})` };
115
  const tok = stat.genTokens && stat.genTokenAtBase
116
+ ? stat.genTokens[stat.genTokenAtBase[absIdx - prompt.length]]
117
  : null;
118
  const [r, g, b] = logprobRgb(tok ? tok.logprob : null, lpRange);
119
  return { style: `color:rgb(${r},${g},${b})` };
 
122
 
123
  // Reference (only the generated span)
124
  if (genText.length > 0) {
125
+ const refSpanEnd = Math.min(s.length, prefixEnd + genLen);
126
+ const refSeq = s.seq.slice(prefixEnd, refSpanEnd);
 
127
  const colorRef = (absIdx, base) => {
128
  // absIdx is local to refSeq (starts at 0)
129
  const genIdx = absIdx;
 
151
  }
152
 
153
  async function generateForSpecies(s) {
154
+ const { prefixStart, prefixEnd } = getPromptWindow(s, prefixLen);
155
+ const prompt = s.seq.slice(prefixStart, prefixEnd);
156
  const stat = { genText: "", genTokens: [], genTokenAtBase: [], status: "running" };
157
  runState[s.species_id] = stat;
158
  renderAll();
 
231
  entry = SPECIES_DATA.find(x => x.symbol === symbol);
232
  if (!entry) return;
233
  els.pills.querySelectorAll(".pill").forEach(p => p.classList.toggle("active", p.dataset.gene === symbol));
234
+ els.info.innerHTML = `<strong>${entry.symbol}</strong> · same gene, ${entry.species.length} species · prefix anchored to the 2nd exon of each species (intron context, then generate into the exon)`;
235
  runState = {};
236
  renderAll();
237
  setStatus("idle");
data/species.json CHANGED
@@ -10,6 +10,16 @@
10
  "strand": -1,
11
  "length": 1200,
12
  "seq": "AGCCCTCCAGGACAGGCTGCATCAGAAGAGGCCATCAAGCAGGTCTGTTCCAAGGGCCTTTGCGTCAGGTGGGCTCAGGATTCCAGGGTGGCTGGACCCCAGGCCCCAGCTCTGCAGCAGGGAGGACGTGGCTGGGCTCGTGAAGCATGTGGGGGTGAGCCCAGGGGCCCCAAGGCAGGGCACCTGGCCTTCAGCCTGCCTCAGCCCTGCCTGTCTCCCAGATCACTGTCCTTCTGCCATGGCCCTGTGGATGCGCCTCCTGCCCCTGCTGGCGCTGCTGGCCCTCTGGGGACCTGACCCAGCCGCAGCCTTTGTGAACCAACACCTGTGCGGCTCACACCTGGTGGAAGCTCTCTACCTAGTGTGCGGGGAACGAGGCTTCTTCTACACACCCAAGACCCGCCGGGAGGCAGAGGACCTGCAGGGTGAGCCAACTGCCCATTGCTGCCCCTGGCCGCCCCCAGCCACCCCCTGCTCCTGGCGCTCCCACCCAGCATGGGCAGAAGGGGGCAGGAGGCTGCCACCCAGCAGGGGGTCAGGTGCACTTTTTTAAAAAGAAGTTCTCTTGGTCACGTCCTAAAAGTGACCAGCTCCCTGTGGCCCAGTCAGAATCTCAGCCTGAGGACGGTGTTGGCTTCGGCAGCCCCGAGATACATCAGAGGGTGGGCACGCTCCTCCCTCCACTCGCCCCTCAAACAAATGCCCCGCAGCCCATTTCTCCACCCTCATTTGATGACCGCAGATTCAAGTGTTTTGTTAAGTAAAGTCCTGGGTGACCTGGGGTCACAGGGTGCCCCACGCTGCCTGCCTCTGGGCGAACACCCCATCACGCCCGGAGGAGGGCGTGGCTGCCTGCCTGAGTGGGCCAGACCCCTGTCGCCAGGCCTCACGGCAGCTCCATAGTCAGGAGATGGGGAAGATGCTGGGGACAGGCCCTGGGGAGAAGTACTGGGATCACCTGTTCAGGCTCCCACTGTGACGCTGCCCCGGGGCGGGGGAAGGAGGTGGGACATGTGGGCGTTGGGGCCTGTAGGTCCACACCCAGTGTGGGTGACCCTCCCTCTAACCTGGGTCCAGCCCGGCTGGAGATGGGTGGGAGTGCGACCTAGGGCTGGCGGGCAGGCGGGCACTGTGTCTCCCTGACTGTGTCCTCCTGTGTCCCTCTGCCTCGCCGCTGTTCCGGAACCTGCTCTGCGCGGC",
 
 
 
 
 
 
 
 
 
 
13
  "species_id": "homo_sapiens",
14
  "common": "human",
15
  "color": "#1a1a1a"
@@ -22,6 +32,16 @@
22
  "strand": 1,
23
  "length": 1180,
24
  "seq": "ACCAGGCAAGTGTTTGGAAACTGCAGCTTCAGCCCCTCTGGCCATCTGCCTACCCACCCCACCTGGAGACCTTAATGGGCCAAACAGCAAAGTCCAGGGGGCAGAGAGGAGGTACTTTGGACTATAAAGCTGGTGGGCATCCAGTAACCCCCAGCCCTTAGTGACCAGCTATAATCAGAGACCATCAGCAAGCAGGTATGTACTCTCCTCTTTGGGCCTGGCTCCCCAGCCAAGACTCCAGCGACTTTAGGGAGAATGTGGGCTCCTCTCTTACATGGATCTTTTGCTAGCCTCAACCCTGCCTATCTTTCAGGTCATTGTTTCAACATGGCCCTGTTGGTGCACTTCCTACCCCTGCTGGCCCTGCTTGCCCTCTGGGAGCCCAAACCCACCCAGGCTTTTGTCAAACAGCATCTTTGTGGTCCCCACCTGGTAGAGGCTCTCTACCTGGTGTGTGGGGAGCGTGGCTTCTTCTACACACCCAAGTCCCGCCGTGAAGTGGAGGACCCACAAGTGGAACAACTGGAGCTGGGAGGAAGCCCCGGGGACCTTCAGACCTTGGCGTTGGAGGTGGCCCGGCAGAAGCGTGGCATTGTGGATCAGTGCTGCACCAGCATCTGCTCCCTCTACCAGCTGGAGAACTACTGCAACTAAGGCCCACCTCGACCCGCCCCACCCCTCTGCAATGAATAAAACTTTTGAATAAGCACCAAAAAAAAGAGTTCTATAATGAATGAAAAAGGATTGTGTATATAGACATCTTTTTCTCTGGCATTTATTGTCATGTTAGCATACTATTAAACCATTGTTAGGTTGGATGATTATATAATCATGTATGAAGCTTGTGATAAAACACCAGGAATAATTCAAGTATCTGGAATTCTGCTTCCTGCCCAAGAAGGTAGGCAACCGTGTAAATGCCACTGAAGCTACTAGTCTAAAAGTGAGTTATCTCTGTCTTTGTCTTACCCCCTGATGCTGTGATAAAACCCTGACAAGAGCAACTGACTCCTGAGAGGAAGGTTTATTCTAGCTCACAATTCCAGGTTACAAACAGTCCATCCGTAGCAGGGGAGTCACAGCAACAGGAACCTCAGGGAACTGCTCCTATTATCCCCACAATCAAGAATAGTGACCAATAAATAAGTGGATCTTTTCTCAAAAAAAAAAAAAAAAAAAA",
 
 
 
 
 
 
 
 
 
 
25
  "species_id": "mus_musculus",
26
  "common": "mouse",
27
  "color": "#2c5aa0"
@@ -34,21 +54,15 @@
34
  "strand": 1,
35
  "length": 1200,
36
  "seq": "TTGTCTCTTTATGAGAAATCAGCAGGGAGGCCAGCATTGGTGTGAGTGTGTGGATGGAGACAGGCTTCTGGTTATAATTGGTCATTTATTATGACTTTCAAAGCCTGATGAATAAAATATTCCTTTCCTCTTCAGAAGGTCCATTTGCTTCTGTAGTCTTGTTTTCACGTCAAAGGAGCTGAGGGACATAAGATGCCTGATGATAGCTTATTCCTCCCTTGCAACCCCCCCGTGTCTCCTTTGCTTCCTACCTCTAGGCCTCCCCCAGCTCATCATGGCTCTCTGGATCCGATCACTGCCTCTTCTGGCTCTCCTTGTCTTTTCTGGCCCTGGAACCAGCTATGCAGCTGCCAACCAGCACCTCTGTGGCTCCCACTTGGTGGAGGCTCTCTACCTGGTGTGTGGAGAGCGTGGCTTCTTCTACTCCCCCAAAGCCCGACGGGATGTCGAGCAGCCCCTAGGTAAGTCAGTTCGACCATGACTACATTCATATGCTATATGATGCAAAAAGCAACTGTCTATCTTTGATGGTGACACAAGGAATGTCCTTGGTGGGGAATGCCAGGAATACCTTAAACATACCAACAGCATCATATCACCCATGAAAAGATCGTCAGGCTAAAAAGGCAGGTGGGAGGGCAAGCAGGGAAAGGAGATTTATGAGACAGAAGGAATTGTCACAGAAAGCTCCAAATTTTTTGCTACTCTCTTGGTAGAGAGAGGCTGAAAACAGTGTTATTCCAACATTTGCATGGCAATTACTCTCACCTGGGAGTGATCATGAAAAATAAAGGTGGAAGGAACACAAGAAGCCTCTTTCTGCACCTCTTCTTGACCCACACTACCCCAGTCCTGTTCTGTGACCACATCAAGTGTGGTCATAAAAACCTCCTGCCTTCTGAAGCTGTCCATTCTTGTGCTTAAATGACTTTTTCTTGAATGTTTCCTCCTAATATTTAACCCAAATGTATCTTGTCACAAATTAGTCCTCATCTTCTCAGTAGTGGACAAAAGGAACAGTCTATCATTTCTCCTTCATGGGTGATTTTCAAACAGTTTAAAAATTGCTTCCATGTCTTGTTTTTATCTACTGTGAGCTAAAAGCCCTCAACAGCCCCAGAATTCCTTTTTAGGTCACATATTCTAGCTCTCTGTCTACATAAACTGTTCTGCATTTGGCCCATACCATTACGGAATGGT",
 
 
 
 
 
 
37
  "species_id": "gallus_gallus",
38
  "common": "chicken",
39
  "color": "#c08030"
40
- },
41
- {
42
- "ortholog_symbol": "ins",
43
- "ensembl_gene": "ENSDARG00000035350",
44
- "ensembl_transcript": "ENSDART00000051222",
45
- "chrom": "5",
46
- "strand": 1,
47
- "length": 1200,
48
- "seq": "ATCTCCACCACCATATCCACCATTCCTCGCCTCTGCTTCGAGAACAGGTGAGTGTCGAGCGGGATGGTAAATCTACAGAGAATGCGGAGTGTAGCTTGTGTACATGTTTTTGATTAACAGAGATTGTATGTGTGTGTTTGTGTCAGTGTGACCATGGCAGTGTGGCTTCAGGCTGGTGCTCTGTTGGTCCTGTTGGTCGTGTCCAGTGTAAGCACTAACCCAGGCACACCGCAGCACCTGTGTGGATCTCATCTGGTCGATGCCCTTTATCTGGTCTGTGGCCCAACAGGCTTCTTCTACAACCCCAAGAGAGACGTTGAGCCCCTTCTGGGTAAGAAAGTCAAGTAGAGGTGTTTTGACAGTGGAGTAGTAACAGTGGAGTTTTATCATTTTTAAATCCAATTAGCGATCTCCAACTCTGGCAAGAGCACTTTTAGTTTAGTTTAGCGTAGATCATTGAATTGAATTAGACCATTAGCGTCTCGTAAATTTTTTTTTTTGATAATTTCCCTATTGAGTATATAGCTTGAAAATGATAACTTTACTATTCTTTATTTACATTGTGGACTAAGACCAGTGGAAATGAAATGTTGCTGCTTTCTACACTGTAAAAAGCGATTAGTTGCCTTTACCTAAAAAAAGAGAGTGAACTCGTTGCCTTATAATTAAGCAAACTATGTGCATATTATAAAAGTTAAGTCAATGGGTTTTCTGACTTTTTAAAAGTAAAATCAACGGTCACACTTTATTTTGATGGTCCATTTGGTAAATTGCTTCTACATGCCAACTAACTCTCATTAGATTATATGTAGACAGGTTGGGTTTAGGGTTAGGGTTAGTGTAAGTTGACATGTACTTGCAAAGTTTCTTATAGTCAGTTAAATGTCTGTTGAAGGAGCAGTATCAACAGATAATAAGCAGACAGTCTACTAATACTCAAATGGACCATCAAAATAAAGTGTTACCAAATTAACTTGTTGCTTTTGCGGTTGGTTTACTTACTTTTTTAAAGTAAAGTTACTAATCGATTTTTACAGTGTAGGTTAATATCATTCTAGCATAATAATAAAGTAACTTTGCCACTGTATCATGGCTGCAGCCATGGTTCGACAACAAAGTAACTTGCCTAGTTAGCCTAATTAATCTCCACTGTAAAAATATTTGTTAATTAAAATGTAAAAGTTTTTGGTTGATTTATGA",
49
- "species_id": "danio_rerio",
50
- "common": "zebrafish",
51
- "color": "#2a8a8a"
52
  }
53
  ]
54
  },
@@ -63,6 +77,12 @@
63
  "strand": -1,
64
  "length": 1200,
65
  "seq": "CTCAAAAGTCTAGAGCCACCGTCCAGGGAGCAGGTAGCTGCTGGGCTCCGGGGACACTTTGCGTTCGGGCTGGGAGCGTGCTTTCCACGACGGTGACACGCTTCCCTGGATTGGGTAAGCTCCTGACTGAACTTGATGAGTCCTCTCTGAGTCACGGGCTCTCGGCTCCGTGTATTTTCAGCTCGGGAAAATCGCTGGGGCTGGGGGTGGGGCAGTGGGGACTTAGCGAGTTTGGGGGTGAGTGGGATGGAAGCTTGGCTAGAGGGATCATCATAGGAGTTGCATTGTTGGGAGACCTGGGTGTAGATGATGGGGATGTTAGGACCATCCGAACTCAAAGTTGAACGCCTAGGCAGAGGAGTGGAGCTTTGGGGAACCTTGAGCCGGCCTAAAGCGTACTTCTTTGCACATCCACCCGGTGCTGGGCGTAGGGAATCCCTGAAATAAAAGATGCACAAAGCATTGAGGTCTGAGACTTTTGGATCTCGAAACATTGAGAACTCATAGCTGTATATTTTAGAGCCCATGGCATCCTAGTGAAAACTGGGGCTCCATTCCGAAATGATCATTTGGGGGTGATCCGGGGAGCCCAAGCTGCTAAGGTCCCACAACTTCCGGACCTTTGTCCTTCCTGGAGCGATCTTTCCAGGCAGCCCCCGGCTCCGCTAGATGGAGAAAATCCAATTGAAGGCTGTCAGTCGTGGAAGTGAGAAGTGCTAAACCAGGGGTTTGCCCGCCAGGCCGAGGAGGACCGTCGCAATCTGAGAGGCCCGGCAGCCCTGTTATTGTTTGGCTCCACATTTACATTTCTGCCTCTTGCAGCAGCATTTCCGGTTTCTTTTTGCCGGAGCAGCTCACTATTCACCCGATGAGAGGGGAGGAGAGAGAGAGAAAATGTCCTTTAGGCCGGTTCCTCTTACTTGGCAGAGGGAGGCTGCTATTCTCCGCCTGCATTTCTTTTTCTGGATTACTTAGTTATGGCCTTTGCAAAGGCAGGGGTATTTGTTTTGATGCAAACCTCAATCCCTCCCCTTCTTTGAATGGTGTGCCCCACCCCGCGGGTCGCCTGCAACCTAGGCGGACGCTACCATGGCGTGAGACAGGGAGGGAAAGAAGTGTGCAGAAGGCAAGCCCGGAGGTATTTTCAAGAATGAGTATATCTCATCTTCCCGGAGGAAAAAAAAAAAGAATGGGTACGTC",
 
 
 
 
 
 
66
  "species_id": "homo_sapiens",
67
  "common": "human",
68
  "color": "#1a1a1a"
@@ -75,6 +95,12 @@
75
  "strand": 1,
76
  "length": 1200,
77
  "seq": "TTTCCCCTCCCACGTGCTCACCCTGGCTAAAGTTCTGTAGCTTCAGTTCATTGGGACCATCCTGGCTGTAGGTAGCGACTACAGTTAGGGGGCACCTAGCATTCAGGCCCTCATCCTCCTCCTTCCCAGCAGGGTGTCACGCTTCTCCGAAGACTGGGTAAGTAATTGATGAGCGTGACGAGACCTCTCGGTCACTGGCTCTCTCCGTTTGCATCCATAAAACTAGAGAAAACCGTGGGGTTTGGGGGTGGGGCAGTGGGGGGACTCAGCGCGATGGAGATGGGCGGAATGGAAGCTTGGCGGGCGGGATGAACGGGAGTGTATATGTCAGATGCTGTAGTGAGGGTAGCTGATGATGATGATGTTAGGACCGACGAGCCTCACTGTCATGCACCTGCAAAGTAGAGCATATAGGGACCACTGAGATGGCCTAAGGGGTTTTCTCTCCGCTACGCGTTGTACACACTTATCTGCCCGCTGCTAGGTGATGGAAGCTCCGGAAATAACATGCACAAAGCACCAGGATTTAAGATTTTTCGAGATTCATAGCTTAAGACTTAAGACCCCCCATAGCATCCTAATGAAACCCTGGGTTCCGTTCCTGGATGAGATCGGGGTGATCCGGGGAGCCTTAGCTGCTAAGGTCCCGCAACTTCCGGACCTTTGTCCCTGGAGTGATTTCTTTTTTTTTCCAGCCGCTTCTCGACCCTGCTAGATGAAGAAAATCCAAGAAAAGCCTGAAGCACTAGCGGTGCTAGCCAGAAGTATTTGCCCTCGGGGCCCGACTCAGCCTCTTGGTCTGAAAGGCCCGCCGGCCCTGTTATTGTTTGGCTCCTTTACGTTTCTGCCGCTTGCAGGAGCATTTCCGGTTTCTTGTTTTCGGAGCAGATCACTGCTCGCCCGGCGACGGGGGAGTAGCGAAAGGGGAGAAATGGATTCTAGGCTGGTTCTGTGGTTTGAGGAGGAAAACTGCTGTCCTCGACATCTTATTTTTCTGGATTACTTGGTTATTGCTTTTGCAAAGGAGGAGGTGTTTATTTAAAAGAGTGCGCCGATAGGTCGTTTCTTCCTGCCGGAAAAGCAAATTACCGAGTATCCGGTTTTAGGGTGAGCCATTCCCTTGCTTAACGCATTCCGCGCGTCCTGAAAGCGGAAGGGGCGGGCTTGGGCCCCGGGCGCCCGGCGGTCCTAAGTGCAGTC",
 
 
 
 
 
 
78
  "species_id": "mus_musculus",
79
  "common": "mouse",
80
  "color": "#2c5aa0"
@@ -87,21 +113,19 @@
87
  "strand": 1,
88
  "length": 1200,
89
  "seq": "GGCCTGGCCCCCGCACAGCACCTCATCCGGGTGGAGGGGAACCCCCAGGCGCGTTACCACGACGACGAGACCACCAAACGGCCACAGCGTCGTCGTCCCCTATGAGCCCCCCGAGGTACCCCCAAATCACACCCCCCCCATAACCCCCCCCCCCAGACCCCCCTAAAGACCCCCCCAACCCCAATTCCCCCCCCAAGACCCCATAACCTCCCATTGCCATGACCCCCAATCCCCCCCCCATGACCCCATAACCCCTCAATGACCCCCCCCAATGACCCCCCCAATGCCCCCAGCCCCAACCCCCCCCCCCAGTCCCCCCTAAAGACCCCCCCAACCCCAAGACCCCCCACGACCCCATAAGATCCCAATGCCGTGACCCCCCAATCCCCCCCCATGACCCCATAACCCCTCAATGACCCCCCCCAATGACCCCAACCCCAAAATACCCCCCCAGACCCCCCTAAAGACCCCCCCAACCCCAATTCCCCCCCCACGACCCCATAACCTCCCATTGCCATGACCCCCAATCCCCCCCCATGACCCTATAACCCCTCAATGACCCCCCCCCAATGACCCCCAACCCCAAAATACCCCCCCAGACCCCCCCTACAGACCCCCACAACCCCAAGACCCCCCCCCACGAACCCATAACTTCCCAATGCTGTGACCCCCAATCCCCCCCCATGACCCCATAACCCCCCAATGAACCCCAACAGCCCAATGACCCCCAGCCCCAAACCCCCCCCCCAGTCCCCTTTAAGGACCCCCCCCCAACCCCAATTCCCCCCCCACGACCCCATAACCTCCCATTGCCATGACCCCCCATCCCCCCTCATGACCCCATAACCCCTCAATGACCCCCCCCCAATGACCCCCAACCCCAAAATACCCCCCAGACCCCCCTAAAGACCCCCCCAACCCCAAGACCCCCCCCACGACCCCATAACTTCCCAATGCTGTGACCCCCAATCCCCCCCCCATGACCCCATAACCCTTCACTGACCCCCCCCAATGACCCCCAGCCCCAAACCCCCCCCAGTCCCCCTTAAGGACCCCCCCCCCACCCCATAACCCCTCAATGACACCCCCCCCCAATGACCCCCAACCCCAAAATGCCCCCCCTCAGTCCCCCCTAAAGACCCCCCCAACCCCAAGACCCCCCCCCACGACCCCATAACCTCCCATTGCCGTGACCCCCAA",
 
 
 
 
 
 
 
 
 
 
90
  "species_id": "gallus_gallus",
91
  "common": "chicken",
92
  "color": "#c08030"
93
- },
94
- {
95
- "ortholog_symbol": "tp53",
96
- "ensembl_gene": "ENSDARG00000035559",
97
- "ensembl_transcript": "ENSDART00000135934",
98
- "chrom": "5",
99
- "strand": 1,
100
- "length": 1200,
101
- "seq": "CTGTAACTAGGGGAATCCCCAAAACTCCACGCGGATTTGCTTTGTGGATGTCCAATAACCTCCTTGTTTTGGTCATTCTTTAAGTCGATCGACTACATATCCGGGCAATCCGAAAGTCGATAATCGTAGTTTAGTGGAGAGGAGGTCGGCAAAATCAATTCTTGCAAAGGTAACAAAGCGAAACTTTTATTGACTAGCGTTAGTGGTTTGATCGATCATATTTATTAACGTTAACTAGTGTTTGTAGATTCAGCTGTTTGTATATTAAGTAAAAGTCGGGATTTGGTGTGCAACATCATTGGACCGCTTGTGTGTTGTATTATTGTAACAAACTGACGTCGATTCGATAAAAACTGGTAATGAATTAAGAGTGAAACTCGTATTTCGAACTTTAGGCCCGACATCGCTTTAAACGTTACTAAAACGTTGATGTTAACGTTAGCCGCTAAGCCTGTTAGCTAGCTAACATTAGCTGTCGAAAATGCAAACTTAGAGTTTCAGTATAATAGCTTTAGGTGACTTTTAGAAATCTTATGTACAGGTATGCGCATTTTTATTCATTTTTAGCTATATCTACTTGCGCGACAGAACTGTGAAGCTAATAAGCTAACGTTAAGCGGCATCTTTGCGACTCTGAGCGATCGCCTGTTTTTTAAATTTAAAGTGAATGTACATGTTATATAAATATCACACTTTGCATAAGAAACAACATCCCGACTTTTATTTTAACGTTATAGTAATTTTAGTTGTCGTGATATATTATTTCCCCCCAGGTTTTTATGACTTTCGTGTTTATAATTTCACAGCAATGGCGCAAAACGACAGCCAAGAGTTCGCGGAGCTCTGGGAGAAGAATTTGATGTAAGTTCGCAAGGGTCGCACTCCTGATACACAACGCTCCTCTTTTTCCTGCCTCTCTAAATTTCCCCGTATTTTTGTTAGCAGCCATGTCAGGTTGCTATAATGTACCTGTCTTAATATTTTTGGTGTTTGCTTGTTTAAGATGCATCTGTCGAACGTATTGTTTATCTGGAGTTCTTTGTATGAGCTTCAACAGATTAATACTAATTTCTCTCTCTTCCTTTTCAATTGTCTCAGAAGTATTCAGCCCCCAGGTGGTGGCTCTTGCTGGGACATCATTAATGATGAGGTATTTAAAAAAATAATCTCAAACACCCAGAAATCTCTATTTTCCTCTTT",
102
- "species_id": "danio_rerio",
103
- "common": "zebrafish",
104
- "color": "#2a8a8a"
105
  }
106
  ]
107
  }
 
10
  "strand": -1,
11
  "length": 1200,
12
  "seq": "AGCCCTCCAGGACAGGCTGCATCAGAAGAGGCCATCAAGCAGGTCTGTTCCAAGGGCCTTTGCGTCAGGTGGGCTCAGGATTCCAGGGTGGCTGGACCCCAGGCCCCAGCTCTGCAGCAGGGAGGACGTGGCTGGGCTCGTGAAGCATGTGGGGGTGAGCCCAGGGGCCCCAAGGCAGGGCACCTGGCCTTCAGCCTGCCTCAGCCCTGCCTGTCTCCCAGATCACTGTCCTTCTGCCATGGCCCTGTGGATGCGCCTCCTGCCCCTGCTGGCGCTGCTGGCCCTCTGGGGACCTGACCCAGCCGCAGCCTTTGTGAACCAACACCTGTGCGGCTCACACCTGGTGGAAGCTCTCTACCTAGTGTGCGGGGAACGAGGCTTCTTCTACACACCCAAGACCCGCCGGGAGGCAGAGGACCTGCAGGGTGAGCCAACTGCCCATTGCTGCCCCTGGCCGCCCCCAGCCACCCCCTGCTCCTGGCGCTCCCACCCAGCATGGGCAGAAGGGGGCAGGAGGCTGCCACCCAGCAGGGGGTCAGGTGCACTTTTTTAAAAAGAAGTTCTCTTGGTCACGTCCTAAAAGTGACCAGCTCCCTGTGGCCCAGTCAGAATCTCAGCCTGAGGACGGTGTTGGCTTCGGCAGCCCCGAGATACATCAGAGGGTGGGCACGCTCCTCCCTCCACTCGCCCCTCAAACAAATGCCCCGCAGCCCATTTCTCCACCCTCATTTGATGACCGCAGATTCAAGTGTTTTGTTAAGTAAAGTCCTGGGTGACCTGGGGTCACAGGGTGCCCCACGCTGCCTGCCTCTGGGCGAACACCCCATCACGCCCGGAGGAGGGCGTGGCTGCCTGCCTGAGTGGGCCAGACCCCTGTCGCCAGGCCTCACGGCAGCTCCATAGTCAGGAGATGGGGAAGATGCTGGGGACAGGCCCTGGGGAGAAGTACTGGGATCACCTGTTCAGGCTCCCACTGTGACGCTGCCCCGGGGCGGGGGAAGGAGGTGGGACATGTGGGCGTTGGGGCCTGTAGGTCCACACCCAGTGTGGGTGACCCTCCCTCTAACCTGGGTCCAGCCCGGCTGGAGATGGGTGGGAGTGCGACCTAGGGCTGGCGGGCAGGCGGGCACTGTGTCTCCCTGACTGTGTCCTCCTGTGTCCCTCTGCCTCGCCGCTGTTCCGGAACCTGCTCTGCGCGGC",
13
+ "exons": [
14
+ {
15
+ "start": 0,
16
+ "end": 42
17
+ },
18
+ {
19
+ "start": 221,
20
+ "end": 425
21
+ }
22
+ ],
23
  "species_id": "homo_sapiens",
24
  "common": "human",
25
  "color": "#1a1a1a"
 
32
  "strand": 1,
33
  "length": 1180,
34
  "seq": "ACCAGGCAAGTGTTTGGAAACTGCAGCTTCAGCCCCTCTGGCCATCTGCCTACCCACCCCACCTGGAGACCTTAATGGGCCAAACAGCAAAGTCCAGGGGGCAGAGAGGAGGTACTTTGGACTATAAAGCTGGTGGGCATCCAGTAACCCCCAGCCCTTAGTGACCAGCTATAATCAGAGACCATCAGCAAGCAGGTATGTACTCTCCTCTTTGGGCCTGGCTCCCCAGCCAAGACTCCAGCGACTTTAGGGAGAATGTGGGCTCCTCTCTTACATGGATCTTTTGCTAGCCTCAACCCTGCCTATCTTTCAGGTCATTGTTTCAACATGGCCCTGTTGGTGCACTTCCTACCCCTGCTGGCCCTGCTTGCCCTCTGGGAGCCCAAACCCACCCAGGCTTTTGTCAAACAGCATCTTTGTGGTCCCCACCTGGTAGAGGCTCTCTACCTGGTGTGTGGGGAGCGTGGCTTCTTCTACACACCCAAGTCCCGCCGTGAAGTGGAGGACCCACAAGTGGAACAACTGGAGCTGGGAGGAAGCCCCGGGGACCTTCAGACCTTGGCGTTGGAGGTGGCCCGGCAGAAGCGTGGCATTGTGGATCAGTGCTGCACCAGCATCTGCTCCCTCTACCAGCTGGAGAACTACTGCAACTAAGGCCCACCTCGACCCGCCCCACCCCTCTGCAATGAATAAAACTTTTGAATAAGCACCAAAAAAAAGAGTTCTATAATGAATGAAAAAGGATTGTGTATATAGACATCTTTTTCTCTGGCATTTATTGTCATGTTAGCATACTATTAAACCATTGTTAGGTTGGATGATTATATAATCATGTATGAAGCTTGTGATAAAACACCAGGAATAATTCAAGTATCTGGAATTCTGCTTCCTGCCCAAGAAGGTAGGCAACCGTGTAAATGCCACTGAAGCTACTAGTCTAAAAGTGAGTTATCTCTGTCTTTGTCTTACCCCCTGATGCTGTGATAAAACCCTGACAAGAGCAACTGACTCCTGAGAGGAAGGTTTATTCTAGCTCACAATTCCAGGTTACAAACAGTCCATCCGTAGCAGGGGAGTCACAGCAACAGGAACCTCAGGGAACTGCTCCTATTATCCCCACAATCAAGAATAGTGACCAATAAATAAGTGGATCTTTTCTCAAAAAAAAAAAAAAAAAAAA",
35
+ "exons": [
36
+ {
37
+ "start": 0,
38
+ "end": 195
39
+ },
40
+ {
41
+ "start": 313,
42
+ "end": 1180
43
+ }
44
+ ],
45
  "species_id": "mus_musculus",
46
  "common": "mouse",
47
  "color": "#2c5aa0"
 
54
  "strand": 1,
55
  "length": 1200,
56
  "seq": "TTGTCTCTTTATGAGAAATCAGCAGGGAGGCCAGCATTGGTGTGAGTGTGTGGATGGAGACAGGCTTCTGGTTATAATTGGTCATTTATTATGACTTTCAAAGCCTGATGAATAAAATATTCCTTTCCTCTTCAGAAGGTCCATTTGCTTCTGTAGTCTTGTTTTCACGTCAAAGGAGCTGAGGGACATAAGATGCCTGATGATAGCTTATTCCTCCCTTGCAACCCCCCCGTGTCTCCTTTGCTTCCTACCTCTAGGCCTCCCCCAGCTCATCATGGCTCTCTGGATCCGATCACTGCCTCTTCTGGCTCTCCTTGTCTTTTCTGGCCCTGGAACCAGCTATGCAGCTGCCAACCAGCACCTCTGTGGCTCCCACTTGGTGGAGGCTCTCTACCTGGTGTGTGGAGAGCGTGGCTTCTTCTACTCCCCCAAAGCCCGACGGGATGTCGAGCAGCCCCTAGGTAAGTCAGTTCGACCATGACTACATTCATATGCTATATGATGCAAAAAGCAACTGTCTATCTTTGATGGTGACACAAGGAATGTCCTTGGTGGGGAATGCCAGGAATACCTTAAACATACCAACAGCATCATATCACCCATGAAAAGATCGTCAGGCTAAAAAGGCAGGTGGGAGGGCAAGCAGGGAAAGGAGATTTATGAGACAGAAGGAATTGTCACAGAAAGCTCCAAATTTTTTGCTACTCTCTTGGTAGAGAGAGGCTGAAAACAGTGTTATTCCAACATTTGCATGGCAATTACTCTCACCTGGGAGTGATCATGAAAAATAAAGGTGGAAGGAACACAAGAAGCCTCTTTCTGCACCTCTTCTTGACCCACACTACCCCAGTCCTGTTCTGTGACCACATCAAGTGTGGTCATAAAAACCTCCTGCCTTCTGAAGCTGTCCATTCTTGTGCTTAAATGACTTTTTCTTGAATGTTTCCTCCTAATATTTAACCCAAATGTATCTTGTCACAAATTAGTCCTCATCTTCTCAGTAGTGGACAAAAGGAACAGTCTATCATTTCTCCTTCATGGGTGATTTTCAAACAGTTTAAAAATTGCTTCCATGTCTTGTTTTTATCTACTGTGAGCTAAAAGCCCTCAACAGCCCCAGAATTCCTTTTTAGGTCACATATTCTAGCTCTCTGTCTACATAAACTGTTCTGCATTTGGCCCATACCATTACGGAATGGT",
57
+ "exons": [
58
+ {
59
+ "start": 0,
60
+ "end": 461
61
+ }
62
+ ],
63
  "species_id": "gallus_gallus",
64
  "common": "chicken",
65
  "color": "#c08030"
 
 
 
 
 
 
 
 
 
 
 
 
66
  }
67
  ]
68
  },
 
77
  "strand": -1,
78
  "length": 1200,
79
  "seq": "CTCAAAAGTCTAGAGCCACCGTCCAGGGAGCAGGTAGCTGCTGGGCTCCGGGGACACTTTGCGTTCGGGCTGGGAGCGTGCTTTCCACGACGGTGACACGCTTCCCTGGATTGGGTAAGCTCCTGACTGAACTTGATGAGTCCTCTCTGAGTCACGGGCTCTCGGCTCCGTGTATTTTCAGCTCGGGAAAATCGCTGGGGCTGGGGGTGGGGCAGTGGGGACTTAGCGAGTTTGGGGGTGAGTGGGATGGAAGCTTGGCTAGAGGGATCATCATAGGAGTTGCATTGTTGGGAGACCTGGGTGTAGATGATGGGGATGTTAGGACCATCCGAACTCAAAGTTGAACGCCTAGGCAGAGGAGTGGAGCTTTGGGGAACCTTGAGCCGGCCTAAAGCGTACTTCTTTGCACATCCACCCGGTGCTGGGCGTAGGGAATCCCTGAAATAAAAGATGCACAAAGCATTGAGGTCTGAGACTTTTGGATCTCGAAACATTGAGAACTCATAGCTGTATATTTTAGAGCCCATGGCATCCTAGTGAAAACTGGGGCTCCATTCCGAAATGATCATTTGGGGGTGATCCGGGGAGCCCAAGCTGCTAAGGTCCCACAACTTCCGGACCTTTGTCCTTCCTGGAGCGATCTTTCCAGGCAGCCCCCGGCTCCGCTAGATGGAGAAAATCCAATTGAAGGCTGTCAGTCGTGGAAGTGAGAAGTGCTAAACCAGGGGTTTGCCCGCCAGGCCGAGGAGGACCGTCGCAATCTGAGAGGCCCGGCAGCCCTGTTATTGTTTGGCTCCACATTTACATTTCTGCCTCTTGCAGCAGCATTTCCGGTTTCTTTTTGCCGGAGCAGCTCACTATTCACCCGATGAGAGGGGAGGAGAGAGAGAGAAAATGTCCTTTAGGCCGGTTCCTCTTACTTGGCAGAGGGAGGCTGCTATTCTCCGCCTGCATTTCTTTTTCTGGATTACTTAGTTATGGCCTTTGCAAAGGCAGGGGTATTTGTTTTGATGCAAACCTCAATCCCTCCCCTTCTTTGAATGGTGTGCCCCACCCCGCGGGTCGCCTGCAACCTAGGCGGACGCTACCATGGCGTGAGACAGGGAGGGAAAGAAGTGTGCAGAAGGCAAGCCCGGAGGTATTTTCAAGAATGAGTATATCTCATCTTCCCGGAGGAAAAAAAAAAAGAATGGGTACGTC",
80
+ "exons": [
81
+ {
82
+ "start": 0,
83
+ "end": 114
84
+ }
85
+ ],
86
  "species_id": "homo_sapiens",
87
  "common": "human",
88
  "color": "#1a1a1a"
 
95
  "strand": 1,
96
  "length": 1200,
97
  "seq": "TTTCCCCTCCCACGTGCTCACCCTGGCTAAAGTTCTGTAGCTTCAGTTCATTGGGACCATCCTGGCTGTAGGTAGCGACTACAGTTAGGGGGCACCTAGCATTCAGGCCCTCATCCTCCTCCTTCCCAGCAGGGTGTCACGCTTCTCCGAAGACTGGGTAAGTAATTGATGAGCGTGACGAGACCTCTCGGTCACTGGCTCTCTCCGTTTGCATCCATAAAACTAGAGAAAACCGTGGGGTTTGGGGGTGGGGCAGTGGGGGGACTCAGCGCGATGGAGATGGGCGGAATGGAAGCTTGGCGGGCGGGATGAACGGGAGTGTATATGTCAGATGCTGTAGTGAGGGTAGCTGATGATGATGATGTTAGGACCGACGAGCCTCACTGTCATGCACCTGCAAAGTAGAGCATATAGGGACCACTGAGATGGCCTAAGGGGTTTTCTCTCCGCTACGCGTTGTACACACTTATCTGCCCGCTGCTAGGTGATGGAAGCTCCGGAAATAACATGCACAAAGCACCAGGATTTAAGATTTTTCGAGATTCATAGCTTAAGACTTAAGACCCCCCATAGCATCCTAATGAAACCCTGGGTTCCGTTCCTGGATGAGATCGGGGTGATCCGGGGAGCCTTAGCTGCTAAGGTCCCGCAACTTCCGGACCTTTGTCCCTGGAGTGATTTCTTTTTTTTTCCAGCCGCTTCTCGACCCTGCTAGATGAAGAAAATCCAAGAAAAGCCTGAAGCACTAGCGGTGCTAGCCAGAAGTATTTGCCCTCGGGGCCCGACTCAGCCTCTTGGTCTGAAAGGCCCGCCGGCCCTGTTATTGTTTGGCTCCTTTACGTTTCTGCCGCTTGCAGGAGCATTTCCGGTTTCTTGTTTTCGGAGCAGATCACTGCTCGCCCGGCGACGGGGGAGTAGCGAAAGGGGAGAAATGGATTCTAGGCTGGTTCTGTGGTTTGAGGAGGAAAACTGCTGTCCTCGACATCTTATTTTTCTGGATTACTTGGTTATTGCTTTTGCAAAGGAGGAGGTGTTTATTTAAAAGAGTGCGCCGATAGGTCGTTTCTTCCTGCCGGAAAAGCAAATTACCGAGTATCCGGTTTTAGGGTGAGCCATTCCCTTGCTTAACGCATTCCGCGCGTCCTGAAAGCGGAAGGGGCGGGCTTGGGCCCCGGGCGCCCGGCGGTCCTAAGTGCAGTC",
98
+ "exons": [
99
+ {
100
+ "start": 0,
101
+ "end": 157
102
+ }
103
+ ],
104
  "species_id": "mus_musculus",
105
  "common": "mouse",
106
  "color": "#2c5aa0"
 
113
  "strand": 1,
114
  "length": 1200,
115
  "seq": "GGCCTGGCCCCCGCACAGCACCTCATCCGGGTGGAGGGGAACCCCCAGGCGCGTTACCACGACGACGAGACCACCAAACGGCCACAGCGTCGTCGTCCCCTATGAGCCCCCCGAGGTACCCCCAAATCACACCCCCCCCATAACCCCCCCCCCCAGACCCCCCTAAAGACCCCCCCAACCCCAATTCCCCCCCCAAGACCCCATAACCTCCCATTGCCATGACCCCCAATCCCCCCCCCATGACCCCATAACCCCTCAATGACCCCCCCCAATGACCCCCCCAATGCCCCCAGCCCCAACCCCCCCCCCCAGTCCCCCCTAAAGACCCCCCCAACCCCAAGACCCCCCACGACCCCATAAGATCCCAATGCCGTGACCCCCCAATCCCCCCCCATGACCCCATAACCCCTCAATGACCCCCCCCAATGACCCCAACCCCAAAATACCCCCCCAGACCCCCCTAAAGACCCCCCCAACCCCAATTCCCCCCCCACGACCCCATAACCTCCCATTGCCATGACCCCCAATCCCCCCCCATGACCCTATAACCCCTCAATGACCCCCCCCCAATGACCCCCAACCCCAAAATACCCCCCCAGACCCCCCCTACAGACCCCCACAACCCCAAGACCCCCCCCCACGAACCCATAACTTCCCAATGCTGTGACCCCCAATCCCCCCCCATGACCCCATAACCCCCCAATGAACCCCAACAGCCCAATGACCCCCAGCCCCAAACCCCCCCCCCAGTCCCCTTTAAGGACCCCCCCCCAACCCCAATTCCCCCCCCACGACCCCATAACCTCCCATTGCCATGACCCCCCATCCCCCCTCATGACCCCATAACCCCTCAATGACCCCCCCCCAATGACCCCCAACCCCAAAATACCCCCCAGACCCCCCTAAAGACCCCCCCAACCCCAAGACCCCCCCCACGACCCCATAACTTCCCAATGCTGTGACCCCCAATCCCCCCCCCATGACCCCATAACCCTTCACTGACCCCCCCCAATGACCCCCAGCCCCAAACCCCCCCCAGTCCCCCTTAAGGACCCCCCCCCCACCCCATAACCCCTCAATGACACCCCCCCCCAATGACCCCCAACCCCAAAATGCCCCCCCTCAGTCCCCCCTAAAGACCCCCCCAACCCCAAGACCCCCCCCCACGACCCCATAACCTCCCATTGCCGTGACCCCCAA",
116
+ "exons": [
117
+ {
118
+ "start": 0,
119
+ "end": 81
120
+ },
121
+ {
122
+ "start": 82,
123
+ "end": 115
124
+ }
125
+ ],
126
  "species_id": "gallus_gallus",
127
  "common": "chicken",
128
  "color": "#c08030"
 
 
 
 
 
 
 
 
 
 
 
 
129
  }
130
  ]
131
  }
demo.html CHANGED
@@ -804,14 +804,14 @@ print(f"delta = {delta:+.2f} (less likely if negative)")</code></pre></div>
804
  <div class="section-num">§4 · Species</div>
805
  <div class="section-title">It knows who's who</div>
806
  <p class="lede">
807
- The same gene (insulin, p53) exists in mouse, chicken, zebrafish, but the surrounding
808
- sequence has accumulated different mutations along each lineage for hundreds of millions
809
- of years. Feed Carbon the opening ~400 bp of each species' copy of the gene and ask it
810
- to continue. Each continuation should match <em>that species'</em> real DNA better than
811
- another species' would. How well that holds tracks how recently the lineages split: the
812
- model nails closely-related species (mouse, chicken, even though they're ~300 My from
813
- human) and starts to drop on zebrafish, where 450 My of independent evolution has
814
- smudged the patterns the model was trained to expect.
815
  </p>
816
  </div>
817
 
@@ -860,9 +860,9 @@ print(f"delta = {delta:+.2f} (less likely if negative)")</code></pre></div>
860
  given and continues in that species' style; identity to that species' reference often
861
  runs 65–90% on the next 60 bp. Cut the prefix to 200 and the signal collapses to
862
  near-random: a few hundred bases is what it takes to "lock in" on a lineage.
863
- Zebrafish stays the hardest case across the board; ~450 My of independent evolution
864
- is enough divergence that a 400 bp prefix doesn't pin down the genome as confidently
865
- as it does for mammals and birds.
866
  </p>
867
  </div>
868
 
@@ -892,7 +892,7 @@ def continue_species(species_prefix):
892
  )
893
  return r.choices[0].text
894
 
895
- # species_prefixes = { "human": ..., "mouse": ..., "chicken": ..., "zebrafish": ... }
896
  with ThreadPoolExecutor() as pool:
897
  results = dict(zip(species_prefixes, pool.map(continue_species, species_prefixes.values())))
898
 
 
804
  <div class="section-num">§4 · Species</div>
805
  <div class="section-title">It knows who's who</div>
806
  <p class="lede">
807
+ The same gene (insulin, p53) exists in mouse and chicken, but the surrounding sequence
808
+ has accumulated different mutations along each lineage for hundreds of millions of
809
+ years. For each species we hand Carbon up to ~400 bp leading into the 2nd exon and
810
+ ask it to continue inside the exon. Each continuation should match <em>that species'</em>
811
+ real DNA better than another species' would. The model handles closely-related species
812
+ well (mouse, chicken, even though they're ~300 My from human); the further you go back
813
+ in evolutionary time, the more the surrounding sequence drifts and the harder this
814
+ setup becomes.
815
  </p>
816
  </div>
817
 
 
860
  given and continues in that species' style; identity to that species' reference often
861
  runs 65–90% on the next 60 bp. Cut the prefix to 200 and the signal collapses to
862
  near-random: a few hundred bases is what it takes to "lock in" on a lineage.
863
+ The gap between mouse and chicken is where you can read the evolutionary signal: 300+
864
+ My since the last common ancestor is enough drift that a 400 bp prefix still locks
865
+ Carbon in, but the per-base identity sits a notch below mouse.
866
  </p>
867
  </div>
868
 
 
892
  )
893
  return r.choices[0].text
894
 
895
+ # species_prefixes = { "human": ..., "mouse": ..., "chicken": ... }
896
  with ThreadPoolExecutor() as pool:
897
  results = dict(zip(species_prefixes, pool.map(continue_species, species_prefixes.values())))
898
 
scripts/fetch_species.py CHANGED
@@ -23,7 +23,9 @@ SPECIES = [
23
  {"id": "homo_sapiens", "common": "human", "color": "#1a1a1a"},
24
  {"id": "mus_musculus", "common": "mouse", "color": "#2c5aa0"},
25
  {"id": "gallus_gallus", "common": "chicken", "color": "#c08030"},
26
- {"id": "danio_rerio", "common": "zebrafish", "color": "#2a8a8a"},
 
 
27
  ]
28
  PREFIX_LEN = 1200 # cap on returned seq length per species (only ~200 will be fed as prompt)
29
 
@@ -56,9 +58,11 @@ def fetch_for(symbol, species_id):
56
 
57
  chrom = g["seq_region_name"]
58
  strand = g["strand"]
59
- t_start = ct["start"]
60
- t_end = ct["end"]
 
61
 
 
62
  # Cap fetched length so the JSON stays small
63
  if t_end - t_start + 1 > PREFIX_LEN:
64
  if strand == 1:
@@ -75,6 +79,24 @@ def fetch_for(symbol, species_id):
75
  else:
76
  seq = revcomp(plus_seq)
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  return {
79
  "ortholog_symbol": g.get("display_name", symbol),
80
  "ensembl_gene": g["id"],
@@ -83,6 +105,7 @@ def fetch_for(symbol, species_id):
83
  "strand": strand,
84
  "length": len(seq),
85
  "seq": seq,
 
86
  }
87
 
88
 
 
23
  {"id": "homo_sapiens", "common": "human", "color": "#1a1a1a"},
24
  {"id": "mus_musculus", "common": "mouse", "color": "#2c5aa0"},
25
  {"id": "gallus_gallus", "common": "chicken", "color": "#c08030"},
26
+ # Zebrafish dropped: ~450 My from human, the model usually can't pick up
27
+ # the lineage from ~400 bp of context and the row looks like noise next
28
+ # to mammals + bird.
29
  ]
30
  PREFIX_LEN = 1200 # cap on returned seq length per species (only ~200 will be fed as prompt)
31
 
 
58
 
59
  chrom = g["seq_region_name"]
60
  strand = g["strand"]
61
+ t_start_full = ct["start"]
62
+ t_end_full = ct["end"]
63
+ exons_genomic = [(e["start"], e["end"]) for e in ct.get("Exon", [])]
64
 
65
+ t_start, t_end = t_start_full, t_end_full
66
  # Cap fetched length so the JSON stays small
67
  if t_end - t_start + 1 > PREFIX_LEN:
68
  if strand == 1:
 
79
  else:
80
  seq = revcomp(plus_seq)
81
 
82
+ # Translate exons from genomic coords into 0-based [start, end) offsets in
83
+ # `seq`. For + strand seq[i] = plus pos t_start + i. For - strand seq is
84
+ # revcomp(plus_seq), so seq[i] = plus pos t_end - i. Exons that fall
85
+ # outside the trimmed window are clipped or dropped.
86
+ exons_seq = []
87
+ seq_len = len(seq)
88
+ for e_start, e_end in exons_genomic:
89
+ if strand == 1:
90
+ s = e_start - t_start
91
+ e = e_end - t_start + 1
92
+ else:
93
+ s = t_end - e_end
94
+ e = t_end - e_start + 1
95
+ s = max(0, s); e = min(seq_len, e)
96
+ if e > s:
97
+ exons_seq.append({"start": s, "end": e})
98
+ exons_seq.sort(key=lambda x: x["start"])
99
+
100
  return {
101
  "ortholog_symbol": g.get("display_name", symbol),
102
  "ensembl_gene": g["id"],
 
105
  "strand": strand,
106
  "length": len(seq),
107
  "seq": seq,
108
+ "exons": exons_seq,
109
  }
110
 
111