Spaces:
Running
CRISPR M2: paste-anything input + fix phantom progress bar
Browse filesResolver (dee/core/resolve.py, new): one input box auto-detects and
resolves a gene symbol, an accession (ENSTβ¦/ENSGβ¦/NM_β¦/XM_β¦), or raw
DNA into an editable sequence + a human label.
- gene symbol β Ensembl canonical-transcript CDS (reuses
exon.fetch_gene_structure; human/mouse).
- Ensembl ID β /sequence/id?type=cds (gene IDs resolve to canonical tx).
- RefSeq β NCBI efetch FASTA.
- raw / FASTA β used as-is, makes NO network call (privacy: a pasted
sequence never leaves the Space; only the identifier is sent).
Conservative classifier: a long DNA-dominant blob is always a sequence.
Endpoint POST /api/crispr/resolve (sign-in gated, same as design),
returns {ok, kind, sequence, gene_symbol, label, source}.
Frontend: a resolver row above the paste box (input + Fetch, Enter to
submit) fills the textarea with the resolved sequence, shows a result
chip, and auto-sets the gene-symbol field so base-edit AA consequences
and exon context get the reading frame.
Bug fix surfaced during verification: `.progress-shell { display:flex }`
overrode the UA `[hidden]` rule, so the progress shell stayed visible
showing its default "Designingβ¦ 0.0s" text β a permanent phantom bar
under BOTH the CRISPR and directed-evolution Design buttons (live for
the 6 customers). Added `.progress-shell[hidden] { display:none }`.
Tests: tests/test_resolve.py (22, network mocked). Full suite 109 green.
Verified desktop + mobile; TP53βENST00000269305 (1182 nt) and NM_000546
resolve live. Cache-buster β 20260529-paste-anything.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
- dee/core/resolve.py +171 -0
- dee/server.py +36 -0
- dee/static/app.css +35 -0
- dee/static/app.js +58 -0
- dee/static/index.html +25 -4
- tests/test_resolve.py +127 -0
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Paste-anything target resolution (Phase 3, M2).
|
| 2 |
+
|
| 3 |
+
One input box, three kinds of input β auto-detected and resolved to an
|
| 4 |
+
editable DNA sequence (plus a human-readable label and, where known, a
|
| 5 |
+
gene symbol that feeds base-edit AA consequences + the structure viewer):
|
| 6 |
+
|
| 7 |
+
* raw DNA / FASTA β used as-is (never leaves the Space).
|
| 8 |
+
* gene symbol β Ensembl canonical-transcript CDS (human / mouse),
|
| 9 |
+
via the existing exon.fetch_gene_structure().
|
| 10 |
+
* accession β Ensembl transcript/gene ID (ENSTβ¦/ENSGβ¦) or RefSeq
|
| 11 |
+
(NM_/XM_/NR_/XR_) fetched from Ensembl / NCBI.
|
| 12 |
+
|
| 13 |
+
Privacy: for symbol/accession lookups, ONLY the (organism, identifier)
|
| 14 |
+
leaves the Space β never the user's pasted sequence. The raw-sequence
|
| 15 |
+
path makes no network calls at all.
|
| 16 |
+
|
| 17 |
+
Classification is conservative: a long, DNA-looking blob is always a
|
| 18 |
+
sequence; a short token that matches an ID pattern is an accession; an
|
| 19 |
+
alphanumeric token is a gene symbol (needs an organism). Ambiguous short
|
| 20 |
+
tokens fall through to a symbol lookup, which fails gracefully.
|
| 21 |
+
"""
|
| 22 |
+
from __future__ import annotations
|
| 23 |
+
|
| 24 |
+
import re
|
| 25 |
+
from typing import Dict, Tuple
|
| 26 |
+
|
| 27 |
+
from dee.core import exon as _exon
|
| 28 |
+
|
| 29 |
+
# βββ Identifier patterns βββββββββββββββββββββββββββββββββββββββββββββ
|
| 30 |
+
_ENSEMBL_TX = re.compile(r"^ENS[A-Z]*T\d{6,}(?:\.\d+)?$", re.I) # ENSTβ¦, ENSMUSTβ¦
|
| 31 |
+
_ENSEMBL_GENE = re.compile(r"^ENS[A-Z]*G\d{6,}(?:\.\d+)?$", re.I) # ENSGβ¦, ENSMUSGβ¦
|
| 32 |
+
_REFSEQ = re.compile(r"^[NX][MR]_\d+(?:\.\d+)?$", re.I) # NM_/NR_/XM_/XR_
|
| 33 |
+
_SYMBOL = re.compile(r"^[A-Za-z][A-Za-z0-9._-]{0,19}$") # TP53, BRCA1, β¦
|
| 34 |
+
|
| 35 |
+
_NCBI_EFETCH = ("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
|
| 36 |
+
"?db=nuccore&rettype=fasta&retmode=text&id=")
|
| 37 |
+
|
| 38 |
+
MIN_TARGET_LEN = 23 # shortest usable target (Cas12a spacer+PAM)
|
| 39 |
+
MAX_TARGET_LEN = 1_000_000
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def _clean_dna(text: str) -> str:
|
| 43 |
+
"""Strip FASTA headers + whitespace + non-ACGTN, uppercase."""
|
| 44 |
+
lines = [ln for ln in text.splitlines() if not ln.strip().startswith(">")]
|
| 45 |
+
return re.sub(r"[^ACGTNacgtn]", "", "".join(lines)).upper()
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def classify(text: str) -> Tuple[str, str]:
|
| 49 |
+
"""Return (kind, value). kind β {empty, sequence, symbol, ensembl_tx,
|
| 50 |
+
ensembl_gene, refseq, unknown}. `value` is the cleaned sequence (for
|
| 51 |
+
'sequence') or the identifier token otherwise."""
|
| 52 |
+
t = (text or "").strip()
|
| 53 |
+
if not t:
|
| 54 |
+
return ("empty", "")
|
| 55 |
+
if t.lstrip().startswith(">"):
|
| 56 |
+
return ("sequence", _clean_dna(t))
|
| 57 |
+
|
| 58 |
+
cleaned = _clean_dna(t)
|
| 59 |
+
letters = re.sub(r"[^A-Za-z]", "", re.sub(r"\s", "", t))
|
| 60 |
+
dna_ratio = len(cleaned) / max(1, len(letters))
|
| 61 |
+
has_ws = bool(re.search(r"\s", t))
|
| 62 |
+
# A long, DNA-dominant blob (or any multi-line / spaced DNA) is a sequence.
|
| 63 |
+
if len(cleaned) >= MIN_TARGET_LEN and dna_ratio >= 0.9 and (has_ws or len(cleaned) > 20):
|
| 64 |
+
return ("sequence", cleaned)
|
| 65 |
+
|
| 66 |
+
token = t.split()[0] if t.split() else ""
|
| 67 |
+
if _ENSEMBL_TX.match(token):
|
| 68 |
+
return ("ensembl_tx", token)
|
| 69 |
+
if _ENSEMBL_GENE.match(token):
|
| 70 |
+
return ("ensembl_gene", token)
|
| 71 |
+
if _REFSEQ.match(token):
|
| 72 |
+
return ("refseq", token.upper())
|
| 73 |
+
if _SYMBOL.match(token):
|
| 74 |
+
return ("symbol", token.upper())
|
| 75 |
+
if len(cleaned) >= MIN_TARGET_LEN and dna_ratio >= 0.9:
|
| 76 |
+
return ("sequence", cleaned)
|
| 77 |
+
return ("unknown", token)
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def _err(msg: str) -> Dict:
|
| 81 |
+
return {"ok": False, "error": msg, "kind": "", "sequence": "",
|
| 82 |
+
"gene_symbol": "", "label": "", "source": ""}
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def _ok(kind: str, sequence: str, label: str, source: str,
|
| 86 |
+
gene_symbol: str = "") -> Dict:
|
| 87 |
+
return {"ok": True, "kind": kind, "sequence": sequence,
|
| 88 |
+
"gene_symbol": gene_symbol, "label": label, "source": source}
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def _fetch_ensembl_cds(identifier: str, is_gene: bool) -> Tuple[str, str]:
|
| 92 |
+
"""Fetch a CDS sequence for an Ensembl transcript or gene ID.
|
| 93 |
+
Returns (sequence, label) or ("", "")."""
|
| 94 |
+
tx_id = identifier
|
| 95 |
+
if is_gene:
|
| 96 |
+
# Resolve gene β canonical transcript first.
|
| 97 |
+
info = _exon._http_get_json(
|
| 98 |
+
f"{_exon.ENSEMBL_BASE}/lookup/id/{identifier}?expand=1")
|
| 99 |
+
if not info:
|
| 100 |
+
return ("", "")
|
| 101 |
+
transcripts = info.get("Transcript", []) or []
|
| 102 |
+
if not transcripts:
|
| 103 |
+
return ("", "")
|
| 104 |
+
canonical = next((t for t in transcripts if t.get("is_canonical") == 1),
|
| 105 |
+
transcripts[0])
|
| 106 |
+
tx_id = canonical.get("id") or ""
|
| 107 |
+
if not tx_id:
|
| 108 |
+
return ("", "")
|
| 109 |
+
fasta = _exon._http_get_text(
|
| 110 |
+
f"{_exon.ENSEMBL_BASE}/sequence/id/{tx_id}?type=cds")
|
| 111 |
+
seq = _exon._fasta_to_seq(fasta) if fasta else ""
|
| 112 |
+
if not seq:
|
| 113 |
+
return ("", "")
|
| 114 |
+
label = (f"{identifier} β {tx_id} Β· CDS {len(seq):,} nt"
|
| 115 |
+
if is_gene else f"{tx_id} Β· CDS {len(seq):,} nt")
|
| 116 |
+
return (seq, label)
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def _fetch_refseq(accession: str) -> str:
|
| 120 |
+
fasta = _exon._http_get_text(_NCBI_EFETCH + accession)
|
| 121 |
+
return _exon._fasta_to_seq(fasta) if fasta else ""
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def resolve_target(text: str, organism: str = "") -> Dict:
|
| 125 |
+
"""Resolve pasted text to an editable sequence.
|
| 126 |
+
|
| 127 |
+
Returns a dict: {ok, kind, sequence, gene_symbol, label, source, error?}.
|
| 128 |
+
"""
|
| 129 |
+
organism = (organism or "").lower().strip()
|
| 130 |
+
kind, val = classify(text)
|
| 131 |
+
|
| 132 |
+
if kind == "empty":
|
| 133 |
+
return _err("Paste a DNA sequence, a gene symbol, or an accession.")
|
| 134 |
+
|
| 135 |
+
if kind == "sequence":
|
| 136 |
+
if len(val) < MIN_TARGET_LEN:
|
| 137 |
+
return _err(f"Sequence is only {len(val)} nt β need at least "
|
| 138 |
+
f"{MIN_TARGET_LEN} nt. Check you pasted DNA, not protein.")
|
| 139 |
+
if len(val) > MAX_TARGET_LEN:
|
| 140 |
+
return _err("Sequence too long. Cap is 1 Mbp β paste just the "
|
| 141 |
+
"gene / region you're editing.")
|
| 142 |
+
return _ok("sequence", val, f"pasted sequence Β· {len(val):,} nt", "input")
|
| 143 |
+
|
| 144 |
+
if kind == "symbol":
|
| 145 |
+
if organism not in ("human", "mouse"):
|
| 146 |
+
return _err(f"To look up β{val}β by gene symbol, pick Human or "
|
| 147 |
+
f"Mouse β or paste the sequence directly.")
|
| 148 |
+
gene = _exon.fetch_gene_structure(organism, val)
|
| 149 |
+
if gene is None:
|
| 150 |
+
return _err(f"Couldn't find β{val}β in {organism}. Check the "
|
| 151 |
+
f"symbol, or paste the sequence directly.")
|
| 152 |
+
return _ok("gene", gene.cds_sequence,
|
| 153 |
+
f"{val} Β· {gene.transcript_id} Β· CDS {len(gene.cds_sequence):,} nt",
|
| 154 |
+
"ensembl", gene_symbol=val)
|
| 155 |
+
|
| 156 |
+
if kind in ("ensembl_tx", "ensembl_gene"):
|
| 157 |
+
seq, label = _fetch_ensembl_cds(val, is_gene=(kind == "ensembl_gene"))
|
| 158 |
+
if not seq:
|
| 159 |
+
return _err(f"Couldn't fetch β{val}β from Ensembl. Check the ID, "
|
| 160 |
+
f"or paste the sequence directly.")
|
| 161 |
+
return _ok("ensembl", seq, label, "ensembl")
|
| 162 |
+
|
| 163 |
+
if kind == "refseq":
|
| 164 |
+
seq = _fetch_refseq(val)
|
| 165 |
+
if not seq:
|
| 166 |
+
return _err(f"Couldn't fetch β{val}β from NCBI. Check the "
|
| 167 |
+
f"accession, or paste the sequence directly.")
|
| 168 |
+
return _ok("refseq", seq, f"{val} Β· {len(seq):,} nt", "ncbi")
|
| 169 |
+
|
| 170 |
+
return _err("Unrecognized input. Paste a DNA sequence, a gene symbol "
|
| 171 |
+
"(with Human/Mouse selected), or an accession (ENSTβ¦, NM_β¦).")
|
|
@@ -1103,6 +1103,42 @@ def create_app() -> Flask:
|
|
| 1103 |
],
|
| 1104 |
})
|
| 1105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1106 |
@app.post("/api/crispr/download")
|
| 1107 |
def crispr_download() -> Response:
|
| 1108 |
"""Return the CSV body for a fresh design. We re-design from the
|
|
|
|
| 1103 |
],
|
| 1104 |
})
|
| 1105 |
|
| 1106 |
+
@app.post("/api/crispr/resolve")
|
| 1107 |
+
def crispr_resolve() -> Response:
|
| 1108 |
+
"""Paste-anything resolver (Phase 3, M2). Body: {text, organism}.
|
| 1109 |
+
Resolves a gene symbol / accession / raw sequence to an editable
|
| 1110 |
+
DNA sequence + a human label. Same sign-in gate as /design.
|
| 1111 |
+
Privacy: only (organism, identifier) leaves the Space for lookups;
|
| 1112 |
+
a raw sequence makes no outbound call."""
|
| 1113 |
+
auth = _auth.get_auth()
|
| 1114 |
+
if auth.anonymous:
|
| 1115 |
+
return jsonify({
|
| 1116 |
+
"ok": False,
|
| 1117 |
+
"error": "Sign in or create a free account to keep going.",
|
| 1118 |
+
"kind": "signin_required",
|
| 1119 |
+
"signup_url": "https://turingdna.com/signin/?from=crispr",
|
| 1120 |
+
}), 403
|
| 1121 |
+
|
| 1122 |
+
body = request.get_json(force=True, silent=True) or {}
|
| 1123 |
+
text = (body.get("text") or "").strip()
|
| 1124 |
+
if not text:
|
| 1125 |
+
return jsonify({"ok": False, "error": "missing 'text'"}), 400
|
| 1126 |
+
# Bound input: a sequence paste can be up to 1 Mbp; an identifier is
|
| 1127 |
+
# tiny. Cap defensively so a giant blob can't tie up the resolver.
|
| 1128 |
+
if len(text) > 1_000_000:
|
| 1129 |
+
return jsonify({"ok": False, "error": "Input too long (1 Mbp cap)."}), 400
|
| 1130 |
+
organism = str(body.get("organism", "")).lower().strip()
|
| 1131 |
+
if organism not in ("", "ecoli", "human", "mouse"):
|
| 1132 |
+
organism = ""
|
| 1133 |
+
|
| 1134 |
+
from dee.core import resolve as _resolve
|
| 1135 |
+
try:
|
| 1136 |
+
result = _resolve.resolve_target(text, organism=organism)
|
| 1137 |
+
except Exception as exc: # noqa: BLE001
|
| 1138 |
+
logger.exception("CRISPR resolve failed.")
|
| 1139 |
+
return jsonify({"ok": False, "error": f"{type(exc).__name__}: {exc}"}), 500
|
| 1140 |
+
return jsonify(result), (200 if result.get("ok") else 422)
|
| 1141 |
+
|
| 1142 |
@app.post("/api/crispr/download")
|
| 1143 |
def crispr_download() -> Response:
|
| 1144 |
"""Return the CSV body for a fresh design. We re-design from the
|
|
@@ -1213,6 +1213,12 @@ input:focus, textarea:focus, select:focus {
|
|
| 1213 |
flex-direction: column;
|
| 1214 |
gap: 8px;
|
| 1215 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1216 |
.progress-bar {
|
| 1217 |
height: 4px;
|
| 1218 |
background: var(--gray-3);
|
|
@@ -4349,6 +4355,35 @@ h3, h4 {
|
|
| 4349 |
.crispr-opt-disabled { opacity: 0.4; cursor: not-allowed; }
|
| 4350 |
.crispr-opt-disabled input[type="radio"] { cursor: not-allowed; }
|
| 4351 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4352 |
/* βββ Enzyme picker (Phase 1) βββββββββββββββββββββββββββββββββββββββ
|
| 4353 |
* Hairline row above the paste-actions block. Mono label, radio
|
| 4354 |
* controls styled as text-buttons with archival-blue selected state
|
|
|
|
| 1213 |
flex-direction: column;
|
| 1214 |
gap: 8px;
|
| 1215 |
}
|
| 1216 |
+
/* The `display: flex` above overrides the UA `[hidden]` rule, so the
|
| 1217 |
+
* shell stayed visible (showing its default "Designing⦠0.0s" text)
|
| 1218 |
+
* even when hidden=true. Restore hide-when-hidden. Fixes a permanent
|
| 1219 |
+
* phantom progress bar under the CRISPR + directed-evolution Design
|
| 1220 |
+
* buttons. */
|
| 1221 |
+
.progress-shell[hidden] { display: none; }
|
| 1222 |
.progress-bar {
|
| 1223 |
height: 4px;
|
| 1224 |
background: var(--gray-3);
|
|
|
|
| 4355 |
.crispr-opt-disabled { opacity: 0.4; cursor: not-allowed; }
|
| 4356 |
.crispr-opt-disabled input[type="radio"] { cursor: not-allowed; }
|
| 4357 |
|
| 4358 |
+
/* βββ Phase 3 (M2): paste-anything resolver βββββββββββββββββββββββββ
|
| 4359 |
+
* A single row above the paste box: text input + Fetch button, with a
|
| 4360 |
+
* result chip below. Flex with min-width:0 so it shrinks cleanly on
|
| 4361 |
+
* mobile rather than overflowing. */
|
| 4362 |
+
.crispr-resolve-row {
|
| 4363 |
+
display: flex;
|
| 4364 |
+
gap: 8px;
|
| 4365 |
+
align-items: stretch;
|
| 4366 |
+
margin-bottom: 10px;
|
| 4367 |
+
}
|
| 4368 |
+
.crispr-resolve-input { flex: 1 1 auto; min-width: 0; }
|
| 4369 |
+
.crispr-resolve-row .ghost { flex: 0 0 auto; white-space: nowrap; }
|
| 4370 |
+
.crispr-resolve-chip {
|
| 4371 |
+
margin: -2px 0 12px;
|
| 4372 |
+
padding: 7px 11px;
|
| 4373 |
+
border-radius: var(--r-1);
|
| 4374 |
+
background: var(--gray-1);
|
| 4375 |
+
border: 1px solid var(--line);
|
| 4376 |
+
font-size: 12px;
|
| 4377 |
+
line-height: 1.45;
|
| 4378 |
+
color: var(--ink-soft);
|
| 4379 |
+
}
|
| 4380 |
+
.crispr-resolve-chip-error {
|
| 4381 |
+
background: #FFFBEB;
|
| 4382 |
+
border-color: #FCD34D;
|
| 4383 |
+
color: #92400E;
|
| 4384 |
+
}
|
| 4385 |
+
.crispr-resolve-ok { color: #047857; font-weight: 700; }
|
| 4386 |
+
|
| 4387 |
/* βββ Enzyme picker (Phase 1) βββββββββββββββββββββββββββββββββββββββ
|
| 4388 |
* Hairline row above the paste-actions block. Mono label, radio
|
| 4389 |
* controls styled as text-buttons with archival-blue selected state
|
|
@@ -3686,6 +3686,64 @@ if (_quitBtn) {
|
|
| 3686 |
});
|
| 3687 |
if (beSelectEl) beSelectEl.addEventListener('change', updateBaseEditorMeta);
|
| 3688 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3689 |
// βββ Phase 2C-1: vector + vendor loader βββββββββββββββββββββββββ
|
| 3690 |
// Fetches /api/crispr/vectors on first design call, caches in memory
|
| 3691 |
// for the rest of the session, populates the picker + vendor row.
|
|
|
|
| 3686 |
});
|
| 3687 |
if (beSelectEl) beSelectEl.addEventListener('change', updateBaseEditorMeta);
|
| 3688 |
|
| 3689 |
+
// βββ Phase 3 (M2): paste-anything resolver ββββββββββββββββββββββ
|
| 3690 |
+
// Type a gene symbol / accession β fill the paste box with the
|
| 3691 |
+
// resolved sequence + set the gene context. Raw paste is unchanged.
|
| 3692 |
+
const resolveInput = document.getElementById('crisprResolveInput');
|
| 3693 |
+
const resolveBtn = document.getElementById('crisprResolveBtn');
|
| 3694 |
+
const resolveChip = document.getElementById('crisprResolveChip');
|
| 3695 |
+
function _showResolveChip(html, isError) {
|
| 3696 |
+
if (!resolveChip) return;
|
| 3697 |
+
resolveChip.hidden = false;
|
| 3698 |
+
resolveChip.className = 'crispr-resolve-chip' + (isError ? ' crispr-resolve-chip-error' : '');
|
| 3699 |
+
resolveChip.innerHTML = html;
|
| 3700 |
+
}
|
| 3701 |
+
async function _doResolve() {
|
| 3702 |
+
const text = (resolveInput && resolveInput.value.trim()) || '';
|
| 3703 |
+
if (!text) return;
|
| 3704 |
+
const organismEl = document.getElementById('crisprOrganism');
|
| 3705 |
+
const organism = (organismEl && organismEl.value) || '';
|
| 3706 |
+
if (resolveBtn) { resolveBtn.disabled = true; resolveBtn.textContent = 'Fetchingβ¦'; }
|
| 3707 |
+
try {
|
| 3708 |
+
const res = await fetch('/api/crispr/resolve', {
|
| 3709 |
+
method: 'POST',
|
| 3710 |
+
headers: { 'Content-Type': 'application/json' },
|
| 3711 |
+
body: JSON.stringify({ text, organism }),
|
| 3712 |
+
});
|
| 3713 |
+
const data = await res.json();
|
| 3714 |
+
if (res.status === 403 && data.kind === 'signin_required') {
|
| 3715 |
+
_openSigninModal();
|
| 3716 |
+
return;
|
| 3717 |
+
}
|
| 3718 |
+
if (!data.ok) {
|
| 3719 |
+
_showResolveChip(escapeHtml(data.error || 'Could not resolve that input.'), true);
|
| 3720 |
+
return;
|
| 3721 |
+
}
|
| 3722 |
+
// Fill the paste box + refresh gutter / stats / button state.
|
| 3723 |
+
if (ta) {
|
| 3724 |
+
ta.value = data.sequence;
|
| 3725 |
+
ta.dispatchEvent(new Event('input', { bubbles: true }));
|
| 3726 |
+
}
|
| 3727 |
+
// Resolved a named gene β set the gene-symbol field so exon
|
| 3728 |
+
// context + base-edit AA consequences get the reading frame.
|
| 3729 |
+
if (data.kind === 'gene' && data.gene_symbol) {
|
| 3730 |
+
const geneEl = document.getElementById('crisprGeneSymbol');
|
| 3731 |
+
if (geneEl) geneEl.value = data.gene_symbol;
|
| 3732 |
+
}
|
| 3733 |
+
_showResolveChip(
|
| 3734 |
+
'<span class="crispr-resolve-ok">✓</span> ' + escapeHtml(data.label),
|
| 3735 |
+
false);
|
| 3736 |
+
} catch (err) {
|
| 3737 |
+
_showResolveChip(escapeHtml(err.message || 'Network error.'), true);
|
| 3738 |
+
} finally {
|
| 3739 |
+
if (resolveBtn) { resolveBtn.disabled = false; resolveBtn.textContent = 'Fetch'; }
|
| 3740 |
+
}
|
| 3741 |
+
}
|
| 3742 |
+
if (resolveBtn) resolveBtn.addEventListener('click', _doResolve);
|
| 3743 |
+
if (resolveInput) resolveInput.addEventListener('keydown', (e) => {
|
| 3744 |
+
if (e.key === 'Enter') { e.preventDefault(); _doResolve(); }
|
| 3745 |
+
});
|
| 3746 |
+
|
| 3747 |
// βββ Phase 2C-1: vector + vendor loader βββββββββββββββββββββββββ
|
| 3748 |
// Fetches /api/crispr/vectors on first design call, caches in memory
|
| 3749 |
// for the rest of the session, populates the picker + vendor row.
|
|
@@ -7,7 +7,7 @@
|
|
| 7 |
<!-- ?v= query bumps invalidate browser + iframe asset caches when app.css /
|
| 8 |
app.js change. Bump these numbers whenever you ship a frontend update β
|
| 9 |
without them, users keep getting the stale file for up to a week. -->
|
| 10 |
-
<link rel="stylesheet" href="/static/app.css?v=20260529-
|
| 11 |
<link rel="icon" type="image/svg+xml" href="/static/favicon.svg?v=2" />
|
| 12 |
<link rel="apple-touch-icon" href="/static/favicon.svg?v=2" />
|
| 13 |
<!-- Mol* (PDBe) viewer for AlphaFold structure embed. Loaded async; the
|
|
@@ -472,6 +472,27 @@
|
|
| 472 |
</p>
|
| 473 |
</header>
|
| 474 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
<!-- Same line-numbered gutter pattern as the
|
| 476 |
directed-evolution paste textarea (see #pasteGutter).
|
| 477 |
Gutter spans render via app.js renderCrisprGutter()
|
|
@@ -1046,10 +1067,10 @@
|
|
| 1046 |
Authorization header on every same-origin /api/* call. Without
|
| 1047 |
this, signed-in users would still hit the anonymous-quota
|
| 1048 |
gate. -->
|
| 1049 |
-
<script src="/static/auth.js?v=20260529-
|
| 1050 |
<!-- Cloning reference data must load before app.js so the Designer
|
| 1051 |
can read VECTORS / ENZYMES / CLONING_METHODS / TAGS / LINKERS. -->
|
| 1052 |
-
<script src="/static/cloning_db.js?v=20260529-
|
| 1053 |
-
<script src="/static/app.js?v=20260529-
|
| 1054 |
</body>
|
| 1055 |
</html>
|
|
|
|
| 7 |
<!-- ?v= query bumps invalidate browser + iframe asset caches when app.css /
|
| 8 |
app.js change. Bump these numbers whenever you ship a frontend update β
|
| 9 |
without them, users keep getting the stale file for up to a week. -->
|
| 10 |
+
<link rel="stylesheet" href="/static/app.css?v=20260529-paste-anything" />
|
| 11 |
<link rel="icon" type="image/svg+xml" href="/static/favicon.svg?v=2" />
|
| 12 |
<link rel="apple-touch-icon" href="/static/favicon.svg?v=2" />
|
| 13 |
<!-- Mol* (PDBe) viewer for AlphaFold structure embed. Loaded async; the
|
|
|
|
| 472 |
</p>
|
| 473 |
</header>
|
| 474 |
|
| 475 |
+
<!-- Phase 3 (M2): paste-anything resolver. Type a gene
|
| 476 |
+
symbol (TP53), an accession (ENSTβ¦ / NM_β¦), or just
|
| 477 |
+
paste DNA below. Resolving a symbol/accession fills
|
| 478 |
+
the box and sets the gene context that base-edit
|
| 479 |
+
amino-acid consequences and the structure view use.
|
| 480 |
+
Only the (organism, identifier) is sent for lookups β
|
| 481 |
+
a pasted sequence never leaves the Space. -->
|
| 482 |
+
<div class="crispr-resolve-row">
|
| 483 |
+
<input
|
| 484 |
+
id="crisprResolveInput"
|
| 485 |
+
class="crispr-context-input crispr-resolve-input"
|
| 486 |
+
type="text"
|
| 487 |
+
maxlength="64"
|
| 488 |
+
autocomplete="off"
|
| 489 |
+
autocapitalize="characters"
|
| 490 |
+
spellcheck="false"
|
| 491 |
+
placeholder="Gene symbol (TP53), accession (ENSTβ¦ / NM_β¦), or paste DNA below" />
|
| 492 |
+
<button id="crisprResolveBtn" type="button" class="ghost">Fetch</button>
|
| 493 |
+
</div>
|
| 494 |
+
<div class="crispr-resolve-chip" id="crisprResolveChip" hidden></div>
|
| 495 |
+
|
| 496 |
<!-- Same line-numbered gutter pattern as the
|
| 497 |
directed-evolution paste textarea (see #pasteGutter).
|
| 498 |
Gutter spans render via app.js renderCrisprGutter()
|
|
|
|
| 1067 |
Authorization header on every same-origin /api/* call. Without
|
| 1068 |
this, signed-in users would still hit the anonymous-quota
|
| 1069 |
gate. -->
|
| 1070 |
+
<script src="/static/auth.js?v=20260529-paste-anything"></script>
|
| 1071 |
<!-- Cloning reference data must load before app.js so the Designer
|
| 1072 |
can read VECTORS / ENZYMES / CLONING_METHODS / TAGS / LINKERS. -->
|
| 1073 |
+
<script src="/static/cloning_db.js?v=20260529-paste-anything" defer></script>
|
| 1074 |
+
<script src="/static/app.js?v=20260529-paste-anything" defer></script>
|
| 1075 |
</body>
|
| 1076 |
</html>
|
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Paste-anything target resolution (dee/core/resolve.py).
|
| 2 |
+
|
| 3 |
+
NO NETWORK β Ensembl/NCBI fetchers are monkeypatched. Locks the input
|
| 4 |
+
classifier (sequence vs symbol vs accession) and the resolve dispatch,
|
| 5 |
+
including the privacy-preserving rule that raw sequences make no calls.
|
| 6 |
+
"""
|
| 7 |
+
import pytest
|
| 8 |
+
|
| 9 |
+
from dee.core import resolve as R
|
| 10 |
+
from dee.core import exon as E
|
| 11 |
+
from dee.core.exon import Exon, GeneStructure
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
# βββββββββββββββββββββββββββ classifier βββββββββββββββββββββββββββ
|
| 15 |
+
|
| 16 |
+
@pytest.mark.parametrize("text,kind", [
|
| 17 |
+
("", "empty"),
|
| 18 |
+
("ACGT" * 10, "sequence"), # 40 nt one-liner
|
| 19 |
+
(">hdr\nACGTACGTACGTACGTACGTACGT", "sequence"), # FASTA
|
| 20 |
+
("TP53", "symbol"),
|
| 21 |
+
("BRCA1", "symbol"),
|
| 22 |
+
("ENST00000269305", "ensembl_tx"),
|
| 23 |
+
("ENSMUST00000000001", "ensembl_tx"),
|
| 24 |
+
("ENSG00000141510", "ensembl_gene"),
|
| 25 |
+
("NM_000546", "refseq"),
|
| 26 |
+
("NM_000546.6", "refseq"),
|
| 27 |
+
("XM_011535573", "refseq"),
|
| 28 |
+
("!!!", "unknown"),
|
| 29 |
+
])
|
| 30 |
+
def test_classify(text, kind):
|
| 31 |
+
assert R.classify(text)[0] == kind
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def test_classify_sequence_strips_to_clean_dna():
|
| 35 |
+
kind, val = R.classify("acgt ACGT\nnnNN gtca gtca gtca")
|
| 36 |
+
assert kind == "sequence"
|
| 37 |
+
assert val == "ACGTACGTNNNNGTCAGTCAGTCA"
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
# βββββββββββββββββββββββββββ raw sequence (offline) βββββββββββββββββββββββββββ
|
| 41 |
+
|
| 42 |
+
def test_resolve_raw_sequence_makes_no_network_call(monkeypatch):
|
| 43 |
+
# Any network helper firing would be a privacy bug for raw input.
|
| 44 |
+
def _boom(*a, **k):
|
| 45 |
+
raise AssertionError("network call on raw-sequence path!")
|
| 46 |
+
monkeypatch.setattr(E, "_http_get_text", _boom)
|
| 47 |
+
monkeypatch.setattr(E, "_http_get_json", _boom)
|
| 48 |
+
monkeypatch.setattr(E, "fetch_gene_structure", _boom)
|
| 49 |
+
out = R.resolve_target("ACGT" * 20)
|
| 50 |
+
assert out["ok"] and out["kind"] == "sequence"
|
| 51 |
+
assert out["sequence"] == "ACGT" * 20
|
| 52 |
+
assert "pasted sequence" in out["label"]
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def test_resolve_short_sequence_errors():
|
| 56 |
+
out = R.resolve_target("ACGTACGT") # < 23 nt β symbol attempt β fails
|
| 57 |
+
assert out["ok"] is False
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# βββββββββββββββββββββββββββ gene symbol βββββββββββββββββββββββββββ
|
| 61 |
+
|
| 62 |
+
def _fake_gene():
|
| 63 |
+
return GeneStructure(
|
| 64 |
+
organism="human", gene_symbol="TP53", transcript_id="ENST00000269305",
|
| 65 |
+
strand=1, cds_sequence="ATG" + "GCT" * 50 + "TAA",
|
| 66 |
+
exons=[Exon(1, 1, 99, 0, 156)], last_junction_cds_pos=0,
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def test_resolve_symbol_requires_organism():
|
| 71 |
+
out = R.resolve_target("TP53", organism="")
|
| 72 |
+
assert out["ok"] is False and "Human or" in out["error"]
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def test_resolve_symbol_with_organism(monkeypatch):
|
| 76 |
+
monkeypatch.setattr(E, "fetch_gene_structure", lambda org, sym: _fake_gene())
|
| 77 |
+
out = R.resolve_target("TP53", organism="human")
|
| 78 |
+
assert out["ok"] and out["kind"] == "gene"
|
| 79 |
+
assert out["gene_symbol"] == "TP53"
|
| 80 |
+
assert out["sequence"].startswith("ATG")
|
| 81 |
+
assert "ENST00000269305" in out["label"]
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def test_resolve_symbol_not_found(monkeypatch):
|
| 85 |
+
monkeypatch.setattr(E, "fetch_gene_structure", lambda org, sym: None)
|
| 86 |
+
out = R.resolve_target("ZZZ9", organism="mouse")
|
| 87 |
+
assert out["ok"] is False and "Couldn't find" in out["error"]
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
# βββββββββββββββββββββββββββ accessions βββββββββββββββββββββββββββ
|
| 91 |
+
|
| 92 |
+
def test_resolve_ensembl_transcript(monkeypatch):
|
| 93 |
+
monkeypatch.setattr(E, "_http_get_text",
|
| 94 |
+
lambda url, **k: ">ENST\nATGAAACCCGGGTTTACGTACGTACGT")
|
| 95 |
+
out = R.resolve_target("ENST00000269305")
|
| 96 |
+
assert out["ok"] and out["kind"] == "ensembl"
|
| 97 |
+
assert out["sequence"] == "ATGAAACCCGGGTTTACGTACGTACGT"
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def test_resolve_ensembl_gene_resolves_canonical(monkeypatch):
|
| 101 |
+
monkeypatch.setattr(E, "_http_get_json", lambda url, **k: {
|
| 102 |
+
"Transcript": [
|
| 103 |
+
{"id": "ENST_other", "is_canonical": 0},
|
| 104 |
+
{"id": "ENST_canon", "is_canonical": 1},
|
| 105 |
+
]})
|
| 106 |
+
captured = {}
|
| 107 |
+
def _seq(url, **k):
|
| 108 |
+
captured["url"] = url
|
| 109 |
+
return ">x\nATGCGTACGTACGTACGTACGTACG"
|
| 110 |
+
monkeypatch.setattr(E, "_http_get_text", _seq)
|
| 111 |
+
out = R.resolve_target("ENSG00000141510")
|
| 112 |
+
assert out["ok"] and out["kind"] == "ensembl"
|
| 113 |
+
assert "ENST_canon" in captured["url"] # used the canonical transcript
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
def test_resolve_refseq(monkeypatch):
|
| 117 |
+
monkeypatch.setattr(E, "_http_get_text",
|
| 118 |
+
lambda url, **k: ">NM_000546\nATGGAGGAGCCGCAGTCAGAT")
|
| 119 |
+
out = R.resolve_target("NM_000546")
|
| 120 |
+
assert out["ok"] and out["kind"] == "refseq"
|
| 121 |
+
assert out["sequence"] == "ATGGAGGAGCCGCAGTCAGAT"
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def test_resolve_ensembl_fetch_failure(monkeypatch):
|
| 125 |
+
monkeypatch.setattr(E, "_http_get_text", lambda url, **k: None)
|
| 126 |
+
out = R.resolve_target("ENST00000269305")
|
| 127 |
+
assert out["ok"] is False and "Ensembl" in out["error"]
|