darkfrostx commited on
Commit
7a4453c
·
verified ·
1 Parent(s): 16593bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +263 -536
app.py CHANGED
@@ -1,22 +1,19 @@
1
  from fastapi import FastAPI, Query, Path, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
- from fastapi.responses import RedirectResponse, JSONResponse, FileResponse, StreamingResponse
4
- from typing import Dict, Any, Tuple, Optional, List, Literal
5
- import httpx, asyncio, time, os, hashlib, json, gzip, math
6
- from pathlib import Path as _Path
7
- from datetime import datetime
8
 
9
  APP_NAME = "neuro-mechanism-backend"
10
- CALLER_ID = "neuro-mech-backend-demo" # appears in STRING logs
11
- DATA_DIR = _Path("/tmp/neuro_mech_jobs")
12
- DATA_DIR.mkdir(parents=True, exist_ok=True)
13
 
14
  app = FastAPI(title=APP_NAME)
15
 
16
  app.add_middleware(
17
  CORSMiddleware,
18
  allow_origins=["*"], allow_credentials=True,
19
- allow_methods=["*"], allow_headers=["*"],
20
  )
21
 
22
  @app.get("/", include_in_schema=False)
@@ -31,17 +28,11 @@ def health():
31
  def endpoints():
32
  return JSONResponse({
33
  "GET": [
34
- "/mechanism_graph_manifest?receptor=HTR2A&symptom=apathy&species=9606",
35
- "/mechanism_graph/nodes?job_id=<id>&page=1&page_size=200",
36
- "/mechanism_graph/edges?job_id=<id>&page=1&page_size=200",
37
- "/mechanism_graph/literature?job_id=<id>&page=1&page_size=50",
38
- "/mechanism_graph/regions?job_id=<id>&page=1&page_size=50",
39
- "/download/<job_id>/nodes (gz)",
40
- "/download/<job_id>/edges (gz)",
41
- "/download/<job_id>/literature (gz)",
42
- "/download/<job_id>/regions (gz)",
43
- "/util/synonyms?term=apathy&kind=phenotype",
44
- "/heuristics/regions_from_string?receptor=HTR2A&symptom=apathy&limit=40",
45
  "/lit/eupmc?query=HTR2A%20AND%20apathy&pageSize=5",
46
  "/string/network?identifiers=HTR2A&species=9606",
47
  "/gpcrdb/protein?entry=htr2a_human",
@@ -53,8 +44,6 @@ def endpoints():
53
  ]
54
  })
55
 
56
- UA = {"User-Agent": f"{APP_NAME}/1.2 (HF Space)"}
57
-
58
  # ----------------- tiny in-memory TTL cache -----------------
59
  class TTLCache:
60
  def __init__(self, max_items=512):
@@ -75,12 +64,7 @@ class TTLCache:
75
  async with httpx.AsyncClient(headers=UA, timeout=30) as client:
76
  r = await client.get(url, params=params)
77
  r.raise_for_status()
78
- # Some third-party APIs return plain text/HTML on error;
79
- # Fast path: try JSON, else wrap as text.
80
- try:
81
- data = r.json()
82
- except Exception:
83
- data = {"text": r.text, "status_code": r.status_code}
84
  async with self._lock:
85
  if len(self.store) > self.max_items:
86
  self.store.pop(next(iter(self.store)))
@@ -89,10 +73,10 @@ class TTLCache:
89
 
90
  CACHE = TTLCache()
91
 
92
- # ----------------- polite throttling for STRING ------------------
93
  _last_string_call = 0.0
94
  async def throttle_string():
95
- """Be nice to STRING; ~1 req/sec as a courtesy."""
96
  global _last_string_call
97
  now = time.time()
98
  wait = 1.05 - (now - _last_string_call)
@@ -100,26 +84,29 @@ async def throttle_string():
100
  await asyncio.sleep(wait)
101
  _last_string_call = time.time()
102
 
103
- # ----------------- helpers -----------------
104
  async def get_json_cached(url: str, params: Optional[dict], ttl: int):
105
- return await CACHE.get(url, params, ttl)
106
-
107
- def _safe_float(x, default=0.0):
108
  try:
109
- return float(x)
110
- except Exception:
111
- return default
 
 
 
 
112
 
113
- def _hash_params(d: dict) -> str:
114
- return hashlib.sha1(json.dumps(d, sort_keys=True).encode()).hexdigest()
 
 
 
 
115
 
116
- # ----------------- base connectors -----------------
117
  @app.get("/lit/eupmc")
118
- async def europe_pmc_search(query: str, pageSize: int = 5, page: int = 1):
119
- # Europe PMC REST search (JSON)
120
- # docs: https://europepmc.org/RestfulWebService ; client vignette: europepmc R pkg
121
  url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
122
- params = {"query": query, "format": "json", "pageSize": pageSize, "page": page}
123
  return await get_json_cached(url, params, ttl=600)
124
 
125
  @app.get("/lit/pubmed_esearch")
@@ -164,576 +151,316 @@ async def gpcrdb_protein(entry: str):
164
 
165
  @app.get("/string/network")
166
  async def string_network(identifiers: str, species: int = 9606, limit: int = 50):
167
- # STRING JSON network endpoint
168
  await throttle_string()
169
  url = "https://string-db.org/api/json/network"
170
  params = {"identifiers": identifiers, "species": species, "caller_identity": CALLER_ID, "limit": limit}
171
  return await get_json_cached(url, params, ttl=3600)
172
 
173
- # ----------------- synonym utilities -----------------
174
- # curated region slang/aliases (additive to OLS)
175
- CURATED_REGION_SYNONYMS = {
176
- "prefrontal cortex": ["PFC", "frontal cortex", "dorsolateral prefrontal cortex", "dlPFC",
177
- "ventromedial prefrontal cortex", "vmPFC", "orbitofrontal cortex", "OFC"],
178
- "anterior cingulate cortex": ["ACC", "dorsal ACC", "dACC", "rostral ACC", "rACC"],
179
- "nucleus accumbens": ["NAc", "ventral striatum"],
180
  "ventral tegmental area": ["VTA"],
181
- "substantia nigra": ["SN", "pars compacta", "SNc"],
182
- "hippocampus": ["hippocampal formation", "CA1", "CA3", "dentate gyrus"],
183
- "amygdala": ["basolateral amygdala", "BLA", "central amygdala"]
 
 
 
 
184
  }
185
 
186
- async def _ols_synonyms(term: str, ontologies: Optional[List[str]] = None) -> List[str]:
187
- # OLS4 search; aggregate synonyms for top hits containing the term
188
  url = "https://www.ebi.ac.uk/ols4/api/search"
189
- params = {"q": term}
190
- if ontologies:
191
- # OLS4 supports multiple ontology filters as repeated params
192
- # We'll just join as comma-separated for brevity (works for OLS4)
193
- params["ontology"] = ",".join(ontologies)
194
  data = await get_json_cached(url, params, ttl=86400)
195
- syns = set()
196
  try:
197
- docs = data.get("response", {}).get("docs", [])
198
- for d in docs[:5]:
199
- for s in d.get("synonyms", []) or []:
200
- if isinstance(s, str):
201
- syns.add(s)
 
202
  except Exception:
203
  pass
204
- return list(syns)
205
-
206
- async def _mygene_aliases(symbol: str) -> List[str]:
207
- # MyGene.info v3; pull aliases/other names for the main focus gene
 
 
 
 
 
 
 
 
208
  url = "https://mygene.info/v3/query"
209
- params = {"q": f"symbol:{symbol}", "fields": "symbol,name,alias,alias_symbol,other_names", "size": 1, "species": "human"}
210
  data = await get_json_cached(url, params, ttl=86400)
211
- syns = set()
212
  try:
213
- hits = data.get("hits", [])
214
- if hits:
215
- h = hits[0]
216
- for fld in ("symbol","name"):
217
- v = h.get(fld)
218
- if isinstance(v, str):
219
- syns.add(v)
220
- for fld in ("alias","alias_symbol","other_names"):
221
- v = h.get(fld)
222
- if isinstance(v, list):
223
- for x in v:
224
- if isinstance(x, str):
225
- syns.add(x)
226
  except Exception:
227
  pass
228
- return list(syns)
 
 
 
 
 
 
229
 
230
  @app.get("/util/synonyms")
231
- async def util_synonyms(term: str, kind: Literal["region","gene","phenotype","auto"]="auto"):
232
- """
233
- Fetch synonyms for a term.
234
- region: OLS4 (UBERON,HBP/HPO where applicable) + curated slang
235
- gene: MyGene.info aliases
236
- phenotype: OLS4(HPO)
237
- auto: choose gene if ALLCAPS letters+digits, else phenotype->region fallback.
238
- """
239
- k = kind
240
- if k == "auto":
241
- k = "gene" if term.isupper() else "phenotype"
242
- syns = set([term])
243
-
244
- if k == "region":
245
- syns.update(CURATED_REGION_SYNONYMS.get(term.lower(), []))
246
- syns.update(await _ols_synonyms(term, ontologies=["uberon","hbp","hpo","ncit"]))
247
- elif k == "gene":
248
- syns.update(await _mygene_aliases(term))
249
- elif k == "phenotype":
250
- syns.update(await _ols_synonyms(term, ontologies=["hpo","efo","mondo"]))
251
-
252
- return {"term": term, "kind": k, "synonyms": sorted({s for s in syns if isinstance(s, str) and len(s) <= 60})}
253
-
254
- # ----------------- region heuristic (upgraded) -----------------
255
  REGION_TERMS_DEFAULT = [
256
- "prefrontal cortex","anterior cingulate cortex","mPFC","ACC","nucleus accumbens","ventral striatum",
257
  "dorsal striatum","caudate","putamen","amygdala","hippocampus","thalamus","hypothalamus",
258
- "insula","ventral tegmental area","VTA","substantia nigra","cerebellum"
259
  ]
260
 
 
 
 
 
 
 
 
 
 
261
  def collect_gene_symbols_from_string(edges: List[dict], focus: str) -> List[str]:
262
  genes = set()
263
  f = focus.upper()
264
  for e in edges or []:
265
  for k in ("preferredName_A","preferredName_B"):
266
  g = e.get(k)
267
- if g and isinstance(g,str) and g.upper() != f:
268
  genes.add(g)
269
  return list(genes)
270
 
271
- async def _eupmc_hitcount(q: str) -> int:
272
- # Europe PMC search hitCount (pageSize=0)
273
- url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
274
- params = {"query": q, "format": "json", "pageSize": 0}
275
- data = await get_json_cached(url, params, ttl=3600)
276
- try:
277
- return int(data.get("hitCount", 0))
278
- except Exception:
279
- return 0
280
-
281
  @app.get("/heuristics/regions_from_string")
282
  async def regions_from_string(
283
  receptor: str = Query(..., description="e.g., HTR2A"),
284
  species: int = 9606,
285
  limit: int = 40,
286
- regions: Optional[str] = Query(None, description="comma-separated region terms (optional)"),
287
- symptom: Optional[str] = Query(None, description="optional phenotype/symptom to weight co-mentions (e.g., apathy)")
 
288
  ):
289
  """
290
- Heuristic: rank brain regions by STRING neighbors + Europe PMC co-mentions, with synonyms & tiered fallbacks.
291
- Tiers (all unquoted for flexible match):
292
- T1: (region_syns) AND ((receptor_syns) OR neighbors) AND (symptom_syns?) weight 1.0
293
- T2: (region_syns) AND (receptor_syns OR neighbors) weight 0.6
294
- T3: (region_syns) AND (receptor_syns) weight 0.5
295
- T4: (region_syns) AND (symptom_syns) weight 0.3
296
- Final score = log10(weighted_hits+1) * mean_top_STRING_conf
297
  """
298
  # 1) STRING neighbors
299
  edges = await string_network(receptor, species=species, limit=limit)
300
  neighbors = collect_gene_symbols_from_string(edges, receptor)
301
 
302
- # STRING confidences
303
- conf: Dict[str, float] = {}
304
- for e in edges or []:
305
- a, b, score = e.get("preferredName_A"), e.get("preferredName_B"), _safe_float(e.get("score", 0))
306
- if a and a.upper() != receptor.upper():
307
- conf[a] = max(conf.get(a, 0.0), score)
308
- if b and b.upper() != receptor.upper():
309
- conf[b] = max(conf.get(b, 0.0), score)
310
- mean_conf = sum(conf.values())/max(len(conf),1) if conf else 0.2
311
-
312
  # 2) synonyms
313
- receptor_syns = await _mygene_aliases(receptor)
314
- symptom_syns = []
315
- if symptom:
316
- s = await util_synonyms(symptom, kind="phenotype")
317
- symptom_syns = s["synonyms"]
318
-
319
  region_list = [r.strip() for r in (regions.split(",") if regions else REGION_TERMS_DEFAULT) if r.strip()]
320
- # Build clauses (unquoted OR lists)
321
- gene_clause = " OR ".join(sorted({receptor} | set(receptor_syns) | set(neighbors[:25])))
322
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  results = []
324
- tasks = []
325
- tier_defs = []
 
 
326
  for region in region_list:
327
- # region synonyms
328
- rs = await util_synonyms(region, kind="region")
329
- region_syns = rs["synonyms"]
330
- region_clause = " OR ".join(region_syns)
331
 
332
- # tiers
333
  # T1
334
- if symptom and symptom_syns:
335
- t1 = f"({region_clause}) AND (({gene_clause})) AND ({' OR '.join(symptom_syns)})"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  else:
337
- t1 = f"({region_clause}) AND (({gene_clause}))"
338
- t2 = f"({region_clause}) AND (({gene_clause}))"
339
- t3 = f"({region_clause}) AND ({' OR '.join(sorted(set([receptor] + receptor_syns)))})"
340
- t4 = f"({region_clause}) AND ({' OR '.join(symptom_syns)})" if symptom_syns else None
341
-
342
- tiers = [("t1",1.0,t1), ("t2",0.6,t2), ("t3",0.5,t3)]
343
- if t4: tiers.append(("t4",0.3,t4))
344
-
345
- # schedule hitCount calls
346
- tier_defs.append((region, tiers))
347
- for _,_,q in tiers:
348
- tasks.append(_eupmc_hitcount(q))
349
-
350
- # gather all counts in-order
351
- counts_all = await asyncio.gather(*tasks)
352
- # fold back into regions
353
- idx = 0
354
- for region, tiers in tier_defs:
355
- weighted = 0.0
356
- tier_counts = {}
357
- for name, weight, _q in tiers:
358
- hc = counts_all[idx]; idx += 1
359
- tier_counts[name] = hc
360
- weighted += weight * hc
361
- score = math.log10(weighted + 1.0) * mean_conf
362
- results.append({"region": region, "tiers": tier_counts, "weighted_hits": int(round(weighted)),
363
- "weighted_score": round(score, 4)})
364
 
365
  results.sort(key=lambda x: x["weighted_score"], reverse=True)
366
  return {
367
  "focus": receptor,
368
  "neighbors_considered": neighbors[:25],
369
  "regions_ranked": results,
370
- "notes": "STRING + Europe PMC with synonyms and tiered fallbacks (unquoted)."
371
  }
372
 
373
- # ----------------- MANIFEST + PAGED SECTIONS + DOWNLOAD -----------------
374
- def _job_dir(job_id: str) -> _Path:
375
- d = DATA_DIR / job_id
376
- d.mkdir(parents=True, exist_ok=True)
377
- return d
378
-
379
- def _write_gz_jsonl(path: _Path, items: List[dict]):
380
- with gzip.open(path, "wt", encoding="utf-8") as gz:
381
- for it in items:
382
- gz.write(json.dumps(it, ensure_ascii=False) + "\n")
383
-
384
- def _read_gz_page(path: _Path, page: int, page_size: int) -> Tuple[int, List[dict]]:
385
- total = 0
386
- start = (page - 1) * page_size
387
- end = start + page_size
388
- out = []
389
- with gzip.open(path, "rt", encoding="utf-8") as gz:
390
- for i, line in enumerate(gz):
391
- if not line.strip():
392
- continue
393
- if i >= start and i < end:
394
- out.append(json.loads(line))
395
- total += 1
396
- return total, out
397
-
398
- async def _build_mech_job(params: dict) -> dict:
399
- """
400
- Build nodes/edges/literature/regions; write gz NDJSON + meta.
401
- """
402
- receptor = params["receptor"]
403
- species = int(params.get("species", 9606))
404
- symptom = params.get("symptom")
405
- string_limit = int(params.get("string_limit", 200))
406
- eupmc_page_size = int(params.get("eupmc_page_size", 100))
407
- eupmc_max_pages = int(params.get("eupmc_max_pages", 3))
408
-
409
- job_id = _hash_params(params)
410
- d = _job_dir(job_id)
411
- meta_path = d / "meta.json"
412
- if meta_path.exists():
413
- return json.loads(meta_path.read_text("utf-8"))
414
-
415
- # 1) STRING edges + nodes
416
- edges = await string_network(receptor, species=species, limit=string_limit)
417
- edge_items = []
418
- nodes = set([receptor])
419
- for e in edges or []:
420
- a = e.get("preferredName_A"); b = e.get("preferredName_B")
421
- score = _safe_float(e.get("score", 0))
422
- if a and b:
423
- edge_items.append({"a": a, "b": b, "score": score})
424
- nodes.add(a); nodes.add(b)
425
- node_items = [{"symbol": n, "seed": (n.upper()==receptor.upper())} for n in sorted(nodes)]
426
-
427
- _write_gz_jsonl(d / "edges.jsonl.gz", edge_items)
428
- _write_gz_jsonl(d / "nodes.jsonl.gz", node_items)
429
-
430
- # 2) Europe PMC literature for (receptor AND symptom?) else receptor
431
- lit_items = []
432
- base_q = f"{receptor} AND {symptom}" if symptom else receptor
433
- for page in range(1, eupmc_max_pages+1):
434
- res = await europe_pmc_search(base_q, pageSize=eupmc_page_size, page=page)
435
- hits = res.get("resultList", {}).get("result", []) or []
436
- for h in hits:
437
- lit_items.append({
438
- "id": h.get("id"),
439
- "source": h.get("source"), "title": h.get("title"),
440
- "pubYear": h.get("pubYear"), "authorString": h.get("authorString"),
441
- "journalTitle": h.get("journalTitle"), "doi": h.get("doi")
442
- })
443
- # stop early if last page
444
- if len(hits) < eupmc_page_size:
445
- break
446
- _write_gz_jsonl(d / "literature.jsonl.gz", lit_items)
447
-
448
- # 3) Regions heuristic (with symptom)
449
- reg = await regions_from_string(receptor=receptor, species=species, limit=min(100, string_limit), regions=None, symptom=symptom)
450
- reg_items = []
451
- for r in reg.get("regions_ranked", []):
452
- reg_items.append(r)
453
- _write_gz_jsonl(d / "regions.jsonl.gz", reg_items)
454
-
455
- meta = {
456
- "job_id": job_id,
457
- "created": datetime.utcnow().isoformat() + "Z",
458
- "params": params,
459
- "counts": {
460
- "nodes": len(node_items),
461
- "edges": len(edge_items),
462
- "literature": len(lit_items),
463
- "regions": len(reg_items)
464
- },
465
- "sections": ["nodes","edges","literature","regions"]
466
- }
467
- meta_path.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
468
- return meta
469
 
470
  @app.get("/mechanism_graph_manifest")
471
  async def mechanism_graph_manifest(
472
- receptor: str = Query(...),
 
473
  species: int = 9606,
474
- symptom: Optional[str] = None,
475
- string_limit: int = 200,
476
- eupmc_page_size: int = 100,
477
- eupmc_max_pages: int = 3
478
  ):
479
  """
480
- Build the full mechanism dataset server-side and return a manifest with job_id + counts.
481
- The actual data is stored as gzipped NDJSON and can be:
482
- - paged via /mechanism_graph/{section}?job_id=...&page=1&page_size=...
483
- - or downloaded as a single gz file via /download/{job_id}/{section}
484
  """
485
- params = {
486
- "receptor": receptor, "species": species, "symptom": symptom,
487
- "string_limit": string_limit, "eupmc_page_size": eupmc_page_size, "eupmc_max_pages": eupmc_max_pages
488
- }
489
- meta = await _build_mech_job(params)
490
- return meta
491
 
492
- @app.get("/mechanism_graph/{section}")
493
- async def mechanism_graph_section(
494
- section: Literal["nodes","edges","literature","regions"] = Path(...),
495
- job_id: str = Query(...),
496
- page: int = 1,
497
- page_size: int = 100
498
- ):
499
- """
500
- Return a single page from a section (nodes|edges|literature|regions).
501
- """
502
- d = _job_dir(job_id)
503
- p = d / f"{section}.jsonl.gz"
504
- if not p.exists():
505
- raise HTTPException(status_code=404, detail=f"section {section} not found for job {job_id}")
506
 
507
- total, items = _read_gz_page(p, page=page, page_size=page_size)
508
- return {
509
- "job_id": job_id,
510
- "section": section,
511
- "page": page, "page_size": page_size,
512
- "total": total,
513
- "items": items
514
- }
515
-
516
- @app.get("/download/{job_id}/{section}")
517
- async def download_section(job_id: str, section: Literal["nodes","edges","literature","regions"]):
518
- """
519
- Download the full gzipped NDJSON for a section.
520
- """
521
- d = _job_dir(job_id)
522
- p = d / f"{section}.jsonl.gz"
523
- if not p.exists():
524
- raise HTTPException(status_code=404, detail=f"section {section} not found for job {job_id}")
525
- return FileResponse(
526
- path=str(p),
527
- filename=f"{APP_NAME}-{job_id}-{section}.jsonl.gz",
528
- media_type="application/gzip"
529
- )
530
-
531
- # ===================== ADD BELOW YOUR EXISTING CODE =====================
532
- from fastapi.responses import StreamingResponse, FileResponse
533
- import gzip, io, secrets, math, pathlib, datetime
534
-
535
- # -------- small in-memory job store (sections kept per job) ----------
536
- JOBS: Dict[str, Dict[str, Any]] = {}
537
- JOB_TTL_SECONDS = 3600
538
-
539
- def _mk_job_id() -> str:
540
- return secrets.token_hex(8)
541
-
542
- def _save_job(sections: Dict[str, Any]) -> str:
543
- # prune old
544
- now = time.time()
545
- for k, v in list(JOBS.items()):
546
- if now - v.get("_ts", now) > JOB_TTL_SECONDS:
547
- JOBS.pop(k, None)
548
- jid = _mk_job_id()
549
- JOBS[jid] = {"_ts": now, **sections}
550
- return jid
551
-
552
- def _get_job(jid: str) -> Optional[Dict[str, Any]]:
553
- job = JOBS.get(jid)
554
- if not job:
555
- return None
556
- if time.time() - job.get("_ts", 0) > JOB_TTL_SECONDS:
557
- JOBS.pop(jid, None)
558
- return None
559
- return job
560
-
561
- def _gzipped_json_bytes(obj: Any) -> bytes:
562
- raw = orjson.dumps(obj) # fast & small
563
- buf = io.BytesIO()
564
- with gzip.GzipFile(fileobj=buf, mode="wb", compresslevel=6) as z:
565
- z.write(raw)
566
- return buf.getvalue()
567
-
568
- # --------------------- Synonym utilities ------------------------------
569
- async def _ols4_synonyms(term: str, size: int = 20) -> List[str]:
570
- """Region/ontology synonyms via OLS4 search."""
571
- url = "https://www.ebi.ac.uk/ols4/api/search"
572
- params = {"q": term, "size": size}
573
- data = await get_json_cached(url, params, ttl=86400)
574
- syns = set()
575
- for hit in data.get("response", {}).get("docs", []):
576
- for k in ("synonym", "label"):
577
- val = hit.get(k)
578
- if isinstance(val, list):
579
- syns.update([s for s in val if isinstance(s, str)])
580
- elif isinstance(val, str):
581
- syns.add(val)
582
- return sorted({s for s in syns if s.lower() != term.lower()})
583
-
584
- async def _mygene_synonyms(gene: str, size: int = 5) -> List[str]:
585
- """Gene symbol/name/alias via MyGene.info."""
586
- url = "https://mygene.info/v3/query"
587
- params = {"q": gene, "fields": "symbol,name,alias", "species": "human", "size": size}
588
- data = await get_json_cached(url, params, ttl=86400)
589
- syns = set()
590
- for h in data.get("hits", []):
591
- for k in ("symbol", "name", "alias"):
592
- v = h.get(k)
593
- if isinstance(v, list):
594
- syns.update([s for s in v if isinstance(s, str)])
595
- elif isinstance(v, str):
596
- syns.add(v)
597
- return sorted({s for s in syns if s.lower() != gene.lower()})
598
-
599
- @app.get("/util/synonyms")
600
- async def util_synonyms(term: str = Query(...), kind: str = Query("region", description="region|gene|phenotype"), size: int = 20):
601
  try:
602
- if kind == "gene":
603
- syns = await _mygene_synonyms(term, size=min(size, 20))
604
- else:
605
- syns = await _ols4_synonyms(term, size=min(size, 50))
606
- return {"term": term, "kind": kind, "synonyms": syns}
607
- except Exception as e:
608
- return {"term": term, "kind": kind, "synonyms": [], "error": str(e)}
609
-
610
- # ------ improved regions heuristic: synonyms + unquoted + fallbacks -----
611
- REGION_SYNONYM_OVERRIDES = {
612
- "prefrontal cortex": ["PFC", "mPFC", "vmPFC", "dorsolateral prefrontal cortex", "DLPFC", "ventromedial prefrontal cortex"],
613
- "anterior cingulate cortex": ["ACC", "dACC", "pregenual ACC", "subgenual ACC", "sgACC"],
614
- "nucleus accumbens": ["NAc", "ventral striatum", "accumbens"]
615
- }
616
-
617
- async def _region_terms_with_synonyms(base_terms: List[str]) -> Dict[str, List[str]]:
618
- out: Dict[str, List[str]] = {}
619
- for term in base_terms:
620
- # manual seeds + OLS4 expansion
621
- syns = set(REGION_SYNONYM_OVERRIDES.get(term, []))
622
- try:
623
- syns.update(await _ols4_synonyms(term, size=20))
624
- except Exception:
625
- pass
626
- # keep short list to control URL size
627
- out[term] = sorted(list(syns))[:12]
628
- return out
629
-
630
- @app.get("/heuristics/regions_from_string")
631
- async def regions_from_string(
632
- receptor: str = Query(..., description="e.g., HTR2A"),
633
- species: int = 9606,
634
- limit: int = 40,
635
- regions: Optional[str] = Query(None, description="comma-separated region terms; default common regions"),
636
- expand: int = Query(1, description="if 1, use OLS4 synonyms and manual aliases"),
637
- ):
638
- # 1) STRING neighbors (cached)
639
- edges = await string_network(receptor, species=species, limit=limit)
640
- neighbors = collect_gene_symbols_from_string(edges, receptor)
641
- conf: Dict[str, float] = {}
642
- for e in edges:
643
- a, b, score = e.get("preferredName_A"), e.get("preferredName_B"), float(e.get("score", 0))
644
- if a and a.upper() != receptor.upper(): conf[a] = max(conf.get(a, 0.0), score)
645
- if b and b.upper() != receptor.upper(): conf[b] = max(conf.get(b, 0.0), score)
646
 
647
- region_list = [r.strip() for r in (regions.split(",") if regions else REGION_TERMS_DEFAULT) if r.strip()]
648
- syn_map = await _region_terms_with_synonyms(region_list) if expand else {t: [] for t in region_list}
 
649
 
650
- # 2) Europe PMC hitCount with broad, unquoted ORs; fallback sequence
651
- gene_clause = " OR ".join([receptor] + neighbors[:25])
652
- results = []
653
- for region in region_list:
654
- terms = [region] + syn_map.get(region, [])
655
- # broad query (no quotes)
656
- q1 = f'({ " OR ".join(terms) }) AND ({gene_clause})'
657
- h1 = await eupmc_hitcount(q1)
658
- if h1 == 0:
659
- # fallback 1: region only with receptor
660
- q2 = f'({ " OR ".join(terms) }) AND ({receptor})'
661
- h2 = await eupmc_hitcount(q2)
662
- hc = h2
663
- else:
664
- hc = h1
665
 
666
- mean_conf = sum(conf.values())/max(len(conf),1)
667
- score = (math.log10(hc+1.0)) * (mean_conf if conf else 0.2)
668
- results.append({"region": region, "synonyms_used": terms[1:], "hits": hc, "weighted_score": round(score, 4)})
 
 
 
669
 
670
- results.sort(key=lambda x: x["weighted_score"], reverse=True)
671
- return {
672
- "focus": receptor,
673
- "neighbors_considered": neighbors[:25],
674
- "regions_ranked": results,
675
- "notes": "Heuristic: STRING neighbors + EuropePMC co-occurrence, with synonyms, broad match, and fallbacks."
676
- }
677
 
678
- # ------------------- MANIFEST / SECTION / DOWNLOAD ---------------------
679
- @app.get("/mechanism_graph_manifest")
680
- async def mechanism_graph_manifest(
681
- receptor: str = Query(...),
682
- symptom: str = Query("apathy"),
683
  species: int = 9606,
684
- max_neighbors: int = 50,
685
- max_hits: int = 25
 
686
  ):
687
- """Prepare big graph in sections; return a manifest + job_id."""
688
- gpcr_entry = f"{receptor.lower()}_human" if not receptor.lower().endswith("_human") else receptor.lower()
689
-
690
- # prefetch pieces (cached)
691
- gpcr = await get_json_cached(f"https://gpcrdb.org/services/protein/{gpcr_entry}", None, ttl=86400)
692
- string_r = await get_json_cached("https://string-db.org/api/json/network",
693
- {"identifiers": receptor, "species": species, "caller_identity": CALLER_ID, "limit": max_neighbors},
694
- ttl=3600)
695
- lit_r = await get_json_cached("https://www.ebi.ac.uk/europepmc/webservices/rest/search",
696
- {"query": f"{receptor} AND {symptom}", "format": "json", "pageSize": max_hits},
697
- ttl=900)
698
- regions_r = await regions_from_string.__wrapped__(receptor=receptor, species=species, limit=40, regions=None, expand=1)
699
-
700
- sections = {
701
- "nodes": {"receptor": receptor, "gpcrdb": gpcr},
702
- "edges": {"string": string_r},
703
- "literature": {"eupmc": lit_r},
704
- "regions": regions_r,
705
- "provenance": {
706
- "built_at": datetime.datetime.utcnow().isoformat() + "Z",
707
- "params": {"receptor": receptor, "symptom": symptom, "species": species,
708
- "max_neighbors": max_neighbors, "max_hits": max_hits}
709
- }
710
- }
711
- jid = _save_job(sections)
712
- counts = {
713
- "edges": len(sections["edges"].get("string", [])),
714
- "literature_hits": int(sections["literature"].get("eupmc", {}).get("hitCount", 0)),
715
- "regions": len(sections["regions"].get("regions_ranked", []))
716
- }
717
- return {"job_id": jid, "sections": list(sections.keys()), "counts": counts}
718
-
719
- @app.get("/mechanism_graph/{section}")
720
- async def mechanism_graph_section(section: str, job_id: str = Query(...)):
721
- """Return one section to keep payloads small."""
722
- job = _get_job(job_id)
723
- if not job or section not in job:
724
- return JSONResponse({"error": "missing job or section"}, status_code=404)
725
- return job[section]
 
 
 
 
 
 
 
 
726
 
727
  @app.get("/download/{job_id}/{section}")
728
  async def download_section(job_id: str, section: str):
729
- """Download a section as gzipped JSON (useful for huge payloads)."""
730
- job = _get_job(job_id)
731
- if not job or section not in job:
732
- return JSONResponse({"error": "missing job or section"}, status_code=404)
733
- data = _gzipped_json_bytes(job[section])
734
- filename = f"{APP_NAME}-{job_id}-{section}.json.gz"
735
- return StreamingResponse(io.BytesIO(data),
 
736
  media_type="application/gzip",
737
- headers={"Content-Disposition": f'attachment; filename="{filename}"'})
738
- # ===================== END ADD-ON BLOCK =====================
739
-
 
1
  from fastapi import FastAPI, Query, Path, HTTPException
2
  from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.responses import RedirectResponse, JSONResponse, StreamingResponse, FileResponse
4
+ import httpx, asyncio, time, os, hashlib, json, io, gzip, math
5
+ from typing import Dict, Any, Tuple, Optional, List
 
 
6
 
7
  APP_NAME = "neuro-mechanism-backend"
8
+ CALLER_ID = "neuro-mech-backend-demo" # shows in STRING logs / rate fairness
9
+ UA = {"User-Agent": f"{APP_NAME}/1.2 (HF Space)"}
 
10
 
11
  app = FastAPI(title=APP_NAME)
12
 
13
  app.add_middleware(
14
  CORSMiddleware,
15
  allow_origins=["*"], allow_credentials=True,
16
+ allow_methods=["*"], allow_headers=["*"]
17
  )
18
 
19
  @app.get("/", include_in_schema=False)
 
28
  def endpoints():
29
  return JSONResponse({
30
  "GET": [
31
+ "/mechanism_graph_manifest?receptor=HTR2A&symptom=apathy",
32
+ "/mechanism_graph/regions?receptor=HTR2A&symptom=apathy",
33
+ "/download/{job_id}/{section}",
34
+ "/heuristics/regions_from_string?receptor=HTR2A",
35
+ "/util/synonyms?term=ACC&kind=region",
 
 
 
 
 
 
36
  "/lit/eupmc?query=HTR2A%20AND%20apathy&pageSize=5",
37
  "/string/network?identifiers=HTR2A&species=9606",
38
  "/gpcrdb/protein?entry=htr2a_human",
 
44
  ]
45
  })
46
 
 
 
47
  # ----------------- tiny in-memory TTL cache -----------------
48
  class TTLCache:
49
  def __init__(self, max_items=512):
 
64
  async with httpx.AsyncClient(headers=UA, timeout=30) as client:
65
  r = await client.get(url, params=params)
66
  r.raise_for_status()
67
+ data = r.json()
 
 
 
 
 
68
  async with self._lock:
69
  if len(self.store) > self.max_items:
70
  self.store.pop(next(iter(self.store)))
 
73
 
74
  CACHE = TTLCache()
75
 
76
+ # --------------- polite throttling for STRING ----------------
77
  _last_string_call = 0.0
78
  async def throttle_string():
79
+ """Be nice to STRING; ~1 req/sec is a good courtesy."""
80
  global _last_string_call
81
  now = time.time()
82
  wait = 1.05 - (now - _last_string_call)
 
84
  await asyncio.sleep(wait)
85
  _last_string_call = time.time()
86
 
87
+ # ----------------- Helpers -----------------
88
  async def get_json_cached(url: str, params: Optional[dict], ttl: int):
 
 
 
89
  try:
90
+ return await CACHE.get(url, params, ttl)
91
+ except Exception as e:
92
+ return {"error": str(e), "url": url, "params": params}
93
+
94
+ def job_key(receptor: str, symptom: str) -> str:
95
+ raw = f"{receptor}|{symptom}|{int(time.time())}"
96
+ return hashlib.sha1(raw.encode()).hexdigest()[:16]
97
 
98
+ def gz_json_bytes(obj: Any) -> bytes:
99
+ b = json.dumps(obj, ensure_ascii=False).encode("utf-8")
100
+ bio = io.BytesIO()
101
+ with gzip.GzipFile(fileobj=bio, mode="wb") as gz:
102
+ gz.write(b)
103
+ return bio.getvalue()
104
 
105
+ # ----------------- External API wrappers -----------------
106
  @app.get("/lit/eupmc")
107
+ async def europe_pmc_search(query: str, pageSize: int = 5):
 
 
108
  url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
109
+ params = {"query": query, "format": "json", "pageSize": pageSize}
110
  return await get_json_cached(url, params, ttl=600)
111
 
112
  @app.get("/lit/pubmed_esearch")
 
151
 
152
  @app.get("/string/network")
153
  async def string_network(identifiers: str, species: int = 9606, limit: int = 50):
 
154
  await throttle_string()
155
  url = "https://string-db.org/api/json/network"
156
  params = {"identifiers": identifiers, "species": species, "caller_identity": CALLER_ID, "limit": limit}
157
  return await get_json_cached(url, params, ttl=3600)
158
 
159
+ # ----------------- Synonyms (regions/genes/phenotypes) --------------
160
+ # Simple built-in expansions + OLS/MyGene lookups.
161
+ REGION_SEED_SYNONYMS = {
162
+ "prefrontal cortex": ["PFC","mPFC","vmPFC","dlPFC","dorsolateral prefrontal cortex","ventromedial prefrontal cortex"],
163
+ "anterior cingulate cortex": ["ACC","dACC","pgACC","sgACC","subgenual cingulate"],
164
+ "nucleus accumbens": ["NAc","ventral striatum","accumbens"],
 
165
  "ventral tegmental area": ["VTA"],
166
+ "substantia nigra": ["SN","SNc","pars compacta"],
167
+ "hippocampus": ["HC"],
168
+ "amygdala": [],
169
+ "insula": ["insular cortex"],
170
+ "thalamus": [],
171
+ "hypothalamus": [],
172
+ "cerebellum": []
173
  }
174
 
175
+ async def ols4_synonyms(term: str, ontology: Optional[str] = None) -> List[str]:
176
+ # OLS4 generic search (best-effort parse)
177
  url = "https://www.ebi.ac.uk/ols4/api/search"
178
+ params = {"q": term, "rows": 20}
179
+ if ontology:
180
+ params["ontology"] = ontology
 
 
181
  data = await get_json_cached(url, params, ttl=86400)
182
+ syns = []
183
  try:
184
+ docs = data.get("response", {}).get("docs", []) or data.get("response", {}).get("docs", [])
185
+ for d in docs:
186
+ if "synonym" in d:
187
+ syns.extend(d.get("synonym", []))
188
+ if "label" in d:
189
+ syns.append(d["label"])
190
  except Exception:
191
  pass
192
+ # Dedup & lowercase normalize
193
+ out = []
194
+ seen = set()
195
+ for s in syns:
196
+ s2 = s.strip()
197
+ if s2.lower() not in seen:
198
+ out.append(s2)
199
+ seen.add(s2.lower())
200
+ return out[:50]
201
+
202
+ async def mygene_synonyms(symbol: str) -> List[str]:
203
+ # MyGene.info gene synonyms/aliases
204
  url = "https://mygene.info/v3/query"
205
+ params = {"q": symbol, "fields": "symbol,name,alias,other_names", "size": 5}
206
  data = await get_json_cached(url, params, ttl=86400)
207
+ syns = []
208
  try:
209
+ for hit in data.get("hits", []):
210
+ for k in ("symbol","name"):
211
+ if k in hit: syns.append(hit[k])
212
+ for k in ("alias","other_names"):
213
+ if k in hit and isinstance(hit[k], list): syns.extend(hit[k])
 
 
 
 
 
 
 
 
214
  except Exception:
215
  pass
216
+ # unique
217
+ out, seen = [], set()
218
+ for s in syns:
219
+ s2 = str(s).strip()
220
+ if s2 and s2.lower() not in seen:
221
+ out.append(s2); seen.add(s2.lower())
222
+ return out[:50]
223
 
224
  @app.get("/util/synonyms")
225
+ async def util_synonyms(term: str, kind: str = Query("region", enum=["region","gene","phenotype"])):
226
+ term_norm = term.strip()
227
+ if kind == "region":
228
+ seeds = REGION_SEED_SYNONYMS.get(term_norm.lower(), [])
229
+ ols = await ols4_synonyms(term_norm, ontology="uberon")
230
+ return {"term": term_norm, "kind": kind, "synonyms": sorted(set([term_norm] + seeds + ols))}
231
+ elif kind == "gene":
232
+ mg = await mygene_synonyms(term_norm)
233
+ return {"term": term_norm, "kind": kind, "synonyms": sorted(set([term_norm] + mg))}
234
+ else:
235
+ # phenotype via OLS (HPO)
236
+ ols = await ols4_synonyms(term_norm, ontology="hp")
237
+ return {"term": term_norm, "kind": kind, "synonyms": sorted(set([term_norm] + ols))}
238
+
239
+ # ----------------- Regions heuristic (improved) -----------------
 
 
 
 
 
 
 
 
 
240
  REGION_TERMS_DEFAULT = [
241
+ "prefrontal cortex","anterior cingulate cortex","nucleus accumbens","ventral striatum",
242
  "dorsal striatum","caudate","putamen","amygdala","hippocampus","thalamus","hypothalamus",
243
+ "insula","ventral tegmental area","substantia nigra","cerebellum"
244
  ]
245
 
246
+ async def eupmc_hitcount(q: str) -> int:
247
+ url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
248
+ params = {"query": q, "format": "json", "pageSize": 0}
249
+ data = await get_json_cached(url, params, ttl=1800)
250
+ try:
251
+ return int(data.get("hitCount", 0))
252
+ except Exception:
253
+ return 0
254
+
255
  def collect_gene_symbols_from_string(edges: List[dict], focus: str) -> List[str]:
256
  genes = set()
257
  f = focus.upper()
258
  for e in edges or []:
259
  for k in ("preferredName_A","preferredName_B"):
260
  g = e.get(k)
261
+ if g and g.upper() != f:
262
  genes.add(g)
263
  return list(genes)
264
 
 
 
 
 
 
 
 
 
 
 
265
  @app.get("/heuristics/regions_from_string")
266
  async def regions_from_string(
267
  receptor: str = Query(..., description="e.g., HTR2A"),
268
  species: int = 9606,
269
  limit: int = 40,
270
+ regions: Optional[str] = Query(None, description="comma-separated region terms; default common regions"),
271
+ use_synonyms: bool = True,
272
+ symptom: Optional[str] = None
273
  ):
274
  """
275
+ Rank brain regions by co-mention with (receptor OR STRING neighbors OR synonyms), with fallbacks.
276
+ Tiered search:
277
+ T1: (region_syns) AND (receptor OR neighbors OR gene_syns)
278
+ T2: (region_syns) AND (receptor)
279
+ T3: (region) AND (receptor)
280
+ Unquoted broad matches are used to avoid exact-phrase misses.
 
281
  """
282
  # 1) STRING neighbors
283
  edges = await string_network(receptor, species=species, limit=limit)
284
  neighbors = collect_gene_symbols_from_string(edges, receptor)
285
 
 
 
 
 
 
 
 
 
 
 
286
  # 2) synonyms
 
 
 
 
 
 
287
  region_list = [r.strip() for r in (regions.split(",") if regions else REGION_TERMS_DEFAULT) if r.strip()]
288
+ region_syns_map: Dict[str, List[str]] = {}
289
+ if use_synonyms:
290
+ syn_tasks = [util_synonyms(r, "region") for r in region_list]
291
+ # run as local function calls (not HTTP)
292
+ syn_results = await asyncio.gather(*[t if asyncio.iscoroutine(t) else asyncio.create_task(t) for t in syn_tasks])
293
+ for r, syn in zip(region_list, syn_results):
294
+ region_syns_map[r] = syn.get("synonyms", [])[:10] or [r]
295
+ # gene synonyms for top neighbors (cap 20)
296
+ gene_syns: List[str] = []
297
+ for g in neighbors[:20]:
298
+ gs = await util_synonyms(g, "gene")
299
+ gene_syns.extend(gs.get("synonyms", [])[:5])
300
+ gene_syns = list({s for s in gene_syns if s})
301
+ else:
302
+ for r in region_list:
303
+ region_syns_map[r] = [r]
304
+ gene_syns = []
305
+
306
+ # 3) Europe PMC hits per region, tiered
307
  results = []
308
+ # build RHS (receptor OR neighbors OR gene_syns)
309
+ rhs_terms = [receptor] + neighbors[:25] + gene_syns[:25]
310
+ rhs = " OR ".join({t for t in rhs_terms if t})
311
+
312
  for region in region_list:
313
+ syns = region_syns_map.get(region, [region])
314
+ lhs = " OR ".join(syns)
315
+ symptom_clause = f" AND ({symptom})" if symptom else ""
 
316
 
 
317
  # T1
318
+ q1 = f"({lhs}) AND ({rhs}){symptom_clause}"
319
+ hc1 = await eupmc_hitcount(q1)
320
+ score = math.log10(hc1 + 1.0)
321
+ if hc1 == 0:
322
+ # T2
323
+ q2 = f"({lhs}) AND ({receptor}){symptom_clause}"
324
+ hc2 = await eupmc_hitcount(q2)
325
+ score = math.log10(hc2 + 1.0)
326
+ if hc2 == 0:
327
+ # T3
328
+ q3 = f"({region}) AND ({receptor}){symptom_clause}"
329
+ hc3 = await eupmc_hitcount(q3)
330
+ score = math.log10(hc3 + 1.0)
331
+ results.append({"region": region, "hits": hc3, "tier": "T3", "weighted_score": round(score, 4)})
332
+ else:
333
+ results.append({"region": region, "hits": hc2, "tier": "T2", "weighted_score": round(score, 4)})
334
  else:
335
+ results.append({"region": region, "hits": hc1, "tier": "T1", "weighted_score": round(score, 4)})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
 
337
  results.sort(key=lambda x: x["weighted_score"], reverse=True)
338
  return {
339
  "focus": receptor,
340
  "neighbors_considered": neighbors[:25],
341
  "regions_ranked": results,
342
+ "notes": "Heuristic uses STRING neighbors + Europe PMC co-mentions with synonyms and fallbacks."
343
  }
344
 
345
+ # ----------------- Manifest / Section / Download -----------------
346
+
347
+ # ephemeral in-memory store of assembled sections (by job_id)
348
+ JOBS: Dict[str, Dict[str, Any]] = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
 
350
  @app.get("/mechanism_graph_manifest")
351
  async def mechanism_graph_manifest(
352
+ receptor: str = Query(..., description="e.g., HTR2A"),
353
+ symptom: str = Query("apathy"),
354
  species: int = 9606,
355
+ string_limit: int = 50,
356
+ lit_page_size: int = 10
 
 
357
  ):
358
  """
359
+ Returns a job_id and the list of available sections with approximate sizes.
 
 
 
360
  """
361
+ jid = job_key(receptor, symptom)
 
 
 
 
 
362
 
363
+ # Pre-compute lightweight counts; store minimal context for later sections
364
+ # STRING count
365
+ sdata = await string_network(receptor, species=species, limit=string_limit)
366
+ s_count = len(sdata) if isinstance(sdata, list) else 0
 
 
 
 
 
 
 
 
 
 
367
 
368
+ # Literature hitCount
369
+ ldata = await europe_pmc_search(f"{receptor} AND {symptom}", pageSize=0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  try:
371
+ lit_hits = int(ldata.get("hitCount", 0))
372
+ except Exception:
373
+ lit_hits = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
 
375
+ # Regions heuristic preview (no synonyms parameter here; section can recalc)
376
+ rdata = await regions_from_string(receptor=receptor, species=species, limit=40, regions=None, use_synonyms=True, symptom=symptom)
377
+ r_count = len(rdata.get("regions_ranked", [])) if isinstance(rdata, dict) else 0
378
 
379
+ JOBS[jid] = {
380
+ "_meta": {"receptor": receptor, "symptom": symptom, "species": species},
381
+ "overview": {
382
+ "receptor": receptor, "symptom": symptom,
383
+ "counts": {"string_edges": s_count, "literature_hits": lit_hits, "regions": r_count}
384
+ }
385
+ # other sections are created lazily below
386
+ }
 
 
 
 
 
 
 
387
 
388
+ sections = [
389
+ {"name": "overview", "approx_size": "small"},
390
+ {"name": "network", "approx_size": f"{s_count} edges (limit={string_limit})"},
391
+ {"name": "literature", "approx_size": f"{lit_hits} hits (pageSize={lit_page_size})"},
392
+ {"name": "regions", "approx_size": f"{r_count} entries"}
393
+ ]
394
 
395
+ return {"job_id": jid, "sections": sections}
 
 
 
 
 
 
396
 
397
+ @app.get("/mechanism_graph/{section}")
398
+ async def mechanism_graph_section(
399
+ section: str = Path(..., description="one of: overview, network, literature, regions"),
400
+ receptor: Optional[str] = None,
401
+ symptom: Optional[str] = None,
402
  species: int = 9606,
403
+ string_limit: int = 50,
404
+ lit_page_size: int = 10,
405
+ job_id: Optional[str] = Query(None, description="optional; use manifest if you want stable ids")
406
  ):
407
+ """
408
+ Returns one section. If job_id is missing or unknown, builds on the fly.
409
+ """
410
+ # pull context from job if available
411
+ ctx = None
412
+ if job_id and job_id in JOBS:
413
+ ctx = JOBS[job_id].get("_meta", {})
414
+ receptor = receptor or ctx.get("receptor")
415
+ symptom = symptom or ctx.get("symptom")
416
+ species = species or ctx.get("species")
417
+
418
+ if not receptor:
419
+ raise HTTPException(status_code=422, detail="receptor is required (query param)")
420
+
421
+ if section == "overview":
422
+ if not job_id or job_id not in JOBS:
423
+ jid = job_key(receptor, symptom or "")
424
+ JOBS.setdefault(jid, {"_meta": {"receptor": receptor, "symptom": symptom or "", "species": species}})
425
+ job_id = jid
426
+ # ensure overview exists
427
+ if "overview" not in JOBS[job_id]:
428
+ sdata = await string_network(receptor, species=species, limit=string_limit)
429
+ s_count = len(sdata) if isinstance(sdata, list) else 0
430
+ ldata = await europe_pmc_search(f"{receptor} AND {symptom}", pageSize=0)
431
+ lit_hits = int(ldata.get("hitCount", 0)) if isinstance(ldata, dict) else 0
432
+ rdata = await regions_from_string(receptor=receptor, species=species, limit=40, regions=None, use_synonyms=True, symptom=symptom)
433
+ r_count = len(rdata.get("regions_ranked", [])) if isinstance(rdata, dict) else 0
434
+ JOBS[job_id]["overview"] = {
435
+ "receptor": receptor, "symptom": symptom,
436
+ "counts": {"string_edges": s_count, "literature_hits": lit_hits, "regions": r_count}
437
+ }
438
+ return {"job_id": job_id, "section": "overview", "data": JOBS[job_id]["overview"]}
439
+
440
+ elif section == "network":
441
+ net = await string_network(receptor, species=species, limit=string_limit)
442
+ return {"job_id": job_id, "section": "network", "data": net}
443
+
444
+ elif section == "literature":
445
+ lit = await europe_pmc_search(f"{receptor} AND {symptom}", pageSize=lit_page_size)
446
+ return {"job_id": job_id, "section": "literature", "data": lit}
447
+
448
+ elif section == "regions":
449
+ reg = await regions_from_string(receptor=receptor, species=species, limit=40, regions=None, use_synonyms=True, symptom=symptom)
450
+ return {"job_id": job_id, "section": "regions", "data": reg}
451
+
452
+ else:
453
+ raise HTTPException(status_code=404, detail=f"unknown section: {section}")
454
 
455
  @app.get("/download/{job_id}/{section}")
456
  async def download_section(job_id: str, section: str):
457
+ """
458
+ Gzipped JSON download of a section; if section not built yet, tries to return what's there.
459
+ """
460
+ data = JOBS.get(job_id, {}).get(section) or JOBS.get(job_id, {}).get("_meta")
461
+ if not data:
462
+ raise HTTPException(status_code=404, detail="job/section not found")
463
+ gz = gz_json_bytes({"job_id": job_id, "section": section, "data": data})
464
+ return StreamingResponse(io.BytesIO(gz),
465
  media_type="application/gzip",
466
+ headers={"Content-Disposition": f'attachment; filename="{job_id}_{section}.json.gz"'})