Update app.py
Browse files
app.py
CHANGED
|
@@ -1,22 +1,19 @@
|
|
| 1 |
from fastapi import FastAPI, Query, Path, HTTPException
|
| 2 |
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
-
from fastapi.responses import RedirectResponse, JSONResponse,
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
from pathlib import Path as _Path
|
| 7 |
-
from datetime import datetime
|
| 8 |
|
| 9 |
APP_NAME = "neuro-mechanism-backend"
|
| 10 |
-
CALLER_ID = "neuro-mech-backend-demo" #
|
| 11 |
-
|
| 12 |
-
DATA_DIR.mkdir(parents=True, exist_ok=True)
|
| 13 |
|
| 14 |
app = FastAPI(title=APP_NAME)
|
| 15 |
|
| 16 |
app.add_middleware(
|
| 17 |
CORSMiddleware,
|
| 18 |
allow_origins=["*"], allow_credentials=True,
|
| 19 |
-
allow_methods=["*"], allow_headers=["*"]
|
| 20 |
)
|
| 21 |
|
| 22 |
@app.get("/", include_in_schema=False)
|
|
@@ -31,17 +28,11 @@ def health():
|
|
| 31 |
def endpoints():
|
| 32 |
return JSONResponse({
|
| 33 |
"GET": [
|
| 34 |
-
"/mechanism_graph_manifest?receptor=HTR2A&symptom=apathy
|
| 35 |
-
"/mechanism_graph/
|
| 36 |
-
"/
|
| 37 |
-
"/
|
| 38 |
-
"/
|
| 39 |
-
"/download/<job_id>/nodes (gz)",
|
| 40 |
-
"/download/<job_id>/edges (gz)",
|
| 41 |
-
"/download/<job_id>/literature (gz)",
|
| 42 |
-
"/download/<job_id>/regions (gz)",
|
| 43 |
-
"/util/synonyms?term=apathy&kind=phenotype",
|
| 44 |
-
"/heuristics/regions_from_string?receptor=HTR2A&symptom=apathy&limit=40",
|
| 45 |
"/lit/eupmc?query=HTR2A%20AND%20apathy&pageSize=5",
|
| 46 |
"/string/network?identifiers=HTR2A&species=9606",
|
| 47 |
"/gpcrdb/protein?entry=htr2a_human",
|
|
@@ -53,8 +44,6 @@ def endpoints():
|
|
| 53 |
]
|
| 54 |
})
|
| 55 |
|
| 56 |
-
UA = {"User-Agent": f"{APP_NAME}/1.2 (HF Space)"}
|
| 57 |
-
|
| 58 |
# ----------------- tiny in-memory TTL cache -----------------
|
| 59 |
class TTLCache:
|
| 60 |
def __init__(self, max_items=512):
|
|
@@ -75,12 +64,7 @@ class TTLCache:
|
|
| 75 |
async with httpx.AsyncClient(headers=UA, timeout=30) as client:
|
| 76 |
r = await client.get(url, params=params)
|
| 77 |
r.raise_for_status()
|
| 78 |
-
|
| 79 |
-
# Fast path: try JSON, else wrap as text.
|
| 80 |
-
try:
|
| 81 |
-
data = r.json()
|
| 82 |
-
except Exception:
|
| 83 |
-
data = {"text": r.text, "status_code": r.status_code}
|
| 84 |
async with self._lock:
|
| 85 |
if len(self.store) > self.max_items:
|
| 86 |
self.store.pop(next(iter(self.store)))
|
|
@@ -89,10 +73,10 @@ class TTLCache:
|
|
| 89 |
|
| 90 |
CACHE = TTLCache()
|
| 91 |
|
| 92 |
-
#
|
| 93 |
_last_string_call = 0.0
|
| 94 |
async def throttle_string():
|
| 95 |
-
"""Be nice to STRING; ~1 req/sec
|
| 96 |
global _last_string_call
|
| 97 |
now = time.time()
|
| 98 |
wait = 1.05 - (now - _last_string_call)
|
|
@@ -100,26 +84,29 @@ async def throttle_string():
|
|
| 100 |
await asyncio.sleep(wait)
|
| 101 |
_last_string_call = time.time()
|
| 102 |
|
| 103 |
-
# -----------------
|
| 104 |
async def get_json_cached(url: str, params: Optional[dict], ttl: int):
|
| 105 |
-
return await CACHE.get(url, params, ttl)
|
| 106 |
-
|
| 107 |
-
def _safe_float(x, default=0.0):
|
| 108 |
try:
|
| 109 |
-
return
|
| 110 |
-
except Exception:
|
| 111 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
-
def
|
| 114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
|
| 116 |
-
# -----------------
|
| 117 |
@app.get("/lit/eupmc")
|
| 118 |
-
async def europe_pmc_search(query: str, pageSize: int = 5
|
| 119 |
-
# Europe PMC REST search (JSON)
|
| 120 |
-
# docs: https://europepmc.org/RestfulWebService ; client vignette: europepmc R pkg
|
| 121 |
url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
|
| 122 |
-
params = {"query": query, "format": "json", "pageSize": pageSize
|
| 123 |
return await get_json_cached(url, params, ttl=600)
|
| 124 |
|
| 125 |
@app.get("/lit/pubmed_esearch")
|
|
@@ -164,576 +151,316 @@ async def gpcrdb_protein(entry: str):
|
|
| 164 |
|
| 165 |
@app.get("/string/network")
|
| 166 |
async def string_network(identifiers: str, species: int = 9606, limit: int = 50):
|
| 167 |
-
# STRING JSON network endpoint
|
| 168 |
await throttle_string()
|
| 169 |
url = "https://string-db.org/api/json/network"
|
| 170 |
params = {"identifiers": identifiers, "species": species, "caller_identity": CALLER_ID, "limit": limit}
|
| 171 |
return await get_json_cached(url, params, ttl=3600)
|
| 172 |
|
| 173 |
-
# -----------------
|
| 174 |
-
#
|
| 175 |
-
|
| 176 |
-
"prefrontal cortex": ["PFC",
|
| 177 |
-
|
| 178 |
-
"
|
| 179 |
-
"nucleus accumbens": ["NAc", "ventral striatum"],
|
| 180 |
"ventral tegmental area": ["VTA"],
|
| 181 |
-
"substantia nigra": ["SN",
|
| 182 |
-
"hippocampus": ["
|
| 183 |
-
"amygdala": [
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
}
|
| 185 |
|
| 186 |
-
async def
|
| 187 |
-
# OLS4 search
|
| 188 |
url = "https://www.ebi.ac.uk/ols4/api/search"
|
| 189 |
-
params = {"q": term}
|
| 190 |
-
if
|
| 191 |
-
|
| 192 |
-
# We'll just join as comma-separated for brevity (works for OLS4)
|
| 193 |
-
params["ontology"] = ",".join(ontologies)
|
| 194 |
data = await get_json_cached(url, params, ttl=86400)
|
| 195 |
-
syns =
|
| 196 |
try:
|
| 197 |
-
docs = data.get("response", {}).get("docs", [])
|
| 198 |
-
for d in docs
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
|
|
|
| 202 |
except Exception:
|
| 203 |
pass
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
url = "https://mygene.info/v3/query"
|
| 209 |
-
params = {"q":
|
| 210 |
data = await get_json_cached(url, params, ttl=86400)
|
| 211 |
-
syns =
|
| 212 |
try:
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
for
|
| 217 |
-
|
| 218 |
-
if isinstance(v, str):
|
| 219 |
-
syns.add(v)
|
| 220 |
-
for fld in ("alias","alias_symbol","other_names"):
|
| 221 |
-
v = h.get(fld)
|
| 222 |
-
if isinstance(v, list):
|
| 223 |
-
for x in v:
|
| 224 |
-
if isinstance(x, str):
|
| 225 |
-
syns.add(x)
|
| 226 |
except Exception:
|
| 227 |
pass
|
| 228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
|
| 230 |
@app.get("/util/synonyms")
|
| 231 |
-
async def util_synonyms(term: str, kind:
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
syns.update(await _ols_synonyms(term, ontologies=["uberon","hbp","hpo","ncit"]))
|
| 247 |
-
elif k == "gene":
|
| 248 |
-
syns.update(await _mygene_aliases(term))
|
| 249 |
-
elif k == "phenotype":
|
| 250 |
-
syns.update(await _ols_synonyms(term, ontologies=["hpo","efo","mondo"]))
|
| 251 |
-
|
| 252 |
-
return {"term": term, "kind": k, "synonyms": sorted({s for s in syns if isinstance(s, str) and len(s) <= 60})}
|
| 253 |
-
|
| 254 |
-
# ----------------- region heuristic (upgraded) -----------------
|
| 255 |
REGION_TERMS_DEFAULT = [
|
| 256 |
-
"prefrontal cortex","anterior cingulate cortex","
|
| 257 |
"dorsal striatum","caudate","putamen","amygdala","hippocampus","thalamus","hypothalamus",
|
| 258 |
-
"insula","ventral tegmental area","
|
| 259 |
]
|
| 260 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
def collect_gene_symbols_from_string(edges: List[dict], focus: str) -> List[str]:
|
| 262 |
genes = set()
|
| 263 |
f = focus.upper()
|
| 264 |
for e in edges or []:
|
| 265 |
for k in ("preferredName_A","preferredName_B"):
|
| 266 |
g = e.get(k)
|
| 267 |
-
if g and
|
| 268 |
genes.add(g)
|
| 269 |
return list(genes)
|
| 270 |
|
| 271 |
-
async def _eupmc_hitcount(q: str) -> int:
|
| 272 |
-
# Europe PMC search hitCount (pageSize=0)
|
| 273 |
-
url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
|
| 274 |
-
params = {"query": q, "format": "json", "pageSize": 0}
|
| 275 |
-
data = await get_json_cached(url, params, ttl=3600)
|
| 276 |
-
try:
|
| 277 |
-
return int(data.get("hitCount", 0))
|
| 278 |
-
except Exception:
|
| 279 |
-
return 0
|
| 280 |
-
|
| 281 |
@app.get("/heuristics/regions_from_string")
|
| 282 |
async def regions_from_string(
|
| 283 |
receptor: str = Query(..., description="e.g., HTR2A"),
|
| 284 |
species: int = 9606,
|
| 285 |
limit: int = 40,
|
| 286 |
-
regions: Optional[str] = Query(None, description="comma-separated region terms
|
| 287 |
-
|
|
|
|
| 288 |
):
|
| 289 |
"""
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
T1: (region_syns) AND (
|
| 293 |
-
T2: (region_syns) AND (
|
| 294 |
-
T3: (
|
| 295 |
-
|
| 296 |
-
Final score = log10(weighted_hits+1) * mean_top_STRING_conf
|
| 297 |
"""
|
| 298 |
# 1) STRING neighbors
|
| 299 |
edges = await string_network(receptor, species=species, limit=limit)
|
| 300 |
neighbors = collect_gene_symbols_from_string(edges, receptor)
|
| 301 |
|
| 302 |
-
# STRING confidences
|
| 303 |
-
conf: Dict[str, float] = {}
|
| 304 |
-
for e in edges or []:
|
| 305 |
-
a, b, score = e.get("preferredName_A"), e.get("preferredName_B"), _safe_float(e.get("score", 0))
|
| 306 |
-
if a and a.upper() != receptor.upper():
|
| 307 |
-
conf[a] = max(conf.get(a, 0.0), score)
|
| 308 |
-
if b and b.upper() != receptor.upper():
|
| 309 |
-
conf[b] = max(conf.get(b, 0.0), score)
|
| 310 |
-
mean_conf = sum(conf.values())/max(len(conf),1) if conf else 0.2
|
| 311 |
-
|
| 312 |
# 2) synonyms
|
| 313 |
-
receptor_syns = await _mygene_aliases(receptor)
|
| 314 |
-
symptom_syns = []
|
| 315 |
-
if symptom:
|
| 316 |
-
s = await util_synonyms(symptom, kind="phenotype")
|
| 317 |
-
symptom_syns = s["synonyms"]
|
| 318 |
-
|
| 319 |
region_list = [r.strip() for r in (regions.split(",") if regions else REGION_TERMS_DEFAULT) if r.strip()]
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
results = []
|
| 324 |
-
|
| 325 |
-
|
|
|
|
|
|
|
| 326 |
for region in region_list:
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
region_clause = " OR ".join(region_syns)
|
| 331 |
|
| 332 |
-
# tiers
|
| 333 |
# T1
|
| 334 |
-
|
| 335 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
else:
|
| 337 |
-
|
| 338 |
-
t2 = f"({region_clause}) AND (({gene_clause}))"
|
| 339 |
-
t3 = f"({region_clause}) AND ({' OR '.join(sorted(set([receptor] + receptor_syns)))})"
|
| 340 |
-
t4 = f"({region_clause}) AND ({' OR '.join(symptom_syns)})" if symptom_syns else None
|
| 341 |
-
|
| 342 |
-
tiers = [("t1",1.0,t1), ("t2",0.6,t2), ("t3",0.5,t3)]
|
| 343 |
-
if t4: tiers.append(("t4",0.3,t4))
|
| 344 |
-
|
| 345 |
-
# schedule hitCount calls
|
| 346 |
-
tier_defs.append((region, tiers))
|
| 347 |
-
for _,_,q in tiers:
|
| 348 |
-
tasks.append(_eupmc_hitcount(q))
|
| 349 |
-
|
| 350 |
-
# gather all counts in-order
|
| 351 |
-
counts_all = await asyncio.gather(*tasks)
|
| 352 |
-
# fold back into regions
|
| 353 |
-
idx = 0
|
| 354 |
-
for region, tiers in tier_defs:
|
| 355 |
-
weighted = 0.0
|
| 356 |
-
tier_counts = {}
|
| 357 |
-
for name, weight, _q in tiers:
|
| 358 |
-
hc = counts_all[idx]; idx += 1
|
| 359 |
-
tier_counts[name] = hc
|
| 360 |
-
weighted += weight * hc
|
| 361 |
-
score = math.log10(weighted + 1.0) * mean_conf
|
| 362 |
-
results.append({"region": region, "tiers": tier_counts, "weighted_hits": int(round(weighted)),
|
| 363 |
-
"weighted_score": round(score, 4)})
|
| 364 |
|
| 365 |
results.sort(key=lambda x: x["weighted_score"], reverse=True)
|
| 366 |
return {
|
| 367 |
"focus": receptor,
|
| 368 |
"neighbors_considered": neighbors[:25],
|
| 369 |
"regions_ranked": results,
|
| 370 |
-
"notes": "STRING + Europe PMC with synonyms and
|
| 371 |
}
|
| 372 |
|
| 373 |
-
# -----------------
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
return d
|
| 378 |
-
|
| 379 |
-
def _write_gz_jsonl(path: _Path, items: List[dict]):
|
| 380 |
-
with gzip.open(path, "wt", encoding="utf-8") as gz:
|
| 381 |
-
for it in items:
|
| 382 |
-
gz.write(json.dumps(it, ensure_ascii=False) + "\n")
|
| 383 |
-
|
| 384 |
-
def _read_gz_page(path: _Path, page: int, page_size: int) -> Tuple[int, List[dict]]:
|
| 385 |
-
total = 0
|
| 386 |
-
start = (page - 1) * page_size
|
| 387 |
-
end = start + page_size
|
| 388 |
-
out = []
|
| 389 |
-
with gzip.open(path, "rt", encoding="utf-8") as gz:
|
| 390 |
-
for i, line in enumerate(gz):
|
| 391 |
-
if not line.strip():
|
| 392 |
-
continue
|
| 393 |
-
if i >= start and i < end:
|
| 394 |
-
out.append(json.loads(line))
|
| 395 |
-
total += 1
|
| 396 |
-
return total, out
|
| 397 |
-
|
| 398 |
-
async def _build_mech_job(params: dict) -> dict:
|
| 399 |
-
"""
|
| 400 |
-
Build nodes/edges/literature/regions; write gz NDJSON + meta.
|
| 401 |
-
"""
|
| 402 |
-
receptor = params["receptor"]
|
| 403 |
-
species = int(params.get("species", 9606))
|
| 404 |
-
symptom = params.get("symptom")
|
| 405 |
-
string_limit = int(params.get("string_limit", 200))
|
| 406 |
-
eupmc_page_size = int(params.get("eupmc_page_size", 100))
|
| 407 |
-
eupmc_max_pages = int(params.get("eupmc_max_pages", 3))
|
| 408 |
-
|
| 409 |
-
job_id = _hash_params(params)
|
| 410 |
-
d = _job_dir(job_id)
|
| 411 |
-
meta_path = d / "meta.json"
|
| 412 |
-
if meta_path.exists():
|
| 413 |
-
return json.loads(meta_path.read_text("utf-8"))
|
| 414 |
-
|
| 415 |
-
# 1) STRING edges + nodes
|
| 416 |
-
edges = await string_network(receptor, species=species, limit=string_limit)
|
| 417 |
-
edge_items = []
|
| 418 |
-
nodes = set([receptor])
|
| 419 |
-
for e in edges or []:
|
| 420 |
-
a = e.get("preferredName_A"); b = e.get("preferredName_B")
|
| 421 |
-
score = _safe_float(e.get("score", 0))
|
| 422 |
-
if a and b:
|
| 423 |
-
edge_items.append({"a": a, "b": b, "score": score})
|
| 424 |
-
nodes.add(a); nodes.add(b)
|
| 425 |
-
node_items = [{"symbol": n, "seed": (n.upper()==receptor.upper())} for n in sorted(nodes)]
|
| 426 |
-
|
| 427 |
-
_write_gz_jsonl(d / "edges.jsonl.gz", edge_items)
|
| 428 |
-
_write_gz_jsonl(d / "nodes.jsonl.gz", node_items)
|
| 429 |
-
|
| 430 |
-
# 2) Europe PMC literature for (receptor AND symptom?) else receptor
|
| 431 |
-
lit_items = []
|
| 432 |
-
base_q = f"{receptor} AND {symptom}" if symptom else receptor
|
| 433 |
-
for page in range(1, eupmc_max_pages+1):
|
| 434 |
-
res = await europe_pmc_search(base_q, pageSize=eupmc_page_size, page=page)
|
| 435 |
-
hits = res.get("resultList", {}).get("result", []) or []
|
| 436 |
-
for h in hits:
|
| 437 |
-
lit_items.append({
|
| 438 |
-
"id": h.get("id"),
|
| 439 |
-
"source": h.get("source"), "title": h.get("title"),
|
| 440 |
-
"pubYear": h.get("pubYear"), "authorString": h.get("authorString"),
|
| 441 |
-
"journalTitle": h.get("journalTitle"), "doi": h.get("doi")
|
| 442 |
-
})
|
| 443 |
-
# stop early if last page
|
| 444 |
-
if len(hits) < eupmc_page_size:
|
| 445 |
-
break
|
| 446 |
-
_write_gz_jsonl(d / "literature.jsonl.gz", lit_items)
|
| 447 |
-
|
| 448 |
-
# 3) Regions heuristic (with symptom)
|
| 449 |
-
reg = await regions_from_string(receptor=receptor, species=species, limit=min(100, string_limit), regions=None, symptom=symptom)
|
| 450 |
-
reg_items = []
|
| 451 |
-
for r in reg.get("regions_ranked", []):
|
| 452 |
-
reg_items.append(r)
|
| 453 |
-
_write_gz_jsonl(d / "regions.jsonl.gz", reg_items)
|
| 454 |
-
|
| 455 |
-
meta = {
|
| 456 |
-
"job_id": job_id,
|
| 457 |
-
"created": datetime.utcnow().isoformat() + "Z",
|
| 458 |
-
"params": params,
|
| 459 |
-
"counts": {
|
| 460 |
-
"nodes": len(node_items),
|
| 461 |
-
"edges": len(edge_items),
|
| 462 |
-
"literature": len(lit_items),
|
| 463 |
-
"regions": len(reg_items)
|
| 464 |
-
},
|
| 465 |
-
"sections": ["nodes","edges","literature","regions"]
|
| 466 |
-
}
|
| 467 |
-
meta_path.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
|
| 468 |
-
return meta
|
| 469 |
|
| 470 |
@app.get("/mechanism_graph_manifest")
|
| 471 |
async def mechanism_graph_manifest(
|
| 472 |
-
receptor: str = Query(
|
|
|
|
| 473 |
species: int = 9606,
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
eupmc_page_size: int = 100,
|
| 477 |
-
eupmc_max_pages: int = 3
|
| 478 |
):
|
| 479 |
"""
|
| 480 |
-
|
| 481 |
-
The actual data is stored as gzipped NDJSON and can be:
|
| 482 |
-
- paged via /mechanism_graph/{section}?job_id=...&page=1&page_size=...
|
| 483 |
-
- or downloaded as a single gz file via /download/{job_id}/{section}
|
| 484 |
"""
|
| 485 |
-
|
| 486 |
-
"receptor": receptor, "species": species, "symptom": symptom,
|
| 487 |
-
"string_limit": string_limit, "eupmc_page_size": eupmc_page_size, "eupmc_max_pages": eupmc_max_pages
|
| 488 |
-
}
|
| 489 |
-
meta = await _build_mech_job(params)
|
| 490 |
-
return meta
|
| 491 |
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
page: int = 1,
|
| 497 |
-
page_size: int = 100
|
| 498 |
-
):
|
| 499 |
-
"""
|
| 500 |
-
Return a single page from a section (nodes|edges|literature|regions).
|
| 501 |
-
"""
|
| 502 |
-
d = _job_dir(job_id)
|
| 503 |
-
p = d / f"{section}.jsonl.gz"
|
| 504 |
-
if not p.exists():
|
| 505 |
-
raise HTTPException(status_code=404, detail=f"section {section} not found for job {job_id}")
|
| 506 |
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
"job_id": job_id,
|
| 510 |
-
"section": section,
|
| 511 |
-
"page": page, "page_size": page_size,
|
| 512 |
-
"total": total,
|
| 513 |
-
"items": items
|
| 514 |
-
}
|
| 515 |
-
|
| 516 |
-
@app.get("/download/{job_id}/{section}")
|
| 517 |
-
async def download_section(job_id: str, section: Literal["nodes","edges","literature","regions"]):
|
| 518 |
-
"""
|
| 519 |
-
Download the full gzipped NDJSON for a section.
|
| 520 |
-
"""
|
| 521 |
-
d = _job_dir(job_id)
|
| 522 |
-
p = d / f"{section}.jsonl.gz"
|
| 523 |
-
if not p.exists():
|
| 524 |
-
raise HTTPException(status_code=404, detail=f"section {section} not found for job {job_id}")
|
| 525 |
-
return FileResponse(
|
| 526 |
-
path=str(p),
|
| 527 |
-
filename=f"{APP_NAME}-{job_id}-{section}.jsonl.gz",
|
| 528 |
-
media_type="application/gzip"
|
| 529 |
-
)
|
| 530 |
-
|
| 531 |
-
# ===================== ADD BELOW YOUR EXISTING CODE =====================
|
| 532 |
-
from fastapi.responses import StreamingResponse, FileResponse
|
| 533 |
-
import gzip, io, secrets, math, pathlib, datetime
|
| 534 |
-
|
| 535 |
-
# -------- small in-memory job store (sections kept per job) ----------
|
| 536 |
-
JOBS: Dict[str, Dict[str, Any]] = {}
|
| 537 |
-
JOB_TTL_SECONDS = 3600
|
| 538 |
-
|
| 539 |
-
def _mk_job_id() -> str:
|
| 540 |
-
return secrets.token_hex(8)
|
| 541 |
-
|
| 542 |
-
def _save_job(sections: Dict[str, Any]) -> str:
|
| 543 |
-
# prune old
|
| 544 |
-
now = time.time()
|
| 545 |
-
for k, v in list(JOBS.items()):
|
| 546 |
-
if now - v.get("_ts", now) > JOB_TTL_SECONDS:
|
| 547 |
-
JOBS.pop(k, None)
|
| 548 |
-
jid = _mk_job_id()
|
| 549 |
-
JOBS[jid] = {"_ts": now, **sections}
|
| 550 |
-
return jid
|
| 551 |
-
|
| 552 |
-
def _get_job(jid: str) -> Optional[Dict[str, Any]]:
|
| 553 |
-
job = JOBS.get(jid)
|
| 554 |
-
if not job:
|
| 555 |
-
return None
|
| 556 |
-
if time.time() - job.get("_ts", 0) > JOB_TTL_SECONDS:
|
| 557 |
-
JOBS.pop(jid, None)
|
| 558 |
-
return None
|
| 559 |
-
return job
|
| 560 |
-
|
| 561 |
-
def _gzipped_json_bytes(obj: Any) -> bytes:
|
| 562 |
-
raw = orjson.dumps(obj) # fast & small
|
| 563 |
-
buf = io.BytesIO()
|
| 564 |
-
with gzip.GzipFile(fileobj=buf, mode="wb", compresslevel=6) as z:
|
| 565 |
-
z.write(raw)
|
| 566 |
-
return buf.getvalue()
|
| 567 |
-
|
| 568 |
-
# --------------------- Synonym utilities ------------------------------
|
| 569 |
-
async def _ols4_synonyms(term: str, size: int = 20) -> List[str]:
|
| 570 |
-
"""Region/ontology synonyms via OLS4 search."""
|
| 571 |
-
url = "https://www.ebi.ac.uk/ols4/api/search"
|
| 572 |
-
params = {"q": term, "size": size}
|
| 573 |
-
data = await get_json_cached(url, params, ttl=86400)
|
| 574 |
-
syns = set()
|
| 575 |
-
for hit in data.get("response", {}).get("docs", []):
|
| 576 |
-
for k in ("synonym", "label"):
|
| 577 |
-
val = hit.get(k)
|
| 578 |
-
if isinstance(val, list):
|
| 579 |
-
syns.update([s for s in val if isinstance(s, str)])
|
| 580 |
-
elif isinstance(val, str):
|
| 581 |
-
syns.add(val)
|
| 582 |
-
return sorted({s for s in syns if s.lower() != term.lower()})
|
| 583 |
-
|
| 584 |
-
async def _mygene_synonyms(gene: str, size: int = 5) -> List[str]:
|
| 585 |
-
"""Gene symbol/name/alias via MyGene.info."""
|
| 586 |
-
url = "https://mygene.info/v3/query"
|
| 587 |
-
params = {"q": gene, "fields": "symbol,name,alias", "species": "human", "size": size}
|
| 588 |
-
data = await get_json_cached(url, params, ttl=86400)
|
| 589 |
-
syns = set()
|
| 590 |
-
for h in data.get("hits", []):
|
| 591 |
-
for k in ("symbol", "name", "alias"):
|
| 592 |
-
v = h.get(k)
|
| 593 |
-
if isinstance(v, list):
|
| 594 |
-
syns.update([s for s in v if isinstance(s, str)])
|
| 595 |
-
elif isinstance(v, str):
|
| 596 |
-
syns.add(v)
|
| 597 |
-
return sorted({s for s in syns if s.lower() != gene.lower()})
|
| 598 |
-
|
| 599 |
-
@app.get("/util/synonyms")
|
| 600 |
-
async def util_synonyms(term: str = Query(...), kind: str = Query("region", description="region|gene|phenotype"), size: int = 20):
|
| 601 |
try:
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
syns = await _ols4_synonyms(term, size=min(size, 50))
|
| 606 |
-
return {"term": term, "kind": kind, "synonyms": syns}
|
| 607 |
-
except Exception as e:
|
| 608 |
-
return {"term": term, "kind": kind, "synonyms": [], "error": str(e)}
|
| 609 |
-
|
| 610 |
-
# ------ improved regions heuristic: synonyms + unquoted + fallbacks -----
|
| 611 |
-
REGION_SYNONYM_OVERRIDES = {
|
| 612 |
-
"prefrontal cortex": ["PFC", "mPFC", "vmPFC", "dorsolateral prefrontal cortex", "DLPFC", "ventromedial prefrontal cortex"],
|
| 613 |
-
"anterior cingulate cortex": ["ACC", "dACC", "pregenual ACC", "subgenual ACC", "sgACC"],
|
| 614 |
-
"nucleus accumbens": ["NAc", "ventral striatum", "accumbens"]
|
| 615 |
-
}
|
| 616 |
-
|
| 617 |
-
async def _region_terms_with_synonyms(base_terms: List[str]) -> Dict[str, List[str]]:
|
| 618 |
-
out: Dict[str, List[str]] = {}
|
| 619 |
-
for term in base_terms:
|
| 620 |
-
# manual seeds + OLS4 expansion
|
| 621 |
-
syns = set(REGION_SYNONYM_OVERRIDES.get(term, []))
|
| 622 |
-
try:
|
| 623 |
-
syns.update(await _ols4_synonyms(term, size=20))
|
| 624 |
-
except Exception:
|
| 625 |
-
pass
|
| 626 |
-
# keep short list to control URL size
|
| 627 |
-
out[term] = sorted(list(syns))[:12]
|
| 628 |
-
return out
|
| 629 |
-
|
| 630 |
-
@app.get("/heuristics/regions_from_string")
|
| 631 |
-
async def regions_from_string(
|
| 632 |
-
receptor: str = Query(..., description="e.g., HTR2A"),
|
| 633 |
-
species: int = 9606,
|
| 634 |
-
limit: int = 40,
|
| 635 |
-
regions: Optional[str] = Query(None, description="comma-separated region terms; default common regions"),
|
| 636 |
-
expand: int = Query(1, description="if 1, use OLS4 synonyms and manual aliases"),
|
| 637 |
-
):
|
| 638 |
-
# 1) STRING neighbors (cached)
|
| 639 |
-
edges = await string_network(receptor, species=species, limit=limit)
|
| 640 |
-
neighbors = collect_gene_symbols_from_string(edges, receptor)
|
| 641 |
-
conf: Dict[str, float] = {}
|
| 642 |
-
for e in edges:
|
| 643 |
-
a, b, score = e.get("preferredName_A"), e.get("preferredName_B"), float(e.get("score", 0))
|
| 644 |
-
if a and a.upper() != receptor.upper(): conf[a] = max(conf.get(a, 0.0), score)
|
| 645 |
-
if b and b.upper() != receptor.upper(): conf[b] = max(conf.get(b, 0.0), score)
|
| 646 |
|
| 647 |
-
|
| 648 |
-
|
|
|
|
| 649 |
|
| 650 |
-
|
| 651 |
-
|
| 652 |
-
|
| 653 |
-
|
| 654 |
-
|
| 655 |
-
|
| 656 |
-
|
| 657 |
-
|
| 658 |
-
if h1 == 0:
|
| 659 |
-
# fallback 1: region only with receptor
|
| 660 |
-
q2 = f'({ " OR ".join(terms) }) AND ({receptor})'
|
| 661 |
-
h2 = await eupmc_hitcount(q2)
|
| 662 |
-
hc = h2
|
| 663 |
-
else:
|
| 664 |
-
hc = h1
|
| 665 |
|
| 666 |
-
|
| 667 |
-
|
| 668 |
-
|
|
|
|
|
|
|
|
|
|
| 669 |
|
| 670 |
-
|
| 671 |
-
return {
|
| 672 |
-
"focus": receptor,
|
| 673 |
-
"neighbors_considered": neighbors[:25],
|
| 674 |
-
"regions_ranked": results,
|
| 675 |
-
"notes": "Heuristic: STRING neighbors + EuropePMC co-occurrence, with synonyms, broad match, and fallbacks."
|
| 676 |
-
}
|
| 677 |
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
receptor: str =
|
| 682 |
-
symptom: str =
|
| 683 |
species: int = 9606,
|
| 684 |
-
|
| 685 |
-
|
|
|
|
| 686 |
):
|
| 687 |
-
"""
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
#
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
|
| 718 |
-
|
| 719 |
-
|
| 720 |
-
|
| 721 |
-
|
| 722 |
-
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 726 |
|
| 727 |
@app.get("/download/{job_id}/{section}")
|
| 728 |
async def download_section(job_id: str, section: str):
|
| 729 |
-
"""
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
|
|
|
|
| 736 |
media_type="application/gzip",
|
| 737 |
-
headers={"Content-Disposition": f'attachment; filename="{
|
| 738 |
-
# ===================== END ADD-ON BLOCK =====================
|
| 739 |
-
|
|
|
|
| 1 |
from fastapi import FastAPI, Query, Path, HTTPException
|
| 2 |
from fastapi.middleware.cors import CORSMiddleware
|
| 3 |
+
from fastapi.responses import RedirectResponse, JSONResponse, StreamingResponse, FileResponse
|
| 4 |
+
import httpx, asyncio, time, os, hashlib, json, io, gzip, math
|
| 5 |
+
from typing import Dict, Any, Tuple, Optional, List
|
|
|
|
|
|
|
| 6 |
|
| 7 |
APP_NAME = "neuro-mechanism-backend"
|
| 8 |
+
CALLER_ID = "neuro-mech-backend-demo" # shows in STRING logs / rate fairness
|
| 9 |
+
UA = {"User-Agent": f"{APP_NAME}/1.2 (HF Space)"}
|
|
|
|
| 10 |
|
| 11 |
app = FastAPI(title=APP_NAME)
|
| 12 |
|
| 13 |
app.add_middleware(
|
| 14 |
CORSMiddleware,
|
| 15 |
allow_origins=["*"], allow_credentials=True,
|
| 16 |
+
allow_methods=["*"], allow_headers=["*"]
|
| 17 |
)
|
| 18 |
|
| 19 |
@app.get("/", include_in_schema=False)
|
|
|
|
| 28 |
def endpoints():
|
| 29 |
return JSONResponse({
|
| 30 |
"GET": [
|
| 31 |
+
"/mechanism_graph_manifest?receptor=HTR2A&symptom=apathy",
|
| 32 |
+
"/mechanism_graph/regions?receptor=HTR2A&symptom=apathy",
|
| 33 |
+
"/download/{job_id}/{section}",
|
| 34 |
+
"/heuristics/regions_from_string?receptor=HTR2A",
|
| 35 |
+
"/util/synonyms?term=ACC&kind=region",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
"/lit/eupmc?query=HTR2A%20AND%20apathy&pageSize=5",
|
| 37 |
"/string/network?identifiers=HTR2A&species=9606",
|
| 38 |
"/gpcrdb/protein?entry=htr2a_human",
|
|
|
|
| 44 |
]
|
| 45 |
})
|
| 46 |
|
|
|
|
|
|
|
| 47 |
# ----------------- tiny in-memory TTL cache -----------------
|
| 48 |
class TTLCache:
|
| 49 |
def __init__(self, max_items=512):
|
|
|
|
| 64 |
async with httpx.AsyncClient(headers=UA, timeout=30) as client:
|
| 65 |
r = await client.get(url, params=params)
|
| 66 |
r.raise_for_status()
|
| 67 |
+
data = r.json()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
async with self._lock:
|
| 69 |
if len(self.store) > self.max_items:
|
| 70 |
self.store.pop(next(iter(self.store)))
|
|
|
|
| 73 |
|
| 74 |
CACHE = TTLCache()
|
| 75 |
|
| 76 |
+
# --------------- polite throttling for STRING ----------------
|
| 77 |
_last_string_call = 0.0
|
| 78 |
async def throttle_string():
|
| 79 |
+
"""Be nice to STRING; ~1 req/sec is a good courtesy."""
|
| 80 |
global _last_string_call
|
| 81 |
now = time.time()
|
| 82 |
wait = 1.05 - (now - _last_string_call)
|
|
|
|
| 84 |
await asyncio.sleep(wait)
|
| 85 |
_last_string_call = time.time()
|
| 86 |
|
| 87 |
+
# ----------------- Helpers -----------------
|
| 88 |
async def get_json_cached(url: str, params: Optional[dict], ttl: int):
|
|
|
|
|
|
|
|
|
|
| 89 |
try:
|
| 90 |
+
return await CACHE.get(url, params, ttl)
|
| 91 |
+
except Exception as e:
|
| 92 |
+
return {"error": str(e), "url": url, "params": params}
|
| 93 |
+
|
| 94 |
+
def job_key(receptor: str, symptom: str) -> str:
|
| 95 |
+
raw = f"{receptor}|{symptom}|{int(time.time())}"
|
| 96 |
+
return hashlib.sha1(raw.encode()).hexdigest()[:16]
|
| 97 |
|
| 98 |
+
def gz_json_bytes(obj: Any) -> bytes:
|
| 99 |
+
b = json.dumps(obj, ensure_ascii=False).encode("utf-8")
|
| 100 |
+
bio = io.BytesIO()
|
| 101 |
+
with gzip.GzipFile(fileobj=bio, mode="wb") as gz:
|
| 102 |
+
gz.write(b)
|
| 103 |
+
return bio.getvalue()
|
| 104 |
|
| 105 |
+
# ----------------- External API wrappers -----------------
|
| 106 |
@app.get("/lit/eupmc")
|
| 107 |
+
async def europe_pmc_search(query: str, pageSize: int = 5):
|
|
|
|
|
|
|
| 108 |
url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
|
| 109 |
+
params = {"query": query, "format": "json", "pageSize": pageSize}
|
| 110 |
return await get_json_cached(url, params, ttl=600)
|
| 111 |
|
| 112 |
@app.get("/lit/pubmed_esearch")
|
|
|
|
| 151 |
|
| 152 |
@app.get("/string/network")
|
| 153 |
async def string_network(identifiers: str, species: int = 9606, limit: int = 50):
|
|
|
|
| 154 |
await throttle_string()
|
| 155 |
url = "https://string-db.org/api/json/network"
|
| 156 |
params = {"identifiers": identifiers, "species": species, "caller_identity": CALLER_ID, "limit": limit}
|
| 157 |
return await get_json_cached(url, params, ttl=3600)
|
| 158 |
|
| 159 |
+
# ----------------- Synonyms (regions/genes/phenotypes) --------------
|
| 160 |
+
# Simple built-in expansions + OLS/MyGene lookups.
|
| 161 |
+
REGION_SEED_SYNONYMS = {
|
| 162 |
+
"prefrontal cortex": ["PFC","mPFC","vmPFC","dlPFC","dorsolateral prefrontal cortex","ventromedial prefrontal cortex"],
|
| 163 |
+
"anterior cingulate cortex": ["ACC","dACC","pgACC","sgACC","subgenual cingulate"],
|
| 164 |
+
"nucleus accumbens": ["NAc","ventral striatum","accumbens"],
|
|
|
|
| 165 |
"ventral tegmental area": ["VTA"],
|
| 166 |
+
"substantia nigra": ["SN","SNc","pars compacta"],
|
| 167 |
+
"hippocampus": ["HC"],
|
| 168 |
+
"amygdala": [],
|
| 169 |
+
"insula": ["insular cortex"],
|
| 170 |
+
"thalamus": [],
|
| 171 |
+
"hypothalamus": [],
|
| 172 |
+
"cerebellum": []
|
| 173 |
}
|
| 174 |
|
| 175 |
+
async def ols4_synonyms(term: str, ontology: Optional[str] = None) -> List[str]:
|
| 176 |
+
# OLS4 generic search (best-effort parse)
|
| 177 |
url = "https://www.ebi.ac.uk/ols4/api/search"
|
| 178 |
+
params = {"q": term, "rows": 20}
|
| 179 |
+
if ontology:
|
| 180 |
+
params["ontology"] = ontology
|
|
|
|
|
|
|
| 181 |
data = await get_json_cached(url, params, ttl=86400)
|
| 182 |
+
syns = []
|
| 183 |
try:
|
| 184 |
+
docs = data.get("response", {}).get("docs", []) or data.get("response", {}).get("docs", [])
|
| 185 |
+
for d in docs:
|
| 186 |
+
if "synonym" in d:
|
| 187 |
+
syns.extend(d.get("synonym", []))
|
| 188 |
+
if "label" in d:
|
| 189 |
+
syns.append(d["label"])
|
| 190 |
except Exception:
|
| 191 |
pass
|
| 192 |
+
# Dedup & lowercase normalize
|
| 193 |
+
out = []
|
| 194 |
+
seen = set()
|
| 195 |
+
for s in syns:
|
| 196 |
+
s2 = s.strip()
|
| 197 |
+
if s2.lower() not in seen:
|
| 198 |
+
out.append(s2)
|
| 199 |
+
seen.add(s2.lower())
|
| 200 |
+
return out[:50]
|
| 201 |
+
|
| 202 |
+
async def mygene_synonyms(symbol: str) -> List[str]:
|
| 203 |
+
# MyGene.info gene synonyms/aliases
|
| 204 |
url = "https://mygene.info/v3/query"
|
| 205 |
+
params = {"q": symbol, "fields": "symbol,name,alias,other_names", "size": 5}
|
| 206 |
data = await get_json_cached(url, params, ttl=86400)
|
| 207 |
+
syns = []
|
| 208 |
try:
|
| 209 |
+
for hit in data.get("hits", []):
|
| 210 |
+
for k in ("symbol","name"):
|
| 211 |
+
if k in hit: syns.append(hit[k])
|
| 212 |
+
for k in ("alias","other_names"):
|
| 213 |
+
if k in hit and isinstance(hit[k], list): syns.extend(hit[k])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
except Exception:
|
| 215 |
pass
|
| 216 |
+
# unique
|
| 217 |
+
out, seen = [], set()
|
| 218 |
+
for s in syns:
|
| 219 |
+
s2 = str(s).strip()
|
| 220 |
+
if s2 and s2.lower() not in seen:
|
| 221 |
+
out.append(s2); seen.add(s2.lower())
|
| 222 |
+
return out[:50]
|
| 223 |
|
| 224 |
@app.get("/util/synonyms")
|
| 225 |
+
async def util_synonyms(term: str, kind: str = Query("region", enum=["region","gene","phenotype"])):
|
| 226 |
+
term_norm = term.strip()
|
| 227 |
+
if kind == "region":
|
| 228 |
+
seeds = REGION_SEED_SYNONYMS.get(term_norm.lower(), [])
|
| 229 |
+
ols = await ols4_synonyms(term_norm, ontology="uberon")
|
| 230 |
+
return {"term": term_norm, "kind": kind, "synonyms": sorted(set([term_norm] + seeds + ols))}
|
| 231 |
+
elif kind == "gene":
|
| 232 |
+
mg = await mygene_synonyms(term_norm)
|
| 233 |
+
return {"term": term_norm, "kind": kind, "synonyms": sorted(set([term_norm] + mg))}
|
| 234 |
+
else:
|
| 235 |
+
# phenotype via OLS (HPO)
|
| 236 |
+
ols = await ols4_synonyms(term_norm, ontology="hp")
|
| 237 |
+
return {"term": term_norm, "kind": kind, "synonyms": sorted(set([term_norm] + ols))}
|
| 238 |
+
|
| 239 |
+
# ----------------- Regions heuristic (improved) -----------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
REGION_TERMS_DEFAULT = [
|
| 241 |
+
"prefrontal cortex","anterior cingulate cortex","nucleus accumbens","ventral striatum",
|
| 242 |
"dorsal striatum","caudate","putamen","amygdala","hippocampus","thalamus","hypothalamus",
|
| 243 |
+
"insula","ventral tegmental area","substantia nigra","cerebellum"
|
| 244 |
]
|
| 245 |
|
| 246 |
+
async def eupmc_hitcount(q: str) -> int:
|
| 247 |
+
url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
|
| 248 |
+
params = {"query": q, "format": "json", "pageSize": 0}
|
| 249 |
+
data = await get_json_cached(url, params, ttl=1800)
|
| 250 |
+
try:
|
| 251 |
+
return int(data.get("hitCount", 0))
|
| 252 |
+
except Exception:
|
| 253 |
+
return 0
|
| 254 |
+
|
| 255 |
def collect_gene_symbols_from_string(edges: List[dict], focus: str) -> List[str]:
|
| 256 |
genes = set()
|
| 257 |
f = focus.upper()
|
| 258 |
for e in edges or []:
|
| 259 |
for k in ("preferredName_A","preferredName_B"):
|
| 260 |
g = e.get(k)
|
| 261 |
+
if g and g.upper() != f:
|
| 262 |
genes.add(g)
|
| 263 |
return list(genes)
|
| 264 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
@app.get("/heuristics/regions_from_string")
|
| 266 |
async def regions_from_string(
|
| 267 |
receptor: str = Query(..., description="e.g., HTR2A"),
|
| 268 |
species: int = 9606,
|
| 269 |
limit: int = 40,
|
| 270 |
+
regions: Optional[str] = Query(None, description="comma-separated region terms; default common regions"),
|
| 271 |
+
use_synonyms: bool = True,
|
| 272 |
+
symptom: Optional[str] = None
|
| 273 |
):
|
| 274 |
"""
|
| 275 |
+
Rank brain regions by co-mention with (receptor OR STRING neighbors OR synonyms), with fallbacks.
|
| 276 |
+
Tiered search:
|
| 277 |
+
T1: (region_syns) AND (receptor OR neighbors OR gene_syns)
|
| 278 |
+
T2: (region_syns) AND (receptor)
|
| 279 |
+
T3: (region) AND (receptor)
|
| 280 |
+
Unquoted broad matches are used to avoid exact-phrase misses.
|
|
|
|
| 281 |
"""
|
| 282 |
# 1) STRING neighbors
|
| 283 |
edges = await string_network(receptor, species=species, limit=limit)
|
| 284 |
neighbors = collect_gene_symbols_from_string(edges, receptor)
|
| 285 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 286 |
# 2) synonyms
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
region_list = [r.strip() for r in (regions.split(",") if regions else REGION_TERMS_DEFAULT) if r.strip()]
|
| 288 |
+
region_syns_map: Dict[str, List[str]] = {}
|
| 289 |
+
if use_synonyms:
|
| 290 |
+
syn_tasks = [util_synonyms(r, "region") for r in region_list]
|
| 291 |
+
# run as local function calls (not HTTP)
|
| 292 |
+
syn_results = await asyncio.gather(*[t if asyncio.iscoroutine(t) else asyncio.create_task(t) for t in syn_tasks])
|
| 293 |
+
for r, syn in zip(region_list, syn_results):
|
| 294 |
+
region_syns_map[r] = syn.get("synonyms", [])[:10] or [r]
|
| 295 |
+
# gene synonyms for top neighbors (cap 20)
|
| 296 |
+
gene_syns: List[str] = []
|
| 297 |
+
for g in neighbors[:20]:
|
| 298 |
+
gs = await util_synonyms(g, "gene")
|
| 299 |
+
gene_syns.extend(gs.get("synonyms", [])[:5])
|
| 300 |
+
gene_syns = list({s for s in gene_syns if s})
|
| 301 |
+
else:
|
| 302 |
+
for r in region_list:
|
| 303 |
+
region_syns_map[r] = [r]
|
| 304 |
+
gene_syns = []
|
| 305 |
+
|
| 306 |
+
# 3) Europe PMC hits per region, tiered
|
| 307 |
results = []
|
| 308 |
+
# build RHS (receptor OR neighbors OR gene_syns)
|
| 309 |
+
rhs_terms = [receptor] + neighbors[:25] + gene_syns[:25]
|
| 310 |
+
rhs = " OR ".join({t for t in rhs_terms if t})
|
| 311 |
+
|
| 312 |
for region in region_list:
|
| 313 |
+
syns = region_syns_map.get(region, [region])
|
| 314 |
+
lhs = " OR ".join(syns)
|
| 315 |
+
symptom_clause = f" AND ({symptom})" if symptom else ""
|
|
|
|
| 316 |
|
|
|
|
| 317 |
# T1
|
| 318 |
+
q1 = f"({lhs}) AND ({rhs}){symptom_clause}"
|
| 319 |
+
hc1 = await eupmc_hitcount(q1)
|
| 320 |
+
score = math.log10(hc1 + 1.0)
|
| 321 |
+
if hc1 == 0:
|
| 322 |
+
# T2
|
| 323 |
+
q2 = f"({lhs}) AND ({receptor}){symptom_clause}"
|
| 324 |
+
hc2 = await eupmc_hitcount(q2)
|
| 325 |
+
score = math.log10(hc2 + 1.0)
|
| 326 |
+
if hc2 == 0:
|
| 327 |
+
# T3
|
| 328 |
+
q3 = f"({region}) AND ({receptor}){symptom_clause}"
|
| 329 |
+
hc3 = await eupmc_hitcount(q3)
|
| 330 |
+
score = math.log10(hc3 + 1.0)
|
| 331 |
+
results.append({"region": region, "hits": hc3, "tier": "T3", "weighted_score": round(score, 4)})
|
| 332 |
+
else:
|
| 333 |
+
results.append({"region": region, "hits": hc2, "tier": "T2", "weighted_score": round(score, 4)})
|
| 334 |
else:
|
| 335 |
+
results.append({"region": region, "hits": hc1, "tier": "T1", "weighted_score": round(score, 4)})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
|
| 337 |
results.sort(key=lambda x: x["weighted_score"], reverse=True)
|
| 338 |
return {
|
| 339 |
"focus": receptor,
|
| 340 |
"neighbors_considered": neighbors[:25],
|
| 341 |
"regions_ranked": results,
|
| 342 |
+
"notes": "Heuristic uses STRING neighbors + Europe PMC co-mentions with synonyms and fallbacks."
|
| 343 |
}
|
| 344 |
|
| 345 |
+
# ----------------- Manifest / Section / Download -----------------
|
| 346 |
+
|
| 347 |
+
# ephemeral in-memory store of assembled sections (by job_id)
|
| 348 |
+
JOBS: Dict[str, Dict[str, Any]] = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 349 |
|
| 350 |
@app.get("/mechanism_graph_manifest")
|
| 351 |
async def mechanism_graph_manifest(
|
| 352 |
+
receptor: str = Query(..., description="e.g., HTR2A"),
|
| 353 |
+
symptom: str = Query("apathy"),
|
| 354 |
species: int = 9606,
|
| 355 |
+
string_limit: int = 50,
|
| 356 |
+
lit_page_size: int = 10
|
|
|
|
|
|
|
| 357 |
):
|
| 358 |
"""
|
| 359 |
+
Returns a job_id and the list of available sections with approximate sizes.
|
|
|
|
|
|
|
|
|
|
| 360 |
"""
|
| 361 |
+
jid = job_key(receptor, symptom)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
|
| 363 |
+
# Pre-compute lightweight counts; store minimal context for later sections
|
| 364 |
+
# STRING count
|
| 365 |
+
sdata = await string_network(receptor, species=species, limit=string_limit)
|
| 366 |
+
s_count = len(sdata) if isinstance(sdata, list) else 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
|
| 368 |
+
# Literature hitCount
|
| 369 |
+
ldata = await europe_pmc_search(f"{receptor} AND {symptom}", pageSize=0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
try:
|
| 371 |
+
lit_hits = int(ldata.get("hitCount", 0))
|
| 372 |
+
except Exception:
|
| 373 |
+
lit_hits = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
|
| 375 |
+
# Regions heuristic preview (no synonyms parameter here; section can recalc)
|
| 376 |
+
rdata = await regions_from_string(receptor=receptor, species=species, limit=40, regions=None, use_synonyms=True, symptom=symptom)
|
| 377 |
+
r_count = len(rdata.get("regions_ranked", [])) if isinstance(rdata, dict) else 0
|
| 378 |
|
| 379 |
+
JOBS[jid] = {
|
| 380 |
+
"_meta": {"receptor": receptor, "symptom": symptom, "species": species},
|
| 381 |
+
"overview": {
|
| 382 |
+
"receptor": receptor, "symptom": symptom,
|
| 383 |
+
"counts": {"string_edges": s_count, "literature_hits": lit_hits, "regions": r_count}
|
| 384 |
+
}
|
| 385 |
+
# other sections are created lazily below
|
| 386 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
|
| 388 |
+
sections = [
|
| 389 |
+
{"name": "overview", "approx_size": "small"},
|
| 390 |
+
{"name": "network", "approx_size": f"{s_count} edges (limit={string_limit})"},
|
| 391 |
+
{"name": "literature", "approx_size": f"{lit_hits} hits (pageSize={lit_page_size})"},
|
| 392 |
+
{"name": "regions", "approx_size": f"{r_count} entries"}
|
| 393 |
+
]
|
| 394 |
|
| 395 |
+
return {"job_id": jid, "sections": sections}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
|
| 397 |
+
@app.get("/mechanism_graph/{section}")
|
| 398 |
+
async def mechanism_graph_section(
|
| 399 |
+
section: str = Path(..., description="one of: overview, network, literature, regions"),
|
| 400 |
+
receptor: Optional[str] = None,
|
| 401 |
+
symptom: Optional[str] = None,
|
| 402 |
species: int = 9606,
|
| 403 |
+
string_limit: int = 50,
|
| 404 |
+
lit_page_size: int = 10,
|
| 405 |
+
job_id: Optional[str] = Query(None, description="optional; use manifest if you want stable ids")
|
| 406 |
):
|
| 407 |
+
"""
|
| 408 |
+
Returns one section. If job_id is missing or unknown, builds on the fly.
|
| 409 |
+
"""
|
| 410 |
+
# pull context from job if available
|
| 411 |
+
ctx = None
|
| 412 |
+
if job_id and job_id in JOBS:
|
| 413 |
+
ctx = JOBS[job_id].get("_meta", {})
|
| 414 |
+
receptor = receptor or ctx.get("receptor")
|
| 415 |
+
symptom = symptom or ctx.get("symptom")
|
| 416 |
+
species = species or ctx.get("species")
|
| 417 |
+
|
| 418 |
+
if not receptor:
|
| 419 |
+
raise HTTPException(status_code=422, detail="receptor is required (query param)")
|
| 420 |
+
|
| 421 |
+
if section == "overview":
|
| 422 |
+
if not job_id or job_id not in JOBS:
|
| 423 |
+
jid = job_key(receptor, symptom or "")
|
| 424 |
+
JOBS.setdefault(jid, {"_meta": {"receptor": receptor, "symptom": symptom or "", "species": species}})
|
| 425 |
+
job_id = jid
|
| 426 |
+
# ensure overview exists
|
| 427 |
+
if "overview" not in JOBS[job_id]:
|
| 428 |
+
sdata = await string_network(receptor, species=species, limit=string_limit)
|
| 429 |
+
s_count = len(sdata) if isinstance(sdata, list) else 0
|
| 430 |
+
ldata = await europe_pmc_search(f"{receptor} AND {symptom}", pageSize=0)
|
| 431 |
+
lit_hits = int(ldata.get("hitCount", 0)) if isinstance(ldata, dict) else 0
|
| 432 |
+
rdata = await regions_from_string(receptor=receptor, species=species, limit=40, regions=None, use_synonyms=True, symptom=symptom)
|
| 433 |
+
r_count = len(rdata.get("regions_ranked", [])) if isinstance(rdata, dict) else 0
|
| 434 |
+
JOBS[job_id]["overview"] = {
|
| 435 |
+
"receptor": receptor, "symptom": symptom,
|
| 436 |
+
"counts": {"string_edges": s_count, "literature_hits": lit_hits, "regions": r_count}
|
| 437 |
+
}
|
| 438 |
+
return {"job_id": job_id, "section": "overview", "data": JOBS[job_id]["overview"]}
|
| 439 |
+
|
| 440 |
+
elif section == "network":
|
| 441 |
+
net = await string_network(receptor, species=species, limit=string_limit)
|
| 442 |
+
return {"job_id": job_id, "section": "network", "data": net}
|
| 443 |
+
|
| 444 |
+
elif section == "literature":
|
| 445 |
+
lit = await europe_pmc_search(f"{receptor} AND {symptom}", pageSize=lit_page_size)
|
| 446 |
+
return {"job_id": job_id, "section": "literature", "data": lit}
|
| 447 |
+
|
| 448 |
+
elif section == "regions":
|
| 449 |
+
reg = await regions_from_string(receptor=receptor, species=species, limit=40, regions=None, use_synonyms=True, symptom=symptom)
|
| 450 |
+
return {"job_id": job_id, "section": "regions", "data": reg}
|
| 451 |
+
|
| 452 |
+
else:
|
| 453 |
+
raise HTTPException(status_code=404, detail=f"unknown section: {section}")
|
| 454 |
|
| 455 |
@app.get("/download/{job_id}/{section}")
|
| 456 |
async def download_section(job_id: str, section: str):
|
| 457 |
+
"""
|
| 458 |
+
Gzipped JSON download of a section; if section not built yet, tries to return what's there.
|
| 459 |
+
"""
|
| 460 |
+
data = JOBS.get(job_id, {}).get(section) or JOBS.get(job_id, {}).get("_meta")
|
| 461 |
+
if not data:
|
| 462 |
+
raise HTTPException(status_code=404, detail="job/section not found")
|
| 463 |
+
gz = gz_json_bytes({"job_id": job_id, "section": section, "data": data})
|
| 464 |
+
return StreamingResponse(io.BytesIO(gz),
|
| 465 |
media_type="application/gzip",
|
| 466 |
+
headers={"Content-Disposition": f'attachment; filename="{job_id}_{section}.json.gz"'})
|
|
|
|
|
|