Spaces:

darkfrostx
/

neuro-mechanism-backend

Running

App Files Files Community

darkfrostx commited on Sep 6

Commit

7a4453c

verified ·

1 Parent(s): 16593bf

Update app.py

Browse files

Files changed (1) hide show

app.py +263 -536

app.py CHANGED Viewed

@@ -1,22 +1,19 @@
 from fastapi import FastAPI, Query, Path, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import RedirectResponse, JSONResponse, FileResponse, StreamingResponse
-from typing import Dict, Any, Tuple, Optional, List, Literal
-import httpx, asyncio, time, os, hashlib, json, gzip, math
-from pathlib import Path as _Path
-from datetime import datetime
 APP_NAME = "neuro-mechanism-backend"
-CALLER_ID = "neuro-mech-backend-demo"   # appears in STRING logs
-DATA_DIR = _Path("/tmp/neuro_mech_jobs")
-DATA_DIR.mkdir(parents=True, exist_ok=True)
 app = FastAPI(title=APP_NAME)
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"], allow_credentials=True,
-    allow_methods=["*"], allow_headers=["*"],
 )
 @app.get("/", include_in_schema=False)
@@ -31,17 +28,11 @@ def health():
 def endpoints():
     return JSONResponse({
         "GET": [
-            "/mechanism_graph_manifest?receptor=HTR2A&symptom=apathy&species=9606",
-            "/mechanism_graph/nodes?job_id=<id>&page=1&page_size=200",
-            "/mechanism_graph/edges?job_id=<id>&page=1&page_size=200",
-            "/mechanism_graph/literature?job_id=<id>&page=1&page_size=50",
-            "/mechanism_graph/regions?job_id=<id>&page=1&page_size=50",
-            "/download/<job_id>/nodes  (gz)",
-            "/download/<job_id>/edges  (gz)",
-            "/download/<job_id>/literature  (gz)",
-            "/download/<job_id>/regions  (gz)",
-            "/util/synonyms?term=apathy&kind=phenotype",
-            "/heuristics/regions_from_string?receptor=HTR2A&symptom=apathy&limit=40",
             "/lit/eupmc?query=HTR2A%20AND%20apathy&pageSize=5",
             "/string/network?identifiers=HTR2A&species=9606",
             "/gpcrdb/protein?entry=htr2a_human",
@@ -53,8 +44,6 @@ def endpoints():
         ]
     })
-UA = {"User-Agent": f"{APP_NAME}/1.2 (HF Space)"}
 # ----------------- tiny in-memory TTL cache -----------------
 class TTLCache:
     def __init__(self, max_items=512):
@@ -75,12 +64,7 @@ class TTLCache:
         async with httpx.AsyncClient(headers=UA, timeout=30) as client:
             r = await client.get(url, params=params)
             r.raise_for_status()
-            # Some third-party APIs return plain text/HTML on error;
-            # Fast path: try JSON, else wrap as text.
-            try:
-                data = r.json()
-            except Exception:
-                data = {"text": r.text, "status_code": r.status_code}
         async with self._lock:
             if len(self.store) > self.max_items:
                 self.store.pop(next(iter(self.store)))
@@ -89,10 +73,10 @@ class TTLCache:
 CACHE = TTLCache()
-# ----------------- polite throttling for STRING ------------------
 _last_string_call = 0.0
 async def throttle_string():
-    """Be nice to STRING; ~1 req/sec as a courtesy."""
     global _last_string_call
     now = time.time()
     wait = 1.05 - (now - _last_string_call)
@@ -100,26 +84,29 @@ async def throttle_string():
         await asyncio.sleep(wait)
     _last_string_call = time.time()
-# ----------------- helpers -----------------
 async def get_json_cached(url: str, params: Optional[dict], ttl: int):
-    return await CACHE.get(url, params, ttl)
-def _safe_float(x, default=0.0):
     try:
-        return float(x)
-    except Exception:
-        return default
-def _hash_params(d: dict) -> str:
-    return hashlib.sha1(json.dumps(d, sort_keys=True).encode()).hexdigest()
-# ----------------- base connectors -----------------
 @app.get("/lit/eupmc")
-async def europe_pmc_search(query: str, pageSize: int = 5, page: int = 1):
-    # Europe PMC REST search (JSON)
-    # docs: https://europepmc.org/RestfulWebService ; client vignette: europepmc R pkg
     url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
-    params = {"query": query, "format": "json", "pageSize": pageSize, "page": page}
     return await get_json_cached(url, params, ttl=600)
 @app.get("/lit/pubmed_esearch")
@@ -164,576 +151,316 @@ async def gpcrdb_protein(entry: str):
 @app.get("/string/network")
 async def string_network(identifiers: str, species: int = 9606, limit: int = 50):
-    # STRING JSON network endpoint
     await throttle_string()
     url = "https://string-db.org/api/json/network"
     params = {"identifiers": identifiers, "species": species, "caller_identity": CALLER_ID, "limit": limit}
     return await get_json_cached(url, params, ttl=3600)
-# ----------------- synonym utilities -----------------
-# curated region slang/aliases (additive to OLS)
-CURATED_REGION_SYNONYMS = {
-    "prefrontal cortex": ["PFC", "frontal cortex", "dorsolateral prefrontal cortex", "dlPFC",
-                          "ventromedial prefrontal cortex", "vmPFC", "orbitofrontal cortex", "OFC"],
-    "anterior cingulate cortex": ["ACC", "dorsal ACC", "dACC", "rostral ACC", "rACC"],
-    "nucleus accumbens": ["NAc", "ventral striatum"],
     "ventral tegmental area": ["VTA"],
-    "substantia nigra": ["SN", "pars compacta", "SNc"],
-    "hippocampus": ["hippocampal formation", "CA1", "CA3", "dentate gyrus"],
-    "amygdala": ["basolateral amygdala", "BLA", "central amygdala"]
 }
-async def _ols_synonyms(term: str, ontologies: Optional[List[str]] = None) -> List[str]:
-    # OLS4 search; aggregate synonyms for top hits containing the term
     url = "https://www.ebi.ac.uk/ols4/api/search"
-    params = {"q": term}
-    if ontologies:
-        # OLS4 supports multiple ontology filters as repeated params
-        # We'll just join as comma-separated for brevity (works for OLS4)
-        params["ontology"] = ",".join(ontologies)
     data = await get_json_cached(url, params, ttl=86400)
-    syns = set()
     try:
-        docs = data.get("response", {}).get("docs", [])
-        for d in docs[:5]:
-            for s in d.get("synonyms", []) or []:
-                if isinstance(s, str):
-                    syns.add(s)
     except Exception:
         pass
-    return list(syns)
-async def _mygene_aliases(symbol: str) -> List[str]:
-    # MyGene.info v3; pull aliases/other names for the main focus gene
     url = "https://mygene.info/v3/query"
-    params = {"q": f"symbol:{symbol}", "fields": "symbol,name,alias,alias_symbol,other_names", "size": 1, "species": "human"}
     data = await get_json_cached(url, params, ttl=86400)
-    syns = set()
     try:
-        hits = data.get("hits", [])
-        if hits:
-            h = hits[0]
-            for fld in ("symbol","name"):
-                v = h.get(fld)
-                if isinstance(v, str):
-                    syns.add(v)
-            for fld in ("alias","alias_symbol","other_names"):
-                v = h.get(fld)
-                if isinstance(v, list):
-                    for x in v:
-                        if isinstance(x, str):
-                            syns.add(x)
     except Exception:
         pass
-    return list(syns)
 @app.get("/util/synonyms")
-async def util_synonyms(term: str, kind: Literal["region","gene","phenotype","auto"]="auto"):
-    """
-    Fetch synonyms for a term.
-      region: OLS4 (UBERON,HBP/HPO where applicable) + curated slang
-      gene:   MyGene.info aliases
-      phenotype: OLS4(HPO)
-      auto: choose gene if ALLCAPS letters+digits, else phenotype->region fallback.
-    """
-    k = kind
-    if k == "auto":
-        k = "gene" if term.isupper() else "phenotype"
-    syns = set([term])
-    if k == "region":
-        syns.update(CURATED_REGION_SYNONYMS.get(term.lower(), []))
-        syns.update(await _ols_synonyms(term, ontologies=["uberon","hbp","hpo","ncit"]))
-    elif k == "gene":
-        syns.update(await _mygene_aliases(term))
-    elif k == "phenotype":
-        syns.update(await _ols_synonyms(term, ontologies=["hpo","efo","mondo"]))
-    return {"term": term, "kind": k, "synonyms": sorted({s for s in syns if isinstance(s, str) and len(s) <= 60})}
-# ----------------- region heuristic (upgraded) -----------------
 REGION_TERMS_DEFAULT = [
-    "prefrontal cortex","anterior cingulate cortex","mPFC","ACC","nucleus accumbens","ventral striatum",
     "dorsal striatum","caudate","putamen","amygdala","hippocampus","thalamus","hypothalamus",
-    "insula","ventral tegmental area","VTA","substantia nigra","cerebellum"
 ]
 def collect_gene_symbols_from_string(edges: List[dict], focus: str) -> List[str]:
     genes = set()
     f = focus.upper()
     for e in edges or []:
         for k in ("preferredName_A","preferredName_B"):
             g = e.get(k)
-            if g and isinstance(g,str) and g.upper() != f:
                 genes.add(g)
     return list(genes)
-async def _eupmc_hitcount(q: str) -> int:
-    # Europe PMC search hitCount (pageSize=0)
-    url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
-    params = {"query": q, "format": "json", "pageSize": 0}
-    data = await get_json_cached(url, params, ttl=3600)
-    try:
-        return int(data.get("hitCount", 0))
-    except Exception:
-        return 0
 @app.get("/heuristics/regions_from_string")
 async def regions_from_string(
     receptor: str = Query(..., description="e.g., HTR2A"),
     species: int = 9606,
     limit: int = 40,
-    regions: Optional[str] = Query(None, description="comma-separated region terms (optional)"),
-    symptom: Optional[str] = Query(None, description="optional phenotype/symptom to weight co-mentions (e.g., apathy)")
 ):
     """
-    Heuristic: rank brain regions by STRING neighbors + Europe PMC co-mentions, with synonyms & tiered fallbacks.
-    Tiers (all unquoted for flexible match):
-      T1: (region_syns) AND ((receptor_syns) OR neighbors) AND (symptom_syns?)   weight 1.0
-      T2: (region_syns) AND (receptor_syns OR neighbors)                         weight 0.6
-      T3: (region_syns) AND (receptor_syns)                                      weight 0.5
-      T4: (region_syns) AND (symptom_syns)                                       weight 0.3
-    Final score = log10(weighted_hits+1) * mean_top_STRING_conf
     """
     # 1) STRING neighbors
     edges = await string_network(receptor, species=species, limit=limit)
     neighbors = collect_gene_symbols_from_string(edges, receptor)
-    # STRING confidences
-    conf: Dict[str, float] = {}
-    for e in edges or []:
-        a, b, score = e.get("preferredName_A"), e.get("preferredName_B"), _safe_float(e.get("score", 0))
-        if a and a.upper() != receptor.upper():
-            conf[a] = max(conf.get(a, 0.0), score)
-        if b and b.upper() != receptor.upper():
-            conf[b] = max(conf.get(b, 0.0), score)
-    mean_conf = sum(conf.values())/max(len(conf),1) if conf else 0.2
     # 2) synonyms
-    receptor_syns = await _mygene_aliases(receptor)
-    symptom_syns = []
-    if symptom:
-        s = await util_synonyms(symptom, kind="phenotype")
-        symptom_syns = s["synonyms"]
     region_list = [r.strip() for r in (regions.split(",") if regions else REGION_TERMS_DEFAULT) if r.strip()]
-    # Build clauses (unquoted OR lists)
-    gene_clause = " OR ".join(sorted({receptor} | set(receptor_syns) | set(neighbors[:25])))
     results = []
-    tasks = []
-    tier_defs = []
     for region in region_list:
-        # region synonyms
-        rs = await util_synonyms(region, kind="region")
-        region_syns = rs["synonyms"]
-        region_clause = " OR ".join(region_syns)
-        # tiers
         # T1
-        if symptom and symptom_syns:
-            t1 = f"({region_clause}) AND (({gene_clause})) AND ({' OR '.join(symptom_syns)})"
         else:
-            t1 = f"({region_clause}) AND (({gene_clause}))"
-        t2 = f"({region_clause}) AND (({gene_clause}))"
-        t3 = f"({region_clause}) AND ({' OR '.join(sorted(set([receptor] + receptor_syns)))})"
-        t4 = f"({region_clause}) AND ({' OR '.join(symptom_syns)})" if symptom_syns else None
-        tiers = [("t1",1.0,t1), ("t2",0.6,t2), ("t3",0.5,t3)]
-        if t4: tiers.append(("t4",0.3,t4))
-        # schedule hitCount calls
-        tier_defs.append((region, tiers))
-        for _,_,q in tiers:
-            tasks.append(_eupmc_hitcount(q))
-    # gather all counts in-order
-    counts_all = await asyncio.gather(*tasks)
-    # fold back into regions
-    idx = 0
-    for region, tiers in tier_defs:
-        weighted = 0.0
-        tier_counts = {}
-        for name, weight, _q in tiers:
-            hc = counts_all[idx]; idx += 1
-            tier_counts[name] = hc
-            weighted += weight * hc
-        score = math.log10(weighted + 1.0) * mean_conf
-        results.append({"region": region, "tiers": tier_counts, "weighted_hits": int(round(weighted)),
-                        "weighted_score": round(score, 4)})
     results.sort(key=lambda x: x["weighted_score"], reverse=True)
     return {
         "focus": receptor,
         "neighbors_considered": neighbors[:25],
         "regions_ranked": results,
-        "notes": "STRING + Europe PMC with synonyms and tiered fallbacks (unquoted)."
     }
-# ----------------- MANIFEST + PAGED SECTIONS + DOWNLOAD -----------------
-def _job_dir(job_id: str) -> _Path:
-    d = DATA_DIR / job_id
-    d.mkdir(parents=True, exist_ok=True)
-    return d
-def _write_gz_jsonl(path: _Path, items: List[dict]):
-    with gzip.open(path, "wt", encoding="utf-8") as gz:
-        for it in items:
-            gz.write(json.dumps(it, ensure_ascii=False) + "\n")
-def _read_gz_page(path: _Path, page: int, page_size: int) -> Tuple[int, List[dict]]:
-    total = 0
-    start = (page - 1) * page_size
-    end = start + page_size
-    out = []
-    with gzip.open(path, "rt", encoding="utf-8") as gz:
-        for i, line in enumerate(gz):
-            if not line.strip():
-                continue
-            if i >= start and i < end:
-                out.append(json.loads(line))
-            total += 1
-    return total, out
-async def _build_mech_job(params: dict) -> dict:
-    """
-    Build nodes/edges/literature/regions; write gz NDJSON + meta.
-    """
-    receptor = params["receptor"]
-    species = int(params.get("species", 9606))
-    symptom = params.get("symptom")
-    string_limit = int(params.get("string_limit", 200))
-    eupmc_page_size = int(params.get("eupmc_page_size", 100))
-    eupmc_max_pages = int(params.get("eupmc_max_pages", 3))
-    job_id = _hash_params(params)
-    d = _job_dir(job_id)
-    meta_path = d / "meta.json"
-    if meta_path.exists():
-        return json.loads(meta_path.read_text("utf-8"))
-    # 1) STRING edges + nodes
-    edges = await string_network(receptor, species=species, limit=string_limit)
-    edge_items = []
-    nodes = set([receptor])
-    for e in edges or []:
-        a = e.get("preferredName_A"); b = e.get("preferredName_B")
-        score = _safe_float(e.get("score", 0))
-        if a and b:
-            edge_items.append({"a": a, "b": b, "score": score})
-            nodes.add(a); nodes.add(b)
-    node_items = [{"symbol": n, "seed": (n.upper()==receptor.upper())} for n in sorted(nodes)]
-    _write_gz_jsonl(d / "edges.jsonl.gz", edge_items)
-    _write_gz_jsonl(d / "nodes.jsonl.gz", node_items)
-    # 2) Europe PMC literature for (receptor AND symptom?) else receptor
-    lit_items = []
-    base_q = f"{receptor} AND {symptom}" if symptom else receptor
-    for page in range(1, eupmc_max_pages+1):
-        res = await europe_pmc_search(base_q, pageSize=eupmc_page_size, page=page)
-        hits = res.get("resultList", {}).get("result", []) or []
-        for h in hits:
-            lit_items.append({
-                "id": h.get("id"),
-                "source": h.get("source"), "title": h.get("title"),
-                "pubYear": h.get("pubYear"), "authorString": h.get("authorString"),
-                "journalTitle": h.get("journalTitle"), "doi": h.get("doi")
-            })
-        # stop early if last page
-        if len(hits) < eupmc_page_size:
-            break
-    _write_gz_jsonl(d / "literature.jsonl.gz", lit_items)
-    # 3) Regions heuristic (with symptom)
-    reg = await regions_from_string(receptor=receptor, species=species, limit=min(100, string_limit), regions=None, symptom=symptom)
-    reg_items = []
-    for r in reg.get("regions_ranked", []):
-        reg_items.append(r)
-    _write_gz_jsonl(d / "regions.jsonl.gz", reg_items)
-    meta = {
-        "job_id": job_id,
-        "created": datetime.utcnow().isoformat() + "Z",
-        "params": params,
-        "counts": {
-            "nodes": len(node_items),
-            "edges": len(edge_items),
-            "literature": len(lit_items),
-            "regions": len(reg_items)
-        },
-        "sections": ["nodes","edges","literature","regions"]
-    }
-    meta_path.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
-    return meta
 @app.get("/mechanism_graph_manifest")
 async def mechanism_graph_manifest(
-    receptor: str = Query(...),
     species: int = 9606,
-    symptom: Optional[str] = None,
-    string_limit: int = 200,
-    eupmc_page_size: int = 100,
-    eupmc_max_pages: int = 3
 ):
     """
-    Build the full mechanism dataset server-side and return a manifest with job_id + counts.
-    The actual data is stored as gzipped NDJSON and can be:
-      - paged via /mechanism_graph/{section}?job_id=...&page=1&page_size=...
-      - or downloaded as a single gz file via /download/{job_id}/{section}
     """
-    params = {
-        "receptor": receptor, "species": species, "symptom": symptom,
-        "string_limit": string_limit, "eupmc_page_size": eupmc_page_size, "eupmc_max_pages": eupmc_max_pages
-    }
-    meta = await _build_mech_job(params)
-    return meta
-@app.get("/mechanism_graph/{section}")
-async def mechanism_graph_section(
-    section: Literal["nodes","edges","literature","regions"] = Path(...),
-    job_id: str = Query(...),
-    page: int = 1,
-    page_size: int = 100
-):
-    """
-    Return a single page from a section (nodes|edges|literature|regions).
-    """
-    d = _job_dir(job_id)
-    p = d / f"{section}.jsonl.gz"
-    if not p.exists():
-        raise HTTPException(status_code=404, detail=f"section {section} not found for job {job_id}")
-    total, items = _read_gz_page(p, page=page, page_size=page_size)
-    return {
-        "job_id": job_id,
-        "section": section,
-        "page": page, "page_size": page_size,
-        "total": total,
-        "items": items
-    }
-@app.get("/download/{job_id}/{section}")
-async def download_section(job_id: str, section: Literal["nodes","edges","literature","regions"]):
-    """
-    Download the full gzipped NDJSON for a section.
-    """
-    d = _job_dir(job_id)
-    p = d / f"{section}.jsonl.gz"
-    if not p.exists():
-        raise HTTPException(status_code=404, detail=f"section {section} not found for job {job_id}")
-    return FileResponse(
-        path=str(p),
-        filename=f"{APP_NAME}-{job_id}-{section}.jsonl.gz",
-        media_type="application/gzip"
-    )
-    # ===================== ADD BELOW YOUR EXISTING CODE =====================
-from fastapi.responses import StreamingResponse, FileResponse
-import gzip, io, secrets, math, pathlib, datetime
-# -------- small in-memory job store (sections kept per job) ----------
-JOBS: Dict[str, Dict[str, Any]] = {}
-JOB_TTL_SECONDS = 3600
-def _mk_job_id() -> str:
-    return secrets.token_hex(8)
-def _save_job(sections: Dict[str, Any]) -> str:
-    # prune old
-    now = time.time()
-    for k, v in list(JOBS.items()):
-        if now - v.get("_ts", now) > JOB_TTL_SECONDS:
-            JOBS.pop(k, None)
-    jid = _mk_job_id()
-    JOBS[jid] = {"_ts": now, **sections}
-    return jid
-def _get_job(jid: str) -> Optional[Dict[str, Any]]:
-    job = JOBS.get(jid)
-    if not job:
-        return None
-    if time.time() - job.get("_ts", 0) > JOB_TTL_SECONDS:
-        JOBS.pop(jid, None)
-        return None
-    return job
-def _gzipped_json_bytes(obj: Any) -> bytes:
-    raw = orjson.dumps(obj)  # fast & small
-    buf = io.BytesIO()
-    with gzip.GzipFile(fileobj=buf, mode="wb", compresslevel=6) as z:
-        z.write(raw)
-    return buf.getvalue()
-# --------------------- Synonym utilities ------------------------------
-async def _ols4_synonyms(term: str, size: int = 20) -> List[str]:
-    """Region/ontology synonyms via OLS4 search."""
-    url = "https://www.ebi.ac.uk/ols4/api/search"
-    params = {"q": term, "size": size}
-    data = await get_json_cached(url, params, ttl=86400)
-    syns = set()
-    for hit in data.get("response", {}).get("docs", []):
-        for k in ("synonym", "label"):
-            val = hit.get(k)
-            if isinstance(val, list):
-                syns.update([s for s in val if isinstance(s, str)])
-            elif isinstance(val, str):
-                syns.add(val)
-    return sorted({s for s in syns if s.lower() != term.lower()})
-async def _mygene_synonyms(gene: str, size: int = 5) -> List[str]:
-    """Gene symbol/name/alias via MyGene.info."""
-    url = "https://mygene.info/v3/query"
-    params = {"q": gene, "fields": "symbol,name,alias", "species": "human", "size": size}
-    data = await get_json_cached(url, params, ttl=86400)
-    syns = set()
-    for h in data.get("hits", []):
-        for k in ("symbol", "name", "alias"):
-            v = h.get(k)
-            if isinstance(v, list):
-                syns.update([s for s in v if isinstance(s, str)])
-            elif isinstance(v, str):
-                syns.add(v)
-    return sorted({s for s in syns if s.lower() != gene.lower()})
-@app.get("/util/synonyms")
-async def util_synonyms(term: str = Query(...), kind: str = Query("region", description="region|gene|phenotype"), size: int = 20):
     try:
-        if kind == "gene":
-            syns = await _mygene_synonyms(term, size=min(size, 20))
-        else:
-            syns = await _ols4_synonyms(term, size=min(size, 50))
-        return {"term": term, "kind": kind, "synonyms": syns}
-    except Exception as e:
-        return {"term": term, "kind": kind, "synonyms": [], "error": str(e)}
-# ------ improved regions heuristic: synonyms + unquoted + fallbacks -----
-REGION_SYNONYM_OVERRIDES = {
-    "prefrontal cortex": ["PFC", "mPFC", "vmPFC", "dorsolateral prefrontal cortex", "DLPFC", "ventromedial prefrontal cortex"],
-    "anterior cingulate cortex": ["ACC", "dACC", "pregenual ACC", "subgenual ACC", "sgACC"],
-    "nucleus accumbens": ["NAc", "ventral striatum", "accumbens"]
-}
-async def _region_terms_with_synonyms(base_terms: List[str]) -> Dict[str, List[str]]:
-    out: Dict[str, List[str]] = {}
-    for term in base_terms:
-        # manual seeds + OLS4 expansion
-        syns = set(REGION_SYNONYM_OVERRIDES.get(term, []))
-        try:
-            syns.update(await _ols4_synonyms(term, size=20))
-        except Exception:
-            pass
-        # keep short list to control URL size
-        out[term] = sorted(list(syns))[:12]
-    return out
-@app.get("/heuristics/regions_from_string")
-async def regions_from_string(
-    receptor: str = Query(..., description="e.g., HTR2A"),
-    species: int = 9606,
-    limit: int = 40,
-    regions: Optional[str] = Query(None, description="comma-separated region terms; default common regions"),
-    expand: int = Query(1, description="if 1, use OLS4 synonyms and manual aliases"),
-):
-    # 1) STRING neighbors (cached)
-    edges = await string_network(receptor, species=species, limit=limit)
-    neighbors = collect_gene_symbols_from_string(edges, receptor)
-    conf: Dict[str, float] = {}
-    for e in edges:
-        a, b, score = e.get("preferredName_A"), e.get("preferredName_B"), float(e.get("score", 0))
-        if a and a.upper() != receptor.upper(): conf[a] = max(conf.get(a, 0.0), score)
-        if b and b.upper() != receptor.upper(): conf[b] = max(conf.get(b, 0.0), score)
-    region_list = [r.strip() for r in (regions.split(",") if regions else REGION_TERMS_DEFAULT) if r.strip()]
-    syn_map = await _region_terms_with_synonyms(region_list) if expand else {t: [] for t in region_list}
-    # 2) Europe PMC hitCount with broad, unquoted ORs; fallback sequence
-    gene_clause = " OR ".join([receptor] + neighbors[:25])
-    results = []
-    for region in region_list:
-        terms = [region] + syn_map.get(region, [])
-        # broad query (no quotes)
-        q1 = f'({ " OR ".join(terms) }) AND ({gene_clause})'
-        h1 = await eupmc_hitcount(q1)
-        if h1 == 0:
-            # fallback 1: region only with receptor
-            q2 = f'({ " OR ".join(terms) }) AND ({receptor})'
-            h2 = await eupmc_hitcount(q2)
-            hc = h2
-        else:
-            hc = h1
-        mean_conf = sum(conf.values())/max(len(conf),1)
-        score = (math.log10(hc+1.0)) * (mean_conf if conf else 0.2)
-        results.append({"region": region, "synonyms_used": terms[1:], "hits": hc, "weighted_score": round(score, 4)})
-    results.sort(key=lambda x: x["weighted_score"], reverse=True)
-    return {
-        "focus": receptor,
-        "neighbors_considered": neighbors[:25],
-        "regions_ranked": results,
-        "notes": "Heuristic: STRING neighbors + EuropePMC co-occurrence, with synonyms, broad match, and fallbacks."
-    }
-# ------------------- MANIFEST / SECTION / DOWNLOAD ---------------------
-@app.get("/mechanism_graph_manifest")
-async def mechanism_graph_manifest(
-    receptor: str = Query(...),
-    symptom: str = Query("apathy"),
     species: int = 9606,
-    max_neighbors: int = 50,
-    max_hits: int = 25
 ):
-    """Prepare big graph in sections; return a manifest + job_id."""
-    gpcr_entry = f"{receptor.lower()}_human" if not receptor.lower().endswith("_human") else receptor.lower()
-    # prefetch pieces (cached)
-    gpcr = await get_json_cached(f"https://gpcrdb.org/services/protein/{gpcr_entry}", None, ttl=86400)
-    string_r = await get_json_cached("https://string-db.org/api/json/network",
-                                     {"identifiers": receptor, "species": species, "caller_identity": CALLER_ID, "limit": max_neighbors},
-                                     ttl=3600)
-    lit_r = await get_json_cached("https://www.ebi.ac.uk/europepmc/webservices/rest/search",
-                                  {"query": f"{receptor} AND {symptom}", "format": "json", "pageSize": max_hits},
-                                  ttl=900)
-    regions_r = await regions_from_string.__wrapped__(receptor=receptor, species=species, limit=40, regions=None, expand=1)
-    sections = {
-        "nodes": {"receptor": receptor, "gpcrdb": gpcr},
-        "edges": {"string": string_r},
-        "literature": {"eupmc": lit_r},
-        "regions": regions_r,
-        "provenance": {
-            "built_at": datetime.datetime.utcnow().isoformat() + "Z",
-            "params": {"receptor": receptor, "symptom": symptom, "species": species,
-                       "max_neighbors": max_neighbors, "max_hits": max_hits}
-        }
-    }
-    jid = _save_job(sections)
-    counts = {
-        "edges": len(sections["edges"].get("string", [])),
-        "literature_hits": int(sections["literature"].get("eupmc", {}).get("hitCount", 0)),
-        "regions": len(sections["regions"].get("regions_ranked", []))
-    }
-    return {"job_id": jid, "sections": list(sections.keys()), "counts": counts}
-@app.get("/mechanism_graph/{section}")
-async def mechanism_graph_section(section: str, job_id: str = Query(...)):
-    """Return one section to keep payloads small."""
-    job = _get_job(job_id)
-    if not job or section not in job:
-        return JSONResponse({"error": "missing job or section"}, status_code=404)
-    return job[section]
 @app.get("/download/{job_id}/{section}")
 async def download_section(job_id: str, section: str):
-    """Download a section as gzipped JSON (useful for huge payloads)."""
-    job = _get_job(job_id)
-    if not job or section not in job:
-        return JSONResponse({"error": "missing job or section"}, status_code=404)
-    data = _gzipped_json_bytes(job[section])
-    filename = f"{APP_NAME}-{job_id}-{section}.json.gz"
-    return StreamingResponse(io.BytesIO(data),
                              media_type="application/gzip",
-                             headers={"Content-Disposition": f'attachment; filename="{filename}"'})
-# ===================== END ADD-ON BLOCK =====================

 from fastapi import FastAPI, Query, Path, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import RedirectResponse, JSONResponse, StreamingResponse, FileResponse
+import httpx, asyncio, time, os, hashlib, json, io, gzip, math
+from typing import Dict, Any, Tuple, Optional, List
 APP_NAME = "neuro-mechanism-backend"
+CALLER_ID = "neuro-mech-backend-demo"   # shows in STRING logs / rate fairness
+UA = {"User-Agent": f"{APP_NAME}/1.2 (HF Space)"}
 app = FastAPI(title=APP_NAME)
 app.add_middleware(
     CORSMiddleware,
     allow_origins=["*"], allow_credentials=True,
+    allow_methods=["*"], allow_headers=["*"]
 )
 @app.get("/", include_in_schema=False)
 def endpoints():
     return JSONResponse({
         "GET": [
+            "/mechanism_graph_manifest?receptor=HTR2A&symptom=apathy",
+            "/mechanism_graph/regions?receptor=HTR2A&symptom=apathy",
+            "/download/{job_id}/{section}",
+            "/heuristics/regions_from_string?receptor=HTR2A",
+            "/util/synonyms?term=ACC&kind=region",
             "/lit/eupmc?query=HTR2A%20AND%20apathy&pageSize=5",
             "/string/network?identifiers=HTR2A&species=9606",
             "/gpcrdb/protein?entry=htr2a_human",
         ]
     })
 # ----------------- tiny in-memory TTL cache -----------------
 class TTLCache:
     def __init__(self, max_items=512):
         async with httpx.AsyncClient(headers=UA, timeout=30) as client:
             r = await client.get(url, params=params)
             r.raise_for_status()
+            data = r.json()
         async with self._lock:
             if len(self.store) > self.max_items:
                 self.store.pop(next(iter(self.store)))
 CACHE = TTLCache()
+# --------------- polite throttling for STRING ----------------
 _last_string_call = 0.0
 async def throttle_string():
+    """Be nice to STRING; ~1 req/sec is a good courtesy."""
     global _last_string_call
     now = time.time()
     wait = 1.05 - (now - _last_string_call)
         await asyncio.sleep(wait)
     _last_string_call = time.time()
+# ----------------- Helpers -----------------
 async def get_json_cached(url: str, params: Optional[dict], ttl: int):
     try:
+        return await CACHE.get(url, params, ttl)
+    except Exception as e:
+        return {"error": str(e), "url": url, "params": params}
+def job_key(receptor: str, symptom: str) -> str:
+    raw = f"{receptor}|{symptom}|{int(time.time())}"
+    return hashlib.sha1(raw.encode()).hexdigest()[:16]
+def gz_json_bytes(obj: Any) -> bytes:
+    b = json.dumps(obj, ensure_ascii=False).encode("utf-8")
+    bio = io.BytesIO()
+    with gzip.GzipFile(fileobj=bio, mode="wb") as gz:
+        gz.write(b)
+    return bio.getvalue()
+# ----------------- External API wrappers -----------------
 @app.get("/lit/eupmc")
+async def europe_pmc_search(query: str, pageSize: int = 5):
     url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
+    params = {"query": query, "format": "json", "pageSize": pageSize}
     return await get_json_cached(url, params, ttl=600)
 @app.get("/lit/pubmed_esearch")
 @app.get("/string/network")
 async def string_network(identifiers: str, species: int = 9606, limit: int = 50):
     await throttle_string()
     url = "https://string-db.org/api/json/network"
     params = {"identifiers": identifiers, "species": species, "caller_identity": CALLER_ID, "limit": limit}
     return await get_json_cached(url, params, ttl=3600)
+# ----------------- Synonyms (regions/genes/phenotypes) --------------
+# Simple built-in expansions + OLS/MyGene lookups.
+REGION_SEED_SYNONYMS = {
+    "prefrontal cortex": ["PFC","mPFC","vmPFC","dlPFC","dorsolateral prefrontal cortex","ventromedial prefrontal cortex"],
+    "anterior cingulate cortex": ["ACC","dACC","pgACC","sgACC","subgenual cingulate"],
+    "nucleus accumbens": ["NAc","ventral striatum","accumbens"],
     "ventral tegmental area": ["VTA"],
+    "substantia nigra": ["SN","SNc","pars compacta"],
+    "hippocampus": ["HC"],
+    "amygdala": [],
+    "insula": ["insular cortex"],
+    "thalamus": [],
+    "hypothalamus": [],
+    "cerebellum": []
 }
+async def ols4_synonyms(term: str, ontology: Optional[str] = None) -> List[str]:
+    # OLS4 generic search (best-effort parse)
     url = "https://www.ebi.ac.uk/ols4/api/search"
+    params = {"q": term, "rows": 20}
+    if ontology:
+        params["ontology"] = ontology
     data = await get_json_cached(url, params, ttl=86400)
+    syns = []
     try:
+        docs = data.get("response", {}).get("docs", []) or data.get("response", {}).get("docs", [])
+        for d in docs:
+            if "synonym" in d:
+                syns.extend(d.get("synonym", []))
+            if "label" in d:
+                syns.append(d["label"])
     except Exception:
         pass
+    # Dedup & lowercase normalize
+    out = []
+    seen = set()
+    for s in syns:
+        s2 = s.strip()
+        if s2.lower() not in seen:
+            out.append(s2)
+            seen.add(s2.lower())
+    return out[:50]
+async def mygene_synonyms(symbol: str) -> List[str]:
+    # MyGene.info gene synonyms/aliases
     url = "https://mygene.info/v3/query"
+    params = {"q": symbol, "fields": "symbol,name,alias,other_names", "size": 5}
     data = await get_json_cached(url, params, ttl=86400)
+    syns = []
     try:
+        for hit in data.get("hits", []):
+            for k in ("symbol","name"):
+                if k in hit: syns.append(hit[k])
+            for k in ("alias","other_names"):
+                if k in hit and isinstance(hit[k], list): syns.extend(hit[k])
     except Exception:
         pass
+    # unique
+    out, seen = [], set()
+    for s in syns:
+        s2 = str(s).strip()
+        if s2 and s2.lower() not in seen:
+            out.append(s2); seen.add(s2.lower())
+    return out[:50]
 @app.get("/util/synonyms")
+async def util_synonyms(term: str, kind: str = Query("region", enum=["region","gene","phenotype"])):
+    term_norm = term.strip()
+    if kind == "region":
+        seeds = REGION_SEED_SYNONYMS.get(term_norm.lower(), [])
+        ols = await ols4_synonyms(term_norm, ontology="uberon")
+        return {"term": term_norm, "kind": kind, "synonyms": sorted(set([term_norm] + seeds + ols))}
+    elif kind == "gene":
+        mg = await mygene_synonyms(term_norm)
+        return {"term": term_norm, "kind": kind, "synonyms": sorted(set([term_norm] + mg))}
+    else:
+        # phenotype via OLS (HPO)
+        ols = await ols4_synonyms(term_norm, ontology="hp")
+        return {"term": term_norm, "kind": kind, "synonyms": sorted(set([term_norm] + ols))}
+# ----------------- Regions heuristic (improved) -----------------
 REGION_TERMS_DEFAULT = [
+    "prefrontal cortex","anterior cingulate cortex","nucleus accumbens","ventral striatum",
     "dorsal striatum","caudate","putamen","amygdala","hippocampus","thalamus","hypothalamus",
+    "insula","ventral tegmental area","substantia nigra","cerebellum"
 ]
+async def eupmc_hitcount(q: str) -> int:
+    url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
+    params = {"query": q, "format": "json", "pageSize": 0}
+    data = await get_json_cached(url, params, ttl=1800)
+    try:
+        return int(data.get("hitCount", 0))
+    except Exception:
+        return 0
 def collect_gene_symbols_from_string(edges: List[dict], focus: str) -> List[str]:
     genes = set()
     f = focus.upper()
     for e in edges or []:
         for k in ("preferredName_A","preferredName_B"):
             g = e.get(k)
+            if g and g.upper() != f:
                 genes.add(g)
     return list(genes)
 @app.get("/heuristics/regions_from_string")
 async def regions_from_string(
     receptor: str = Query(..., description="e.g., HTR2A"),
     species: int = 9606,
     limit: int = 40,
+    regions: Optional[str] = Query(None, description="comma-separated region terms; default common regions"),
+    use_synonyms: bool = True,
+    symptom: Optional[str] = None
 ):
     """
+    Rank brain regions by co-mention with (receptor OR STRING neighbors OR synonyms), with fallbacks.
+    Tiered search:
+      T1: (region_syns) AND (receptor OR neighbors OR gene_syns)
+      T2: (region_syns) AND (receptor)
+      T3: (region) AND (receptor)
+    Unquoted broad matches are used to avoid exact-phrase misses.
     """
     # 1) STRING neighbors
     edges = await string_network(receptor, species=species, limit=limit)
     neighbors = collect_gene_symbols_from_string(edges, receptor)
     # 2) synonyms
     region_list = [r.strip() for r in (regions.split(",") if regions else REGION_TERMS_DEFAULT) if r.strip()]
+    region_syns_map: Dict[str, List[str]] = {}
+    if use_synonyms:
+        syn_tasks = [util_synonyms(r, "region") for r in region_list]
+        # run as local function calls (not HTTP)
+        syn_results = await asyncio.gather(*[t if asyncio.iscoroutine(t) else asyncio.create_task(t) for t in syn_tasks])
+        for r, syn in zip(region_list, syn_results):
+            region_syns_map[r] = syn.get("synonyms", [])[:10] or [r]
+        # gene synonyms for top neighbors (cap 20)
+        gene_syns: List[str] = []
+        for g in neighbors[:20]:
+            gs = await util_synonyms(g, "gene")
+            gene_syns.extend(gs.get("synonyms", [])[:5])
+        gene_syns = list({s for s in gene_syns if s})
+    else:
+        for r in region_list:
+            region_syns_map[r] = [r]
+        gene_syns = []
+    # 3) Europe PMC hits per region, tiered
     results = []
+    # build RHS (receptor OR neighbors OR gene_syns)
+    rhs_terms = [receptor] + neighbors[:25] + gene_syns[:25]
+    rhs = " OR ".join({t for t in rhs_terms if t})
     for region in region_list:
+        syns = region_syns_map.get(region, [region])
+        lhs = " OR ".join(syns)
+        symptom_clause = f" AND ({symptom})" if symptom else ""
         # T1
+        q1 = f"({lhs}) AND ({rhs}){symptom_clause}"
+        hc1 = await eupmc_hitcount(q1)
+        score = math.log10(hc1 + 1.0)
+        if hc1 == 0:
+            # T2
+            q2 = f"({lhs}) AND ({receptor}){symptom_clause}"
+            hc2 = await eupmc_hitcount(q2)
+            score = math.log10(hc2 + 1.0)
+            if hc2 == 0:
+                # T3
+                q3 = f"({region}) AND ({receptor}){symptom_clause}"
+                hc3 = await eupmc_hitcount(q3)
+                score = math.log10(hc3 + 1.0)
+                results.append({"region": region, "hits": hc3, "tier": "T3", "weighted_score": round(score, 4)})
+            else:
+                results.append({"region": region, "hits": hc2, "tier": "T2", "weighted_score": round(score, 4)})
         else:
+            results.append({"region": region, "hits": hc1, "tier": "T1", "weighted_score": round(score, 4)})
     results.sort(key=lambda x: x["weighted_score"], reverse=True)
     return {
         "focus": receptor,
         "neighbors_considered": neighbors[:25],
         "regions_ranked": results,
+        "notes": "Heuristic uses STRING neighbors + Europe PMC co-mentions with synonyms and fallbacks."
     }
+# ----------------- Manifest / Section / Download -----------------
+# ephemeral in-memory store of assembled sections (by job_id)
+JOBS: Dict[str, Dict[str, Any]] = {}
 @app.get("/mechanism_graph_manifest")
 async def mechanism_graph_manifest(
+    receptor: str = Query(..., description="e.g., HTR2A"),
+    symptom: str = Query("apathy"),
     species: int = 9606,
+    string_limit: int = 50,
+    lit_page_size: int = 10
 ):
     """
+    Returns a job_id and the list of available sections with approximate sizes.
     """
+    jid = job_key(receptor, symptom)
+    # Pre-compute lightweight counts; store minimal context for later sections
+    # STRING count
+    sdata = await string_network(receptor, species=species, limit=string_limit)
+    s_count = len(sdata) if isinstance(sdata, list) else 0
+    # Literature hitCount
+    ldata = await europe_pmc_search(f"{receptor} AND {symptom}", pageSize=0)
     try:
+        lit_hits = int(ldata.get("hitCount", 0))
+    except Exception:
+        lit_hits = 0
+    # Regions heuristic preview (no synonyms parameter here; section can recalc)
+    rdata = await regions_from_string(receptor=receptor, species=species, limit=40, regions=None, use_synonyms=True, symptom=symptom)
+    r_count = len(rdata.get("regions_ranked", [])) if isinstance(rdata, dict) else 0
+    JOBS[jid] = {
+        "_meta": {"receptor": receptor, "symptom": symptom, "species": species},
+        "overview": {
+            "receptor": receptor, "symptom": symptom,
+            "counts": {"string_edges": s_count, "literature_hits": lit_hits, "regions": r_count}
+        }
+        # other sections are created lazily below
+    }
+    sections = [
+        {"name": "overview",   "approx_size": "small"},
+        {"name": "network",    "approx_size": f"{s_count} edges (limit={string_limit})"},
+        {"name": "literature", "approx_size": f"{lit_hits} hits (pageSize={lit_page_size})"},
+        {"name": "regions",    "approx_size": f"{r_count} entries"}
+    ]
+    return {"job_id": jid, "sections": sections}
+@app.get("/mechanism_graph/{section}")
+async def mechanism_graph_section(
+    section: str = Path(..., description="one of: overview, network, literature, regions"),
+    receptor: Optional[str] = None,
+    symptom: Optional[str] = None,
     species: int = 9606,
+    string_limit: int = 50,
+    lit_page_size: int = 10,
+    job_id: Optional[str] = Query(None, description="optional; use manifest if you want stable ids")
 ):
+    """
+    Returns one section. If job_id is missing or unknown, builds on the fly.
+    """
+    # pull context from job if available
+    ctx = None
+    if job_id and job_id in JOBS:
+        ctx = JOBS[job_id].get("_meta", {})
+        receptor = receptor or ctx.get("receptor")
+        symptom = symptom or ctx.get("symptom")
+        species  = species or ctx.get("species")
+    if not receptor:
+        raise HTTPException(status_code=422, detail="receptor is required (query param)")
+    if section == "overview":
+        if not job_id or job_id not in JOBS:
+            jid = job_key(receptor, symptom or "")
+            JOBS.setdefault(jid, {"_meta": {"receptor": receptor, "symptom": symptom or "", "species": species}})
+            job_id = jid
+        # ensure overview exists
+        if "overview" not in JOBS[job_id]:
+            sdata = await string_network(receptor, species=species, limit=string_limit)
+            s_count = len(sdata) if isinstance(sdata, list) else 0
+            ldata = await europe_pmc_search(f"{receptor} AND {symptom}", pageSize=0)
+            lit_hits = int(ldata.get("hitCount", 0)) if isinstance(ldata, dict) else 0
+            rdata = await regions_from_string(receptor=receptor, species=species, limit=40, regions=None, use_synonyms=True, symptom=symptom)
+            r_count = len(rdata.get("regions_ranked", [])) if isinstance(rdata, dict) else 0
+            JOBS[job_id]["overview"] = {
+                "receptor": receptor, "symptom": symptom,
+                "counts": {"string_edges": s_count, "literature_hits": lit_hits, "regions": r_count}
+            }
+        return {"job_id": job_id, "section": "overview", "data": JOBS[job_id]["overview"]}
+    elif section == "network":
+        net = await string_network(receptor, species=species, limit=string_limit)
+        return {"job_id": job_id, "section": "network", "data": net}
+    elif section == "literature":
+        lit = await europe_pmc_search(f"{receptor} AND {symptom}", pageSize=lit_page_size)
+        return {"job_id": job_id, "section": "literature", "data": lit}
+    elif section == "regions":
+        reg = await regions_from_string(receptor=receptor, species=species, limit=40, regions=None, use_synonyms=True, symptom=symptom)
+        return {"job_id": job_id, "section": "regions", "data": reg}
+    else:
+        raise HTTPException(status_code=404, detail=f"unknown section: {section}")
 @app.get("/download/{job_id}/{section}")
 async def download_section(job_id: str, section: str):
+    """
+    Gzipped JSON download of a section; if section not built yet, tries to return what's there.
+    """
+    data = JOBS.get(job_id, {}).get(section) or JOBS.get(job_id, {}).get("_meta")
+    if not data:
+        raise HTTPException(status_code=404, detail="job/section not found")
+    gz = gz_json_bytes({"job_id": job_id, "section": section, "data": data})
+    return StreamingResponse(io.BytesIO(gz),
                              media_type="application/gzip",
+                             headers={"Content-Disposition": f'attachment; filename="{job_id}_{section}.json.gz"'})