Spaces:

darkfrostx
/

neuro-mechanism-backend

Running

App Files Files Community

darkfrostx commited on Sep 6

Commit

3ed83bb

verified ·

1 Parent(s): eb37def

Update app.py

Browse files

Files changed (1) hide show

app.py +107 -125

app.py CHANGED Viewed

@@ -1,17 +1,16 @@
 from fastapi import FastAPI, Query
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import RedirectResponse, JSONResponse
-import httpx, asyncio, time, hashlib, json, math
-from typing import Dict, Any, Tuple, Optional, List, Iterable
 APP_NAME = "neuro-mechanism-backend"
-CALLER_ID = "neuro-mech-backend-demo"   # shows up in STRING logs
 app = FastAPI(title=APP_NAME)
 @app.get("/", include_in_schema=False)
 def root():
-    # Friendly landing: send to Swagger UI
     return RedirectResponse(url="/docs")
 @app.get("/health", include_in_schema=False)
@@ -43,7 +42,7 @@ app.add_middleware(
 UA = {"User-Agent": f"{APP_NAME}/1.2 (HF Space)"}
-# ----------------- Tiny in-memory TTL cache -----------------
 class TTLCache:
     def __init__(self, max_items=512):
         self.store: Dict[str, Tuple[float, Any]] = {}
@@ -60,16 +59,10 @@ class TTLCache:
             item = self.store.get(k)
             if item and (time.time() < item[0]):
                 return item[1]
-        # network
-        try:
-            async with httpx.AsyncClient(headers=UA, timeout=30) as client:
-                r = await client.get(url, params=params)
-                r.raise_for_status()
-                data = r.json()
-        except Exception as e:
-            # return structured error instead of raising => prevents 500s
-            data = {"error": str(e), "upstream": url, "params": params}
-        # cache
         async with self._lock:
             if len(self.store) > self.max_items:
                 self.store.pop(next(iter(self.store)))
@@ -78,14 +71,11 @@ class TTLCache:
 CACHE = TTLCache()
-async def get_json_cached(url: str, params: Optional[dict], ttl: int):
-    return await CACHE.get(url, params, ttl)
-# ----------------- STRING: polite throttling -----------------
 _last_string_call = 0.0
 async def throttle_string():
-    """Be nice to STRING; ~1 req/sec."""
-    # Official guidance recommends identifying the caller and being polite.
     global _last_string_call
     now = time.time()
     wait = 1.05 - (now - _last_string_call)
@@ -93,31 +83,30 @@ async def throttle_string():
         await asyncio.sleep(wait)
     _last_string_call = time.time()
-# ----------------- External endpoints -----------------
 @app.get("/lit/eupmc")
 async def europe_pmc_search(query: str, pageSize: int = 5):
-    # Europe PMC REST search returns JSON (hitCount in payload).
     url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
     params = {"query": query, "format": "json", "pageSize": pageSize}
     return await get_json_cached(url, params, ttl=600)
 @app.get("/lit/pubmed_esearch")
 async def pubmed_esearch(term: str, retmax: int = 10):
-    # NCBI E-utilities ESearch, JSON retmode.
     url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
     params = {"db":"pubmed","term":term,"retmode":"json","retmax":retmax}
     return await get_json_cached(url, params, ttl=600)
 @app.get("/trials/search")
 async def ctgov_v2_studies(q: str, pageSize: int = 5):
-    # ClinicalTrials.gov modernized API v2 /studies
     url = "https://clinicaltrials.gov/api/v2/studies"
     params = {"query.term": q, "pageSize": pageSize}
     return await get_json_cached(url, params, ttl=900)
 @app.get("/rxnav/rxcui")
 async def rxnav_rxcui(name: str):
-    # RxNav rxcui.json by name.
     url = "https://rxnav.nlm.nih.gov/REST/rxcui.json"
     params = {"name": name}
     return await get_json_cached(url, params, ttl=86400)
@@ -130,145 +119,149 @@ async def openfda_adverse_events(drug: str, limit: int = 5):
 @app.get("/pubchem/compound_by_name")
 async def pubchem_by_name(name: str):
-    # PubChem PUG REST /compound/name/{name}/JSON. :contentReference[oaicite:3]{index=3}
     url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{name}/JSON"
     return await get_json_cached(url, None, ttl=86400)
 @app.get("/uniprot/search")
 async def uniprot_search(query: str, size: int = 5):
-    # UniProt REST (uniprotkb/search) JSON
     url = "https://rest.uniprot.org/uniprotkb/search"
     params = {"query": query, "format": "json", "size": size}
     return await get_json_cached(url, params, ttl=86400)
 @app.get("/gpcrdb/protein")
 async def gpcrdb_protein(entry: str):
-    # GPCRdb web services, protein endpoint (JSON).
     url = f"https://gpcrdb.org/services/protein/{entry}"
-    return await get_json_cached(url, None, ttl=86400)
 @app.get("/string/network")
 async def string_network(identifiers: str, species: int = 9606, limit: int = 50):
-    # STRING JSON network; include caller_identity and throttle.
     await throttle_string()
     url = "https://string-db.org/api/json/network"
     params = {"identifiers": identifiers, "species": species, "caller_identity": CALLER_ID, "limit": limit}
-    return await get_json_cached(url, params, ttl=3600)
-# ----------------- STRING → region heuristic (improved) -----------------
-REGION_TERMS_DEFAULT = [
-    # Core regions
-    "prefrontal cortex","anterior cingulate cortex","mPFC","ACC",
-    "nucleus accumbens","NAc","ventral striatum","dorsal striatum",
-    "caudate","putamen","amygdala","hippocampus","thalamus","hypothalamus",
-    "insula","ventral tegmental area","VTA","substantia nigra","cerebellum",
-    # a few extras often relevant for motivation/drive
-    "orbitofrontal cortex","OFC","ventromedial prefrontal cortex","vmPFC",
-]
 REGION_SYNONYMS = {
-    "prefrontal cortex": ["PFC","frontal cortex","frontal lobe"],
-    "anterior cingulate cortex": ["ACC","cingulate gyrus","dorsal ACC","rostral ACC"],
-    "nucleus accumbens": ["NAc","accumbens","ventral striatum"],
     "ventral tegmental area": ["VTA"],
-    "substantia nigra": ["SN","SNc"],
-    "hippocampus": ["hippocampal formation"],
-    "orbitofrontal cortex": ["OFC"],
-    "ventromedial prefrontal cortex": ["vmPFC","medial orbitofrontal cortex"],
 }
-def expand_region_terms(base: Iterable[str]) -> List[str]:
-    out = []
-    seen = set()
-    for r in base:
-        for v in [r] + REGION_SYNONYMS.get(r, []):
-            v2 = v.strip()
-            if v2 and v2.lower() not in seen:
-                seen.add(v2.lower()); out.append(v2)
-    return out
 async def eupmc_hitcount(q: str) -> int:
     url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
     params = {"query": q, "format": "json", "pageSize": 0}
     data = await get_json_cached(url, params, ttl=1800)
-    try:
-        return int(data.get("hitCount", 0))
-    except Exception:
-        return 0
 def collect_gene_symbols_from_string(edges: List[dict], focus: str) -> List[str]:
     genes = set()
     f = focus.upper()
-    for e in edges or []:
-        for k in ("preferredName_A","preferredName_B"):
             g = e.get(k)
             if g and g.upper() != f:
                 genes.add(g)
     return list(genes)
-async def compute_region_scores(receptor: str, species: int, limit: int, regions_csv: Optional[str]):
-    # 1) STRING neighbors (may include error container)
     edges = await string_network(receptor, species=species, limit=limit)
-    if isinstance(edges, dict) and "error" in edges:
-        edges = []
     neighbors = collect_gene_symbols_from_string(edges, receptor)
-    # STRING conf per neighbor (top score)
     conf: Dict[str, float] = {}
-    for e in edges or []:
-        a, b, score = e.get("preferredName_A"), e.get("preferredName_B"), float(e.get("score", 0) or 0)
         if a and a.upper() != receptor.upper():
             conf[a] = max(conf.get(a, 0.0), score)
         if b and b.upper() != receptor.upper():
             conf[b] = max(conf.get(b, 0.0), score)
-    region_list_in = [r.strip() for r in (regions_csv.split(",") if regions_csv else REGION_TERMS_DEFAULT) if r.strip()]
-    region_list = expand_region_terms(region_list_in)
-    # 2) Europe PMC co-mention counts
-    # Strategy:
-    #   (a) Strict:   "region" AND (receptor OR neighbors[:25])
-    #   (b) Fallback: region    AND  receptor           (unquoted region) if (a) == 0
     gene_clause = " OR ".join([receptor] + neighbors[:25]) if neighbors else receptor
     tasks = []
     queries = []
-    for region in region_list:
-        q_strict = f'("{region}") AND ({gene_clause})'
-        q_fallback = f'({region}) AND ({receptor})'
-        queries.append((region, q_strict, q_fallback))
-        tasks.append(eupmc_hitcount(q_strict))
-    strict_counts = await asyncio.gather(*tasks)
     results = []
-    mean_conf = sum(conf.values())/max(len(conf),1) if conf else 0.2
-    for (region, _q1, q2), hc in zip(queries, strict_counts):
-        if hc == 0:
-            hc2 = await eupmc_hitcount(q2)
-            hc = max(hc, hc2)
-        score = (math.log10(hc + 1.0)) * mean_conf
-        results.append({"region": region, "hits": hc, "weighted_score": round(score, 4)})
     results.sort(key=lambda x: x["weighted_score"], reverse=True)
     return {
         "focus": receptor,
         "neighbors_considered": neighbors[:25],
         "regions_ranked": results,
-        "notes": "Exploratory heuristic using STRING neighbors + Europe PMC co-occurrence (with synonyms + fallback)."
     }
-@app.get("/heuristics/regions_from_string")
-async def regions_from_string(
-    receptor: str = Query(..., description="e.g., HTR2A"),
-    species: int = 9606,
-    limit: int = 40,
-    regions: Optional[str] = Query(None, description="comma-separated region terms; defaults include synonyms")
-):
-    return await compute_region_scores(receptor, species, limit, regions)
-# ----------------- Aggregator (robust, no 500) -----------------
 @app.get("/mechanism_graph")
 async def mechanism_graph(
     receptor: str = Query(..., description="e.g., HTR2A"),
@@ -277,29 +270,18 @@ async def mechanism_graph(
 ):
     gpcr_entry = f"{receptor.lower()}_human" if not receptor.lower().endswith("_human") else receptor.lower()
-    # Fire in parallel; any upstream error returns a JSON {error: ...} (so no 500s)
-    gpcr_task = get_json_cached(f"https://gpcrdb.org/services/protein/{gpcr_entry}", None, ttl=86400)  #
-    string_task = get_json_cached("https://string-db.org/api/json/network",
-                                 {"identifiers": receptor, "species": species, "caller_identity": CALLER_ID, "limit": 50},
-                                 ttl=3600)  #
-    lit_task = get_json_cached("https://www.ebi.ac.uk/europepmc/webservices/rest/search",
-                              {"query": f"{receptor} AND {symptom}", "format": "json", "pageSize": 10},
-                              ttl=600)  #
-    region_task = compute_region_scores(receptor, species, 40, None)
-    gpcr_r, string_r, lit_r, regions_r = await asyncio.gather(
-        gpcr_task, string_task, lit_task, region_task, return_exceptions=True
-    )
-    # Sanitize exceptions from gather (never bubble to 500)
-    def clean(x):
-        return {} if isinstance(x, Exception) else (x or {})
     return {
         "receptor": receptor,
-        "gpcrdb": clean(gpcr_r),
-        "string": clean(string_r),
-        "literature": clean(lit_r),
-        "region_scores": clean(regions_r),
-        "notes": "Mechanism aggregator with cache + STRING→region heuristic (synonyms+fallback)."
     }

 from fastapi import FastAPI, Query
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import RedirectResponse, JSONResponse
+import httpx, asyncio, time, hashlib, json, os, math
+from typing import Dict, Any, Tuple, Optional, List
 APP_NAME = "neuro-mechanism-backend"
+CALLER_ID = "neuro-mech-backend-demo"
 app = FastAPI(title=APP_NAME)
 @app.get("/", include_in_schema=False)
 def root():
     return RedirectResponse(url="/docs")
 @app.get("/health", include_in_schema=False)
 UA = {"User-Agent": f"{APP_NAME}/1.2 (HF Space)"}
+# ----------------- tiny in-memory TTL cache -----------------
 class TTLCache:
     def __init__(self, max_items=512):
         self.store: Dict[str, Tuple[float, Any]] = {}
             item = self.store.get(k)
             if item and (time.time() < item[0]):
                 return item[1]
+        async with httpx.AsyncClient(headers=UA, timeout=30) as client:
+            r = await client.get(url, params=params)
+            r.raise_for_status()
+            data = r.json()
         async with self._lock:
             if len(self.store) > self.max_items:
                 self.store.pop(next(iter(self.store)))
 CACHE = TTLCache()
+# ----------------- polite throttling for STRING -----------------
 _last_string_call = 0.0
 async def throttle_string():
+    """Courtesy throttle ~1 call/sec for STRING API."""
+    # See STRING API etiquette.
     global _last_string_call
     now = time.time()
     wait = 1.05 - (now - _last_string_call)
         await asyncio.sleep(wait)
     _last_string_call = time.time()
+async def get_json_cached(url: str, params: Optional[dict], ttl: int):
+    return await CACHE.get(url, params, ttl)
+# ----------------- basic pass-throughs -----------------
 @app.get("/lit/eupmc")
 async def europe_pmc_search(query: str, pageSize: int = 5):
     url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
     params = {"query": query, "format": "json", "pageSize": pageSize}
     return await get_json_cached(url, params, ttl=600)
 @app.get("/lit/pubmed_esearch")
 async def pubmed_esearch(term: str, retmax: int = 10):
     url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
     params = {"db":"pubmed","term":term,"retmode":"json","retmax":retmax}
     return await get_json_cached(url, params, ttl=600)
 @app.get("/trials/search")
 async def ctgov_v2_studies(q: str, pageSize: int = 5):
     url = "https://clinicaltrials.gov/api/v2/studies"
     params = {"query.term": q, "pageSize": pageSize}
     return await get_json_cached(url, params, ttl=900)
 @app.get("/rxnav/rxcui")
 async def rxnav_rxcui(name: str):
     url = "https://rxnav.nlm.nih.gov/REST/rxcui.json"
     params = {"name": name}
     return await get_json_cached(url, params, ttl=86400)
 @app.get("/pubchem/compound_by_name")
 async def pubchem_by_name(name: str):
     url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{name}/JSON"
     return await get_json_cached(url, None, ttl=86400)
 @app.get("/uniprot/search")
 async def uniprot_search(query: str, size: int = 5):
     url = "https://rest.uniprot.org/uniprotkb/search"
     params = {"query": query, "format": "json", "size": size}
     return await get_json_cached(url, params, ttl=86400)
 @app.get("/gpcrdb/protein")
 async def gpcrdb_protein(entry: str):
     url = f"https://gpcrdb.org/services/protein/{entry}"
+    try:
+        return await get_json_cached(url, None, ttl=86400)
+    except Exception:
+        # never blow up the aggregator
+        return {}
 @app.get("/string/network")
 async def string_network(identifiers: str, species: int = 9606, limit: int = 50):
     await throttle_string()
     url = "https://string-db.org/api/json/network"
     params = {"identifiers": identifiers, "species": species, "caller_identity": CALLER_ID, "limit": limit}
+    try:
+        return await get_json_cached(url, params, ttl=3600)
+    except Exception:
+        return []
+# ----------------- REGION heuristic (improved) -----------------
+# synonyms to widen recall; add more as needed
 REGION_SYNONYMS = {
+    "prefrontal cortex": ["PFC", "vmPFC", "dlPFC", "ventromedial prefrontal cortex", "dorsolateral prefrontal cortex"],
+    "anterior cingulate cortex": ["ACC", "dACC", "rACC"],
+    "nucleus accumbens": ["NAc", "accumbens", "ventral striatum"],
     "ventral tegmental area": ["VTA"],
+    "substantia nigra": ["SN", "SNc"],
+    "hippocampus": ["HC"],
+    "amygdala": [],
+    "insula": ["insular cortex"],
+    "thalamus": [],
+    "hypothalamus": [],
+    "dorsal striatum": ["caudate", "putamen"],
+    "cerebellum": []
 }
+REGION_TERMS_DEFAULT = list(REGION_SYNONYMS.keys())
+def _quote_if_phrase(s: str) -> str:
+    s = s.strip()
+    # phrase? keep quotes; single token? no quotes to broaden match
+    return f'"{s}"' if (" " in s and not s.startswith('"')) else s
 async def eupmc_hitcount(q: str) -> int:
     url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
     params = {"query": q, "format": "json", "pageSize": 0}
     data = await get_json_cached(url, params, ttl=1800)
+    return int(data.get("hitCount", 0))
 def collect_gene_symbols_from_string(edges: List[dict], focus: str) -> List[str]:
     genes = set()
     f = focus.upper()
+    for e in edges:
+        for k in ("preferredName_A", "preferredName_B"):
             g = e.get(k)
             if g and g.upper() != f:
                 genes.add(g)
     return list(genes)
+@app.get("/heuristics/regions_from_string")
+async def regions_from_string(
+    receptor: str = Query(..., description="e.g., HTR2A"),
+    species: int = 9606,
+    limit: int = 40,
+    regions: Optional[str] = Query(None, description="comma-separated regions; default common set")
+):
+    # 1) pull neighbors
     edges = await string_network(receptor, species=species, limit=limit)
     neighbors = collect_gene_symbols_from_string(edges, receptor)
+    # STRING confidence map
     conf: Dict[str, float] = {}
+    for e in edges:
+        a, b = e.get("preferredName_A"), e.get("preferredName_B")
+        score = float(e.get("score", 0) or 0)
         if a and a.upper() != receptor.upper():
             conf[a] = max(conf.get(a, 0.0), score)
         if b and b.upper() != receptor.upper():
             conf[b] = max(conf.get(b, 0.0), score)
+    # region list + synonyms
+    base_regions = [r.strip() for r in (regions.split(",") if regions else REGION_TERMS_DEFAULT) if r.strip()]
+    expanded_regions: List[Tuple[str, str]] = []
+    for base in base_regions:
+        expanded_regions.append((base, base))
+        for syn in REGION_SYNONYMS.get(base, []):
+            expanded_regions.append((base, syn))  # (canonical, synonym)
+    # 2) Europe PMC hitCount per (canonical, candidate term)
     gene_clause = " OR ".join([receptor] + neighbors[:25]) if neighbors else receptor
     tasks = []
     queries = []
+    for canon, term in expanded_regions:
+        q1 = f'({_quote_if_phrase(term)}) AND ({gene_clause})'
+        queries.append((canon, term, q1))
+        tasks.append(eupmc_hitcount(q1))
+    counts = await asyncio.gather(*tasks)
+    # fallback pass for zeros: (region) AND (receptor) only
+    fallback_tasks = []
+    fallback_idx = []
+    for i, ((canon, term, q1), hc) in enumerate(zip(queries, counts)):
+        if hc == 0:
+            q2 = f'({_quote_if_phrase(term)}) AND ({receptor})'
+            fallback_idx.append(i)
+            fallback_tasks.append(eupmc_hitcount(q2))
+    if fallback_tasks:
+        fallback_counts = await asyncio.gather(*fallback_tasks)
+        for j, idx in enumerate(fallback_idx):
+            if fallback_counts[j] > 0:
+                counts[idx] = fallback_counts[j]
+    # 3) aggregate by canonical region; weight by mean STRING conf
+    mean_conf = sum(conf.values()) / max(len(conf), 1) if conf else 0.2
+    agg: Dict[str, Dict[str, float]] = {}
+    for (canon, _term, _q), hc in zip(queries, counts):
+        d = agg.setdefault(canon, {"hits": 0})
+        d["hits"] += int(hc)
     results = []
+    for region, d in agg.items():
+        score = (math.log10(d["hits"] + 1.0)) * mean_conf
+        results.append({"region": region, "hits": d["hits"], "weighted_score": round(score, 4)})
     results.sort(key=lambda x: x["weighted_score"], reverse=True)
     return {
         "focus": receptor,
         "neighbors_considered": neighbors[:25],
         "regions_ranked": results,
+        "notes": "STRING neighbors + EuropePMC co-mentions; synonyms + fallback enabled."
     }
+# ----------------- aggregator -----------------
 @app.get("/mechanism_graph")
 async def mechanism_graph(
     receptor: str = Query(..., description="e.g., HTR2A"),
 ):
     gpcr_entry = f"{receptor.lower()}_human" if not receptor.lower().endswith("_human") else receptor.lower()
+    gpcr_task = gpcrdb_protein(entry=gpcr_entry)  # safe wrapper above
+    string_task = string_network(identifiers=receptor, species=species, limit=50)
+    lit_task = europe_pmc_search(query=f"{receptor} AND {symptom}", pageSize=10)
+    regions_task = regions_from_string(receptor=receptor, species=species, limit=40, regions=None)
+    gpcr_r, string_r, lit_r, regions_r = await asyncio.gather(gpcr_task, string_task, lit_task, regions_task)
     return {
         "receptor": receptor,
+        "gpcrdb": gpcr_r if isinstance(gpcr_r, dict) else {},
+        "string": string_r if isinstance(string_r, list) else [],
+        "literature": lit_r if isinstance(lit_r, dict) else {},
+        "region_scores": regions_r if isinstance(regions_r, dict) else {},
+        "notes": "Mechanism aggregator with cache + robust region heuristic"
     }