Update mcp/disgenet.py
Browse files- mcp/disgenet.py +49 -59
mcp/disgenet.py
CHANGED
|
@@ -1,68 +1,58 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
"""MedGenesis β DisGeNET async helper (disease β gene associations).
|
| 3 |
-
|
| 4 |
-
Features
|
| 5 |
-
~~~~~~~~
|
| 6 |
-
* Accepts optional Bearer token via env **`DISGENET_KEY`** (rateβlimit free).
|
| 7 |
-
* Endpoint: `https://www.disgenet.org/api/gda/disease/<disease_name>`
|
| 8 |
-
* Backβoff retry (2Γ, 4Γ) for 429/5xx.
|
| 9 |
-
* LRU cache (24Β h, 512 queries) to minimise API calls.
|
| 10 |
-
* Returns top *N* rows (defaultΒ =Β 10) as `list[dict]`.
|
| 11 |
"""
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
|
|
|
| 14 |
import os, asyncio, httpx
|
| 15 |
from functools import lru_cache
|
| 16 |
-
from typing import List, Dict
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
|
|
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
# ---------------------------------------------------------------------
|
| 26 |
-
async def _fetch(url: str, *, retries: int = 3) -> List[Dict[str, Any]]:
|
| 27 |
-
delay = 2
|
| 28 |
-
for _ in range(retries):
|
| 29 |
-
async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as cli:
|
| 30 |
-
resp = await cli.get(url, params={"source": "ALL", "format": "json"})
|
| 31 |
-
if resp.status_code == 200:
|
| 32 |
-
return resp.json()
|
| 33 |
-
if resp.status_code in {429, 500, 503}:
|
| 34 |
-
await asyncio.sleep(delay)
|
| 35 |
-
delay *= 2
|
| 36 |
-
continue
|
| 37 |
-
resp.raise_for_status()
|
| 38 |
-
return [] # final fallback
|
| 39 |
|
| 40 |
-
#
|
| 41 |
-
# Public API β cached 24Β h
|
| 42 |
-
# ---------------------------------------------------------------------
|
| 43 |
@lru_cache(maxsize=512)
|
| 44 |
-
async def disease_to_genes(disease_name: str,
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
Parameters
|
| 48 |
-
----------
|
| 49 |
-
disease_name : str
|
| 50 |
-
Freeβtext disease label (e.g. "glioblastoma"). Internally converted
|
| 51 |
-
to lowercase and URLβencoded.
|
| 52 |
-
limit : int, optional
|
| 53 |
-
Maximum number of rows to return (defaultΒ =Β 10).
|
| 54 |
"""
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
+
disgenet.py Β· Disease-Gene associations helper
|
| 3 |
+
Docs: https://www.disgenet.com/downloads (REST v1) π
|
| 4 |
+
|
| 5 |
+
Change-log
|
| 6 |
+
ββββββββββ
|
| 7 |
+
β’ 2025-06-25 β .org β .COM redirect (301) broke calls.
|
| 8 |
+
We now default to https://www.disgenet.com/api
|
| 9 |
+
and still follow redirects if they add a CDN later.
|
| 10 |
+
β’ Graceful retry + 24 h LRU-cache.
|
| 11 |
+
β’ Empty list on any error so orchestrator never crashes.
|
| 12 |
+
"""
|
| 13 |
|
| 14 |
+
from __future__ import annotations
|
| 15 |
import os, asyncio, httpx
|
| 16 |
from functools import lru_cache
|
| 17 |
+
from typing import List, Dict
|
| 18 |
|
| 19 |
+
_TOKEN = os.getenv("DISGENET_KEY") # optional Bearer token
|
| 20 |
+
_BASE = "https://www.disgenet.com/api" # β new canonical host
|
| 21 |
+
_HDRS = {"Accept": "application/json"}
|
| 22 |
+
if _TOKEN:
|
| 23 |
+
_HDRS["Authorization"] = f"Bearer {_TOKEN}"
|
| 24 |
|
| 25 |
+
_TIMEOUT = 12
|
| 26 |
+
_RETRIES = 2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
|
|
|
| 29 |
@lru_cache(maxsize=512)
|
| 30 |
+
async def disease_to_genes(disease_name: str,
|
| 31 |
+
limit: int = 10) -> List[Dict]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
"""
|
| 33 |
+
Return top-N gene associations for *disease_name*.
|
| 34 |
+
Empty list on failure or if none found.
|
| 35 |
+
"""
|
| 36 |
+
url = f"{_BASE}/gda/disease/{disease_name.lower()}"
|
| 37 |
+
params = {"source": "ALL", "format": "json"}
|
| 38 |
+
|
| 39 |
+
async def _one_call() -> List[Dict]:
|
| 40 |
+
async with httpx.AsyncClient(timeout=_TIMEOUT,
|
| 41 |
+
headers=_HDRS,
|
| 42 |
+
follow_redirects=True) as cli:
|
| 43 |
+
r = await cli.get(url, params=params)
|
| 44 |
+
if r.status_code == 404:
|
| 45 |
+
return []
|
| 46 |
+
r.raise_for_status()
|
| 47 |
+
return r.json()[:limit]
|
| 48 |
+
|
| 49 |
+
delay = 0.0
|
| 50 |
+
for _ in range(_RETRIES):
|
| 51 |
+
try:
|
| 52 |
+
return await _one_call()
|
| 53 |
+
except (httpx.HTTPStatusError, httpx.ReadTimeout):
|
| 54 |
+
await asyncio.sleep(delay or 0.7)
|
| 55 |
+
delay = 0.0 # retry only once
|
| 56 |
+
except Exception:
|
| 57 |
+
break
|
| 58 |
+
return [] # graceful fallback
|