| |
| """MedGenesis – NCBI E‑utilities helper (async, cached). |
| |
| Supports: |
| • `search_gene(term)` → quick gene symbol/name hits via ESearch + ESummary |
| • `get_mesh_definition(term)`→ first MeSH definition string via ESummary |
| |
| New features |
| ~~~~~~~~~~~~ |
| * Central `_request()` with exponential‑backoff retry (2×/4×). |
| * 12‑hour LRU caches for both public helpers (API quota‑friendly). |
| * Respects optional `BIO_KEY` env to boost rate limits. |
| * Handles single‑item edge cases (ESummary returns dict not list). |
| """ |
| from __future__ import annotations |
|
|
| import os, asyncio, httpx, xmltodict |
| from functools import lru_cache |
| from typing import List, Dict, Any |
|
|
| _API_KEY = os.getenv("BIO_KEY") |
| _BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/" |
| _TIMEOUT = 15 |
|
|
| |
| |
| |
| async def _request(endpoint: str, params: Dict[str, Any], *, retries: int = 3) -> httpx.Response: |
| if _API_KEY: |
| params["api_key"] = _API_KEY |
| delay = 2 |
| last = None |
| for _ in range(retries): |
| async with httpx.AsyncClient(timeout=_TIMEOUT) as cli: |
| last = await cli.get(f"{_BASE}{endpoint}", params=params) |
| if last.status_code == 200: |
| return last |
| await asyncio.sleep(delay) |
| delay *= 2 |
| last.raise_for_status() |
|
|
|
|
| |
| |
| |
| @lru_cache(maxsize=512) |
| async def search_gene(term: str, *, retmax: int = 5) -> List[Dict]: |
| """Return list of gene summary dicts for *term* (Entrez Gene db).""" |
| es_params = { |
| "db" : "gene", |
| "term" : term, |
| "retmode": "json", |
| "retmax": retmax, |
| } |
| es_resp = await _request("esearch.fcgi", es_params) |
| ids = es_resp.json().get("esearchresult", {}).get("idlist", []) |
| if not ids: |
| return [] |
|
|
| sum_params = {"db": "gene", "id": ",".join(ids), "retmode": "json"} |
| sum_resp = await _request("esummary.fcgi", sum_params) |
| data = sum_resp.json().get("result", {}) |
| |
| return [v for k, v in data.items() if k != "uids"] |
|
|
|
|
| |
| |
| |
| @lru_cache(maxsize=512) |
| async def get_mesh_definition(term: str) -> str: |
| """Return first MeSH definition string for *term* or ''.""" |
| params = { |
| "db": "mesh", |
| "term": term, |
| "retmode": "json", |
| "retmax": 1, |
| } |
| resp = await _request("esummary.fcgi", params) |
| data = resp.json().get("result", {}) |
| recs = [v for k, v in data.items() if k != "uids"] |
| if not recs: |
| return "" |
| return recs[0].get("ds_meshterms", [""])[0] |
|
|
|
|
| |
| |
| |
| if __name__ == "__main__": |
| async def _demo(): |
| genes = await search_gene("TP53", retmax=3) |
| print(f"Gene hits: {len(genes)} – {genes[0]['name'] if genes else 'None'}") |
| mesh = await get_mesh_definition("glioblastoma") |
| print("MeSH def:", mesh[:80], "…") |
| asyncio.run(_demo()) |
|
|