| |
| """MedGenesis – minimal **Wikidata** lookup helper (async). |
| |
| Features |
| ~~~~~~~~ |
| * `simple_search(term)` – return first matching entity dict `{id, label, description}`. |
| * `fetch_entity(qid)` – return full entity data (`claims`, `labels`, etc.). |
| * Uses public Wikidata REST API (no key). 15‑second timeout with `httpx`. |
| * Least‑recently‑used cache (128) to avoid repeated hits when the same |
| concept appears across multiple papers. |
| """ |
| from __future__ import annotations |
|
|
| import httpx, asyncio |
| from functools import lru_cache |
| from typing import Dict, Optional |
|
|
| _API = "https://www.wikidata.org/w/api.php" |
| _TIMEOUT = 15 |
| _HEADERS = {"User-Agent": "MedGenesis/1.0 (https://huggingface.co/spaces)"} |
|
|
| |
| |
| |
|
|
| @lru_cache(maxsize=128) |
| async def simple_search(term: str) -> Optional[Dict]: |
| """Return top search hit for *term* or None.""" |
| params = { |
| "action": "wbsearchentities", |
| "search": term, |
| "language": "en", |
| "format": "json", |
| "limit": 1, |
| } |
| async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as client: |
| resp = await client.get(_API, params=params) |
| resp.raise_for_status() |
| hits = resp.json().get("search", []) |
| return hits[0] if hits else None |
|
|
|
|
| @lru_cache(maxsize=128) |
| async def fetch_entity(qid: str) -> Dict: |
| """Fetch full entity JSON for a Wikidata Q‑ID (e.g. `Q12136`).""" |
| params = { |
| "action": "wbgetentities", |
| "ids": qid, |
| "format": "json", |
| "languages": "en", |
| } |
| async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as client: |
| resp = await client.get(_API, params=params) |
| resp.raise_for_status() |
| return resp.json().get("entities", {}).get(qid, {}) |
|
|
|
|
| |
| |
| |
| if __name__ == "__main__": |
| async def _demo(): |
| hit = await simple_search("glioblastoma") |
| print("Top hit:", hit) |
| if hit: |
| full = await fetch_entity(hit["id"]) |
| print("Labels:", full.get("labels", {}).get("en", {})) |
| asyncio.run(_demo()) |
|
|