| """Europe PMC search tool - replaces BioRxiv.""" |
|
|
| from typing import Any |
|
|
| import httpx |
| from tenacity import retry, stop_after_attempt, wait_exponential |
|
|
| from src.utils.exceptions import SearchError |
| from src.utils.models import Citation, Evidence |
|
|
|
|
| class EuropePMCTool: |
| """ |
| Search Europe PMC for papers and preprints. |
| |
| Europe PMC indexes: |
| - PubMed/MEDLINE articles |
| - PMC full-text articles |
| - Preprints from bioRxiv, medRxiv, ChemRxiv, etc. |
| - Patents and clinical guidelines |
| |
| API Docs: https://europepmc.org/RestfulWebService |
| """ |
|
|
| BASE_URL = "https://www.ebi.ac.uk/europepmc/webservices/rest/search" |
|
|
| @property |
| def name(self) -> str: |
| return "europepmc" |
|
|
| @retry( |
| stop=stop_after_attempt(3), |
| wait=wait_exponential(multiplier=1, min=1, max=10), |
| reraise=True, |
| ) |
| async def search(self, query: str, max_results: int = 10) -> list[Evidence]: |
| """ |
| Search Europe PMC for papers matching query. |
| |
| Args: |
| query: Search keywords |
| max_results: Maximum results to return |
| |
| Returns: |
| List of Evidence objects |
| """ |
| params: dict[str, str | int] = { |
| "query": query, |
| "resultType": "core", |
| "pageSize": min(max_results, 100), |
| "format": "json", |
| } |
|
|
| async with httpx.AsyncClient(timeout=30.0) as client: |
| try: |
| response = await client.get(self.BASE_URL, params=params) |
| response.raise_for_status() |
|
|
| data = response.json() |
| results = data.get("resultList", {}).get("result", []) |
|
|
| return [self._to_evidence(r) for r in results[:max_results]] |
|
|
| except httpx.HTTPStatusError as e: |
| raise SearchError(f"Europe PMC API error: {e}") from e |
| except httpx.RequestError as e: |
| raise SearchError(f"Europe PMC connection failed: {e}") from e |
|
|
| def _to_evidence(self, result: dict[str, Any]) -> Evidence: |
| """Convert Europe PMC result to Evidence.""" |
| title = result.get("title", "Untitled") |
| abstract = result.get("abstractText", "No abstract available.") |
| doi = result.get("doi", "") |
| pub_year = result.get("pubYear", "Unknown") |
|
|
| |
| author_list = result.get("authorList", {}).get("author", []) |
| authors = [a.get("fullName", "") for a in author_list[:5] if a.get("fullName")] |
|
|
| |
| pub_types = result.get("pubTypeList", {}).get("pubType", []) |
| is_preprint = "Preprint" in pub_types |
| source_db = result.get("source", "europepmc") |
|
|
| |
| preprint_marker = "[PREPRINT - Not peer-reviewed] " if is_preprint else "" |
| content = f"{preprint_marker}{abstract[:1800]}" |
|
|
| |
| if doi: |
| url = f"https://doi.org/{doi}" |
| elif result.get("pmid"): |
| url = f"https://pubmed.ncbi.nlm.nih.gov/{result['pmid']}/" |
| else: |
| url = f"https://europepmc.org/article/{source_db}/{result.get('id', '')}" |
|
|
| return Evidence( |
| content=content[:2000], |
| citation=Citation( |
| source="preprint" if is_preprint else "europepmc", |
| title=title[:500], |
| url=url, |
| date=str(pub_year), |
| authors=authors, |
| ), |
| relevance=0.75 if is_preprint else 0.9, |
| ) |
|
|