Spaces:
Sleeping
Sleeping
File size: 2,754 Bytes
5e4cf1b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 | """PubMed search tool wrapping NCBI Entrez API via Biopython."""
import os
from xml.etree import ElementTree
from agents import function_tool
from Bio import Entrez
Entrez.email = os.environ.get("NCBI_EMAIL", "user@example.com")
def _fetch_details(id_list: list[str], max_results: int = 3) -> list[dict]:
"""Fetch paper metadata for a list of PubMed IDs."""
if not id_list:
return []
ids = id_list[:max_results]
handle = Entrez.efetch(db="pubmed", id=",".join(ids), rettype="xml", retmode="xml")
raw = handle.read()
handle.close()
root = ElementTree.fromstring(raw)
papers = []
for article in root.findall(".//PubmedArticle"):
medline = article.find(".//MedlineCitation")
art = medline.find(".//Article") if medline is not None else None
if art is None:
continue
title_el = art.find("ArticleTitle")
title = title_el.text if title_el is not None and title_el.text else "No title"
abstract_el = art.find(".//AbstractText")
abstract = abstract_el.text if abstract_el is not None and abstract_el.text else "No abstract available"
pmid_el = medline.find("PMID")
pmid = pmid_el.text if pmid_el is not None else ""
# Extract DOI from ArticleIdList
doi = ""
for aid in article.findall(".//ArticleId"):
if aid.get("IdType") == "doi":
doi = aid.text or ""
break
papers.append(
{
"title": title,
"abstract": abstract[:500],
"pmid": pmid,
"doi": doi,
"url": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/",
}
)
return papers
@function_tool
def pubmed_search(query: str, max_results: int = 3) -> str:
"""Search PubMed for papers matching the query. Returns titles, abstracts, PMIDs, and DOIs.
Args:
query: The search query for PubMed.
max_results: Maximum number of papers to return (default 3).
"""
handle = Entrez.esearch(db="pubmed", term=query, retmax=max_results, sort="relevance")
record = Entrez.read(handle)
handle.close()
id_list = record.get("IdList", [])
if not id_list:
return f"No PubMed results found for: {query}"
papers = _fetch_details(id_list, max_results)
if not papers:
return f"No PubMed results found for: {query}"
lines = []
for i, p in enumerate(papers, 1):
lines.append(
f"[PubMed {i}]\n"
f"Title: {p['title']}\n"
f"Abstract: {p['abstract']}\n"
f"PMID: {p['pmid']}\n"
f"DOI: {p['doi']}\n"
f"URL: {p['url']}\n"
)
return "\n".join(lines)
|