| import requests |
| from transformers import pipeline |
| from nltk.tokenize import sent_tokenize |
| import nltk |
|
|
| from config import MY_PUBMED_EMAIL, MAX_PUBMED_RESULTS, SUMMARIZATION_CHUNK_SIZE |
|
|
| nltk.download("punkt") |
|
|
| |
| summarizer = pipeline("summarization", model="facebook/bart-large-cnn") |
|
|
| def search_pubmed(query, max_results=MAX_PUBMED_RESULTS): |
| """ |
| Search PubMed for articles matching the query. |
| """ |
| url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" |
| params = { |
| "db": "pubmed", |
| "term": query, |
| "retmax": max_results, |
| "retmode": "json", |
| "tool": "AdvancedMedicalAI", |
| "email": MY_PUBMED_EMAIL, |
| } |
| response = requests.get(url, params=params, timeout=10) |
| response.raise_for_status() |
| data = response.json() |
| return data.get("esearchresult", {}).get("idlist", []) |
|
|
| def fetch_abstract(pmid): |
| """ |
| Fetch the abstract of a given PubMed ID. |
| """ |
| url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" |
| params = { |
| "db": "pubmed", |
| "id": pmid, |
| "retmode": "text", |
| "rettype": "abstract", |
| "tool": "AdvancedMedicalAI", |
| "email": MY_PUBMED_EMAIL, |
| } |
| response = requests.get(url, params=params, timeout=10) |
| response.raise_for_status() |
| return response.text.strip() |
|
|
| def fetch_pubmed_abstracts(pmids): |
| """ |
| Fetch multiple abstracts for a list of PMIDs. |
| """ |
| results = {} |
| for pmid in pmids: |
| try: |
| abstract = fetch_abstract(pmid) |
| results[pmid] = abstract |
| except Exception as e: |
| results[pmid] = f"Error fetching PMID {pmid}: {e}" |
| return results |
|
|
| def summarize_text(text, chunk_size=SUMMARIZATION_CHUNK_SIZE): |
| """ |
| Summarize long text using a chunking strategy. |
| """ |
| sentences = sent_tokenize(text) |
| chunks = [] |
| current_chunk = [] |
| current_length = 0 |
|
|
| for sentence in sentences: |
| tokens = len(sentence.split()) |
| if current_length + tokens > chunk_size: |
| chunks.append(" ".join(current_chunk)) |
| current_chunk = [] |
| current_length = 0 |
| current_chunk.append(sentence) |
| current_length += tokens |
|
|
| if current_chunk: |
| chunks.append(" ".join(current_chunk)) |
|
|
| summaries = [] |
| for chunk in chunks: |
| summary = summarizer(chunk, max_length=100, min_length=30, do_sample=False)[0]["summary_text"] |
| summaries.append(summary) |
| return " ".join(summaries) |
|
|