| | |
| | import requests |
| | from transformers import AutoTokenizer, AutoModelForCausalLM |
| | import torch |
| |
|
| | |
| | DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| |
|
| | |
| | bio_tokenizer = AutoTokenizer.from_pretrained("microsoft/BioGPT-Large") |
| | bio_model = AutoModelForCausalLM.from_pretrained("microsoft/BioGPT-Large").to(DEVICE) |
| |
|
| | def search_pubmed(query, max_results=5): |
| | """ |
| | البحث عن أبحاث في PubMed باستخدام اسم المرض |
| | """ |
| | url = f"https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pubmed/?term={query}&format=json" |
| | response = requests.get(url) |
| | if response.status_code != 200: |
| | return [] |
| | data = response.json() |
| | articles = [] |
| | for item in data.get("records", [])[:max_results]: |
| | articles.append({ |
| | "title": item.get("title", ""), |
| | "link": item.get("url", ""), |
| | "abstract": item.get("abstract", "") |
| | }) |
| | return articles |
| |
|
| | def summarize_text(text): |
| | """ |
| | تلخيص نص البحث باستخدام BioGPT |
| | ملاحظة: BioGPT موديل causal، مش seq2seq، لذلك النتيجة توليد نصي وليست تلخيص مثالي |
| | """ |
| | inputs = bio_tokenizer(text, return_tensors="pt", truncation=True, max_length=1024).to(DEVICE) |
| | |
| | summary_ids = bio_model.generate( |
| | **inputs, |
| | max_length=150, |
| | min_length=40, |
| | length_penalty=2.0, |
| | num_beams=4 |
| | ) |
| | summary = bio_tokenizer.decode(summary_ids[0], skip_special_tokens=True) |
| | return summary |