Rename pubmed_module.py to diagnosis_module.py
Browse files- diagnosis_module.py +33 -0
- pubmed_module.py +0 -44
diagnosis_module.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# diagnosis_module.py
|
| 2 |
+
import json
|
| 3 |
+
from difflib import SequenceMatcher
|
| 4 |
+
|
| 5 |
+
def load_diseases(path="diseases.json"):
|
| 6 |
+
"""
|
| 7 |
+
تحميل قاعدة بيانات الأمراض والأعراض
|
| 8 |
+
"""
|
| 9 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 10 |
+
return json.load(f)
|
| 11 |
+
|
| 12 |
+
def diagnose_symptoms(user_input):
|
| 13 |
+
"""
|
| 14 |
+
تحليل الأعراض وإعطاء الأمراض المحتملة
|
| 15 |
+
"""
|
| 16 |
+
diseases = load_diseases()
|
| 17 |
+
results = []
|
| 18 |
+
user_symptoms = [s.strip() for s in user_input.split(",")]
|
| 19 |
+
for disease, info in diseases.items():
|
| 20 |
+
match_score = sum(
|
| 21 |
+
SequenceMatcher(None, sym.lower(), us.lower()).ratio()
|
| 22 |
+
for sym in info["symptoms"]
|
| 23 |
+
for us in user_symptoms
|
| 24 |
+
) / (len(info["symptoms"]) * len(user_symptoms))
|
| 25 |
+
if match_score > 0.2:
|
| 26 |
+
results.append({
|
| 27 |
+
"disease": disease,
|
| 28 |
+
"score": match_score,
|
| 29 |
+
"source": info.get("source", ""),
|
| 30 |
+
"brief": info.get("brief", "")
|
| 31 |
+
})
|
| 32 |
+
results.sort(key=lambda x: x["score"], reverse=True)
|
| 33 |
+
return results[:5]
|
pubmed_module.py
DELETED
|
@@ -1,44 +0,0 @@
|
|
| 1 |
-
# pubmed_module.py
|
| 2 |
-
import requests
|
| 3 |
-
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
| 4 |
-
import torch
|
| 5 |
-
|
| 6 |
-
# الجهاز: GPU لو متاح
|
| 7 |
-
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 8 |
-
|
| 9 |
-
# تحميل BioGPT للتلخيص
|
| 10 |
-
bio_tokenizer = AutoTokenizer.from_pretrained("microsoft/BioGPT-Large")
|
| 11 |
-
bio_model = AutoModelForSeq2SeqLM.from_pretrained("microsoft/BioGPT-Large").to(DEVICE)
|
| 12 |
-
|
| 13 |
-
def search_pubmed(query, max_results=5):
|
| 14 |
-
"""
|
| 15 |
-
البحث عن أبحاث في PubMed باستخدام اسم المرض
|
| 16 |
-
"""
|
| 17 |
-
url = f"https://api.ncbi.nlm.nih.gov/lit/ctxp/v1/pubmed/?term={query}&format=json"
|
| 18 |
-
response = requests.get(url)
|
| 19 |
-
if response.status_code != 200:
|
| 20 |
-
return []
|
| 21 |
-
data = response.json()
|
| 22 |
-
articles = []
|
| 23 |
-
for item in data.get("records", [])[:max_results]:
|
| 24 |
-
articles.append({
|
| 25 |
-
"title": item.get("title", ""),
|
| 26 |
-
"link": item.get("url", ""),
|
| 27 |
-
"abstract": item.get("abstract", "")
|
| 28 |
-
})
|
| 29 |
-
return articles
|
| 30 |
-
|
| 31 |
-
def summarize_text(text):
|
| 32 |
-
"""
|
| 33 |
-
تلخيص نص البحث باستخدام BioGPT
|
| 34 |
-
"""
|
| 35 |
-
inputs = bio_tokenizer(text, return_tensors="pt", truncation=True, max_length=1024).to(DEVICE)
|
| 36 |
-
summary_ids = bio_model.generate(
|
| 37 |
-
**inputs,
|
| 38 |
-
max_length=150,
|
| 39 |
-
min_length=40,
|
| 40 |
-
length_penalty=2.0,
|
| 41 |
-
num_beams=4
|
| 42 |
-
)
|
| 43 |
-
summary = bio_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
| 44 |
-
return summary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|