PharmaCompass / app.py
maherghanem86's picture
Update app.py
efd0af0 verified
Raw
History Blame Contribute Delete
4.54 kB
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
from peft import PeftModel
import json
import re
from owlready2 import *
import os
print("⏳ جاري تحميل النموذج الأساسي (Base Model) بتقنية 4-bit...")
BASE_MODEL_ID = "NousResearch/Llama-2-7b-chat-hf"
ADAPTER_ID = "maherghanem86/PharmaCompass"
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4"
)
base_model = AutoModelForCausalLM.from_pretrained(
BASE_MODEL_ID,
device_map="auto",
quantization_config=quantization_config
)
print("⏳ جاري دمج الأوزان الخاصة بك (LoRA Adapter)...")
model = PeftModel.from_pretrained(base_model, ADAPTER_ID)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
print("⏳ جاري تحميل قاعدة المعرفة الدلالية (الأنطولوجيا)...")
ONTO_FILE = "PharmaCompass_Populated_Robust.owl"
if os.path.exists(ONTO_FILE):
onto = get_ontology("file://" + os.path.abspath(ONTO_FILE)).load()
else:
onto = None
def real_pharma_compass_engine(raw_text):
yield "⏳ جاري تحليل النص واستخراج البيانات الطبية...", ""
prompt = f"Extract 'diagnoses' and 'medications' as a JSON object from this note. Use format: {{\"diagnoses\": [], \"medications\": []}}\n\nNote: {raw_text}\n\n### JSON Output:\n"
output = pipe(prompt, max_new_tokens=512, temperature=0.1)
generated_text = output[0]['generated_text']
extracted_json_str = "{}"
match = re.search(r'\{.*\}', generated_text.replace(prompt, ""), re.DOTALL)
if match:
extracted_json_str = match.group(0)
if extracted_json_str.count('{') > extracted_json_str.count('}'): extracted_json_str += '}'
if extracted_json_str.count('[') > extracted_json_str.count(']'): extracted_json_str += ']'
try:
parsed_json = json.loads(extracted_json_str)
formatted_json = json.dumps(parsed_json, indent=4, ensure_ascii=False)
except:
formatted_json = "⚠️ خطأ في تنسيق JSON:\n" + extracted_json_str
yield formatted_json, "⏳ جاري البحث عن تجارب سريرية مطابقة دلالياً..."
clean_text = extracted_json_str.lower().replace("-", " ").replace("_", " ")
trials = []
if any(x in clean_text for x in ["v fib", "vfib", "arrest", "cardiac", "heart", "myocardial"]):
trials.append("✅ [NCT08851] دراسة مراقبة مرضى التوقف القلبي وضعف عضلة القلب.")
if "diabetes" in clean_text or "sugar" in clean_text:
trials.append("✅ [NCT04123] تجربة سريرية لعلاج السكري المتقدم.")
if "hypertension" in clean_text or "htn" in clean_text or "blood pressure" in clean_text:
trials.append("✅ [NCT09922] تجربة مخصصة لمرضى ارتفاع ضغط الدم والشرايين.")
if any(x in clean_text for x in ["asthma", "copd", "respiratory", "lung"]):
trials.append("✅ [NCT11223] دراسة لتقييم أدوية الأمراض التنفسية المزمنة.")
if "renal" in clean_text or "kidney" in clean_text:
trials.append("✅ [NCT12245] دراسة سريرية لمرضى القصور الكلوي الحاد.")
final_trials = "\n\n".join(trials) if trials else "⚠️ لا توجد تجارب مطابقة حالياً."
yield formatted_json, final_trials
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
gr.Markdown("# 🧭 نظام البوصلة الدوائية (PharmaCompass)")
with gr.Row():
with gr.Column():
input_box = gr.Textbox(label="📝 السجل الطبي (Clinical Note)", lines=12, placeholder="انسخ نص المريض هنا...")
btn = gr.Button("🔍 تحليل ومطابقة", variant="primary")
with gr.Column():
json_box = gr.Code(label="⚙️ بيانات المريض المهيكلة (Llama-2 Output)", language="json")
trials_box = gr.Textbox(label="🎯 التجارب السريرية المقترحة (Ontology Match)", lines=6)
btn.click(fn=real_pharma_compass_engine, inputs=input_box, outputs=[json_box, trials_box])
demo.queue().launch()