Spaces:

halilolcay
/

nlp

Build error

App Files Files Community

halilolcay commited on Jan 7

Commit

b2dfdbc

verified ·

1 Parent(s): 7437adc

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -57

app.py CHANGED Viewed

@@ -4,7 +4,9 @@ import torch
 import random
 import os
 import gradio as gr
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 from datasets import load_dataset
 from sentence_transformers import SentenceTransformer, util
 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
@@ -12,39 +14,38 @@ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_sc
 warnings.filterwarnings("ignore")
 # ============================================================================
-# 1. INITIALIZATION & MODELS
 # ============================================================================
-device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu")
 print("[INFO] Loading Expert Models (NLI, Similarity, Uncertainty)...")
-nli_model = pipeline("text-classification", model="pritamdeka/PubMedBERT-MNLI-MedNLI", device=device, truncation=True, max_length=512)
-sim_model = SentenceTransformer("all-MiniLM-L6-v2", device=device)
-clf_model = pipeline("text-classification", model="cross-encoder/ms-marco-MiniLM-L-6-v2", device=device, truncation=True, max_length=512)
-# Nous-Hermes-2-Mistral-7B-DPO Yükleme (4-bit Sıkıştırma ile)
-print("[INFO] Loading Nous-Hermes-2-Mistral-7B-DPO (4-bit optimized)...")
-model_id = "NousResearch/Nous-Hermes-2-Mistral-7B-DPO"
-# Ücretsiz HF Space (16GB VRAM) için kritik ayarlar
-quant_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_compute_dtype=torch.float16,
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_use_double_quant=True
 )
-tokenizer = AutoTokenizer.from_pretrained(model_id)
-correction_model = AutoModelForCausalLM.from_pretrained(
-    model_id,
-    quantization_config=quant_config,
-    device_map="auto"
 )
 # ============================================================================
-# 2. CORE FUNCTIONS
 # ============================================================================
 def detect_nli(evidence, answer):
-    res = nli_model(f"{evidence} [SEP] {answer}")[0]
     return res["label"], res["score"]
 def detect_similarity(evidence, answer):
@@ -53,42 +54,27 @@ def detect_similarity(evidence, answer):
     return util.pytorch_cos_sim(e1, e2).item()
 def detect_uncertainty(evidence, answer):
-    return clf_model(f"{evidence} [SEP] {answer}")[0]["score"]
 def generate_correction(query, wrong, truth):
-    # Nous-Hermes-2 ChatML Formatı
-    prompt = f"""<|im_start|>system
-You are a board-certified medical doctor. Analyze the clinical error and provide a fix based ONLY on verified evidence.<|im_end|>
-<|im_start|>user
-QUESTION: {query}
-INCORRECT ANSWER: {wrong}
-VERIFIED EVIDENCE: {truth}
-TASK:
-1. Explain why the answer is incorrect.
-2. Provide the clinically accurate correction.<|im_end|>
-<|im_start|>assistant
-"""
-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
-    with torch.no_grad():
-        outputs = correction_model.generate(
-            **inputs,
-            max_new_tokens=300,
-            temperature=0.1, # Tıbbi doğruluk için düşük sıcaklık
-            eos_token_id=tokenizer.eos_token_id
-        )
-    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Sadece asistanın cevabını ayıklıyoruz
-    return decoded.split("assistant")[-1].strip()
 # ============================================================================
-# 3. THE AUDIT ENGINE (N=20)
 # ============================================================================
 def run_clinical_audit():
     dataset = load_dataset("UTAustin-AIHealth/MedHallu", "pqa_labeled", split="train", streaming=True)
-    data_pool = list(dataset.take(150))
     samples = random.sample(data_pool, 20)
     results = []
@@ -109,7 +95,8 @@ def run_clinical_audit():
         detected = 0
         reason = "Factual"
-        if nli_label == "contradiction" or sim_score < 0.30 or unc_score < 0.25:
             detected = 1
             reason = "Clinical Hallucination Detected"
@@ -120,7 +107,7 @@ def run_clinical_audit():
         if detected:
             corrected_text = generate_correction(query, llm_answer, factual)
             correction = {
-                "physician_prompt": "Nous-Hermes-2 ChatML Structure",
                 "llm_corrected_answer": corrected_text
             }
@@ -152,11 +139,11 @@ def run_clinical_audit():
     return f"✅ Audit Complete!\nAccuracy: {metrics['accuracy']:.2f}\nRecall: {metrics['recall']:.2f}", file_name
 # ============================================================================
-# 4. GRADIO INTERFACE
 # ============================================================================
 with gr.Blocks() as demo:
-    gr.Markdown("# 🩺 Healthcare LLM Auditor (Nous-Hermes-2 Engine)")
-    gr.Markdown("Bu sistem 20 vakayı 4-bit optimize edilmiş Nous-Hermes-2 ile denetler.")
     run_btn = gr.Button("🚀 Start Clinical Audit", variant="primary")
     output_text = gr.Textbox(label="Status Summary")

 import random
 import os
 import gradio as gr
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+from transformers import pipeline
 from datasets import load_dataset
 from sentence_transformers import SentenceTransformer, util
 from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
 warnings.filterwarnings("ignore")
 # ============================================================================
+# 1. INITIALIZATION & EXPERT MODELS (Lightweight)
 # ============================================================================
+device = "cpu" # Ücretsiz Space için zorunlu
 print("[INFO] Loading Expert Models (NLI, Similarity, Uncertainty)...")
+# Bu modeller küçük olduğu için CPU'da rahat çalışır
+nli_model = pipeline("text-classification", model="pritamdeka/PubMedBERT-MNLI-MedNLI", device=-1)
+sim_model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu")
+clf_model = pipeline("text-classification", model="cross-encoder/ms-marco-MiniLM-L-6-v2", device=-1)
+# ============================================================================
+# 2. LOADING GGUF MODEL (For CPU Correction)
+# ============================================================================
+print("[INFO] Downloading and Loading Nous-Hermes-2 GGUF (CPU Optimized)...")
+# Modelin CPU dostu Q4_K_M (4-bit) versiyonunu indiriyoruz
+model_path = hf_hub_download(
+    repo_id="QuantFactory/Nous-Hermes-2-Mistral-7B-DPO-GGUF",
+    filename="Nous-Hermes-2-Mistral-7B-DPO.Q4_K_M.gguf"
 )
+correction_model = Llama(
+    model_path=model_path,
+    n_ctx=1024,      # Bağlam penceresi
+    n_threads=4,     # İşlemci çekirdek kullanımı
+    n_gpu_layers=0   # GPU olmadığı için 0
 )
 # ============================================================================
+# 3. CORE FUNCTIONS
 # ============================================================================
 def detect_nli(evidence, answer):
+    res = nli_model(f"{evidence} [SEP] {answer}", truncation=True, max_length=512)[0]
     return res["label"], res["score"]
 def detect_similarity(evidence, answer):
     return util.pytorch_cos_sim(e1, e2).item()
 def detect_uncertainty(evidence, answer):
+    res = clf_model(f"{evidence} [SEP] {answer}", truncation=True, max_length=512)[0]
+    return res["score"]
 def generate_correction(query, wrong, truth):
+    # ChatML Formatı GGUF için uyarlandı
+    prompt = f"<|im_start|>system\nYou are a doctor. Explain error and fix based on evidence.<|im_end|>\n<|im_start|>user\nQ: {query}\nWrong: {wrong}\nTruth: {truth}\n<|im_end|>\n<|im_start|>assistant\n"
+    output = correction_model(
+        prompt,
+        max_tokens=250,
+        stop=["<|im_end|>"],
+        echo=False
+    )
+    return output["choices"][0]["text"].strip()
 # ============================================================================
+# 4. THE AUDIT ENGINE (N=20)
 # ============================================================================
 def run_clinical_audit():
     dataset = load_dataset("UTAustin-AIHealth/MedHallu", "pqa_labeled", split="train", streaming=True)
+    data_pool = list(dataset.take(100))
     samples = random.sample(data_pool, 20)
     results = []
         detected = 0
         reason = "Factual"
+        # Eşik değerlerin (thresholds)
+        if nli_label == "contradiction" or sim_score < 0.25 or unc_score < 0.20:
             detected = 1
             reason = "Clinical Hallucination Detected"
         if detected:
             corrected_text = generate_correction(query, llm_answer, factual)
             correction = {
+                "physician_prompt": "Nous-Hermes-2 GGUF Structure",
                 "llm_corrected_answer": corrected_text
             }
     return f"✅ Audit Complete!\nAccuracy: {metrics['accuracy']:.2f}\nRecall: {metrics['recall']:.2f}", file_name
 # ============================================================================
+# 5. GRADIO INTERFACE
 # ============================================================================
 with gr.Blocks() as demo:
+    gr.Markdown("# 🩺 Healthcare LLM Auditor (GGUF CPU Edition)")
+    gr.Markdown("Ücretsiz CPU katmanı için optimize edilmiştir. 20 vakayı analiz eder.")
     run_btn = gr.Button("🚀 Start Clinical Audit", variant="primary")
     output_text = gr.Textbox(label="Status Summary")