halilolcay commited on
Commit
b2dfdbc
·
verified ·
1 Parent(s): 7437adc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -57
app.py CHANGED
@@ -4,7 +4,9 @@ import torch
4
  import random
5
  import os
6
  import gradio as gr
7
- from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 
 
8
  from datasets import load_dataset
9
  from sentence_transformers import SentenceTransformer, util
10
  from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
@@ -12,39 +14,38 @@ from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_sc
12
  warnings.filterwarnings("ignore")
13
 
14
  # ============================================================================
15
- # 1. INITIALIZATION & MODELS
16
  # ============================================================================
17
- device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu")
18
 
19
  print("[INFO] Loading Expert Models (NLI, Similarity, Uncertainty)...")
20
- nli_model = pipeline("text-classification", model="pritamdeka/PubMedBERT-MNLI-MedNLI", device=device, truncation=True, max_length=512)
21
- sim_model = SentenceTransformer("all-MiniLM-L6-v2", device=device)
22
- clf_model = pipeline("text-classification", model="cross-encoder/ms-marco-MiniLM-L-6-v2", device=device, truncation=True, max_length=512)
23
-
24
- # Nous-Hermes-2-Mistral-7B-DPO Yükleme (4-bit Sıkıştırma ile)
25
- print("[INFO] Loading Nous-Hermes-2-Mistral-7B-DPO (4-bit optimized)...")
26
- model_id = "NousResearch/Nous-Hermes-2-Mistral-7B-DPO"
27
-
28
- # Ücretsiz HF Space (16GB VRAM) için kritik ayarlar
29
- quant_config = BitsAndBytesConfig(
30
- load_in_4bit=True,
31
- bnb_4bit_compute_dtype=torch.float16,
32
- bnb_4bit_quant_type="nf4",
33
- bnb_4bit_use_double_quant=True
34
  )
35
 
36
- tokenizer = AutoTokenizer.from_pretrained(model_id)
37
- correction_model = AutoModelForCausalLM.from_pretrained(
38
- model_id,
39
- quantization_config=quant_config,
40
- device_map="auto"
41
  )
42
 
43
  # ============================================================================
44
- # 2. CORE FUNCTIONS
45
  # ============================================================================
46
  def detect_nli(evidence, answer):
47
- res = nli_model(f"{evidence} [SEP] {answer}")[0]
48
  return res["label"], res["score"]
49
 
50
  def detect_similarity(evidence, answer):
@@ -53,42 +54,27 @@ def detect_similarity(evidence, answer):
53
  return util.pytorch_cos_sim(e1, e2).item()
54
 
55
  def detect_uncertainty(evidence, answer):
56
- return clf_model(f"{evidence} [SEP] {answer}")[0]["score"]
 
57
 
58
  def generate_correction(query, wrong, truth):
59
- # Nous-Hermes-2 ChatML Formatı
60
- prompt = f"""<|im_start|>system
61
- You are a board-certified medical doctor. Analyze the clinical error and provide a fix based ONLY on verified evidence.<|im_end|>
62
- <|im_start|>user
63
- QUESTION: {query}
64
- INCORRECT ANSWER: {wrong}
65
- VERIFIED EVIDENCE: {truth}
66
-
67
- TASK:
68
- 1. Explain why the answer is incorrect.
69
- 2. Provide the clinically accurate correction.<|im_end|>
70
- <|im_start|>assistant
71
- """
72
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
73
-
74
- with torch.no_grad():
75
- outputs = correction_model.generate(
76
- **inputs,
77
- max_new_tokens=300,
78
- temperature=0.1, # Tıbbi doğruluk için düşük sıcaklık
79
- eos_token_id=tokenizer.eos_token_id
80
- )
81
 
82
- decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
83
- # Sadece asistanın cevabını ayıklıyoruz
84
- return decoded.split("assistant")[-1].strip()
 
 
 
 
85
 
86
  # ============================================================================
87
- # 3. THE AUDIT ENGINE (N=20)
88
  # ============================================================================
89
  def run_clinical_audit():
90
  dataset = load_dataset("UTAustin-AIHealth/MedHallu", "pqa_labeled", split="train", streaming=True)
91
- data_pool = list(dataset.take(150))
92
  samples = random.sample(data_pool, 20)
93
 
94
  results = []
@@ -109,7 +95,8 @@ def run_clinical_audit():
109
 
110
  detected = 0
111
  reason = "Factual"
112
- if nli_label == "contradiction" or sim_score < 0.30 or unc_score < 0.25:
 
113
  detected = 1
114
  reason = "Clinical Hallucination Detected"
115
 
@@ -120,7 +107,7 @@ def run_clinical_audit():
120
  if detected:
121
  corrected_text = generate_correction(query, llm_answer, factual)
122
  correction = {
123
- "physician_prompt": "Nous-Hermes-2 ChatML Structure",
124
  "llm_corrected_answer": corrected_text
125
  }
126
 
@@ -152,11 +139,11 @@ def run_clinical_audit():
152
  return f"✅ Audit Complete!\nAccuracy: {metrics['accuracy']:.2f}\nRecall: {metrics['recall']:.2f}", file_name
153
 
154
  # ============================================================================
155
- # 4. GRADIO INTERFACE
156
  # ============================================================================
157
  with gr.Blocks() as demo:
158
- gr.Markdown("# 🩺 Healthcare LLM Auditor (Nous-Hermes-2 Engine)")
159
- gr.Markdown("Bu sistem 20 vakayı 4-bit optimize edilmiş Nous-Hermes-2 ile denetler.")
160
 
161
  run_btn = gr.Button("🚀 Start Clinical Audit", variant="primary")
162
  output_text = gr.Textbox(label="Status Summary")
 
4
  import random
5
  import os
6
  import gradio as gr
7
+ from huggingface_hub import hf_hub_download
8
+ from llama_cpp import Llama
9
+ from transformers import pipeline
10
  from datasets import load_dataset
11
  from sentence_transformers import SentenceTransformer, util
12
  from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
 
14
  warnings.filterwarnings("ignore")
15
 
16
  # ============================================================================
17
+ # 1. INITIALIZATION & EXPERT MODELS (Lightweight)
18
  # ============================================================================
19
+ device = "cpu" # Ücretsiz Space için zorunlu
20
 
21
  print("[INFO] Loading Expert Models (NLI, Similarity, Uncertainty)...")
22
+ # Bu modeller küçük olduğu için CPU'da rahat çalışır
23
+ nli_model = pipeline("text-classification", model="pritamdeka/PubMedBERT-MNLI-MedNLI", device=-1)
24
+ sim_model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu")
25
+ clf_model = pipeline("text-classification", model="cross-encoder/ms-marco-MiniLM-L-6-v2", device=-1)
26
+
27
+ # ============================================================================
28
+ # 2. LOADING GGUF MODEL (For CPU Correction)
29
+ # ============================================================================
30
+ print("[INFO] Downloading and Loading Nous-Hermes-2 GGUF (CPU Optimized)...")
31
+ # Modelin CPU dostu Q4_K_M (4-bit) versiyonunu indiriyoruz
32
+ model_path = hf_hub_download(
33
+ repo_id="QuantFactory/Nous-Hermes-2-Mistral-7B-DPO-GGUF",
34
+ filename="Nous-Hermes-2-Mistral-7B-DPO.Q4_K_M.gguf"
 
35
  )
36
 
37
+ correction_model = Llama(
38
+ model_path=model_path,
39
+ n_ctx=1024, # Bağlam penceresi
40
+ n_threads=4, # İşlemci çekirdek kullanımı
41
+ n_gpu_layers=0 # GPU olmadığı için 0
42
  )
43
 
44
  # ============================================================================
45
+ # 3. CORE FUNCTIONS
46
  # ============================================================================
47
  def detect_nli(evidence, answer):
48
+ res = nli_model(f"{evidence} [SEP] {answer}", truncation=True, max_length=512)[0]
49
  return res["label"], res["score"]
50
 
51
  def detect_similarity(evidence, answer):
 
54
  return util.pytorch_cos_sim(e1, e2).item()
55
 
56
  def detect_uncertainty(evidence, answer):
57
+ res = clf_model(f"{evidence} [SEP] {answer}", truncation=True, max_length=512)[0]
58
+ return res["score"]
59
 
60
  def generate_correction(query, wrong, truth):
61
+ # ChatML Formatı GGUF için uyarlandı
62
+ prompt = f"<|im_start|>system\nYou are a doctor. Explain error and fix based on evidence.<|im_end|>\n<|im_start|>user\nQ: {query}\nWrong: {wrong}\nTruth: {truth}\n<|im_end|>\n<|im_start|>assistant\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
+ output = correction_model(
65
+ prompt,
66
+ max_tokens=250,
67
+ stop=["<|im_end|>"],
68
+ echo=False
69
+ )
70
+ return output["choices"][0]["text"].strip()
71
 
72
  # ============================================================================
73
+ # 4. THE AUDIT ENGINE (N=20)
74
  # ============================================================================
75
  def run_clinical_audit():
76
  dataset = load_dataset("UTAustin-AIHealth/MedHallu", "pqa_labeled", split="train", streaming=True)
77
+ data_pool = list(dataset.take(100))
78
  samples = random.sample(data_pool, 20)
79
 
80
  results = []
 
95
 
96
  detected = 0
97
  reason = "Factual"
98
+ # Eşik değerlerin (thresholds)
99
+ if nli_label == "contradiction" or sim_score < 0.25 or unc_score < 0.20:
100
  detected = 1
101
  reason = "Clinical Hallucination Detected"
102
 
 
107
  if detected:
108
  corrected_text = generate_correction(query, llm_answer, factual)
109
  correction = {
110
+ "physician_prompt": "Nous-Hermes-2 GGUF Structure",
111
  "llm_corrected_answer": corrected_text
112
  }
113
 
 
139
  return f"✅ Audit Complete!\nAccuracy: {metrics['accuracy']:.2f}\nRecall: {metrics['recall']:.2f}", file_name
140
 
141
  # ============================================================================
142
+ # 5. GRADIO INTERFACE
143
  # ============================================================================
144
  with gr.Blocks() as demo:
145
+ gr.Markdown("# 🩺 Healthcare LLM Auditor (GGUF CPU Edition)")
146
+ gr.Markdown("Ücretsiz CPU katmanı için optimize edilmiştir. 20 vakayı analiz eder.")
147
 
148
  run_btn = gr.Button("🚀 Start Clinical Audit", variant="primary")
149
  output_text = gr.Textbox(label="Status Summary")