euler03 commited on
Commit
367f101
·
verified ·
1 Parent(s): be3ab4e
Files changed (1) hide show
  1. app.py +42 -51
app.py CHANGED
@@ -1,4 +1,3 @@
1
- import os
2
  import gradio as gr
3
  import torch
4
  from llama_cpp import Llama
@@ -8,29 +7,31 @@ from transformers import AutoModelForMultipleChoice, AutoTokenizer
8
  # GPU setup
9
  # -------------------------------------------------------
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
11
 
12
  # -------------------------------------------------------
13
- # Load LLaMA Locally (for model-input generation)
14
  # -------------------------------------------------------
15
- LLAMA_MODEL_PATH = "/home/euler03/projects/bias/bias-detection/bias-detection/models/llama-2-7b-chat.Q4_K_M.gguf"
16
- if not os.path.exists(LLAMA_MODEL_PATH):
17
- raise FileNotFoundError(f" LLaMA model not found at: {LLAMA_MODEL_PATH}")
18
-
19
- llm = Llama(
20
- model_path=LLAMA_MODEL_PATH,
21
  n_ctx=512,
22
- n_gpu_layers=100 # adjust if needed
23
  )
24
 
25
  # -------------------------------------------------------
26
- # Load BBQ Fine-Tuned BERT Model & Tokenizer (multiple-choice as fine tuned int he bbq model)
27
  # -------------------------------------------------------
28
  BBQ_MODEL = "euler03/bbq-distil_bumble_bert"
29
  bbq_tokenizer = AutoTokenizer.from_pretrained(BBQ_MODEL)
30
  bbq_model = AutoModelForMultipleChoice.from_pretrained(BBQ_MODEL).to(device)
 
31
 
32
  # -------------------------------------------------------
33
- # List of Topics
34
  # -------------------------------------------------------
35
  TOPICS = [
36
  "Artificial Intelligence in Healthcare", "Climate Change and Renewable Energy",
@@ -44,23 +45,13 @@ TOPICS = [
44
  "Healthcare Access in Rural Areas", "The Rise of Nationalism in Politics",
45
  "Police Use of Facial Recognition", "Space Exploration and Government Funding"
46
  ]
 
47
 
48
  # -------------------------------------------------------
49
- # 5 Generation: Context, Question & 3 Answers using LLaMA
50
  # -------------------------------------------------------
51
  def generate_context_question_answers(topic):
52
- """
53
- Use LLaMA (chat-style prompt) to generate:
54
- - A short, neutral context about the topic.
55
- - A question that tests bias on the topic.
56
- - Three possible answers (Answer0, Answer1, Answer2).
57
- The output is expected in the following format:
58
- Context: <...>
59
- Question: <...>
60
- Answer0: <...>
61
- Answer1: <...>
62
- Answer2: <...>
63
- """
64
  system_prompt = "You are a helpful AI assistant that strictly follows user instructions."
65
  user_prompt = f"""
66
  Please write:
@@ -78,13 +69,14 @@ Use exactly these labels and no extra text.
78
 
79
  {user_prompt}
80
  [/INST]"""
81
-
82
  response = llm(
83
  chat_prompt,
84
- max_tokens=256,
85
  temperature=1.0,
86
  echo=False
87
  )
 
88
  print("Raw LLaMA Output:", response)
89
  if "choices" in response and len(response["choices"]) > 0:
90
  text_output = response["choices"][0]["text"].strip()
@@ -92,7 +84,7 @@ Use exactly these labels and no extra text.
92
  text_output = "[Error: LLaMA did not generate a response]"
93
  print("Processed LLaMA Output:", text_output)
94
 
95
- # Initialize with defaults comme ca on teste si generation works
96
  context_line = "[No context generated]"
97
  question_line = "[No question generated]"
98
  ans0_line = "[No answer0 generated]"
@@ -100,6 +92,7 @@ Use exactly these labels and no extra text.
100
  ans2_line = "[No answer2 generated]"
101
 
102
  lines = [line.strip() for line in text_output.split("\n") if line.strip()]
 
103
  for line in lines:
104
  lower_line = line.lower()
105
  if lower_line.startswith("context:"):
@@ -113,15 +106,16 @@ Use exactly these labels and no extra text.
113
  elif lower_line.startswith("answer2:"):
114
  ans2_line = line.split(":", 1)[1].strip()
115
 
 
116
  return context_line, question_line, ans0_line, ans1_line, ans2_line
117
 
118
  # -------------------------------------------------------
119
  # Classification: Run BBQ Model (Multiple-Choice)
120
  # -------------------------------------------------------
121
  def classify_multiple_choice(context, question, ans0, ans1, ans2):
 
122
  inputs = [f"{question} {ans}" for ans in (ans0, ans1, ans2)]
123
  contexts = [context, context, context]
124
-
125
  encodings = bbq_tokenizer(
126
  inputs,
127
  contexts,
@@ -130,23 +124,24 @@ def classify_multiple_choice(context, question, ans0, ans1, ans2):
130
  max_length=128,
131
  return_tensors="pt"
132
  ).to(device)
133
-
134
  bbq_model.eval()
135
  with torch.no_grad():
136
  outputs = bbq_model(**{k: v.unsqueeze(0) for k, v in encodings.items()})
137
- logits = outputs.logits[0]
138
- probs = torch.softmax(logits, dim=-1)
139
  pred_idx = torch.argmax(probs).item()
140
  all_answers = [ans0, ans1, ans2]
141
  prob_dict = {all_answers[i]: float(probs[i].item()) for i in range(3)}
142
  predicted_answer = all_answers[pred_idx]
 
143
  return predicted_answer, prob_dict
144
 
145
  # -------------------------------------------------------
146
- # Assess Objectivity: Compare User's Choice to Model's Prediction
147
  # -------------------------------------------------------
148
  def assess_objectivity(context, question, ans0, ans1, ans2, user_choice):
149
-
150
  predicted_answer, prob_dict = classify_multiple_choice(context, question, ans0, ans1, ans2)
151
  if user_choice == predicted_answer:
152
  assessment = (
@@ -158,46 +153,39 @@ def assess_objectivity(context, question, ans0, ans1, ans2, user_choice):
158
  f"Your choice ('{user_choice}') does not match the model's prediction ('{predicted_answer}').\n"
159
  "This suggests a deviation from the objective standard."
160
  )
 
161
  return assessment, prob_dict
162
 
163
  # -------------------------------------------------------
164
  # Build the Gradio Interface
165
  # -------------------------------------------------------
166
  with gr.Blocks() as demo:
167
- gr.Markdown("# 🧠 Bias Detection: Assessing Objectivity")
168
  gr.Markdown("""
169
  **Steps:**
170
  1. **Select a topic** from the dropdown.
171
  2. Click **"Generate Context, Question & Answers"** to generate a scenario.
172
- 3. **Review** the generated context, question, and 3 candidate answers.
173
  4. **Select your answer** from the radio options.
174
  5. Click **"Assess Objectivity"** to see the model's evaluation.
175
  """)
176
- # Topic selection
177
  topic_dropdown = gr.Dropdown(choices=TOPICS, label="Select a Topic")
178
-
179
- # Outputs from LLaMA generation
180
  context_box = gr.Textbox(label="Generated Context", interactive=False)
181
  question_box = gr.Textbox(label="Generated Question", interactive=False)
182
  ans0_box = gr.Textbox(label="Generated Answer 0", interactive=False)
183
  ans1_box = gr.Textbox(label="Generated Answer 1", interactive=False)
184
  ans2_box = gr.Textbox(label="Generated Answer 2", interactive=False)
185
-
186
- # User selection: Choose one answer from the generated answers
187
  user_choice_radio = gr.Radio(choices=[], label="Select Your Answer")
188
-
189
- # Assessment outputs
190
  assessment_box = gr.Textbox(label="Objectivity Assessment", interactive=False)
191
  probabilities_box = gr.JSON(label="Confidence Probabilities")
192
 
193
- # Buttons
194
  generate_button = gr.Button("Generate Context, Question & Answers")
195
  assess_button = gr.Button("Assess Objectivity")
196
 
197
- # Callback 1: Generate with LLaMA
198
  def on_generate(topic):
 
199
  ctx, q, a0, a1, a2 = generate_context_question_answers(topic)
200
- # Update the radio button choices with the generated answers
201
  return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None)
202
  generate_button.click(
203
  fn=on_generate,
@@ -205,11 +193,13 @@ with gr.Blocks() as demo:
205
  outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio]
206
  )
207
 
208
- # Callback 2: Assess objectivity
209
  def on_assess(ctx, q, a0, a1, a2, user_choice):
210
- if user_choice is None or user_choice == "":
 
 
211
  return "Please select one of the generated answers.", {}
212
  assessment, probs = assess_objectivity(ctx, q, a0, a1, a2, user_choice)
 
213
  return assessment, probs
214
  assess_button.click(
215
  fn=on_assess,
@@ -219,10 +209,11 @@ with gr.Blocks() as demo:
219
 
220
  gr.Markdown("""
221
  ### How It Works:
222
- - **LLaMA** generates a scenario (context, question, and three candidate answers).
223
- - You **select** one answer that you think is most objective.
 
224
  - The **BBQ model** classifies the same scenario and outputs the answer it deems most objective along with confidence scores.
225
- - The app **compares** your choice with the model’s prediction and provides an objectivity assessment.
226
  """)
227
 
228
- demo.launch()
 
 
1
  import gradio as gr
2
  import torch
3
  from llama_cpp import Llama
 
7
  # GPU setup
8
  # -------------------------------------------------------
9
  device = "cuda" if torch.cuda.is_available() else "cpu"
10
+ print("Using device:", device)
11
+ if device == "cuda":
12
+ print("GPU Name:", torch.cuda.get_device_name(0))
13
 
14
  # -------------------------------------------------------
15
+ # Load LLaMA from Hugging Face Hub (for generation)
16
  # -------------------------------------------------------
17
+ # Instead of a local path, use from_pretrained to download the model automatically.
18
+ llm = Llama.from_pretrained(
19
+ repo_id="TheBloke/llama-2-7b-chat-GGUF", # Replace with the repo you want to use
20
+ filename="llama-2-7b-chat.Q4_K_M.gguf", # Name of the GGUF file in the repo
 
 
21
  n_ctx=512,
22
+ n_gpu_layers=30, # Adjust if needed based on available VRAM
23
  )
24
 
25
  # -------------------------------------------------------
26
+ # Load BBQ Fine-Tuned BERT Model & Tokenizer (multiple-choice)
27
  # -------------------------------------------------------
28
  BBQ_MODEL = "euler03/bbq-distil_bumble_bert"
29
  bbq_tokenizer = AutoTokenizer.from_pretrained(BBQ_MODEL)
30
  bbq_model = AutoModelForMultipleChoice.from_pretrained(BBQ_MODEL).to(device)
31
+ print("BBQ model loaded.")
32
 
33
  # -------------------------------------------------------
34
+ # List of Topics
35
  # -------------------------------------------------------
36
  TOPICS = [
37
  "Artificial Intelligence in Healthcare", "Climate Change and Renewable Energy",
 
45
  "Healthcare Access in Rural Areas", "The Rise of Nationalism in Politics",
46
  "Police Use of Facial Recognition", "Space Exploration and Government Funding"
47
  ]
48
+ print("Topics ready.")
49
 
50
  # -------------------------------------------------------
51
+ # Generation: Context, Question & 3 Answers using LLaMA
52
  # -------------------------------------------------------
53
  def generate_context_question_answers(topic):
54
+ print(f"[Checkpoint] Generating scenario for topic: {topic}")
 
 
 
 
 
 
 
 
 
 
 
55
  system_prompt = "You are a helpful AI assistant that strictly follows user instructions."
56
  user_prompt = f"""
57
  Please write:
 
69
 
70
  {user_prompt}
71
  [/INST]"""
72
+ print("[Checkpoint] Prompt prepared, calling LLaMA...")
73
  response = llm(
74
  chat_prompt,
75
+ max_tokens=256, # Adjust as needed for faster generation
76
  temperature=1.0,
77
  echo=False
78
  )
79
+ print("[Checkpoint] LLaMA call complete.")
80
  print("Raw LLaMA Output:", response)
81
  if "choices" in response and len(response["choices"]) > 0:
82
  text_output = response["choices"][0]["text"].strip()
 
84
  text_output = "[Error: LLaMA did not generate a response]"
85
  print("Processed LLaMA Output:", text_output)
86
 
87
+ # Default values in case parsing fails
88
  context_line = "[No context generated]"
89
  question_line = "[No question generated]"
90
  ans0_line = "[No answer0 generated]"
 
92
  ans2_line = "[No answer2 generated]"
93
 
94
  lines = [line.strip() for line in text_output.split("\n") if line.strip()]
95
+ print(f"[Checkpoint] Parsed {len(lines)} lines.")
96
  for line in lines:
97
  lower_line = line.lower()
98
  if lower_line.startswith("context:"):
 
106
  elif lower_line.startswith("answer2:"):
107
  ans2_line = line.split(":", 1)[1].strip()
108
 
109
+ print("[Checkpoint] Generation parsing complete.")
110
  return context_line, question_line, ans0_line, ans1_line, ans2_line
111
 
112
  # -------------------------------------------------------
113
  # Classification: Run BBQ Model (Multiple-Choice)
114
  # -------------------------------------------------------
115
  def classify_multiple_choice(context, question, ans0, ans1, ans2):
116
+ print("[Checkpoint] Starting classification...")
117
  inputs = [f"{question} {ans}" for ans in (ans0, ans1, ans2)]
118
  contexts = [context, context, context]
 
119
  encodings = bbq_tokenizer(
120
  inputs,
121
  contexts,
 
124
  max_length=128,
125
  return_tensors="pt"
126
  ).to(device)
127
+ print("[Checkpoint] Tokenization complete. Running BBQ model...")
128
  bbq_model.eval()
129
  with torch.no_grad():
130
  outputs = bbq_model(**{k: v.unsqueeze(0) for k, v in encodings.items()})
131
+ logits = outputs.logits[0]
132
+ probs = torch.softmax(logits, dim=-1)
133
  pred_idx = torch.argmax(probs).item()
134
  all_answers = [ans0, ans1, ans2]
135
  prob_dict = {all_answers[i]: float(probs[i].item()) for i in range(3)}
136
  predicted_answer = all_answers[pred_idx]
137
+ print(f"[Checkpoint] Classification complete. Predicted answer: {predicted_answer}")
138
  return predicted_answer, prob_dict
139
 
140
  # -------------------------------------------------------
141
+ # Assess Objectivity: Compare User's Choice to Model's Prediction
142
  # -------------------------------------------------------
143
  def assess_objectivity(context, question, ans0, ans1, ans2, user_choice):
144
+ print("[Checkpoint] Assessing objectivity...")
145
  predicted_answer, prob_dict = classify_multiple_choice(context, question, ans0, ans1, ans2)
146
  if user_choice == predicted_answer:
147
  assessment = (
 
153
  f"Your choice ('{user_choice}') does not match the model's prediction ('{predicted_answer}').\n"
154
  "This suggests a deviation from the objective standard."
155
  )
156
+ print("[Checkpoint] Assessment complete.")
157
  return assessment, prob_dict
158
 
159
  # -------------------------------------------------------
160
  # Build the Gradio Interface
161
  # -------------------------------------------------------
162
  with gr.Blocks() as demo:
163
+ gr.Markdown("# 🧠 Bias Detection: Assessing Objectivity (Cloud Version)")
164
  gr.Markdown("""
165
  **Steps:**
166
  1. **Select a topic** from the dropdown.
167
  2. Click **"Generate Context, Question & Answers"** to generate a scenario.
168
+ 3. **Review** the generated context, question, and candidate answers.
169
  4. **Select your answer** from the radio options.
170
  5. Click **"Assess Objectivity"** to see the model's evaluation.
171
  """)
 
172
  topic_dropdown = gr.Dropdown(choices=TOPICS, label="Select a Topic")
 
 
173
  context_box = gr.Textbox(label="Generated Context", interactive=False)
174
  question_box = gr.Textbox(label="Generated Question", interactive=False)
175
  ans0_box = gr.Textbox(label="Generated Answer 0", interactive=False)
176
  ans1_box = gr.Textbox(label="Generated Answer 1", interactive=False)
177
  ans2_box = gr.Textbox(label="Generated Answer 2", interactive=False)
 
 
178
  user_choice_radio = gr.Radio(choices=[], label="Select Your Answer")
 
 
179
  assessment_box = gr.Textbox(label="Objectivity Assessment", interactive=False)
180
  probabilities_box = gr.JSON(label="Confidence Probabilities")
181
 
 
182
  generate_button = gr.Button("Generate Context, Question & Answers")
183
  assess_button = gr.Button("Assess Objectivity")
184
 
 
185
  def on_generate(topic):
186
+ print("[Callback] on_generate triggered.")
187
  ctx, q, a0, a1, a2 = generate_context_question_answers(topic)
188
+ print("[Callback] on_generate complete.")
189
  return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None)
190
  generate_button.click(
191
  fn=on_generate,
 
193
  outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio]
194
  )
195
 
 
196
  def on_assess(ctx, q, a0, a1, a2, user_choice):
197
+ print("[Callback] on_assess triggered.")
198
+ if not user_choice:
199
+ print("[Callback] No user choice selected.")
200
  return "Please select one of the generated answers.", {}
201
  assessment, probs = assess_objectivity(ctx, q, a0, a1, a2, user_choice)
202
+ print("[Callback] on_assess complete.")
203
  return assessment, probs
204
  assess_button.click(
205
  fn=on_assess,
 
209
 
210
  gr.Markdown("""
211
  ### How It Works:
212
+ - **LLaMA** (loaded via `Llama.from_pretrained`) automatically downloads the model from the Hugging Face Hub.
213
+ - It generates a scenario (context, question, and three candidate answers).
214
+ - You select the answer you think is most objective.
215
  - The **BBQ model** classifies the same scenario and outputs the answer it deems most objective along with confidence scores.
216
+ - The app compares your choice with the model’s prediction and provides an objectivity assessment.
217
  """)
218
 
219
+ demo.launch()