euler03 commited on
Commit
4e2f6d9
·
verified ·
1 Parent(s): 12b5a2d

two options

Browse files
Files changed (1) hide show
  1. app.py +126 -54
app.py CHANGED
@@ -1,63 +1,139 @@
1
  import gradio as gr
2
  import torch
3
  from llama_cpp import Llama
4
- # GPU setup
 
 
 
 
 
 
 
5
  # -------------------------------------------------------
6
  device = "cuda" if torch.cuda.is_available() else "cpu"
7
- print("Using device:", device)
8
  if device == "cuda":
9
  print("GPU Name:", torch.cuda.get_device_name(0))
10
 
11
  # -------------------------------------------------------
12
- # Load LLaMA from Hugging Face Hub (for generation)
13
  # -------------------------------------------------------
14
- # Instead of a local path, use from_pretrained to download the model automatically.
15
- llm = Llama.from_pretrained(
16
- repo_id="TheBloke/llama-2-7b-chat-GGUF", # Replace with the repo you want to use
17
- filename="llama-2-7b-chat.Q4_K_M.gguf", # Name of the GGUF file in the repo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
 
 
 
19
 
 
 
 
 
 
20
  n_ctx=512,
21
- n_gpu_layers=30, # Adjust if needed based on available VRAM
22
  )
23
 
24
- # -------------------------------------------------------
25
- # Load BBQ Fine-Tuned BERT Model & Tokenizer (multiple-choice)
26
- # -------------------------------------------------------
27
  BBQ_MODEL = "euler03/bbq-distil_bumble_bert"
28
  bbq_tokenizer = AutoTokenizer.from_pretrained(BBQ_MODEL)
29
  bbq_model = AutoModelForMultipleChoice.from_pretrained(BBQ_MODEL).to(device)
30
  print("BBQ model loaded.")
31
 
32
- # -------------------------------------------------------
33
- # List of Topics
34
- # -------------------------------------------------------
35
  TOPICS = [
36
  "Artificial Intelligence in Healthcare", "Climate Change and Renewable Energy",
 
 
 
 
 
 
 
37
  "Healthcare Access in Rural Areas", "The Rise of Nationalism in Politics",
38
  "Police Use of Facial Recognition", "Space Exploration and Government Funding"
39
  ]
40
  print("Topics ready.")
41
 
42
- # -------------------------------------------------------
43
- # Generation: Context, Question & 3 Answers using LLaMA
44
- # -------------------------------------------------------
45
  def generate_context_question_answers(topic):
 
 
 
 
 
 
 
 
 
 
 
 
46
  print(f"[Checkpoint] Generating scenario for topic: {topic}")
47
-
48
-
49
-
50
-
51
-
52
-
53
-
54
-
55
-
56
-
57
-
58
  system_prompt = "You are a helpful AI assistant that strictly follows user instructions."
59
  user_prompt = f"""
60
  Please write:
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  {user_prompt}
63
  [/INST]"""
@@ -72,37 +148,39 @@ Please write:
72
  print("Raw LLaMA Output:", response)
73
  if "choices" in response and len(response["choices"]) > 0:
74
  text_output = response["choices"][0]["text"].strip()
 
75
  text_output = "[Error: LLaMA did not generate a response]"
76
  print("Processed LLaMA Output:", text_output)
77
-
78
- # Default values in case parsing fails
79
  context_line = "[No context generated]"
80
  question_line = "[No question generated]"
81
  ans0_line = "[No answer0 generated]"
 
82
  ans2_line = "[No answer2 generated]"
83
-
84
  lines = [line.strip() for line in text_output.split("\n") if line.strip()]
85
- print(f"[Checkpoint] Parsed {len(lines)} lines.")
86
  for line in lines:
87
  lower_line = line.lower()
88
  if lower_line.startswith("context:"):
 
 
 
 
 
 
 
89
  elif lower_line.startswith("answer2:"):
90
  ans2_line = line.split(":", 1)[1].strip()
91
-
92
  print("[Checkpoint] Generation parsing complete.")
93
  return context_line, question_line, ans0_line, ans1_line, ans2_line
94
 
95
- # -------------------------------------------------------
96
- # Classification: Run BBQ Model (Multiple-Choice)
97
- # -------------------------------------------------------
98
  def classify_multiple_choice(context, question, ans0, ans1, ans2):
99
  print("[Checkpoint] Starting classification...")
100
  inputs = [f"{question} {ans}" for ans in (ans0, ans1, ans2)]
101
  contexts = [context, context, context]
102
-
103
  encodings = bbq_tokenizer(
104
  inputs,
105
  contexts,
 
 
106
  max_length=128,
107
  return_tensors="pt"
108
  ).to(device)
@@ -119,13 +197,15 @@ def classify_multiple_choice(context, question, ans0, ans1, ans2):
119
  print(f"[Checkpoint] Classification complete. Predicted answer: {predicted_answer}")
120
  return predicted_answer, prob_dict
121
 
122
- # -------------------------------------------------------
123
- # Assess Objectivity: Compare User's Choice to Model's Prediction
124
- # -------------------------------------------------------
125
  def assess_objectivity(context, question, ans0, ans1, ans2, user_choice):
126
  print("[Checkpoint] Assessing objectivity...")
127
  predicted_answer, prob_dict = classify_multiple_choice(context, question, ans0, ans1, ans2)
128
  if user_choice == predicted_answer:
 
 
 
 
 
129
  assessment = (
130
  f"Your choice ('{user_choice}') does not match the model's prediction ('{predicted_answer}').\n"
131
  "This suggests a deviation from the objective standard."
@@ -146,28 +226,18 @@ with gr.Blocks() as demo:
146
  4. **Select your answer** from the radio options.
147
  5. Click **"Assess Objectivity"** to see the model's evaluation.
148
  """)
149
-
150
  topic_dropdown = gr.Dropdown(choices=TOPICS, label="Select a Topic")
151
-
152
-
153
  context_box = gr.Textbox(label="Generated Context", interactive=False)
154
  question_box = gr.Textbox(label="Generated Question", interactive=False)
155
  ans0_box = gr.Textbox(label="Generated Answer 0", interactive=False)
156
  ans1_box = gr.Textbox(label="Generated Answer 1", interactive=False)
157
  ans2_box = gr.Textbox(label="Generated Answer 2", interactive=False)
158
-
159
-
160
  user_choice_radio = gr.Radio(choices=[], label="Select Your Answer")
161
-
162
-
163
  assessment_box = gr.Textbox(label="Objectivity Assessment", interactive=False)
164
  probabilities_box = gr.JSON(label="Confidence Probabilities")
165
-
166
-
167
  generate_button = gr.Button("Generate Context, Question & Answers")
168
  assess_button = gr.Button("Assess Objectivity")
169
 
170
-
171
  def on_generate(topic):
172
  print("[Callback] on_generate triggered.")
173
  ctx, q, a0, a1, a2 = generate_context_question_answers(topic)
@@ -175,10 +245,10 @@ with gr.Blocks() as demo:
175
  return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None)
176
  generate_button.click(
177
  fn=on_generate,
 
178
  outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio]
179
  )
180
 
181
-
182
  def on_assess(ctx, q, a0, a1, a2, user_choice):
183
  print("[Callback] on_assess triggered.")
184
  if not user_choice:
@@ -189,15 +259,17 @@ with gr.Blocks() as demo:
189
  return assessment, probs
190
  assess_button.click(
191
  fn=on_assess,
 
 
 
192
 
193
  gr.Markdown("""
194
  ### How It Works:
195
- - **LLaMA** (loaded via `Llama.from_pretrained`) automatically downloads the model from the Hugging Face Hub.
196
  - It generates a scenario (context, question, and three candidate answers).
197
  - You select the answer you think is most objective.
198
- - The **BBQ model** classifies the same scenario and outputs the answer it deems most objective along with confidence scores.
199
  - The app compares your choice with the model’s prediction and provides an objectivity assessment.
200
  """)
201
 
202
  demo.launch()
203
-
 
1
  import gradio as gr
2
  import torch
3
  from llama_cpp import Llama
4
+ from transformers import (
5
+ AutoModelForSequenceClassification,
6
+ AutoTokenizer,
7
+ AutoModelForMultipleChoice
8
+ )
9
+
10
+ # -------------------------------------------------------
11
+ # 1️⃣ Setup: Device
12
  # -------------------------------------------------------
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
14
+ print(f"Using device: {device}")
15
  if device == "cuda":
16
  print("GPU Name:", torch.cuda.get_device_name(0))
17
 
18
  # -------------------------------------------------------
19
+ # 2️⃣ Text Objectivity Analysis (Sequence Classification)
20
  # -------------------------------------------------------
21
+ MODELS = {
22
+ "Aubins/distil-bumble-bert": "Aubins/distil-bumble-bert",
23
+ # You can add more models here if needed.
24
+ }
25
+ id2label = {0: "BIASED", 1: "NEUTRAL"}
26
+ label2id = {"BIASED": 0, "NEUTRAL": 1}
27
+ loaded_models = {}
28
+
29
+ def load_model(model_name: str):
30
+ if model_name not in loaded_models:
31
+ try:
32
+ model_path = MODELS[model_name]
33
+ model = AutoModelForSequenceClassification.from_pretrained(
34
+ model_path,
35
+ num_labels=2,
36
+ id2label=id2label,
37
+ label2id=label2id
38
+ ).to(device)
39
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
40
+ loaded_models[model_name] = (model, tokenizer)
41
+ return model, tokenizer
42
+ except Exception as e:
43
+ return f"Error loading model: {str(e)}"
44
+ return loaded_models[model_name]
45
+
46
+ def analyze_text(text: str, model_name: str):
47
+ if not text.strip():
48
+ return {"Empty text": 1.0}, "Please enter text to analyze."
49
+ result = load_model(model_name)
50
+ if isinstance(result, str):
51
+ return {"Error": 1.0}, result
52
+ model, tokenizer = result
53
+ try:
54
+ inputs = tokenizer(
55
+ text,
56
+ return_tensors="pt",
57
+ truncation=True,
58
+ padding=True,
59
+ max_length=512
60
+ )
61
+ inputs = {k: v.to(device) for k, v in inputs.items()}
62
+ model.eval()
63
+ with torch.no_grad():
64
+ outputs = model(**inputs)
65
+ logits = outputs.logits[0]
66
+ probabilities = torch.nn.functional.softmax(logits, dim=0)
67
+ predicted_class = torch.argmax(logits).item()
68
+ status = "neutral" if predicted_class == 1 else "biased"
69
+ confidence = probabilities[predicted_class].item()
70
+ message = f"This text is classified as {status} with a confidence of {confidence:.2%}."
71
+ confidence_map = {"Neutral": probabilities[1].item(), "Biased": probabilities[0].item()}
72
+ return confidence_map, message
73
+ except Exception as e:
74
+ return {"Error": 1.0}, f"Analysis error: {str(e)}"
75
 
76
+ # -------------------------------------------------------
77
+ # 3️⃣ Scenario-based Objectivity Assessment (LLaMA + BBQ)
78
+ # -------------------------------------------------------
79
 
80
+ # Load LLaMA from Hugging Face Hub (for generation)
81
+ # Now we load the model from the HF Hub automatically.
82
+ llm = Llama.from_pretrained(
83
+ repo_id="TheBloke/llama-2-7b-chat-GGUF", # Repo on HF Hub
84
+ filename="llama-2-7b-chat.Q4_K_M.gguf",
85
  n_ctx=512,
86
+ n_gpu_layers=30, # first try
87
  )
88
 
89
+ # Load BBQ Fine-Tuned BERT Model & Tokenizer (for multiple-choice)
 
 
90
  BBQ_MODEL = "euler03/bbq-distil_bumble_bert"
91
  bbq_tokenizer = AutoTokenizer.from_pretrained(BBQ_MODEL)
92
  bbq_model = AutoModelForMultipleChoice.from_pretrained(BBQ_MODEL).to(device)
93
  print("BBQ model loaded.")
94
 
 
 
 
95
  TOPICS = [
96
  "Artificial Intelligence in Healthcare", "Climate Change and Renewable Energy",
97
+ "Immigration Policies in the USA", "Social Media's Role in Elections",
98
+ "The Ethics of Genetic Engineering", "Universal Basic Income Pros and Cons",
99
+ "Impact of AI on Jobs", "Gender Pay Gap in the Workplace",
100
+ "Government Surveillance and Privacy", "Cryptocurrency Regulation",
101
+ "Censorship in Journalism", "Nuclear Energy as a Climate Solution",
102
+ "Effects of Misinformation on Society", "Affirmative Action in Universities",
103
+ "Automation and Its Effect on the Workforce", "The Role of Religion in Politics",
104
  "Healthcare Access in Rural Areas", "The Rise of Nationalism in Politics",
105
  "Police Use of Facial Recognition", "Space Exploration and Government Funding"
106
  ]
107
  print("Topics ready.")
108
 
 
 
 
109
  def generate_context_question_answers(topic):
110
+ """
111
+ Use LLaMA (chat-style prompt) to generate:
112
+ - Context: 2-3 sentences about the topic.
113
+ - Question: A question testing bias on the topic.
114
+ - Answer0, Answer1, Answer2: Three candidate answers.
115
+ Expected format (exactly):
116
+ Context: <...>
117
+ Question: <...>
118
+ Answer0: <...>
119
+ Answer1: <...>
120
+ Answer2: <...>
121
+ """
122
  print(f"[Checkpoint] Generating scenario for topic: {topic}")
 
 
 
 
 
 
 
 
 
 
 
123
  system_prompt = "You are a helpful AI assistant that strictly follows user instructions."
124
  user_prompt = f"""
125
  Please write:
126
+ Context: <2-3 sentences about {topic}>
127
+ Question: <a question that tests bias on {topic}>
128
+ Answer0: <possible answer #1>
129
+ Answer1: <possible answer #2>
130
+ Answer2: <possible answer #3>
131
+
132
+ Use exactly these labels and no extra text.
133
+ """
134
+ chat_prompt = f"""[INST] <<SYS>>
135
+ {system_prompt}
136
+ <</SYS>>
137
 
138
  {user_prompt}
139
  [/INST]"""
 
148
  print("Raw LLaMA Output:", response)
149
  if "choices" in response and len(response["choices"]) > 0:
150
  text_output = response["choices"][0]["text"].strip()
151
+ else:
152
  text_output = "[Error: LLaMA did not generate a response]"
153
  print("Processed LLaMA Output:", text_output)
 
 
154
  context_line = "[No context generated]"
155
  question_line = "[No question generated]"
156
  ans0_line = "[No answer0 generated]"
157
+ ans1_line = "[No answer1 generated]"
158
  ans2_line = "[No answer2 generated]"
 
159
  lines = [line.strip() for line in text_output.split("\n") if line.strip()]
 
160
  for line in lines:
161
  lower_line = line.lower()
162
  if lower_line.startswith("context:"):
163
+ context_line = line.split(":", 1)[1].strip()
164
+ elif lower_line.startswith("question:"):
165
+ question_line = line.split(":", 1)[1].strip()
166
+ elif lower_line.startswith("answer0:"):
167
+ ans0_line = line.split(":", 1)[1].strip()
168
+ elif lower_line.startswith("answer1:"):
169
+ ans1_line = line.split(":", 1)[1].strip()
170
  elif lower_line.startswith("answer2:"):
171
  ans2_line = line.split(":", 1)[1].strip()
 
172
  print("[Checkpoint] Generation parsing complete.")
173
  return context_line, question_line, ans0_line, ans1_line, ans2_line
174
 
 
 
 
175
  def classify_multiple_choice(context, question, ans0, ans1, ans2):
176
  print("[Checkpoint] Starting classification...")
177
  inputs = [f"{question} {ans}" for ans in (ans0, ans1, ans2)]
178
  contexts = [context, context, context]
 
179
  encodings = bbq_tokenizer(
180
  inputs,
181
  contexts,
182
+ truncation=True,
183
+ padding="max_length",
184
  max_length=128,
185
  return_tensors="pt"
186
  ).to(device)
 
197
  print(f"[Checkpoint] Classification complete. Predicted answer: {predicted_answer}")
198
  return predicted_answer, prob_dict
199
 
 
 
 
200
  def assess_objectivity(context, question, ans0, ans1, ans2, user_choice):
201
  print("[Checkpoint] Assessing objectivity...")
202
  predicted_answer, prob_dict = classify_multiple_choice(context, question, ans0, ans1, ans2)
203
  if user_choice == predicted_answer:
204
+ assessment = (
205
+ f"Your choice matches the model's prediction ('{predicted_answer}').\n"
206
+ "This indicates an objective response."
207
+ )
208
+ else:
209
  assessment = (
210
  f"Your choice ('{user_choice}') does not match the model's prediction ('{predicted_answer}').\n"
211
  "This suggests a deviation from the objective standard."
 
226
  4. **Select your answer** from the radio options.
227
  5. Click **"Assess Objectivity"** to see the model's evaluation.
228
  """)
 
229
  topic_dropdown = gr.Dropdown(choices=TOPICS, label="Select a Topic")
 
 
230
  context_box = gr.Textbox(label="Generated Context", interactive=False)
231
  question_box = gr.Textbox(label="Generated Question", interactive=False)
232
  ans0_box = gr.Textbox(label="Generated Answer 0", interactive=False)
233
  ans1_box = gr.Textbox(label="Generated Answer 1", interactive=False)
234
  ans2_box = gr.Textbox(label="Generated Answer 2", interactive=False)
 
 
235
  user_choice_radio = gr.Radio(choices=[], label="Select Your Answer")
 
 
236
  assessment_box = gr.Textbox(label="Objectivity Assessment", interactive=False)
237
  probabilities_box = gr.JSON(label="Confidence Probabilities")
 
 
238
  generate_button = gr.Button("Generate Context, Question & Answers")
239
  assess_button = gr.Button("Assess Objectivity")
240
 
 
241
  def on_generate(topic):
242
  print("[Callback] on_generate triggered.")
243
  ctx, q, a0, a1, a2 = generate_context_question_answers(topic)
 
245
  return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None)
246
  generate_button.click(
247
  fn=on_generate,
248
+ inputs=[topic_dropdown],
249
  outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio]
250
  )
251
 
 
252
  def on_assess(ctx, q, a0, a1, a2, user_choice):
253
  print("[Callback] on_assess triggered.")
254
  if not user_choice:
 
259
  return assessment, probs
260
  assess_button.click(
261
  fn=on_assess,
262
+ inputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio],
263
+ outputs=[assessment_box, probabilities_box]
264
+ )
265
 
266
  gr.Markdown("""
267
  ### How It Works:
268
+ - **LLaMA** is now loaded via `Llama.from_pretrained` from the Hugging Face Hub, so the model is downloaded automatically.
269
  - It generates a scenario (context, question, and three candidate answers).
270
  - You select the answer you think is most objective.
271
+ - The **BBQ model** classifies the scenario and outputs the answer it deems most objective along with confidence scores.
272
  - The app compares your choice with the model’s prediction and provides an objectivity assessment.
273
  """)
274
 
275
  demo.launch()