euler03 commited on
Commit
d6eb293
·
verified ·
1 Parent(s): 0187815

with offline

Browse files
Files changed (1) hide show
  1. app.py +200 -46
app.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  import gradio as gr
2
  import torch
3
  from llama_cpp import Llama
@@ -20,13 +23,13 @@ if device == "cuda":
20
  # -------------------------------------------------------
21
  MODELS = {
22
  "Aubins/distil-bumble-bert": "Aubins/distil-bumble-bert",
23
- # You can add more models here if needed.
24
  }
25
  id2label = {0: "BIASED", 1: "NEUTRAL"}
26
  label2id = {"BIASED": 0, "NEUTRAL": 1}
27
  loaded_models = {}
28
 
29
  def load_model(model_name: str):
 
30
  if model_name not in loaded_models:
31
  try:
32
  model_path = MODELS[model_name]
@@ -44,6 +47,7 @@ def load_model(model_name: str):
44
  return loaded_models[model_name]
45
 
46
  def analyze_text(text: str, model_name: str):
 
47
  if not text.strip():
48
  return {"Empty text": 1.0}, "Please enter text to analyze."
49
  result = load_model(model_name)
@@ -77,41 +81,115 @@ def analyze_text(text: str, model_name: str):
77
  # 3️⃣ Scenario-based Objectivity Assessment (LLaMA + BBQ)
78
  # -------------------------------------------------------
79
  # (a) Load LLaMA from Hugging Face Hub (for generation)
80
- # Here we use from_pretrained so that the model is downloaded automatically
81
  llm = Llama.from_pretrained(
82
- repo_id="TheBloke/llama-2-7b-chat-GGUF", # Repo on Hugging Face Hub
83
- filename="llama-2-7b-chat.Q4_K_M.gguf", # GGUF file name in that repo
84
  n_ctx=512,
85
- n_gpu_layers=30 # try
86
  )
87
-
88
- # (b) Load BBQ Fine-Tuned BERT Model & Tokenizer (for multiple-choice)
89
  BBQ_MODEL = "euler03/bbq-distil_bumble_bert"
90
  bbq_tokenizer = AutoTokenizer.from_pretrained(BBQ_MODEL)
91
  bbq_model = AutoModelForMultipleChoice.from_pretrained(BBQ_MODEL).to(device)
92
  print("BBQ model loaded.")
93
 
 
 
 
94
  TOPICS = [
95
- "Artificial Intelligence in Healthcare", "Climate Change and Renewable Energy",
96
- "Immigration Policies in the USA", "Social Media's Role in Elections",
97
- "The Ethics of Genetic Engineering", "Universal Basic Income Pros and Cons",
98
- "Impact of AI on Jobs", "Gender Pay Gap in the Workplace",
99
- "Government Surveillance and Privacy", "Cryptocurrency Regulation",
100
- "Censorship in Journalism", "Nuclear Energy as a Climate Solution",
101
- "Effects of Misinformation on Society", "Affirmative Action in Universities",
102
- "Automation and Its Effect on the Workforce", "The Role of Religion in Politics",
103
- "Healthcare Access in Rural Areas", "The Rise of Nationalism in Politics",
104
- "Police Use of Facial Recognition", "Space Exploration and Government Funding"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  ]
106
- print("Topics ready.")
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  def generate_context_question_answers(topic):
109
  """
110
- Use LLaMA (chat-style prompt) to generate:
111
- - Context: 2-3 sentences about the topic.
112
- - Question: A question testing bias on the topic.
113
- - Answer0, Answer1, Answer2: Three candidate answers.
114
- Expected format (exactly):
115
  Context: <...>
116
  Question: <...>
117
  Answer0: <...>
@@ -127,13 +205,11 @@ Question: <a question that tests bias on {topic}>
127
  Answer0: <possible answer #1>
128
  Answer1: <possible answer #2>
129
  Answer2: <possible answer #3>
130
-
131
  Use exactly these labels and no extra text.
132
  """
133
  chat_prompt = f"""[INST] <<SYS>>
134
  {system_prompt}
135
  <</SYS>>
136
-
137
  {user_prompt}
138
  [/INST]"""
139
  print("[Checkpoint] Prompt prepared, calling LLaMA...")
@@ -145,11 +221,13 @@ Use exactly these labels and no extra text.
145
  )
146
  print("[Checkpoint] LLaMA call complete.")
147
  print("Raw LLaMA Output:", response)
 
148
  if "choices" in response and len(response["choices"]) > 0:
149
  text_output = response["choices"][0]["text"].strip()
150
  else:
151
  text_output = "[Error: LLaMA did not generate a response]"
152
  print("Processed LLaMA Output:", text_output)
 
153
  context_line = "[No context generated]"
154
  question_line = "[No question generated]"
155
  ans0_line = "[No answer0 generated]"
@@ -168,9 +246,13 @@ Use exactly these labels and no extra text.
168
  ans1_line = line.split(":", 1)[1].strip()
169
  elif lower_line.startswith("answer2:"):
170
  ans2_line = line.split(":", 1)[1].strip()
 
171
  print("[Checkpoint] Generation parsing complete.")
172
  return context_line, question_line, ans0_line, ans1_line, ans2_line
173
 
 
 
 
174
  def classify_multiple_choice(context, question, ans0, ans1, ans2):
175
  print("[Checkpoint] Starting classification...")
176
  inputs = [f"{question} {ans}" for ans in (ans0, ans1, ans2)]
@@ -218,6 +300,7 @@ def assess_objectivity(context, question, ans0, ans1, ans2, user_choice):
218
  with gr.Blocks() as app:
219
  gr.Markdown("# Objectivity Analysis Suite")
220
  gr.Markdown("Choose a functionality below:")
 
221
  with gr.Tabs():
222
  # --- Tab 1: Text Objectivity Analysis ---
223
  with gr.TabItem("Text Analysis"):
@@ -243,29 +326,40 @@ with gr.Blocks() as app:
243
  show_label=True
244
  )
245
  result_message = gr.Textbox(label="Detailed results")
 
246
  analyze_button.click(
247
  analyze_text,
248
  inputs=[text_input, model_dropdown],
249
  outputs=[confidence_output, result_message]
250
  )
 
251
  gr.Markdown("## How to use this application")
252
  gr.Markdown("""
253
  1. Select a model from the drop-down.
254
  2. Enter or paste the text to be analyzed.
255
  3. Click **'Analyze the text'** to see the results.
256
  """)
 
257
  # --- Tab 2: Scenario-based Objectivity Assessment ---
258
  with gr.TabItem("Scenario Assessment"):
259
  gr.Markdown("## Bias Detection: Assessing Objectivity in Scenarios")
260
  gr.Markdown("""
261
  **Steps:**
262
- 1. Select a topic from the dropdown.
263
- 2. Click **'Generate Context, Question & Answers'** to generate a scenario.
264
- 3. Review the generated context, question, and 3 candidate answers.
265
- 4. Select your answer from the radio options.
266
- 5. Click **'Assess Objectivity'** to see the evaluation.
 
267
  """)
 
268
  topic_dropdown = gr.Dropdown(choices=TOPICS, label="Select a Topic")
 
 
 
 
 
 
269
  context_box = gr.Textbox(label="Generated Context", interactive=False)
270
  question_box = gr.Textbox(label="Generated Question", interactive=False)
271
  ans0_box = gr.Textbox(label="Generated Answer 0", interactive=False)
@@ -274,43 +368,103 @@ with gr.Blocks() as app:
274
  user_choice_radio = gr.Radio(choices=[], label="Select Your Answer")
275
  assessment_box = gr.Textbox(label="Objectivity Assessment", interactive=False)
276
  probabilities_box = gr.JSON(label="Confidence Probabilities")
277
- generate_button = gr.Button("Generate Context, Question & Answers")
278
  assess_button = gr.Button("Assess Objectivity")
279
- def on_generate(topic):
280
- print("[Callback] on_generate triggered.")
281
- ctx, q, a0, a1, a2 = generate_context_question_answers(topic)
282
- print("[Callback] on_generate complete.")
283
- return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
284
  generate_button.click(
285
  fn=on_generate,
286
- inputs=[topic_dropdown],
287
  outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio]
288
  )
 
289
  def on_assess(ctx, q, a0, a1, a2, user_choice):
290
- print("[Callback] on_assess triggered.")
291
  if not user_choice:
292
- print("[Callback] No user choice selected.")
293
  return "Please select one of the generated answers.", {}
294
  assessment, probs = assess_objectivity(ctx, q, a0, a1, a2, user_choice)
295
- print("[Callback] on_assess complete.")
296
  return assessment, probs
 
297
  assess_button.click(
298
  fn=on_assess,
299
  inputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio],
300
  outputs=[assessment_box, probabilities_box]
301
  )
 
302
  gr.Markdown("### How It Works:")
303
  gr.Markdown("""
304
- - **LLaMA** (loaded via `Llama.from_pretrained`) automatically downloads the model from the Hugging Face Hub.
305
- - It generates a scenario (context, question, and three candidate answers).
306
- - You select the answer that you think is most objective.
307
- - The **BBQ model** classifies the scenario and outputs the answer it deems most objective along with confidence scores.
308
- - The app compares your choice with the model’s prediction and provides an objectivity assessment.
309
  """)
 
310
  gr.Markdown("## Additional Instructions")
311
  gr.Markdown("""
312
  - In the **Text Analysis** tab, you can analyze any text for objectivity.
313
- - In the **Scenario Assessment** tab, LLaMA generates a scenario and you assess your objectivity by selecting one of the candidate answers.
314
  """)
315
 
316
  app.launch()
 
1
+ import os
2
+ import json
3
+ import random
4
  import gradio as gr
5
  import torch
6
  from llama_cpp import Llama
 
23
  # -------------------------------------------------------
24
  MODELS = {
25
  "Aubins/distil-bumble-bert": "Aubins/distil-bumble-bert",
 
26
  }
27
  id2label = {0: "BIASED", 1: "NEUTRAL"}
28
  label2id = {"BIASED": 0, "NEUTRAL": 1}
29
  loaded_models = {}
30
 
31
  def load_model(model_name: str):
32
+ """Load and cache a sequence classification model for text objectivity analysis."""
33
  if model_name not in loaded_models:
34
  try:
35
  model_path = MODELS[model_name]
 
47
  return loaded_models[model_name]
48
 
49
  def analyze_text(text: str, model_name: str):
50
+ """Analyze the text for bias or neutrality using a selected classification model."""
51
  if not text.strip():
52
  return {"Empty text": 1.0}, "Please enter text to analyze."
53
  result = load_model(model_name)
 
81
  # 3️⃣ Scenario-based Objectivity Assessment (LLaMA + BBQ)
82
  # -------------------------------------------------------
83
  # (a) Load LLaMA from Hugging Face Hub (for generation)
 
84
  llm = Llama.from_pretrained(
85
+ repo_id="TheBloke/llama-2-7b-chat-GGUF",
86
+ filename="llama-2-7b-chat.Q4_K_M.gguf",
87
  n_ctx=512,
88
+ n_gpu_layers=30,
89
  )
90
+ # (b) Load BBQ Fine-Tuned BERT Model & Tokenizer (multiple-choice)
 
91
  BBQ_MODEL = "euler03/bbq-distil_bumble_bert"
92
  bbq_tokenizer = AutoTokenizer.from_pretrained(BBQ_MODEL)
93
  bbq_model = AutoModelForMultipleChoice.from_pretrained(BBQ_MODEL).to(device)
94
  print("BBQ model loaded.")
95
 
96
+ # -------------------------------------------------------
97
+ # Replace original topics with your offline scenario topics
98
+ # -------------------------------------------------------
99
  TOPICS = [
100
+ "AI in Healthcare",
101
+ "Climate Change",
102
+ "Universal Basic Income",
103
+ "Social Media's Role in Elections",
104
+ "Government Surveillance and Privacy",
105
+ "Genetic Engineering",
106
+ "Gender Pay Gap",
107
+ "Police Use of Facial Recognition",
108
+ "Space Exploration and Government Funding",
109
+ "Affirmative Action in Universities",
110
+ "Renewable Energy Advances",
111
+ "Mental Health Awareness",
112
+ "Online Privacy and Data Security",
113
+ "Impact of Automation on Employment",
114
+ "Electric Vehicles Adoption",
115
+ "Work From Home Culture",
116
+ "Food Security and GMOs",
117
+ "Cryptocurrency Volatility",
118
+ "Artificial Intelligence in Education",
119
+ "Cultural Diversity in Media",
120
+ "Urbanization and Infrastructure",
121
+ "Healthcare Reform",
122
+ "Taxation Policies",
123
+ "Global Trade and Tariffs",
124
+ "Environmental Conservation",
125
+ "Social Justice Movements",
126
+ "Digital Transformation in Business",
127
+ "Public Transportation Funding",
128
+ "Immigration Reform",
129
+ "Aging Population Challenges",
130
+ "Mental Health in the Workplace",
131
+ "Internet Censorship",
132
+ "Political Polarization",
133
+ "Cybersecurity in the Digital Age",
134
+ "Privacy vs. Security",
135
+ "Sustainable Agriculture",
136
+ "Future of Work",
137
+ "Tech Monopolies",
138
+ "Education Reform",
139
+ "Climate Policy and Economics",
140
+ "Renewable Energy Storage",
141
+ "Water Scarcity",
142
+ "Urban Green Spaces",
143
+ "Automation in Manufacturing",
144
+ "Renewable Energy Subsidies",
145
+ "Universal Healthcare",
146
+ "Workplace Automation",
147
+ "Cultural Heritage Preservation",
148
+ "Biotechnology in Agriculture",
149
+ "Media Bias",
150
+ "Renewable Energy Policy",
151
+ "Artificial Intelligence Ethics",
152
+ "Space Colonization",
153
+ "Social Media Regulation",
154
+ "Virtual Reality in Education",
155
+ "Blockchain in Supply Chain",
156
+ "Data-Driven Policymaking",
157
+ "Gig Economy",
158
+ "Climate Adaptation Strategies",
159
+ "Economic Inequality",
160
+ "Sustainable Urban Development",
161
+ "Media Regulation"
162
  ]
163
+ print(f"Offline topics loaded. Total: {len(TOPICS)}")
164
 
165
+ # -------------------------------------------------------
166
+ # Offline scenarios
167
+ # -------------------------------------------------------
168
+ def load_offline_scenarios():
169
+ """Load offline scenarios from scenarios.json if it exists."""
170
+ if os.path.exists("scenarios.json"):
171
+ with open("scenarios.json", "r") as f:
172
+ data = json.load(f)
173
+ print(f"Offline scenarios loaded: {len(data)} scenarios.")
174
+ return data
175
+ print("No scenarios.json found in working directory.")
176
+ return []
177
+
178
+ offline_scenarios = load_offline_scenarios()
179
+
180
+ def get_offline_scenario(topic):
181
+ """Find a random scenario that matches the selected topic (case-insensitive)."""
182
+ matches = [s for s in offline_scenarios if s.get("topic", "").lower() == topic.lower()]
183
+ if matches:
184
+ return random.choice(matches)
185
+ return None
186
+
187
+ # -------------------------------------------------------
188
+ # Generation: Combined scenario (Context + Question + 3 Answers)
189
+ # -------------------------------------------------------
190
  def generate_context_question_answers(topic):
191
  """
192
+ Use LLaMA to generate:
 
 
 
 
193
  Context: <...>
194
  Question: <...>
195
  Answer0: <...>
 
205
  Answer0: <possible answer #1>
206
  Answer1: <possible answer #2>
207
  Answer2: <possible answer #3>
 
208
  Use exactly these labels and no extra text.
209
  """
210
  chat_prompt = f"""[INST] <<SYS>>
211
  {system_prompt}
212
  <</SYS>>
 
213
  {user_prompt}
214
  [/INST]"""
215
  print("[Checkpoint] Prompt prepared, calling LLaMA...")
 
221
  )
222
  print("[Checkpoint] LLaMA call complete.")
223
  print("Raw LLaMA Output:", response)
224
+
225
  if "choices" in response and len(response["choices"]) > 0:
226
  text_output = response["choices"][0]["text"].strip()
227
  else:
228
  text_output = "[Error: LLaMA did not generate a response]"
229
  print("Processed LLaMA Output:", text_output)
230
+
231
  context_line = "[No context generated]"
232
  question_line = "[No question generated]"
233
  ans0_line = "[No answer0 generated]"
 
246
  ans1_line = line.split(":", 1)[1].strip()
247
  elif lower_line.startswith("answer2:"):
248
  ans2_line = line.split(":", 1)[1].strip()
249
+
250
  print("[Checkpoint] Generation parsing complete.")
251
  return context_line, question_line, ans0_line, ans1_line, ans2_line
252
 
253
+ # -------------------------------------------------------
254
+ # Classification: Run BBQ Model (Multiple-Choice)
255
+ # -------------------------------------------------------
256
  def classify_multiple_choice(context, question, ans0, ans1, ans2):
257
  print("[Checkpoint] Starting classification...")
258
  inputs = [f"{question} {ans}" for ans in (ans0, ans1, ans2)]
 
300
  with gr.Blocks() as app:
301
  gr.Markdown("# Objectivity Analysis Suite")
302
  gr.Markdown("Choose a functionality below:")
303
+
304
  with gr.Tabs():
305
  # --- Tab 1: Text Objectivity Analysis ---
306
  with gr.TabItem("Text Analysis"):
 
326
  show_label=True
327
  )
328
  result_message = gr.Textbox(label="Detailed results")
329
+
330
  analyze_button.click(
331
  analyze_text,
332
  inputs=[text_input, model_dropdown],
333
  outputs=[confidence_output, result_message]
334
  )
335
+
336
  gr.Markdown("## How to use this application")
337
  gr.Markdown("""
338
  1. Select a model from the drop-down.
339
  2. Enter or paste the text to be analyzed.
340
  3. Click **'Analyze the text'** to see the results.
341
  """)
342
+
343
  # --- Tab 2: Scenario-based Objectivity Assessment ---
344
  with gr.TabItem("Scenario Assessment"):
345
  gr.Markdown("## Bias Detection: Assessing Objectivity in Scenarios")
346
  gr.Markdown("""
347
  **Steps:**
348
+ 1. Select a topic from the dropdown below (topics match your offline JSON).
349
+ 2. Check "Use Offline Data" if you want to load a pre-generated scenario.
350
+ Otherwise, generate a new scenario using the LLaMA-based generation buttons.
351
+ 3. Review the context, question, and 3 candidate answers.
352
+ 4. Select your answer.
353
+ 5. Click "Assess Objectivity" to see the model's evaluation.
354
  """)
355
+
356
  topic_dropdown = gr.Dropdown(choices=TOPICS, label="Select a Topic")
357
+ use_offline_checkbox = gr.Checkbox(label="Use Offline Data", value=False)
358
+ load_offline_button = gr.Button("Load Offline Scenario")
359
+
360
+ with gr.Row():
361
+ generate_button = gr.Button("Generate Context, Question & Answers")
362
+
363
  context_box = gr.Textbox(label="Generated Context", interactive=False)
364
  question_box = gr.Textbox(label="Generated Question", interactive=False)
365
  ans0_box = gr.Textbox(label="Generated Answer 0", interactive=False)
 
368
  user_choice_radio = gr.Radio(choices=[], label="Select Your Answer")
369
  assessment_box = gr.Textbox(label="Objectivity Assessment", interactive=False)
370
  probabilities_box = gr.JSON(label="Confidence Probabilities")
 
371
  assess_button = gr.Button("Assess Objectivity")
372
+
373
+ # Offline scenario loader
374
+ def on_load_offline_scenario(topic, use_offline):
375
+ """Load offline scenario if use_offline is True and a matching scenario is found."""
376
+ if not use_offline:
377
+ return ("[No offline scenario used]", "[No offline scenario used]",
378
+ "[No offline scenario used]", "[No offline scenario used]",
379
+ "[No offline scenario used]",
380
+ gr.update(choices=[], value=None))
381
+ scenario = get_offline_scenario(topic)
382
+ if scenario:
383
+ return (
384
+ scenario.get("context", "[No context]"),
385
+ scenario.get("question", "[No question]"),
386
+ scenario.get("answer0", "[No answer0]"),
387
+ scenario.get("answer1", "[No answer1]"),
388
+ scenario.get("answer2", "[No answer2]"),
389
+ gr.update(
390
+ choices=[
391
+ scenario.get("answer0", ""),
392
+ scenario.get("answer1", ""),
393
+ scenario.get("answer2", "")
394
+ ],
395
+ value=None
396
+ )
397
+ )
398
+ else:
399
+ return ("[No offline scenario found]", "[No offline scenario found]",
400
+ "[No offline scenario found]", "[No offline scenario found]",
401
+ "[No offline scenario found]", gr.update(choices=[], value=None))
402
+
403
+ load_offline_button.click(
404
+ fn=on_load_offline_scenario,
405
+ inputs=[topic_dropdown, use_offline_checkbox],
406
+ outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio]
407
+ )
408
+
409
+ # Online scenario generation (all in one function)
410
+ def on_generate(topic, use_offline):
411
+ """If user doesn't want offline or no offline scenario, generate new scenario with LLaMA."""
412
+ if use_offline:
413
+ # Attempt offline scenario first
414
+ scenario = get_offline_scenario(topic)
415
+ if scenario:
416
+ return (
417
+ scenario.get("context", "[No context]"),
418
+ scenario.get("question", "[No question]"),
419
+ scenario.get("answer0", "[No answer0]"),
420
+ scenario.get("answer1", "[No answer1]"),
421
+ scenario.get("answer2", "[No answer2]"),
422
+ gr.update(
423
+ choices=[
424
+ scenario.get("answer0", ""),
425
+ scenario.get("answer1", ""),
426
+ scenario.get("answer2", "")
427
+ ],
428
+ value=None
429
+ )
430
+ )
431
+ # If no offline scenario found, fallback to generation
432
+ ctx, q, a0, a1, a2 = generate_context_question_answers(topic)
433
+ return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None)
434
+ else:
435
+ # Purely online generation
436
+ ctx, q, a0, a1, a2 = generate_context_question_answers(topic)
437
+ return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None)
438
+
439
  generate_button.click(
440
  fn=on_generate,
441
+ inputs=[topic_dropdown, use_offline_checkbox],
442
  outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio]
443
  )
444
+
445
  def on_assess(ctx, q, a0, a1, a2, user_choice):
 
446
  if not user_choice:
 
447
  return "Please select one of the generated answers.", {}
448
  assessment, probs = assess_objectivity(ctx, q, a0, a1, a2, user_choice)
 
449
  return assessment, probs
450
+
451
  assess_button.click(
452
  fn=on_assess,
453
  inputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio],
454
  outputs=[assessment_box, probabilities_box]
455
  )
456
+
457
  gr.Markdown("### How It Works:")
458
  gr.Markdown("""
459
+ - **Offline Mode**: Check "Use Offline Data" and click "Load Offline Scenario" or "Generate" to see if a matching scenario is found in scenarios.json.
460
+ - **Online Generation**: Uncheck "Use Offline Data" (or no scenario found), then click "Generate" to create a new scenario with LLaMA.
461
+ - Finally, select your answer and click "Assess Objectivity."
 
 
462
  """)
463
+
464
  gr.Markdown("## Additional Instructions")
465
  gr.Markdown("""
466
  - In the **Text Analysis** tab, you can analyze any text for objectivity.
467
+ - In the **Scenario Assessment** tab, you can load a scenario offline or generate one with LLaMA.
468
  """)
469
 
470
  app.launch()