Spaces:

euler03
/

bbq

Sleeping

App Files Files Community

euler03 commited on Mar 14, 2025

Commit

367f101

verified ·

1 Parent(s): be3ab4e

not local

Browse files

Files changed (1) hide show

app.py +42 -51

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import os
 import gradio as gr
 import torch
 from llama_cpp import Llama
@@ -8,29 +7,31 @@ from transformers import AutoModelForMultipleChoice, AutoTokenizer
 # GPU setup
 # -------------------------------------------------------
 device = "cuda" if torch.cuda.is_available() else "cpu"
 # -------------------------------------------------------
-# Load LLaMA Locally (for model-input generation)
 # -------------------------------------------------------
-LLAMA_MODEL_PATH = "/home/euler03/projects/bias/bias-detection/bias-detection/models/llama-2-7b-chat.Q4_K_M.gguf"
-if not os.path.exists(LLAMA_MODEL_PATH):
-    raise FileNotFoundError(f" LLaMA model not found at: {LLAMA_MODEL_PATH}")
-llm = Llama(
-    model_path=LLAMA_MODEL_PATH,
     n_ctx=512,
-    n_gpu_layers=100  # adjust if needed
 )
 # -------------------------------------------------------
-# Load BBQ Fine-Tuned BERT Model & Tokenizer (multiple-choice as fine tuned int he bbq model)
 # -------------------------------------------------------
 BBQ_MODEL = "euler03/bbq-distil_bumble_bert"
 bbq_tokenizer = AutoTokenizer.from_pretrained(BBQ_MODEL)
 bbq_model = AutoModelForMultipleChoice.from_pretrained(BBQ_MODEL).to(device)
 # -------------------------------------------------------
-#  List of Topics
 # -------------------------------------------------------
 TOPICS = [
     "Artificial Intelligence in Healthcare", "Climate Change and Renewable Energy",
@@ -44,23 +45,13 @@ TOPICS = [
     "Healthcare Access in Rural Areas", "The Rise of Nationalism in Politics",
     "Police Use of Facial Recognition", "Space Exploration and Government Funding"
 ]
 # -------------------------------------------------------
-# 5 Generation: Context, Question & 3 Answers using LLaMA
 # -------------------------------------------------------
 def generate_context_question_answers(topic):
-    """
-    Use LLaMA (chat-style prompt) to generate:
-      - A short, neutral context about the topic.
-      - A question that tests bias on the topic.
-      - Three possible answers (Answer0, Answer1, Answer2).
-    The output is expected in the following format:
-      Context: <...>
-      Question: <...>
-      Answer0: <...>
-      Answer1: <...>
-      Answer2: <...>
-    """
     system_prompt = "You are a helpful AI assistant that strictly follows user instructions."
     user_prompt = f"""
 Please write:
@@ -78,13 +69,14 @@ Use exactly these labels and no extra text.
 {user_prompt}
 [/INST]"""
     response = llm(
         chat_prompt,
-        max_tokens=256,
         temperature=1.0,
         echo=False
     )
     print("Raw LLaMA Output:", response)
     if "choices" in response and len(response["choices"]) > 0:
         text_output = response["choices"][0]["text"].strip()
@@ -92,7 +84,7 @@ Use exactly these labels and no extra text.
         text_output = "[Error: LLaMA did not generate a response]"
     print("Processed LLaMA Output:", text_output)
-    # Initialize with defaults comme ca on teste si generation works
     context_line = "[No context generated]"
     question_line = "[No question generated]"
     ans0_line = "[No answer0 generated]"
@@ -100,6 +92,7 @@ Use exactly these labels and no extra text.
     ans2_line = "[No answer2 generated]"
     lines = [line.strip() for line in text_output.split("\n") if line.strip()]
     for line in lines:
         lower_line = line.lower()
         if lower_line.startswith("context:"):
@@ -113,15 +106,16 @@ Use exactly these labels and no extra text.
         elif lower_line.startswith("answer2:"):
             ans2_line = line.split(":", 1)[1].strip()
     return context_line, question_line, ans0_line, ans1_line, ans2_line
 # -------------------------------------------------------
 # Classification: Run BBQ Model (Multiple-Choice)
 # -------------------------------------------------------
 def classify_multiple_choice(context, question, ans0, ans1, ans2):
     inputs = [f"{question} {ans}" for ans in (ans0, ans1, ans2)]
     contexts = [context, context, context]
     encodings = bbq_tokenizer(
         inputs,
         contexts,
@@ -130,23 +124,24 @@ def classify_multiple_choice(context, question, ans0, ans1, ans2):
         max_length=128,
         return_tensors="pt"
     ).to(device)
     bbq_model.eval()
     with torch.no_grad():
         outputs = bbq_model(**{k: v.unsqueeze(0) for k, v in encodings.items()})
-    logits = outputs.logits[0]
-    probs = torch.softmax(logits, dim=-1)
     pred_idx = torch.argmax(probs).item()
     all_answers = [ans0, ans1, ans2]
     prob_dict = {all_answers[i]: float(probs[i].item()) for i in range(3)}
     predicted_answer = all_answers[pred_idx]
     return predicted_answer, prob_dict
 # -------------------------------------------------------
-#   Assess Objectivity: Compare User's Choice to Model's Prediction
 # -------------------------------------------------------
 def assess_objectivity(context, question, ans0, ans1, ans2, user_choice):
     predicted_answer, prob_dict = classify_multiple_choice(context, question, ans0, ans1, ans2)
     if user_choice == predicted_answer:
         assessment = (
@@ -158,46 +153,39 @@ def assess_objectivity(context, question, ans0, ans1, ans2, user_choice):
             f"Your choice ('{user_choice}') does not match the model's prediction ('{predicted_answer}').\n"
             "This suggests a deviation from the objective standard."
         )
     return assessment, prob_dict
 # -------------------------------------------------------
 # Build the Gradio Interface
 # -------------------------------------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("# 🧠 Bias Detection: Assessing Objectivity")
     gr.Markdown("""
 **Steps:**
 1. **Select a topic** from the dropdown.
 2. Click **"Generate Context, Question & Answers"** to generate a scenario.
-3. **Review** the generated context, question, and 3 candidate answers.
 4. **Select your answer** from the radio options.
 5. Click **"Assess Objectivity"** to see the model's evaluation.
 """)
-    # Topic selection
     topic_dropdown = gr.Dropdown(choices=TOPICS, label="Select a Topic")
-    # Outputs from LLaMA generation
     context_box = gr.Textbox(label="Generated Context", interactive=False)
     question_box = gr.Textbox(label="Generated Question", interactive=False)
     ans0_box = gr.Textbox(label="Generated Answer 0", interactive=False)
     ans1_box = gr.Textbox(label="Generated Answer 1", interactive=False)
     ans2_box = gr.Textbox(label="Generated Answer 2", interactive=False)
-    # User selection: Choose one answer from the generated answers
     user_choice_radio = gr.Radio(choices=[], label="Select Your Answer")
-    # Assessment outputs
     assessment_box = gr.Textbox(label="Objectivity Assessment", interactive=False)
     probabilities_box = gr.JSON(label="Confidence Probabilities")
-    # Buttons
     generate_button = gr.Button("Generate Context, Question & Answers")
     assess_button = gr.Button("Assess Objectivity")
-    # Callback 1: Generate with LLaMA
     def on_generate(topic):
         ctx, q, a0, a1, a2 = generate_context_question_answers(topic)
-        # Update the radio button choices with the generated answers
         return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None)
     generate_button.click(
         fn=on_generate,
@@ -205,11 +193,13 @@ with gr.Blocks() as demo:
         outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio]
     )
-    # Callback 2: Assess objectivity
     def on_assess(ctx, q, a0, a1, a2, user_choice):
-        if user_choice is None or user_choice == "":
             return "Please select one of the generated answers.", {}
         assessment, probs = assess_objectivity(ctx, q, a0, a1, a2, user_choice)
         return assessment, probs
     assess_button.click(
         fn=on_assess,
@@ -219,10 +209,11 @@ with gr.Blocks() as demo:
     gr.Markdown("""
 ### How It Works:
-- **LLaMA** generates a scenario (context, question, and three candidate answers).
-- You **select** one answer that you think is most objective.
 - The **BBQ model** classifies the same scenario and outputs the answer it deems most objective along with confidence scores.
-- The app **compares** your choice with the model’s prediction and provides an objectivity assessment.
 """)
-demo.launch()

 import gradio as gr
 import torch
 from llama_cpp import Llama
 # GPU setup
 # -------------------------------------------------------
 device = "cuda" if torch.cuda.is_available() else "cpu"
+print("Using device:", device)
+if device == "cuda":
+    print("GPU Name:", torch.cuda.get_device_name(0))
 # -------------------------------------------------------
+# Load LLaMA from Hugging Face Hub (for generation)
 # -------------------------------------------------------
+# Instead of a local path, use from_pretrained to download the model automatically.
+llm = Llama.from_pretrained(
+    repo_id="TheBloke/llama-2-7b-chat-GGUF",   # Replace with the repo you want to use
+    filename="llama-2-7b-chat.Q4_K_M.gguf",      # Name of the GGUF file in the repo
     n_ctx=512,
+    n_gpu_layers=30,                           # Adjust if needed based on available VRAM
 )
 # -------------------------------------------------------
+# Load BBQ Fine-Tuned BERT Model & Tokenizer (multiple-choice)
 # -------------------------------------------------------
 BBQ_MODEL = "euler03/bbq-distil_bumble_bert"
 bbq_tokenizer = AutoTokenizer.from_pretrained(BBQ_MODEL)
 bbq_model = AutoModelForMultipleChoice.from_pretrained(BBQ_MODEL).to(device)
+print("BBQ model loaded.")
 # -------------------------------------------------------
+# List of Topics
 # -------------------------------------------------------
 TOPICS = [
     "Artificial Intelligence in Healthcare", "Climate Change and Renewable Energy",
     "Healthcare Access in Rural Areas", "The Rise of Nationalism in Politics",
     "Police Use of Facial Recognition", "Space Exploration and Government Funding"
 ]
+print("Topics ready.")
 # -------------------------------------------------------
+# Generation: Context, Question & 3 Answers using LLaMA
 # -------------------------------------------------------
 def generate_context_question_answers(topic):
+    print(f"[Checkpoint] Generating scenario for topic: {topic}")
     system_prompt = "You are a helpful AI assistant that strictly follows user instructions."
     user_prompt = f"""
 Please write:
 {user_prompt}
 [/INST]"""
+    print("[Checkpoint] Prompt prepared, calling LLaMA...")
     response = llm(
         chat_prompt,
+        max_tokens=256,  # Adjust as needed for faster generation
         temperature=1.0,
         echo=False
     )
+    print("[Checkpoint] LLaMA call complete.")
     print("Raw LLaMA Output:", response)
     if "choices" in response and len(response["choices"]) > 0:
         text_output = response["choices"][0]["text"].strip()
         text_output = "[Error: LLaMA did not generate a response]"
     print("Processed LLaMA Output:", text_output)
+    # Default values in case parsing fails
     context_line = "[No context generated]"
     question_line = "[No question generated]"
     ans0_line = "[No answer0 generated]"
     ans2_line = "[No answer2 generated]"
     lines = [line.strip() for line in text_output.split("\n") if line.strip()]
+    print(f"[Checkpoint] Parsed {len(lines)} lines.")
     for line in lines:
         lower_line = line.lower()
         if lower_line.startswith("context:"):
         elif lower_line.startswith("answer2:"):
             ans2_line = line.split(":", 1)[1].strip()
+    print("[Checkpoint] Generation parsing complete.")
     return context_line, question_line, ans0_line, ans1_line, ans2_line
 # -------------------------------------------------------
 # Classification: Run BBQ Model (Multiple-Choice)
 # -------------------------------------------------------
 def classify_multiple_choice(context, question, ans0, ans1, ans2):
+    print("[Checkpoint] Starting classification...")
     inputs = [f"{question} {ans}" for ans in (ans0, ans1, ans2)]
     contexts = [context, context, context]
     encodings = bbq_tokenizer(
         inputs,
         contexts,
         max_length=128,
         return_tensors="pt"
     ).to(device)
+    print("[Checkpoint] Tokenization complete. Running BBQ model...")
     bbq_model.eval()
     with torch.no_grad():
         outputs = bbq_model(**{k: v.unsqueeze(0) for k, v in encodings.items()})
+    logits = outputs.logits[0]
+    probs = torch.softmax(logits, dim=-1)
     pred_idx = torch.argmax(probs).item()
     all_answers = [ans0, ans1, ans2]
     prob_dict = {all_answers[i]: float(probs[i].item()) for i in range(3)}
     predicted_answer = all_answers[pred_idx]
+    print(f"[Checkpoint] Classification complete. Predicted answer: {predicted_answer}")
     return predicted_answer, prob_dict
 # -------------------------------------------------------
+# Assess Objectivity: Compare User's Choice to Model's Prediction
 # -------------------------------------------------------
 def assess_objectivity(context, question, ans0, ans1, ans2, user_choice):
+    print("[Checkpoint] Assessing objectivity...")
     predicted_answer, prob_dict = classify_multiple_choice(context, question, ans0, ans1, ans2)
     if user_choice == predicted_answer:
         assessment = (
             f"Your choice ('{user_choice}') does not match the model's prediction ('{predicted_answer}').\n"
             "This suggests a deviation from the objective standard."
         )
+    print("[Checkpoint] Assessment complete.")
     return assessment, prob_dict
 # -------------------------------------------------------
 # Build the Gradio Interface
 # -------------------------------------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("# 🧠 Bias Detection: Assessing Objectivity (Cloud Version)")
     gr.Markdown("""
 **Steps:**
 1. **Select a topic** from the dropdown.
 2. Click **"Generate Context, Question & Answers"** to generate a scenario.
+3. **Review** the generated context, question, and candidate answers.
 4. **Select your answer** from the radio options.
 5. Click **"Assess Objectivity"** to see the model's evaluation.
 """)
     topic_dropdown = gr.Dropdown(choices=TOPICS, label="Select a Topic")
     context_box = gr.Textbox(label="Generated Context", interactive=False)
     question_box = gr.Textbox(label="Generated Question", interactive=False)
     ans0_box = gr.Textbox(label="Generated Answer 0", interactive=False)
     ans1_box = gr.Textbox(label="Generated Answer 1", interactive=False)
     ans2_box = gr.Textbox(label="Generated Answer 2", interactive=False)
     user_choice_radio = gr.Radio(choices=[], label="Select Your Answer")
     assessment_box = gr.Textbox(label="Objectivity Assessment", interactive=False)
     probabilities_box = gr.JSON(label="Confidence Probabilities")
     generate_button = gr.Button("Generate Context, Question & Answers")
     assess_button = gr.Button("Assess Objectivity")
     def on_generate(topic):
+        print("[Callback] on_generate triggered.")
         ctx, q, a0, a1, a2 = generate_context_question_answers(topic)
+        print("[Callback] on_generate complete.")
         return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None)
     generate_button.click(
         fn=on_generate,
         outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio]
     )
     def on_assess(ctx, q, a0, a1, a2, user_choice):
+        print("[Callback] on_assess triggered.")
+        if not user_choice:
+            print("[Callback] No user choice selected.")
             return "Please select one of the generated answers.", {}
         assessment, probs = assess_objectivity(ctx, q, a0, a1, a2, user_choice)
+        print("[Callback] on_assess complete.")
         return assessment, probs
     assess_button.click(
         fn=on_assess,
     gr.Markdown("""
 ### How It Works:
+- **LLaMA** (loaded via `Llama.from_pretrained`) automatically downloads the model from the Hugging Face Hub.
+- It generates a scenario (context, question, and three candidate answers).
+- You select the answer you think is most objective.
 - The **BBQ model** classifies the same scenario and outputs the answer it deems most objective along with confidence scores.
+- The app compares your choice with the model’s prediction and provides an objectivity assessment.
 """)
+demo.launch()