Spaces:

euler03
/

bbq

Sleeping

App Files Files Community

euler03 commited on Mar 14, 2025

Commit

12b5a2d

verified ·

1 Parent(s): 50d0c15

not local

Browse files

Files changed (1) hide show

app.py +83 -69

app.py CHANGED Viewed

@@ -1,101 +1,108 @@
 import gradio as gr
 import torch
-from transformers import pipeline, AutoModelForMultipleChoice, AutoTokenizer
 # GPU setup
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print("Using device:", device)
 if device == "cuda":
     print("GPU Name:", torch.cuda.get_device_name(0))
-# Load Mistral-7B-Instruct via pipeline
-model_name = "mistralai/Mistral-7B-Instruct"
-text_generation = pipeline(
-    "text-generation",
-    model=model_name,
-    device=0 if device == "cuda" else -1,  # use GPU if available
-    max_new_tokens=256,  # Adjust as needed
-    temperature=1.0,
-    do_sample=True
 )
-# Load BBQ model (unchanged)
 BBQ_MODEL = "euler03/bbq-distil_bumble_bert"
 bbq_tokenizer = AutoTokenizer.from_pretrained(BBQ_MODEL)
 bbq_model = AutoModelForMultipleChoice.from_pretrained(BBQ_MODEL).to(device)
 print("BBQ model loaded.")
-# Topics
 TOPICS = [
     "Artificial Intelligence in Healthcare", "Climate Change and Renewable Energy",
-    "Immigration Policies in the USA", "Social Media's Role in Elections",
-    "The Ethics of Genetic Engineering", "Universal Basic Income Pros and Cons",
-    "Impact of AI on Jobs", "Gender Pay Gap in the Workplace",
-    "Government Surveillance and Privacy", "Cryptocurrency Regulation",
-    "Censorship in Journalism", "Nuclear Energy as a Climate Solution",
-    "Effects of Misinformation on Society", "Affirmative Action in Universities",
-    "Automation and Its Effect on the Workforce", "The Role of Religion in Politics",
     "Healthcare Access in Rural Areas", "The Rise of Nationalism in Politics",
     "Police Use of Facial Recognition", "Space Exploration and Government Funding"
 ]
 print("Topics ready.")
 def generate_context_question_answers(topic):
     print(f"[Checkpoint] Generating scenario for topic: {topic}")
     system_prompt = "You are a helpful AI assistant that strictly follows user instructions."
     user_prompt = f"""
 Please write:
-Context: <2-3 sentences about {topic}>
-Question: <a question that tests bias on {topic}>
-Answer0: <possible answer #1>
-Answer1: <possible answer #2>
-Answer2: <possible answer #3>
-Use exactly these labels and no extra text.
-"""
-    # Combine them into a single prompt
-    chat_prompt = f"[SYSTEM]\n{system_prompt}\n[USER]\n{user_prompt}"
-    print("[Checkpoint] Prompt prepared, calling Mistral pipeline...")
-    # Generate text
-    output = text_generation(chat_prompt)[0]["generated_text"]
-    print("[Checkpoint] Mistral call complete.")
-    print("Raw Mistral Output:", output)
-    # Default placeholders
     context_line = "[No context generated]"
     question_line = "[No question generated]"
     ans0_line = "[No answer0 generated]"
-    ans1_line = "[No answer1 generated]"
     ans2_line = "[No answer2 generated]"
-    lines = [line.strip() for line in output.split("\n") if line.strip()]
     print(f"[Checkpoint] Parsed {len(lines)} lines.")
     for line in lines:
         lower_line = line.lower()
         if lower_line.startswith("context:"):
-            context_line = line.split(":", 1)[1].strip()
-        elif lower_line.startswith("question:"):
-            question_line = line.split(":", 1)[1].strip()
-        elif lower_line.startswith("answer0:"):
-            ans0_line = line.split(":", 1)[1].strip()
-        elif lower_line.startswith("answer1:"):
-            ans1_line = line.split(":", 1)[1].strip()
         elif lower_line.startswith("answer2:"):
             ans2_line = line.split(":", 1)[1].strip()
     print("[Checkpoint] Generation parsing complete.")
     return context_line, question_line, ans0_line, ans1_line, ans2_line
 def classify_multiple_choice(context, question, ans0, ans1, ans2):
     print("[Checkpoint] Starting classification...")
     inputs = [f"{question} {ans}" for ans in (ans0, ans1, ans2)]
     contexts = [context, context, context]
     encodings = bbq_tokenizer(
         inputs,
         contexts,
-        truncation=True,
-        padding="max_length",
         max_length=128,
         return_tensors="pt"
     ).to(device)
@@ -112,15 +119,13 @@ def classify_multiple_choice(context, question, ans0, ans1, ans2):
     print(f"[Checkpoint] Classification complete. Predicted answer: {predicted_answer}")
     return predicted_answer, prob_dict
 def assess_objectivity(context, question, ans0, ans1, ans2, user_choice):
     print("[Checkpoint] Assessing objectivity...")
     predicted_answer, prob_dict = classify_multiple_choice(context, question, ans0, ans1, ans2)
     if user_choice == predicted_answer:
-        assessment = (
-            f"Your choice matches the model's prediction ('{predicted_answer}').\n"
-            "This indicates an objective response."
-        )
-    else:
         assessment = (
             f"Your choice ('{user_choice}') does not match the model's prediction ('{predicted_answer}').\n"
             "This suggests a deviation from the objective standard."
@@ -128,42 +133,52 @@ def assess_objectivity(context, question, ans0, ans1, ans2, user_choice):
     print("[Checkpoint] Assessment complete.")
     return assessment, prob_dict
 with gr.Blocks() as demo:
-    gr.Markdown("# 🧠 Bias Detection: Mistral-7B + BBQ")
     gr.Markdown("""
 **Steps:**
-1. Select a topic.
-2. Click "Generate Context, Question & Answers."
-3. Review the generated scenario and answers.
-4. Choose the answer you think is most objective.
-5. Click "Assess Objectivity."
 """)
     topic_dropdown = gr.Dropdown(choices=TOPICS, label="Select a Topic")
     context_box = gr.Textbox(label="Generated Context", interactive=False)
     question_box = gr.Textbox(label="Generated Question", interactive=False)
     ans0_box = gr.Textbox(label="Generated Answer 0", interactive=False)
     ans1_box = gr.Textbox(label="Generated Answer 1", interactive=False)
     ans2_box = gr.Textbox(label="Generated Answer 2", interactive=False)
     user_choice_radio = gr.Radio(choices=[], label="Select Your Answer")
     assessment_box = gr.Textbox(label="Objectivity Assessment", interactive=False)
     probabilities_box = gr.JSON(label="Confidence Probabilities")
     generate_button = gr.Button("Generate Context, Question & Answers")
     assess_button = gr.Button("Assess Objectivity")
     def on_generate(topic):
         print("[Callback] on_generate triggered.")
         ctx, q, a0, a1, a2 = generate_context_question_answers(topic)
         print("[Callback] on_generate complete.")
         return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None)
     generate_button.click(
         fn=on_generate,
-        inputs=[topic_dropdown],
         outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio]
     )
     def on_assess(ctx, q, a0, a1, a2, user_choice):
         print("[Callback] on_assess triggered.")
         if not user_choice:
@@ -172,18 +187,17 @@ with gr.Blocks() as demo:
         assessment, probs = assess_objectivity(ctx, q, a0, a1, a2, user_choice)
         print("[Callback] on_assess complete.")
         return assessment, probs
     assess_button.click(
         fn=on_assess,
-        inputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio],
-        outputs=[assessment_box, probabilities_box]
-    )
     gr.Markdown("""
 ### How It Works:
-- **Mistral-7B-Instruct** is loaded via the Hugging Face `text-generation` pipeline.
-- **BBQ** is your multiple-choice bias classifier.
-- The app compares your chosen answer to the model's predicted answer and returns an objectivity assessment.
 """)
 demo.launch()

 import gradio as gr
 import torch
+from llama_cpp import Llama
 # GPU setup
+# -------------------------------------------------------
 device = "cuda" if torch.cuda.is_available() else "cpu"
 print("Using device:", device)
 if device == "cuda":
     print("GPU Name:", torch.cuda.get_device_name(0))
+# -------------------------------------------------------
+# Load LLaMA from Hugging Face Hub (for generation)
+# -------------------------------------------------------
+# Instead of a local path, use from_pretrained to download the model automatically.
+llm = Llama.from_pretrained(
+    repo_id="TheBloke/llama-2-7b-chat-GGUF",   # Replace with the repo you want to use
+    filename="llama-2-7b-chat.Q4_K_M.gguf",      # Name of the GGUF file in the repo
+    n_ctx=512,
+    n_gpu_layers=30,                           # Adjust if needed based on available VRAM
 )
+# -------------------------------------------------------
+# Load BBQ Fine-Tuned BERT Model & Tokenizer (multiple-choice)
+# -------------------------------------------------------
 BBQ_MODEL = "euler03/bbq-distil_bumble_bert"
 bbq_tokenizer = AutoTokenizer.from_pretrained(BBQ_MODEL)
 bbq_model = AutoModelForMultipleChoice.from_pretrained(BBQ_MODEL).to(device)
 print("BBQ model loaded.")
+# -------------------------------------------------------
+# List of Topics
+# -------------------------------------------------------
 TOPICS = [
     "Artificial Intelligence in Healthcare", "Climate Change and Renewable Energy",
     "Healthcare Access in Rural Areas", "The Rise of Nationalism in Politics",
     "Police Use of Facial Recognition", "Space Exploration and Government Funding"
 ]
 print("Topics ready.")
+# -------------------------------------------------------
+# Generation: Context, Question & 3 Answers using LLaMA
+# -------------------------------------------------------
 def generate_context_question_answers(topic):
     print(f"[Checkpoint] Generating scenario for topic: {topic}")
     system_prompt = "You are a helpful AI assistant that strictly follows user instructions."
     user_prompt = f"""
 Please write:
+{user_prompt}
+[/INST]"""
+    print("[Checkpoint] Prompt prepared, calling LLaMA...")
+    response = llm(
+        chat_prompt,
+        max_tokens=256,  # Adjust as needed for faster generation
+        temperature=1.0,
+        echo=False
+    )
+    print("[Checkpoint] LLaMA call complete.")
+    print("Raw LLaMA Output:", response)
+    if "choices" in response and len(response["choices"]) > 0:
+        text_output = response["choices"][0]["text"].strip()
+        text_output = "[Error: LLaMA did not generate a response]"
+    print("Processed LLaMA Output:", text_output)
+    # Default values in case parsing fails
     context_line = "[No context generated]"
     question_line = "[No question generated]"
     ans0_line = "[No answer0 generated]"
     ans2_line = "[No answer2 generated]"
+    lines = [line.strip() for line in text_output.split("\n") if line.strip()]
     print(f"[Checkpoint] Parsed {len(lines)} lines.")
     for line in lines:
         lower_line = line.lower()
         if lower_line.startswith("context:"):
         elif lower_line.startswith("answer2:"):
             ans2_line = line.split(":", 1)[1].strip()
     print("[Checkpoint] Generation parsing complete.")
     return context_line, question_line, ans0_line, ans1_line, ans2_line
+# -------------------------------------------------------
+# Classification: Run BBQ Model (Multiple-Choice)
+# -------------------------------------------------------
 def classify_multiple_choice(context, question, ans0, ans1, ans2):
     print("[Checkpoint] Starting classification...")
     inputs = [f"{question} {ans}" for ans in (ans0, ans1, ans2)]
     contexts = [context, context, context]
     encodings = bbq_tokenizer(
         inputs,
         contexts,
         max_length=128,
         return_tensors="pt"
     ).to(device)
     print(f"[Checkpoint] Classification complete. Predicted answer: {predicted_answer}")
     return predicted_answer, prob_dict
+# -------------------------------------------------------
+# Assess Objectivity: Compare User's Choice to Model's Prediction
+# -------------------------------------------------------
 def assess_objectivity(context, question, ans0, ans1, ans2, user_choice):
     print("[Checkpoint] Assessing objectivity...")
     predicted_answer, prob_dict = classify_multiple_choice(context, question, ans0, ans1, ans2)
     if user_choice == predicted_answer:
         assessment = (
             f"Your choice ('{user_choice}') does not match the model's prediction ('{predicted_answer}').\n"
             "This suggests a deviation from the objective standard."
     print("[Checkpoint] Assessment complete.")
     return assessment, prob_dict
+# -------------------------------------------------------
+# Build the Gradio Interface
+# -------------------------------------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("# 🧠 Bias Detection: Assessing Objectivity (Cloud Version)")
     gr.Markdown("""
 **Steps:**
+1. **Select a topic** from the dropdown.
+2. Click **"Generate Context, Question & Answers"** to generate a scenario.
+3. **Review** the generated context, question, and candidate answers.
+4. **Select your answer** from the radio options.
+5. Click **"Assess Objectivity"** to see the model's evaluation.
 """)
     topic_dropdown = gr.Dropdown(choices=TOPICS, label="Select a Topic")
     context_box = gr.Textbox(label="Generated Context", interactive=False)
     question_box = gr.Textbox(label="Generated Question", interactive=False)
     ans0_box = gr.Textbox(label="Generated Answer 0", interactive=False)
     ans1_box = gr.Textbox(label="Generated Answer 1", interactive=False)
     ans2_box = gr.Textbox(label="Generated Answer 2", interactive=False)
     user_choice_radio = gr.Radio(choices=[], label="Select Your Answer")
     assessment_box = gr.Textbox(label="Objectivity Assessment", interactive=False)
     probabilities_box = gr.JSON(label="Confidence Probabilities")
     generate_button = gr.Button("Generate Context, Question & Answers")
     assess_button = gr.Button("Assess Objectivity")
     def on_generate(topic):
         print("[Callback] on_generate triggered.")
         ctx, q, a0, a1, a2 = generate_context_question_answers(topic)
         print("[Callback] on_generate complete.")
         return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None)
     generate_button.click(
         fn=on_generate,
         outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio]
     )
     def on_assess(ctx, q, a0, a1, a2, user_choice):
         print("[Callback] on_assess triggered.")
         if not user_choice:
         assessment, probs = assess_objectivity(ctx, q, a0, a1, a2, user_choice)
         print("[Callback] on_assess complete.")
         return assessment, probs
     assess_button.click(
         fn=on_assess,
     gr.Markdown("""
 ### How It Works:
+- **LLaMA** (loaded via `Llama.from_pretrained`) automatically downloads the model from the Hugging Face Hub.
+- It generates a scenario (context, question, and three candidate answers).
+- You select the answer you think is most objective.
+- The **BBQ model** classifies the same scenario and outputs the answer it deems most objective along with confidence scores.
+- The app compares your choice with the model’s prediction and provides an objectivity assessment.
 """)
 demo.launch()