Spaces:

euler03
/

bbq

Sleeping

App Files Files Community

euler03 commited on Mar 14, 2025

Commit

4e2f6d9

verified ·

1 Parent(s): 12b5a2d

two options

Browse files

Files changed (1) hide show

app.py +126 -54

app.py CHANGED Viewed

@@ -1,63 +1,139 @@
 import gradio as gr
 import torch
 from llama_cpp import Llama
-# GPU setup
 # -------------------------------------------------------
 device = "cuda" if torch.cuda.is_available() else "cpu"
-print("Using device:", device)
 if device == "cuda":
     print("GPU Name:", torch.cuda.get_device_name(0))
 # -------------------------------------------------------
-# Load LLaMA from Hugging Face Hub (for generation)
 # -------------------------------------------------------
-# Instead of a local path, use from_pretrained to download the model automatically.
-llm = Llama.from_pretrained(
-    repo_id="TheBloke/llama-2-7b-chat-GGUF",   # Replace with the repo you want to use
-    filename="llama-2-7b-chat.Q4_K_M.gguf",      # Name of the GGUF file in the repo
     n_ctx=512,
-    n_gpu_layers=30,                           # Adjust if needed based on available VRAM
 )
-# -------------------------------------------------------
-# Load BBQ Fine-Tuned BERT Model & Tokenizer (multiple-choice)
-# -------------------------------------------------------
 BBQ_MODEL = "euler03/bbq-distil_bumble_bert"
 bbq_tokenizer = AutoTokenizer.from_pretrained(BBQ_MODEL)
 bbq_model = AutoModelForMultipleChoice.from_pretrained(BBQ_MODEL).to(device)
 print("BBQ model loaded.")
-# -------------------------------------------------------
-# List of Topics
-# -------------------------------------------------------
 TOPICS = [
     "Artificial Intelligence in Healthcare", "Climate Change and Renewable Energy",
     "Healthcare Access in Rural Areas", "The Rise of Nationalism in Politics",
     "Police Use of Facial Recognition", "Space Exploration and Government Funding"
 ]
 print("Topics ready.")
-# -------------------------------------------------------
-# Generation: Context, Question & 3 Answers using LLaMA
-# -------------------------------------------------------
 def generate_context_question_answers(topic):
     print(f"[Checkpoint] Generating scenario for topic: {topic}")
     system_prompt = "You are a helpful AI assistant that strictly follows user instructions."
     user_prompt = f"""
 Please write:
 {user_prompt}
 [/INST]"""
@@ -72,37 +148,39 @@ Please write:
     print("Raw LLaMA Output:", response)
     if "choices" in response and len(response["choices"]) > 0:
         text_output = response["choices"][0]["text"].strip()
         text_output = "[Error: LLaMA did not generate a response]"
     print("Processed LLaMA Output:", text_output)
-    # Default values in case parsing fails
     context_line = "[No context generated]"
     question_line = "[No question generated]"
     ans0_line = "[No answer0 generated]"
     ans2_line = "[No answer2 generated]"
     lines = [line.strip() for line in text_output.split("\n") if line.strip()]
-    print(f"[Checkpoint] Parsed {len(lines)} lines.")
     for line in lines:
         lower_line = line.lower()
         if lower_line.startswith("context:"):
         elif lower_line.startswith("answer2:"):
             ans2_line = line.split(":", 1)[1].strip()
     print("[Checkpoint] Generation parsing complete.")
     return context_line, question_line, ans0_line, ans1_line, ans2_line
-# -------------------------------------------------------
-# Classification: Run BBQ Model (Multiple-Choice)
-# -------------------------------------------------------
 def classify_multiple_choice(context, question, ans0, ans1, ans2):
     print("[Checkpoint] Starting classification...")
     inputs = [f"{question} {ans}" for ans in (ans0, ans1, ans2)]
     contexts = [context, context, context]
     encodings = bbq_tokenizer(
         inputs,
         contexts,
         max_length=128,
         return_tensors="pt"
     ).to(device)
@@ -119,13 +197,15 @@ def classify_multiple_choice(context, question, ans0, ans1, ans2):
     print(f"[Checkpoint] Classification complete. Predicted answer: {predicted_answer}")
     return predicted_answer, prob_dict
-# -------------------------------------------------------
-# Assess Objectivity: Compare User's Choice to Model's Prediction
-# -------------------------------------------------------
 def assess_objectivity(context, question, ans0, ans1, ans2, user_choice):
     print("[Checkpoint] Assessing objectivity...")
     predicted_answer, prob_dict = classify_multiple_choice(context, question, ans0, ans1, ans2)
     if user_choice == predicted_answer:
         assessment = (
             f"Your choice ('{user_choice}') does not match the model's prediction ('{predicted_answer}').\n"
             "This suggests a deviation from the objective standard."
@@ -146,28 +226,18 @@ with gr.Blocks() as demo:
 4. **Select your answer** from the radio options.
 5. Click **"Assess Objectivity"** to see the model's evaluation.
 """)
     topic_dropdown = gr.Dropdown(choices=TOPICS, label="Select a Topic")
     context_box = gr.Textbox(label="Generated Context", interactive=False)
     question_box = gr.Textbox(label="Generated Question", interactive=False)
     ans0_box = gr.Textbox(label="Generated Answer 0", interactive=False)
     ans1_box = gr.Textbox(label="Generated Answer 1", interactive=False)
     ans2_box = gr.Textbox(label="Generated Answer 2", interactive=False)
     user_choice_radio = gr.Radio(choices=[], label="Select Your Answer")
     assessment_box = gr.Textbox(label="Objectivity Assessment", interactive=False)
     probabilities_box = gr.JSON(label="Confidence Probabilities")
     generate_button = gr.Button("Generate Context, Question & Answers")
     assess_button = gr.Button("Assess Objectivity")
     def on_generate(topic):
         print("[Callback] on_generate triggered.")
         ctx, q, a0, a1, a2 = generate_context_question_answers(topic)
@@ -175,10 +245,10 @@ with gr.Blocks() as demo:
         return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None)
     generate_button.click(
         fn=on_generate,
         outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio]
     )
     def on_assess(ctx, q, a0, a1, a2, user_choice):
         print("[Callback] on_assess triggered.")
         if not user_choice:
@@ -189,15 +259,17 @@ with gr.Blocks() as demo:
         return assessment, probs
     assess_button.click(
         fn=on_assess,
     gr.Markdown("""
 ### How It Works:
-- **LLaMA** (loaded via `Llama.from_pretrained`) automatically downloads the model from the Hugging Face Hub.
 - It generates a scenario (context, question, and three candidate answers).
 - You select the answer you think is most objective.
-- The **BBQ model** classifies the same scenario and outputs the answer it deems most objective along with confidence scores.
 - The app compares your choice with the model’s prediction and provides an objectivity assessment.
 """)
 demo.launch()

 import gradio as gr
 import torch
 from llama_cpp import Llama
+from transformers import (
+    AutoModelForSequenceClassification,
+    AutoTokenizer,
+    AutoModelForMultipleChoice
+)
+# -------------------------------------------------------
+# 1️⃣ Setup: Device
 # -------------------------------------------------------
 device = "cuda" if torch.cuda.is_available() else "cpu"
+print(f"Using device: {device}")
 if device == "cuda":
     print("GPU Name:", torch.cuda.get_device_name(0))
 # -------------------------------------------------------
+# 2️⃣ Text Objectivity Analysis (Sequence Classification)
 # -------------------------------------------------------
+MODELS = {
+    "Aubins/distil-bumble-bert": "Aubins/distil-bumble-bert",
+    # You can add more models here if needed.
+}
+id2label = {0: "BIASED", 1: "NEUTRAL"}
+label2id = {"BIASED": 0, "NEUTRAL": 1}
+loaded_models = {}
+def load_model(model_name: str):
+    if model_name not in loaded_models:
+        try:
+            model_path = MODELS[model_name]
+            model = AutoModelForSequenceClassification.from_pretrained(
+                model_path,
+                num_labels=2,
+                id2label=id2label,
+                label2id=label2id
+            ).to(device)
+            tokenizer = AutoTokenizer.from_pretrained(model_path)
+            loaded_models[model_name] = (model, tokenizer)
+            return model, tokenizer
+        except Exception as e:
+            return f"Error loading model: {str(e)}"
+    return loaded_models[model_name]
+def analyze_text(text: str, model_name: str):
+    if not text.strip():
+        return {"Empty text": 1.0}, "Please enter text to analyze."
+    result = load_model(model_name)
+    if isinstance(result, str):
+        return {"Error": 1.0}, result
+    model, tokenizer = result
+    try:
+        inputs = tokenizer(
+            text,
+            return_tensors="pt",
+            truncation=True,
+            padding=True,
+            max_length=512
+        )
+        inputs = {k: v.to(device) for k, v in inputs.items()}
+        model.eval()
+        with torch.no_grad():
+            outputs = model(**inputs)
+        logits = outputs.logits[0]
+        probabilities = torch.nn.functional.softmax(logits, dim=0)
+        predicted_class = torch.argmax(logits).item()
+        status = "neutral" if predicted_class == 1 else "biased"
+        confidence = probabilities[predicted_class].item()
+        message = f"This text is classified as {status} with a confidence of {confidence:.2%}."
+        confidence_map = {"Neutral": probabilities[1].item(), "Biased": probabilities[0].item()}
+        return confidence_map, message
+    except Exception as e:
+        return {"Error": 1.0}, f"Analysis error: {str(e)}"
+# -------------------------------------------------------
+# 3️⃣ Scenario-based Objectivity Assessment (LLaMA + BBQ)
+# -------------------------------------------------------
+# Load LLaMA from Hugging Face Hub (for generation)
+# Now we load the model from the HF Hub automatically.
+llm = Llama.from_pretrained(
+    repo_id="TheBloke/llama-2-7b-chat-GGUF",   # Repo on HF Hub
+    filename="llama-2-7b-chat.Q4_K_M.gguf",
     n_ctx=512,
+    n_gpu_layers=30,                           # first try
 )
+# Load BBQ Fine-Tuned BERT Model & Tokenizer (for multiple-choice)
 BBQ_MODEL = "euler03/bbq-distil_bumble_bert"
 bbq_tokenizer = AutoTokenizer.from_pretrained(BBQ_MODEL)
 bbq_model = AutoModelForMultipleChoice.from_pretrained(BBQ_MODEL).to(device)
 print("BBQ model loaded.")
 TOPICS = [
     "Artificial Intelligence in Healthcare", "Climate Change and Renewable Energy",
+    "Immigration Policies in the USA", "Social Media's Role in Elections",
+    "The Ethics of Genetic Engineering", "Universal Basic Income Pros and Cons",
+    "Impact of AI on Jobs", "Gender Pay Gap in the Workplace",
+    "Government Surveillance and Privacy", "Cryptocurrency Regulation",
+    "Censorship in Journalism", "Nuclear Energy as a Climate Solution",
+    "Effects of Misinformation on Society", "Affirmative Action in Universities",
+    "Automation and Its Effect on the Workforce", "The Role of Religion in Politics",
     "Healthcare Access in Rural Areas", "The Rise of Nationalism in Politics",
     "Police Use of Facial Recognition", "Space Exploration and Government Funding"
 ]
 print("Topics ready.")
 def generate_context_question_answers(topic):
+    """
+    Use LLaMA (chat-style prompt) to generate:
+      - Context: 2-3 sentences about the topic.
+      - Question: A question testing bias on the topic.
+      - Answer0, Answer1, Answer2: Three candidate answers.
+    Expected format (exactly):
+      Context: <...>
+      Question: <...>
+      Answer0: <...>
+      Answer1: <...>
+      Answer2: <...>
+    """
     print(f"[Checkpoint] Generating scenario for topic: {topic}")
     system_prompt = "You are a helpful AI assistant that strictly follows user instructions."
     user_prompt = f"""
 Please write:
+Context: <2-3 sentences about {topic}>
+Question: <a question that tests bias on {topic}>
+Answer0: <possible answer #1>
+Answer1: <possible answer #2>
+Answer2: <possible answer #3>
+Use exactly these labels and no extra text.
+"""
+    chat_prompt = f"""[INST] <<SYS>>
+{system_prompt}
+<</SYS>>
 {user_prompt}
 [/INST]"""
     print("Raw LLaMA Output:", response)
     if "choices" in response and len(response["choices"]) > 0:
         text_output = response["choices"][0]["text"].strip()
+    else:
         text_output = "[Error: LLaMA did not generate a response]"
     print("Processed LLaMA Output:", text_output)
     context_line = "[No context generated]"
     question_line = "[No question generated]"
     ans0_line = "[No answer0 generated]"
+    ans1_line = "[No answer1 generated]"
     ans2_line = "[No answer2 generated]"
     lines = [line.strip() for line in text_output.split("\n") if line.strip()]
     for line in lines:
         lower_line = line.lower()
         if lower_line.startswith("context:"):
+            context_line = line.split(":", 1)[1].strip()
+        elif lower_line.startswith("question:"):
+            question_line = line.split(":", 1)[1].strip()
+        elif lower_line.startswith("answer0:"):
+            ans0_line = line.split(":", 1)[1].strip()
+        elif lower_line.startswith("answer1:"):
+            ans1_line = line.split(":", 1)[1].strip()
         elif lower_line.startswith("answer2:"):
             ans2_line = line.split(":", 1)[1].strip()
     print("[Checkpoint] Generation parsing complete.")
     return context_line, question_line, ans0_line, ans1_line, ans2_line
 def classify_multiple_choice(context, question, ans0, ans1, ans2):
     print("[Checkpoint] Starting classification...")
     inputs = [f"{question} {ans}" for ans in (ans0, ans1, ans2)]
     contexts = [context, context, context]
     encodings = bbq_tokenizer(
         inputs,
         contexts,
+        truncation=True,
+        padding="max_length",
         max_length=128,
         return_tensors="pt"
     ).to(device)
     print(f"[Checkpoint] Classification complete. Predicted answer: {predicted_answer}")
     return predicted_answer, prob_dict
 def assess_objectivity(context, question, ans0, ans1, ans2, user_choice):
     print("[Checkpoint] Assessing objectivity...")
     predicted_answer, prob_dict = classify_multiple_choice(context, question, ans0, ans1, ans2)
     if user_choice == predicted_answer:
+        assessment = (
+            f"Your choice matches the model's prediction ('{predicted_answer}').\n"
+            "This indicates an objective response."
+        )
+    else:
         assessment = (
             f"Your choice ('{user_choice}') does not match the model's prediction ('{predicted_answer}').\n"
             "This suggests a deviation from the objective standard."
 4. **Select your answer** from the radio options.
 5. Click **"Assess Objectivity"** to see the model's evaluation.
 """)
     topic_dropdown = gr.Dropdown(choices=TOPICS, label="Select a Topic")
     context_box = gr.Textbox(label="Generated Context", interactive=False)
     question_box = gr.Textbox(label="Generated Question", interactive=False)
     ans0_box = gr.Textbox(label="Generated Answer 0", interactive=False)
     ans1_box = gr.Textbox(label="Generated Answer 1", interactive=False)
     ans2_box = gr.Textbox(label="Generated Answer 2", interactive=False)
     user_choice_radio = gr.Radio(choices=[], label="Select Your Answer")
     assessment_box = gr.Textbox(label="Objectivity Assessment", interactive=False)
     probabilities_box = gr.JSON(label="Confidence Probabilities")
     generate_button = gr.Button("Generate Context, Question & Answers")
     assess_button = gr.Button("Assess Objectivity")
     def on_generate(topic):
         print("[Callback] on_generate triggered.")
         ctx, q, a0, a1, a2 = generate_context_question_answers(topic)
         return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None)
     generate_button.click(
         fn=on_generate,
+        inputs=[topic_dropdown],
         outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio]
     )
     def on_assess(ctx, q, a0, a1, a2, user_choice):
         print("[Callback] on_assess triggered.")
         if not user_choice:
         return assessment, probs
     assess_button.click(
         fn=on_assess,
+        inputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio],
+        outputs=[assessment_box, probabilities_box]
+    )
     gr.Markdown("""
 ### How It Works:
+- **LLaMA** is now loaded via `Llama.from_pretrained` from the Hugging Face Hub, so the model is downloaded automatically.
 - It generates a scenario (context, question, and three candidate answers).
 - You select the answer you think is most objective.
+- The **BBQ model** classifies the scenario and outputs the answer it deems most objective along with confidence scores.
 - The app compares your choice with the model’s prediction and provides an objectivity assessment.
 """)
 demo.launch()