Spaces:

euler03
/

bbq

Sleeping

App Files Files Community

euler03 commited on Mar 14, 2025

Commit

d6eb293

verified ·

1 Parent(s): 0187815

with offline

Browse files

Files changed (1) hide show

app.py +200 -46

app.py CHANGED Viewed

@@ -1,3 +1,6 @@
 import gradio as gr
 import torch
 from llama_cpp import Llama
@@ -20,13 +23,13 @@ if device == "cuda":
 # -------------------------------------------------------
 MODELS = {
     "Aubins/distil-bumble-bert": "Aubins/distil-bumble-bert",
-    # You can add more models here if needed.
 }
 id2label = {0: "BIASED", 1: "NEUTRAL"}
 label2id = {"BIASED": 0, "NEUTRAL": 1}
 loaded_models = {}
 def load_model(model_name: str):
     if model_name not in loaded_models:
         try:
             model_path = MODELS[model_name]
@@ -44,6 +47,7 @@ def load_model(model_name: str):
     return loaded_models[model_name]
 def analyze_text(text: str, model_name: str):
     if not text.strip():
         return {"Empty text": 1.0}, "Please enter text to analyze."
     result = load_model(model_name)
@@ -77,41 +81,115 @@ def analyze_text(text: str, model_name: str):
 # 3️⃣ Scenario-based Objectivity Assessment (LLaMA + BBQ)
 # -------------------------------------------------------
 # (a) Load LLaMA from Hugging Face Hub (for generation)
-# Here we use from_pretrained so that the model is downloaded automatically
 llm = Llama.from_pretrained(
-    repo_id="TheBloke/llama-2-7b-chat-GGUF",   # Repo on Hugging Face Hub
-    filename="llama-2-7b-chat.Q4_K_M.gguf",      # GGUF file name in that repo
     n_ctx=512,
-    n_gpu_layers=30                            # try
 )
-# (b) Load BBQ Fine-Tuned BERT Model & Tokenizer (for multiple-choice)
 BBQ_MODEL = "euler03/bbq-distil_bumble_bert"
 bbq_tokenizer = AutoTokenizer.from_pretrained(BBQ_MODEL)
 bbq_model = AutoModelForMultipleChoice.from_pretrained(BBQ_MODEL).to(device)
 print("BBQ model loaded.")
 TOPICS = [
-    "Artificial Intelligence in Healthcare", "Climate Change and Renewable Energy",
-    "Immigration Policies in the USA", "Social Media's Role in Elections",
-    "The Ethics of Genetic Engineering", "Universal Basic Income Pros and Cons",
-    "Impact of AI on Jobs", "Gender Pay Gap in the Workplace",
-    "Government Surveillance and Privacy", "Cryptocurrency Regulation",
-    "Censorship in Journalism", "Nuclear Energy as a Climate Solution",
-    "Effects of Misinformation on Society", "Affirmative Action in Universities",
-    "Automation and Its Effect on the Workforce", "The Role of Religion in Politics",
-    "Healthcare Access in Rural Areas", "The Rise of Nationalism in Politics",
-    "Police Use of Facial Recognition", "Space Exploration and Government Funding"
 ]
-print("Topics ready.")
 def generate_context_question_answers(topic):
     """
-    Use LLaMA (chat-style prompt) to generate:
-      - Context: 2-3 sentences about the topic.
-      - Question: A question testing bias on the topic.
-      - Answer0, Answer1, Answer2: Three candidate answers.
-    Expected format (exactly):
       Context: <...>
       Question: <...>
       Answer0: <...>
@@ -127,13 +205,11 @@ Question: <a question that tests bias on {topic}>
 Answer0: <possible answer #1>
 Answer1: <possible answer #2>
 Answer2: <possible answer #3>
 Use exactly these labels and no extra text.
 """
     chat_prompt = f"""[INST] <<SYS>>
 {system_prompt}
 <</SYS>>
 {user_prompt}
 [/INST]"""
     print("[Checkpoint] Prompt prepared, calling LLaMA...")
@@ -145,11 +221,13 @@ Use exactly these labels and no extra text.
     )
     print("[Checkpoint] LLaMA call complete.")
     print("Raw LLaMA Output:", response)
     if "choices" in response and len(response["choices"]) > 0:
         text_output = response["choices"][0]["text"].strip()
     else:
         text_output = "[Error: LLaMA did not generate a response]"
     print("Processed LLaMA Output:", text_output)
     context_line = "[No context generated]"
     question_line = "[No question generated]"
     ans0_line = "[No answer0 generated]"
@@ -168,9 +246,13 @@ Use exactly these labels and no extra text.
             ans1_line = line.split(":", 1)[1].strip()
         elif lower_line.startswith("answer2:"):
             ans2_line = line.split(":", 1)[1].strip()
     print("[Checkpoint] Generation parsing complete.")
     return context_line, question_line, ans0_line, ans1_line, ans2_line
 def classify_multiple_choice(context, question, ans0, ans1, ans2):
     print("[Checkpoint] Starting classification...")
     inputs = [f"{question} {ans}" for ans in (ans0, ans1, ans2)]
@@ -218,6 +300,7 @@ def assess_objectivity(context, question, ans0, ans1, ans2, user_choice):
 with gr.Blocks() as app:
     gr.Markdown("# Objectivity Analysis Suite")
     gr.Markdown("Choose a functionality below:")
     with gr.Tabs():
         # --- Tab 1: Text Objectivity Analysis ---
         with gr.TabItem("Text Analysis"):
@@ -243,29 +326,40 @@ with gr.Blocks() as app:
                         show_label=True
                     )
                     result_message = gr.Textbox(label="Detailed results")
             analyze_button.click(
                 analyze_text,
                 inputs=[text_input, model_dropdown],
                 outputs=[confidence_output, result_message]
             )
             gr.Markdown("## How to use this application")
             gr.Markdown("""
             1. Select a model from the drop-down.
             2. Enter or paste the text to be analyzed.
             3. Click **'Analyze the text'** to see the results.
             """)
         # --- Tab 2: Scenario-based Objectivity Assessment ---
         with gr.TabItem("Scenario Assessment"):
             gr.Markdown("## Bias Detection: Assessing Objectivity in Scenarios")
             gr.Markdown("""
             **Steps:**
-            1. Select a topic from the dropdown.
-            2. Click **'Generate Context, Question & Answers'** to generate a scenario.
-            3. Review the generated context, question, and 3 candidate answers.
-            4. Select your answer from the radio options.
-            5. Click **'Assess Objectivity'** to see the evaluation.
             """)
             topic_dropdown = gr.Dropdown(choices=TOPICS, label="Select a Topic")
             context_box = gr.Textbox(label="Generated Context", interactive=False)
             question_box = gr.Textbox(label="Generated Question", interactive=False)
             ans0_box = gr.Textbox(label="Generated Answer 0", interactive=False)
@@ -274,43 +368,103 @@ with gr.Blocks() as app:
             user_choice_radio = gr.Radio(choices=[], label="Select Your Answer")
             assessment_box = gr.Textbox(label="Objectivity Assessment", interactive=False)
             probabilities_box = gr.JSON(label="Confidence Probabilities")
-            generate_button = gr.Button("Generate Context, Question & Answers")
             assess_button = gr.Button("Assess Objectivity")
-            def on_generate(topic):
-                print("[Callback] on_generate triggered.")
-                ctx, q, a0, a1, a2 = generate_context_question_answers(topic)
-                print("[Callback] on_generate complete.")
-                return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None)
             generate_button.click(
                 fn=on_generate,
-                inputs=[topic_dropdown],
                 outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio]
             )
             def on_assess(ctx, q, a0, a1, a2, user_choice):
-                print("[Callback] on_assess triggered.")
                 if not user_choice:
-                    print("[Callback] No user choice selected.")
                     return "Please select one of the generated answers.", {}
                 assessment, probs = assess_objectivity(ctx, q, a0, a1, a2, user_choice)
-                print("[Callback] on_assess complete.")
                 return assessment, probs
             assess_button.click(
                 fn=on_assess,
                 inputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio],
                 outputs=[assessment_box, probabilities_box]
             )
             gr.Markdown("### How It Works:")
             gr.Markdown("""
-            - **LLaMA** (loaded via `Llama.from_pretrained`) automatically downloads the model from the Hugging Face Hub.
-            - It generates a scenario (context, question, and three candidate answers).
-            - You select the answer that you think is most objective.
-            - The **BBQ model** classifies the scenario and outputs the answer it deems most objective along with confidence scores.
-            - The app compares your choice with the model’s prediction and provides an objectivity assessment.
             """)
     gr.Markdown("## Additional Instructions")
     gr.Markdown("""
     - In the **Text Analysis** tab, you can analyze any text for objectivity.
-    - In the **Scenario Assessment** tab, LLaMA generates a scenario and you assess your objectivity by selecting one of the candidate answers.
     """)
 app.launch()

+import os
+import json
+import random
 import gradio as gr
 import torch
 from llama_cpp import Llama
 # -------------------------------------------------------
 MODELS = {
     "Aubins/distil-bumble-bert": "Aubins/distil-bumble-bert",
 }
 id2label = {0: "BIASED", 1: "NEUTRAL"}
 label2id = {"BIASED": 0, "NEUTRAL": 1}
 loaded_models = {}
 def load_model(model_name: str):
+    """Load and cache a sequence classification model for text objectivity analysis."""
     if model_name not in loaded_models:
         try:
             model_path = MODELS[model_name]
     return loaded_models[model_name]
 def analyze_text(text: str, model_name: str):
+    """Analyze the text for bias or neutrality using a selected classification model."""
     if not text.strip():
         return {"Empty text": 1.0}, "Please enter text to analyze."
     result = load_model(model_name)
 # 3️⃣ Scenario-based Objectivity Assessment (LLaMA + BBQ)
 # -------------------------------------------------------
 # (a) Load LLaMA from Hugging Face Hub (for generation)
 llm = Llama.from_pretrained(
+    repo_id="TheBloke/llama-2-7b-chat-GGUF",
+    filename="llama-2-7b-chat.Q4_K_M.gguf",
     n_ctx=512,
+    n_gpu_layers=30,
 )
+# (b) Load BBQ Fine-Tuned BERT Model & Tokenizer (multiple-choice)
 BBQ_MODEL = "euler03/bbq-distil_bumble_bert"
 bbq_tokenizer = AutoTokenizer.from_pretrained(BBQ_MODEL)
 bbq_model = AutoModelForMultipleChoice.from_pretrained(BBQ_MODEL).to(device)
 print("BBQ model loaded.")
+# -------------------------------------------------------
+# Replace original topics with your offline scenario topics
+# -------------------------------------------------------
 TOPICS = [
+    "AI in Healthcare",
+    "Climate Change",
+    "Universal Basic Income",
+    "Social Media's Role in Elections",
+    "Government Surveillance and Privacy",
+    "Genetic Engineering",
+    "Gender Pay Gap",
+    "Police Use of Facial Recognition",
+    "Space Exploration and Government Funding",
+    "Affirmative Action in Universities",
+    "Renewable Energy Advances",
+    "Mental Health Awareness",
+    "Online Privacy and Data Security",
+    "Impact of Automation on Employment",
+    "Electric Vehicles Adoption",
+    "Work From Home Culture",
+    "Food Security and GMOs",
+    "Cryptocurrency Volatility",
+    "Artificial Intelligence in Education",
+    "Cultural Diversity in Media",
+    "Urbanization and Infrastructure",
+    "Healthcare Reform",
+    "Taxation Policies",
+    "Global Trade and Tariffs",
+    "Environmental Conservation",
+    "Social Justice Movements",
+    "Digital Transformation in Business",
+    "Public Transportation Funding",
+    "Immigration Reform",
+    "Aging Population Challenges",
+    "Mental Health in the Workplace",
+    "Internet Censorship",
+    "Political Polarization",
+    "Cybersecurity in the Digital Age",
+    "Privacy vs. Security",
+    "Sustainable Agriculture",
+    "Future of Work",
+    "Tech Monopolies",
+    "Education Reform",
+    "Climate Policy and Economics",
+    "Renewable Energy Storage",
+    "Water Scarcity",
+    "Urban Green Spaces",
+    "Automation in Manufacturing",
+    "Renewable Energy Subsidies",
+    "Universal Healthcare",
+    "Workplace Automation",
+    "Cultural Heritage Preservation",
+    "Biotechnology in Agriculture",
+    "Media Bias",
+    "Renewable Energy Policy",
+    "Artificial Intelligence Ethics",
+    "Space Colonization",
+    "Social Media Regulation",
+    "Virtual Reality in Education",
+    "Blockchain in Supply Chain",
+    "Data-Driven Policymaking",
+    "Gig Economy",
+    "Climate Adaptation Strategies",
+    "Economic Inequality",
+    "Sustainable Urban Development",
+    "Media Regulation"
 ]
+print(f"Offline topics loaded. Total: {len(TOPICS)}")
+# -------------------------------------------------------
+# Offline scenarios
+# -------------------------------------------------------
+def load_offline_scenarios():
+    """Load offline scenarios from scenarios.json if it exists."""
+    if os.path.exists("scenarios.json"):
+        with open("scenarios.json", "r") as f:
+            data = json.load(f)
+        print(f"Offline scenarios loaded: {len(data)} scenarios.")
+        return data
+    print("No scenarios.json found in working directory.")
+    return []
+offline_scenarios = load_offline_scenarios()
+def get_offline_scenario(topic):
+    """Find a random scenario that matches the selected topic (case-insensitive)."""
+    matches = [s for s in offline_scenarios if s.get("topic", "").lower() == topic.lower()]
+    if matches:
+        return random.choice(matches)
+    return None
+# -------------------------------------------------------
+# Generation: Combined scenario (Context + Question + 3 Answers)
+# -------------------------------------------------------
 def generate_context_question_answers(topic):
     """
+    Use LLaMA to generate:
       Context: <...>
       Question: <...>
       Answer0: <...>
 Answer0: <possible answer #1>
 Answer1: <possible answer #2>
 Answer2: <possible answer #3>
 Use exactly these labels and no extra text.
 """
     chat_prompt = f"""[INST] <<SYS>>
 {system_prompt}
 <</SYS>>
 {user_prompt}
 [/INST]"""
     print("[Checkpoint] Prompt prepared, calling LLaMA...")
     )
     print("[Checkpoint] LLaMA call complete.")
     print("Raw LLaMA Output:", response)
     if "choices" in response and len(response["choices"]) > 0:
         text_output = response["choices"][0]["text"].strip()
     else:
         text_output = "[Error: LLaMA did not generate a response]"
     print("Processed LLaMA Output:", text_output)
     context_line = "[No context generated]"
     question_line = "[No question generated]"
     ans0_line = "[No answer0 generated]"
             ans1_line = line.split(":", 1)[1].strip()
         elif lower_line.startswith("answer2:"):
             ans2_line = line.split(":", 1)[1].strip()
     print("[Checkpoint] Generation parsing complete.")
     return context_line, question_line, ans0_line, ans1_line, ans2_line
+# -------------------------------------------------------
+# Classification: Run BBQ Model (Multiple-Choice)
+# -------------------------------------------------------
 def classify_multiple_choice(context, question, ans0, ans1, ans2):
     print("[Checkpoint] Starting classification...")
     inputs = [f"{question} {ans}" for ans in (ans0, ans1, ans2)]
 with gr.Blocks() as app:
     gr.Markdown("# Objectivity Analysis Suite")
     gr.Markdown("Choose a functionality below:")
     with gr.Tabs():
         # --- Tab 1: Text Objectivity Analysis ---
         with gr.TabItem("Text Analysis"):
                         show_label=True
                     )
                     result_message = gr.Textbox(label="Detailed results")
             analyze_button.click(
                 analyze_text,
                 inputs=[text_input, model_dropdown],
                 outputs=[confidence_output, result_message]
             )
             gr.Markdown("## How to use this application")
             gr.Markdown("""
             1. Select a model from the drop-down.
             2. Enter or paste the text to be analyzed.
             3. Click **'Analyze the text'** to see the results.
             """)
         # --- Tab 2: Scenario-based Objectivity Assessment ---
         with gr.TabItem("Scenario Assessment"):
             gr.Markdown("## Bias Detection: Assessing Objectivity in Scenarios")
             gr.Markdown("""
             **Steps:**
+            1. Select a topic from the dropdown below (topics match your offline JSON).
+            2. Check "Use Offline Data" if you want to load a pre-generated scenario.
+               Otherwise, generate a new scenario using the LLaMA-based generation buttons.
+            3. Review the context, question, and 3 candidate answers.
+            4. Select your answer.
+            5. Click "Assess Objectivity" to see the model's evaluation.
             """)
             topic_dropdown = gr.Dropdown(choices=TOPICS, label="Select a Topic")
+            use_offline_checkbox = gr.Checkbox(label="Use Offline Data", value=False)
+            load_offline_button = gr.Button("Load Offline Scenario")
+            with gr.Row():
+                generate_button = gr.Button("Generate Context, Question & Answers")
             context_box = gr.Textbox(label="Generated Context", interactive=False)
             question_box = gr.Textbox(label="Generated Question", interactive=False)
             ans0_box = gr.Textbox(label="Generated Answer 0", interactive=False)
             user_choice_radio = gr.Radio(choices=[], label="Select Your Answer")
             assessment_box = gr.Textbox(label="Objectivity Assessment", interactive=False)
             probabilities_box = gr.JSON(label="Confidence Probabilities")
             assess_button = gr.Button("Assess Objectivity")
+            # Offline scenario loader
+            def on_load_offline_scenario(topic, use_offline):
+                """Load offline scenario if use_offline is True and a matching scenario is found."""
+                if not use_offline:
+                    return ("[No offline scenario used]", "[No offline scenario used]",
+                            "[No offline scenario used]", "[No offline scenario used]",
+                            "[No offline scenario used]",
+                            gr.update(choices=[], value=None))
+                scenario = get_offline_scenario(topic)
+                if scenario:
+                    return (
+                        scenario.get("context", "[No context]"),
+                        scenario.get("question", "[No question]"),
+                        scenario.get("answer0", "[No answer0]"),
+                        scenario.get("answer1", "[No answer1]"),
+                        scenario.get("answer2", "[No answer2]"),
+                        gr.update(
+                            choices=[
+                                scenario.get("answer0", ""),
+                                scenario.get("answer1", ""),
+                                scenario.get("answer2", "")
+                            ],
+                            value=None
+                        )
+                    )
+                else:
+                    return ("[No offline scenario found]", "[No offline scenario found]",
+                            "[No offline scenario found]", "[No offline scenario found]",
+                            "[No offline scenario found]", gr.update(choices=[], value=None))
+            load_offline_button.click(
+                fn=on_load_offline_scenario,
+                inputs=[topic_dropdown, use_offline_checkbox],
+                outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio]
+            )
+            # Online scenario generation (all in one function)
+            def on_generate(topic, use_offline):
+                """If user doesn't want offline or no offline scenario, generate new scenario with LLaMA."""
+                if use_offline:
+                    # Attempt offline scenario first
+                    scenario = get_offline_scenario(topic)
+                    if scenario:
+                        return (
+                            scenario.get("context", "[No context]"),
+                            scenario.get("question", "[No question]"),
+                            scenario.get("answer0", "[No answer0]"),
+                            scenario.get("answer1", "[No answer1]"),
+                            scenario.get("answer2", "[No answer2]"),
+                            gr.update(
+                                choices=[
+                                    scenario.get("answer0", ""),
+                                    scenario.get("answer1", ""),
+                                    scenario.get("answer2", "")
+                                ],
+                                value=None
+                            )
+                        )
+                    # If no offline scenario found, fallback to generation
+                    ctx, q, a0, a1, a2 = generate_context_question_answers(topic)
+                    return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None)
+                else:
+                    # Purely online generation
+                    ctx, q, a0, a1, a2 = generate_context_question_answers(topic)
+                    return ctx, q, a0, a1, a2, gr.update(choices=[a0, a1, a2], value=None)
             generate_button.click(
                 fn=on_generate,
+                inputs=[topic_dropdown, use_offline_checkbox],
                 outputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio]
             )
             def on_assess(ctx, q, a0, a1, a2, user_choice):
                 if not user_choice:
                     return "Please select one of the generated answers.", {}
                 assessment, probs = assess_objectivity(ctx, q, a0, a1, a2, user_choice)
                 return assessment, probs
             assess_button.click(
                 fn=on_assess,
                 inputs=[context_box, question_box, ans0_box, ans1_box, ans2_box, user_choice_radio],
                 outputs=[assessment_box, probabilities_box]
             )
             gr.Markdown("### How It Works:")
             gr.Markdown("""
+            - **Offline Mode**: Check "Use Offline Data" and click "Load Offline Scenario" or "Generate" to see if a matching scenario is found in scenarios.json.
+            - **Online Generation**: Uncheck "Use Offline Data" (or no scenario found), then click "Generate" to create a new scenario with LLaMA.
+            - Finally, select your answer and click "Assess Objectivity."
             """)
     gr.Markdown("## Additional Instructions")
     gr.Markdown("""
     - In the **Text Analysis** tab, you can analyze any text for objectivity.
+    - In the **Scenario Assessment** tab, you can load a scenario offline or generate one with LLaMA.
     """)
 app.launch()