Spaces:

nsschw
/

TheUnsampledTruth

Sleeping

App Files Files Community

nsschw commited on Jul 20, 2025

Commit

9b87613

verified ·

1 Parent(s): 89d0124

Update app.py

Browse files

Files changed (1) hide show

app.py +117 -38

app.py CHANGED Viewed

@@ -1,45 +1,124 @@
 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
-import numpy as np
 import matplotlib.pyplot as plt
-import seaborn as sns
-import random
-from datetime import datetime
-class AcademicOpinionOMatic:
-    def __init__(self, model_name="microsoft/Phi-4-mini-instruct"):
-        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
-        self.model = AutoModelForCausalLM.from_pretrained(
-            model_name,
-            torch_dtype="auto",
-            device_map="auto",
-            trust_remote_code=True
-        )
-        # Get token IDs for numbers 1-5
-        self.likert_tokens = {
-            i: self.tokenizer.encode(str(i), add_special_tokens=False)[0]
-            for i in range(1, 6)
-        }
-        self.likert_token_ids = list(self.likert_tokens.values())
-    def get_likert_probabilities(self, prompt, temperature=1.0):
-        """Extract academic opinions with scientific precision!"""
-        if isinstance(prompt, str):
-            chat_prompt = [{"role": "user", "content": prompt}]
-        elif isinstance(prompt, list):
-            chat_prompt = prompt
         else:
-            raise ValueError("Input must be string or chat messages (like a properly formatted citation!)")
-        try:
-            prompt_text = self.tokenizer.apply_chat_template(
-                chat_prompt,
-                tokenize=False,
-                add_generation_prompt=True
-            )
-        except Exception:
-            if isinstance(prompt, list):
-                prompt_

 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import matplotlib.pyplot as plt
+# Initialize model (using a small, fast model for proof of concept)
+MODEL_NAME = "microsoft/DialoGPT-medium"
+print("Loading model...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
+# Add padding token if missing
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+# Get token IDs for numbers 1-5
+likert_tokens = {}
+for i in range(1, 6):
+    tokens = tokenizer.encode(str(i), add_special_tokens=False)
+    if tokens:
+        likert_tokens[i] = tokens[0]
+print(f"Likert tokens: {likert_tokens}")
+def analyze_statement(statement):
+    """
+    Analyze a statement and return Likert probabilities
+    """
+    try:
+        # Create prompt
+        prompt = f"""Rate this statement from 1-5 where:
+1 = Strongly Disagree
+2 = Disagree
+3 = Neutral
+4 = Agree
+5 = Strongly Agree
+Statement: "{statement}"
+Rating:"""
+        # Tokenize
+        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
+        # Generate with output scores
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=1,
+                return_dict_in_generate=True,
+                output_scores=True,
+                do_sample=False,
+                pad_token_id=tokenizer.pad_token_id
+            )
+        # Get probabilities for first generated token
+        if outputs.scores:
+            logits = outputs.scores[0][0]  # First token, first batch
+            probs = torch.softmax(logits, dim=-1)
+            # Extract Likert probabilities
+            likert_probs = {}
+            for value, token_id in likert_tokens.items():
+                likert_probs[value] = probs[token_id].item()
+            # Create simple bar chart
+            fig, ax = plt.subplots(figsize=(8, 5))
+            values = list(likert_probs.keys())
+            probabilities = list(likert_probs.values())
+            bars = ax.bar(values, probabilities, color='skyblue', edgecolor='navy')
+            # Add value labels
+            for bar, prob in zip(bars, probabilities):
+                height = bar.get_height()
+                ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
+                       f'{prob:.3f}', ha='center', va='bottom')
+            ax.set_xlabel('Likert Scale')
+            ax.set_ylabel('Probability')
+            ax.set_title('Response Probability Distribution')
+            ax.set_xticks(values)
+            ax.set_ylim(0, max(probabilities) * 1.2)
+            plt.tight_layout()
+            # Format probabilities text
+            prob_text = "\n".join([f"{k}: {v:.4f}" for k, v in likert_probs.items()])
+            return fig, prob_text, "✅ Analysis complete"
         else:
+            return None, "", "❌ No scores generated"
+    except Exception as e:
+        return None, "", f"❌ Error: {str(e)}"
+# Create Gradio interface
+demo = gr.Interface(
+    fn=analyze_statement,
+    inputs=[
+        gr.Textbox(
+            label="Statement to Analyze",
+            placeholder="e.g., Climate change is a serious threat",
+            lines=3
+        )
+    ],
+    outputs=[
+        gr.Plot(label="Probability Distribution"),
+        gr.Textbox(label="Raw Probabilities", lines=6),
+        gr.Textbox(label="Status")
+    ],
+    title="🎯 Likert Scale Probability Extractor (Minimal)",
+    description="Enter a statement and see the probability distribution for Likert scale responses (1-5)",
+    examples=[
+        ["Climate change is a serious threat"],
+        ["Technology makes life better"],
+        ["Government should provide universal healthcare"]
+    ]
+)
+if __name__ == "__main__":
+    demo.launch()