Spaces:

nsschw
/

TheUnsampledTruth

Sleeping

App Files Files Community

nsschw commited on Jul 20, 2025

Commit

02b5cfb

1 Parent(s): 8cc7f61

Refactor analysis functions and add default prompt for statement analysis

Browse files

Files changed (2) hide show

app.py +42 -129
default-prompt.txt +9 -0

app.py CHANGED Viewed

@@ -22,118 +22,54 @@ for i in range(1, 6):
     if tokens:
         likert_tokens[i] = tokens[0]
-print(f"Likert tokens: {likert_tokens}")
-def analyze_statement(statement):
-    """
-    Analyze a statement and return Likert probabilities using chat template
-    """
-    try:
-        # Create chat messages
-        messages = [
-            {
-                "role": "user",
-                "content": f"""How would you respond to the following statement:
-'{statement}'
-Respond only with a number from 1 to 5, where:
-1 = Strongly Disagree
-2 = Disagree
-3 = Neutral
-4 = Agree
-5 = Strongly Agree"""
-            }
-        ]
-        # Apply chat template
-        prompt = tokenizer.apply_chat_template(
-            messages,
-            tokenize=False,
-            add_generation_prompt=True
-        )
-        # Tokenize
-        inputs = tokenizer(prompt, return_tensors="pt")
-        # Generate with output scores
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=1,
-                return_dict_in_generate=True,
-                output_scores=True,
-                do_sample=False,
-                pad_token_id=tokenizer.eos_token_id
-            )
-        # Get probabilities for first generated token
-        if outputs.scores:
-            logits = outputs.scores[0][0]  # First token, first batch
-            probs = torch.softmax(logits, dim=-1)
-            # Extract Likert probabilities
-            likert_probs = {}
-            for value, token_id in likert_tokens.items():
-                likert_probs[value] = probs[token_id].item()
-            # Create simple bar chart
-            fig, ax = plt.subplots(figsize=(8, 5))
-            values = list(likert_probs.keys())
-            probabilities = list(likert_probs.values())
-            bars = ax.bar(values, probabilities, color='steelblue', alpha=0.8, edgecolor='navy')
-            # Add value labels
-            for bar, prob in zip(bars, probabilities):
-                height = bar.get_height()
-                ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
-                       f'{prob:.3f}', ha='center', va='bottom')
-            ax.set_xlabel('Likert Scale Value')
-            ax.set_ylabel('Probability')
-            ax.set_title('Response Probability Distribution')
-            ax.set_xticks(values)
-            ax.set_ylim(0, max(probabilities) * 1.2 if probabilities else 1)
-            ax.grid(True, axis='y', alpha=0.3)
-            plt.tight_layout()
-            # Format probabilities text
-            prob_text = "\n".join([f"{k}: {v:.4f}" for k, v in likert_probs.items()])
-            # Show what the model actually generated (for debugging)
-            generated_text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
-            debug_info = f"Generated: {generated_text[-50:]}"  # Last 50 chars
-            return fig, prob_text, f"✅ Analysis complete\n{debug_info}"
-        else:
-            return None, "", "❌ No scores generated"
-    except Exception as e:
-        return None, "", f"❌ Error: {str(e)}"
 def analyze_with_persona(statement, persona=""):
     """
-    Analyze with optional persona
     """
     try:
         # Create chat messages with optional system prompt
         messages = []
         if persona.strip():
             messages.append({"role": "system", "content": persona.strip()})
         messages.append({
-            "role": "user",
-            "content": f"""How would you respond to the following statement:
-'{statement}'
-Respond only with a number from 1 to 5, where:
-1 = Strongly Disagree
-2 = Disagree
-3 = Neutral
-4 = Agree
-5 = Strongly Agree"""
         })
         # Apply chat template
@@ -150,7 +86,7 @@ Respond only with a number from 1 to 5, where:
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
-                max_new_tokens=3,
                 return_dict_in_generate=True,
                 output_scores=True,
                 do_sample=False,
@@ -167,39 +103,16 @@ Respond only with a number from 1 to 5, where:
             for value, token_id in likert_tokens.items():
                 likert_probs[value] = probs[token_id].item()
-            # Create simple bar chart
-            fig, ax = plt.subplots(figsize=(8, 5))
-            values = list(likert_probs.keys())
-            probabilities = list(likert_probs.values())
-            bars = ax.bar(values, probabilities, color='steelblue', alpha=0.8, edgecolor='navy')
-            # Add value labels
-            for bar, prob in zip(bars, probabilities):
-                height = bar.get_height()
-                ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
-                       f'{prob:.3f}', ha='center', va='bottom')
-            ax.set_xlabel('Likert Scale Value')
-            ax.set_ylabel('Probability')
-            title = 'Response Probability Distribution'
-            if persona.strip():
-                title += f'\nPersona: {persona[:50]}...' if len(persona) > 50 else f'\nPersona: {persona}'
-            ax.set_title(title)
-            ax.set_xticks(values)
-            ax.set_ylim(0, max(probabilities) * 1.2 if probabilities else 1)
-            ax.grid(True, axis='y', alpha=0.3)
-            plt.tight_layout()
             # Format probabilities text
             prob_text = "\n".join([f"{k}: {v:.4f}" for k, v in likert_probs.items()])
-            # Show what the model actually generated
-            generated_text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
-            debug_info = f"Generated: {generated_text[-100:]}"  # Last 100 chars
-            return fig, prob_text, f"✅ Analysis complete\n{debug_info}"
         else:
             return None, "", "❌ No scores generated"

     if tokens:
         likert_tokens[i] = tokens[0]
+def create_probability_plot(likert_probs, persona=""):
+    """Create a bar chart for Likert scale probabilities"""
+    fig, ax = plt.subplots(figsize=(8, 5))
+    values = list(likert_probs.keys())
+    probabilities = list(likert_probs.values())
+    bars = ax.bar(values, probabilities, color='steelblue', alpha=0.8, edgecolor='navy')
+    # Add value labels
+    for bar, prob in zip(bars, probabilities):
+        height = bar.get_height()
+        ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
+               f'{prob:.3f}', ha='center', va='bottom')
+    ax.set_xlabel('Likert Scale Value')
+    ax.set_ylabel('Probability')
+    title = 'Response Probability Distribution'
+    if persona.strip():
+        title += f'\nPersona: {persona[:50]}...' if len(persona) > 50 else f'\nPersona: {persona}'
+    ax.set_title(title)
+    ax.set_xticks(values)
+    ax.set_ylim(0, max(probabilities) * 1.2 if probabilities else 1)
+    ax.grid(True, axis='y', alpha=0.3)
+    plt.tight_layout()
+    return fig
 def analyze_with_persona(statement, persona=""):
     """
+    Analyze with persona prompt
     """
     try:
+        # read default prompt
+        with open("default_prompt.txt", "r") as f:
+            default_prompt = f.read().strip()
         # Create chat messages with optional system prompt
         messages = []
         if persona.strip():
             messages.append({"role": "system", "content": persona.strip()})
         messages.append({
+            "role": "user",
+            "content": default_prompt.format(statement="Your actual statement here")
         })
         # Apply chat template
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
+                max_new_tokens=1,
                 return_dict_in_generate=True,
                 output_scores=True,
                 do_sample=False,
             for value, token_id in likert_tokens.items():
                 likert_probs[value] = probs[token_id].item()
+            # Create probability plot
+            fig = create_probability_plot(likert_probs, persona)
             # Format probabilities text
             prob_text = "\n".join([f"{k}: {v:.4f}" for k, v in likert_probs.items()])
+            # Show what the model actually generated including input and special tokens
+            debug_info = f"Input: {prompt}...\n\n"
+            debug_info += f"Output Tokens: {outputs.sequences[0]}"
+            return fig, f"✅ Analysis complete\n\n{debug_info}"
         else:
             return None, "", "❌ No scores generated"

default-prompt.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+How would you respond to the following statement:
+'{statement}'
+Respond only with a number from 1 to 5, where:
+1 = Strongly Disagree
+2 = Disagree
+3 = Neutral
+4 = Agree
+5 = Strongly Agree