ProBERT-Demo / app.py
collapseindex's picture
Added DistilBERT to compare responses with ProBERT 1.0
f7a1403 verified
import gradio as gr
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
# Load ProBERT (fine-tuned)
probert_model = AutoModelForSequenceClassification.from_pretrained("collapseindex/ProBERT-1.0")
probert_tokenizer = AutoTokenizer.from_pretrained("collapseindex/ProBERT-1.0")
# Load DistilBERT base (for comparison)
base_model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=3)
base_tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
LABELS = ["process_clarity", "rhetorical_confidence", "scope_blur"]
EXAMPLES = [
["This revolutionary AI will transform your business and guarantee results."],
["Step 1: Load data. Step 2: Validate schema. Step 3: Return results."],
["Trust your intuition and embrace the journey. The universe has a plan."],
["First, check if the input is null. If null, return error. Otherwise, process the request."],
["Our cutting-edge solution leverages synergies to maximize value propositions."],
]
def classify(text):
# ProBERT predictions
probert_inputs = probert_tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
with torch.no_grad():
probert_outputs = probert_model(**probert_inputs)
probert_probs = torch.softmax(probert_outputs.logits, dim=1)[0]
# Base DistilBERT predictions
base_inputs = base_tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
with torch.no_grad():
base_outputs = base_model(**base_inputs)
base_probs = torch.softmax(base_outputs.logits, dim=1)[0]
# ProBERT results
probert_top_prob = float(probert_probs.max())
probert_top_idx = int(probert_probs.argmax())
probert_top_label = LABELS[probert_top_idx]
probert_predictions = {LABELS[i]: float(probert_probs[i]) for i in range(len(LABELS))}
probert_confidence = f"**ProBERT Top Prediction:** {probert_top_label} (Confidence: {probert_top_prob:.1%})"
# Base DistilBERT results
base_top_prob = float(base_probs.max())
base_top_idx = int(base_probs.argmax())
base_top_label = LABELS[base_top_idx]
base_predictions = {LABELS[i]: float(base_probs[i]) for i in range(len(LABELS))}
base_confidence = f"**DistilBERT Base:** {base_top_label} (Confidence: {base_top_prob:.1%})"
# Comparison text
if probert_top_label == base_top_label:
comparison = f"✅ **Both models agree:** {probert_top_label}"
else:
comparison = f"⚠️ **Disagreement:** ProBERT says {probert_top_label}, Base says {base_top_label}"
return probert_predictions, probert_confidence, base_predictions, base_confidence, comparison
demo = gr.Interface(
fn=classify,
inputs=gr.Textbox(lines=3, placeholder="Enter text here...", label="Input Text"),
outputs=[
gr.Label(num_top_classes=3, label="ProBERT v1.0 (Fine-tuned)"),
gr.Markdown(label="ProBERT Confidence"),
gr.Label(num_top_classes=3, label="DistilBERT Base (Untrained)"),
gr.Markdown(label="Base Confidence"),
gr.Markdown(label="Comparison")
],
title="ProBERT v1.0 vs DistilBERT Base",
description="""
**Compare fine-tuned ProBERT against base DistilBERT.**
- 🟢 **process_clarity**: Step-by-step reasoning you can verify
- 🟠 **rhetorical_confidence**: Assertive claims without supporting process
- 🔴 **scope_blur**: Vague generalizations with ambiguous boundaries
**ProBERT** is fine-tuned on just 450 examples (150 per class) to detect rhetorical patterns. **DistilBERT Base** has random weights (no training). Notice how base gives ~33% uniform noise while ProBERT shows sharp separation. That's what fine-tuning adds!
**Model:** [collapseindex/ProBERT-1.0](https://huggingface.co/collapseindex/ProBERT-1.0)
""",
examples=EXAMPLES,
theme="default",
)
if __name__ == "__main__":
demo.launch()