import gradio as gr from transformers import AutoModelForSequenceClassification, AutoTokenizer import torch # Load ProBERT (fine-tuned) probert_model = AutoModelForSequenceClassification.from_pretrained("collapseindex/ProBERT-1.0") probert_tokenizer = AutoTokenizer.from_pretrained("collapseindex/ProBERT-1.0") # Load DistilBERT base (for comparison) base_model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=3) base_tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") LABELS = ["process_clarity", "rhetorical_confidence", "scope_blur"] EXAMPLES = [ ["This revolutionary AI will transform your business and guarantee results."], ["Step 1: Load data. Step 2: Validate schema. Step 3: Return results."], ["Trust your intuition and embrace the journey. The universe has a plan."], ["First, check if the input is null. If null, return error. Otherwise, process the request."], ["Our cutting-edge solution leverages synergies to maximize value propositions."], ] def classify(text): # ProBERT predictions probert_inputs = probert_tokenizer(text, return_tensors="pt", truncation=True, max_length=128) with torch.no_grad(): probert_outputs = probert_model(**probert_inputs) probert_probs = torch.softmax(probert_outputs.logits, dim=1)[0] # Base DistilBERT predictions base_inputs = base_tokenizer(text, return_tensors="pt", truncation=True, max_length=128) with torch.no_grad(): base_outputs = base_model(**base_inputs) base_probs = torch.softmax(base_outputs.logits, dim=1)[0] # ProBERT results probert_top_prob = float(probert_probs.max()) probert_top_idx = int(probert_probs.argmax()) probert_top_label = LABELS[probert_top_idx] probert_predictions = {LABELS[i]: float(probert_probs[i]) for i in range(len(LABELS))} probert_confidence = f"**ProBERT Top Prediction:** {probert_top_label} (Confidence: {probert_top_prob:.1%})" # Base DistilBERT results base_top_prob = float(base_probs.max()) base_top_idx = int(base_probs.argmax()) base_top_label = LABELS[base_top_idx] base_predictions = {LABELS[i]: float(base_probs[i]) for i in range(len(LABELS))} base_confidence = f"**DistilBERT Base:** {base_top_label} (Confidence: {base_top_prob:.1%})" # Comparison text if probert_top_label == base_top_label: comparison = f"✅ **Both models agree:** {probert_top_label}" else: comparison = f"⚠️ **Disagreement:** ProBERT says {probert_top_label}, Base says {base_top_label}" return probert_predictions, probert_confidence, base_predictions, base_confidence, comparison demo = gr.Interface( fn=classify, inputs=gr.Textbox(lines=3, placeholder="Enter text here...", label="Input Text"), outputs=[ gr.Label(num_top_classes=3, label="ProBERT v1.0 (Fine-tuned)"), gr.Markdown(label="ProBERT Confidence"), gr.Label(num_top_classes=3, label="DistilBERT Base (Untrained)"), gr.Markdown(label="Base Confidence"), gr.Markdown(label="Comparison") ], title="ProBERT v1.0 vs DistilBERT Base", description=""" **Compare fine-tuned ProBERT against base DistilBERT.** - 🟢 **process_clarity**: Step-by-step reasoning you can verify - 🟠 **rhetorical_confidence**: Assertive claims without supporting process - 🔴 **scope_blur**: Vague generalizations with ambiguous boundaries **ProBERT** is fine-tuned on just 450 examples (150 per class) to detect rhetorical patterns. **DistilBERT Base** has random weights (no training). Notice how base gives ~33% uniform noise while ProBERT shows sharp separation. That's what fine-tuning adds! **Model:** [collapseindex/ProBERT-1.0](https://huggingface.co/collapseindex/ProBERT-1.0) """, examples=EXAMPLES, theme="default", ) if __name__ == "__main__": demo.launch()