import gradio as gr
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch

# Load ProBERT (fine-tuned)
probert_model = AutoModelForSequenceClassification.from_pretrained("collapseindex/ProBERT-1.0")
probert_tokenizer = AutoTokenizer.from_pretrained("collapseindex/ProBERT-1.0")

# Load DistilBERT base (for comparison)
base_model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=3)
base_tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

LABELS = ["process_clarity", "rhetorical_confidence", "scope_blur"]

EXAMPLES = [
    ["This revolutionary AI will transform your business and guarantee results."],
    ["Step 1: Load data. Step 2: Validate schema. Step 3: Return results."],
    ["Trust your intuition and embrace the journey. The universe has a plan."],
    ["First, check if the input is null. If null, return error. Otherwise, process the request."],
    ["Our cutting-edge solution leverages synergies to maximize value propositions."],
]

def classify(text):
    # ProBERT predictions
    probert_inputs = probert_tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
    with torch.no_grad():
        probert_outputs = probert_model(**probert_inputs)
    probert_probs = torch.softmax(probert_outputs.logits, dim=1)[0]
    
    # Base DistilBERT predictions
    base_inputs = base_tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
    with torch.no_grad():
        base_outputs = base_model(**base_inputs)
    base_probs = torch.softmax(base_outputs.logits, dim=1)[0]
    
    # ProBERT results
    probert_top_prob = float(probert_probs.max())
    probert_top_idx = int(probert_probs.argmax())
    probert_top_label = LABELS[probert_top_idx]
    probert_predictions = {LABELS[i]: float(probert_probs[i]) for i in range(len(LABELS))}
    probert_confidence = f"**ProBERT Top Prediction:** {probert_top_label} (Confidence: {probert_top_prob:.1%})"
    
    # Base DistilBERT results
    base_top_prob = float(base_probs.max())
    base_top_idx = int(base_probs.argmax())
    base_top_label = LABELS[base_top_idx]
    base_predictions = {LABELS[i]: float(base_probs[i]) for i in range(len(LABELS))}
    base_confidence = f"**DistilBERT Base:** {base_top_label} (Confidence: {base_top_prob:.1%})"
    
    # Comparison text
    if probert_top_label == base_top_label:
        comparison = f"✅ **Both models agree:** {probert_top_label}"
    else:
        comparison = f"⚠️ **Disagreement:** ProBERT says {probert_top_label}, Base says {base_top_label}"
    
    return probert_predictions, probert_confidence, base_predictions, base_confidence, comparison

demo = gr.Interface(
    fn=classify,
    inputs=gr.Textbox(lines=3, placeholder="Enter text here...", label="Input Text"),
    outputs=[
        gr.Label(num_top_classes=3, label="ProBERT v1.0 (Fine-tuned)"),
        gr.Markdown(label="ProBERT Confidence"),
        gr.Label(num_top_classes=3, label="DistilBERT Base (Untrained)"),
        gr.Markdown(label="Base Confidence"),
        gr.Markdown(label="Comparison")
    ],
    title="ProBERT v1.0 vs DistilBERT Base",
    description="""
    **Compare fine-tuned ProBERT against base DistilBERT.**
    
    - 🟢 **process_clarity**: Step-by-step reasoning you can verify
    - 🟠 **rhetorical_confidence**: Assertive claims without supporting process
    - 🔴 **scope_blur**: Vague generalizations with ambiguous boundaries
    
    **ProBERT** is fine-tuned on just 450 examples (150 per class) to detect rhetorical patterns. **DistilBERT Base** has random weights (no training). Notice how base gives ~33% uniform noise while ProBERT shows sharp separation. That's what fine-tuning adds!
    
    **Model:** [collapseindex/ProBERT-1.0](https://huggingface.co/collapseindex/ProBERT-1.0)
    """,
    examples=EXAMPLES,
    theme="default",
)

if __name__ == "__main__":
    demo.launch()