Spaces:

llm-semantic-router
/

playground

Running

File size: 3,631 Bytes

53605cf

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

# Load model and tokenizer
MODEL_ID = "LLM-Semantic-Router/halugate-sentinel"

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
model.eval()

# Label mapping
LABELS = {
    0: ("NO_FACT_CHECK_NEEDED", "🟢"),
    1: ("FACT_CHECK_NEEDED", "🔴"),
}


def classify_text(text: str) -> tuple[str, dict]:
    """Classify whether a prompt needs fact-checking."""
    if not text.strip():
        return "Please enter some text to classify.", {}

    # Tokenize and predict
    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)

    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        probs = torch.softmax(logits, dim=-1)[0]

    # Get prediction
    pred_class = torch.argmax(probs).item()
    label_name, emoji = LABELS[pred_class]
    confidence = probs[pred_class].item()

    # Format result
    result = f"{emoji} **{label_name}**\n\nConfidence: {confidence:.1%}"

    # Confidence scores for both classes
    scores = {
        f"{LABELS[0][1]} {LABELS[0][0]}": float(probs[0]),
        f"{LABELS[1][1]} {LABELS[1][0]}": float(probs[1]),
    }

    return result, scores


# Example prompts
EXAMPLES = [
    ["When was the Eiffel Tower built?"],
    ["What is the population of Tokyo?"],
    ["Who invented the telephone?"],
    ["Write a poem about the ocean"],
    ["Can you help me debug this Python code?"],
    ["What do you think about modern art?"],
    ["What year did World War II end?"],
    ["Calculate 15 * 7 + 3"],
    ["Translate 'hello' to Spanish"],
    ["What is the current population of China?"],
]

# Create Gradio interface
with gr.Blocks(title="HaluGate Sentinel - Fact Check Classifier") as demo:
    gr.Markdown(
        """
    # 🛡️ HaluGate Sentinel

    **Fact-Check Classifier** - Determines whether a prompt requires external factual verification.

    This model helps identify prompts that contain factual claims or questions that should be
    verified against authoritative sources to prevent hallucinations in LLM responses.

    - 🔴 **FACT_CHECK_NEEDED**: The prompt contains factual claims/questions that should be verified
    - 🟢 **NO_FACT_CHECK_NEEDED**: The prompt is creative, computational, or opinion-based
    """
    )

    with gr.Row():
        with gr.Column(scale=2):
            input_text = gr.Textbox(
                label="Input Prompt",
                placeholder="Enter a prompt to classify...",
                lines=4,
            )
            submit_btn = gr.Button("Classify", variant="primary")

        with gr.Column(scale=1):
            output_label = gr.Markdown(label="Classification Result")
            output_scores = gr.Label(label="Confidence Scores", num_top_classes=2)

    gr.Examples(
        examples=EXAMPLES,
        inputs=input_text,
        outputs=[output_label, output_scores],
        fn=classify_text,
        cache_examples=True,
    )

    submit_btn.click(
        fn=classify_text,
        inputs=input_text,
        outputs=[output_label, output_scores],
    )

    input_text.submit(
        fn=classify_text,
        inputs=input_text,
        outputs=[output_label, output_scores],
    )

    gr.Markdown(
        """
    ---
    **Model**: [LLM-Semantic-Router/halugate-sentinel](https://huggingface.co/LLM-Semantic-Router/halugate-sentinel)
    | **Architecture**: ModernBERT for Sequence Classification
    """
    )

if __name__ == "__main__":
    demo.launch()