import gradio as gr import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification # Load model and tokenizer MODEL_ID = "LLM-Semantic-Router/halugate-sentinel" tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID) model.eval() # Label mapping LABELS = { 0: ("NO_FACT_CHECK_NEEDED", "🟢"), 1: ("FACT_CHECK_NEEDED", "🔴"), } def classify_text(text: str) -> tuple[str, dict]: """Classify whether a prompt needs fact-checking.""" if not text.strip(): return "Please enter some text to classify.", {} # Tokenize and predict inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512) with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits probs = torch.softmax(logits, dim=-1)[0] # Get prediction pred_class = torch.argmax(probs).item() label_name, emoji = LABELS[pred_class] confidence = probs[pred_class].item() # Format result result = f"{emoji} **{label_name}**\n\nConfidence: {confidence:.1%}" # Confidence scores for both classes scores = { f"{LABELS[0][1]} {LABELS[0][0]}": float(probs[0]), f"{LABELS[1][1]} {LABELS[1][0]}": float(probs[1]), } return result, scores # Example prompts EXAMPLES = [ ["When was the Eiffel Tower built?"], ["What is the population of Tokyo?"], ["Who invented the telephone?"], ["Write a poem about the ocean"], ["Can you help me debug this Python code?"], ["What do you think about modern art?"], ["What year did World War II end?"], ["Calculate 15 * 7 + 3"], ["Translate 'hello' to Spanish"], ["What is the current population of China?"], ] # Create Gradio interface with gr.Blocks(title="HaluGate Sentinel - Fact Check Classifier") as demo: gr.Markdown( """ # 🛡️ HaluGate Sentinel **Fact-Check Classifier** - Determines whether a prompt requires external factual verification. This model helps identify prompts that contain factual claims or questions that should be verified against authoritative sources to prevent hallucinations in LLM responses. - 🔴 **FACT_CHECK_NEEDED**: The prompt contains factual claims/questions that should be verified - 🟢 **NO_FACT_CHECK_NEEDED**: The prompt is creative, computational, or opinion-based """ ) with gr.Row(): with gr.Column(scale=2): input_text = gr.Textbox( label="Input Prompt", placeholder="Enter a prompt to classify...", lines=4, ) submit_btn = gr.Button("Classify", variant="primary") with gr.Column(scale=1): output_label = gr.Markdown(label="Classification Result") output_scores = gr.Label(label="Confidence Scores", num_top_classes=2) gr.Examples( examples=EXAMPLES, inputs=input_text, outputs=[output_label, output_scores], fn=classify_text, cache_examples=True, ) submit_btn.click( fn=classify_text, inputs=input_text, outputs=[output_label, output_scores], ) input_text.submit( fn=classify_text, inputs=input_text, outputs=[output_label, output_scores], ) gr.Markdown( """ --- **Model**: [LLM-Semantic-Router/halugate-sentinel](https://huggingface.co/LLM-Semantic-Router/halugate-sentinel) | **Architecture**: ModernBERT for Sequence Classification """ ) if __name__ == "__main__": demo.launch()