Spaces:

llm-semantic-router
/

playground

Running

App Files Files Community

bitliu commited on 12 days ago

Commit

53605cf

1 Parent(s): 4ffff16

init

Browse files

Signed-off-by: bitliu <bitliu@tencent.com>

Files changed (2) hide show

app.py +122 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,122 @@

+import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+# Load model and tokenizer
+MODEL_ID = "LLM-Semantic-Router/halugate-sentinel"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
+model.eval()
+# Label mapping
+LABELS = {
+    0: ("NO_FACT_CHECK_NEEDED", "🟢"),
+    1: ("FACT_CHECK_NEEDED", "🔴"),
+}
+def classify_text(text: str) -> tuple[str, dict]:
+    """Classify whether a prompt needs fact-checking."""
+    if not text.strip():
+        return "Please enter some text to classify.", {}
+    # Tokenize and predict
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=512)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        logits = outputs.logits
+        probs = torch.softmax(logits, dim=-1)[0]
+    # Get prediction
+    pred_class = torch.argmax(probs).item()
+    label_name, emoji = LABELS[pred_class]
+    confidence = probs[pred_class].item()
+    # Format result
+    result = f"{emoji} **{label_name}**\n\nConfidence: {confidence:.1%}"
+    # Confidence scores for both classes
+    scores = {
+        f"{LABELS[0][1]} {LABELS[0][0]}": float(probs[0]),
+        f"{LABELS[1][1]} {LABELS[1][0]}": float(probs[1]),
+    }
+    return result, scores
+# Example prompts
+EXAMPLES = [
+    ["When was the Eiffel Tower built?"],
+    ["What is the population of Tokyo?"],
+    ["Who invented the telephone?"],
+    ["Write a poem about the ocean"],
+    ["Can you help me debug this Python code?"],
+    ["What do you think about modern art?"],
+    ["What year did World War II end?"],
+    ["Calculate 15 * 7 + 3"],
+    ["Translate 'hello' to Spanish"],
+    ["What is the current population of China?"],
+]
+# Create Gradio interface
+with gr.Blocks(title="HaluGate Sentinel - Fact Check Classifier") as demo:
+    gr.Markdown(
+        """
+    # 🛡️ HaluGate Sentinel
+    **Fact-Check Classifier** - Determines whether a prompt requires external factual verification.
+    This model helps identify prompts that contain factual claims or questions that should be
+    verified against authoritative sources to prevent hallucinations in LLM responses.
+    - 🔴 **FACT_CHECK_NEEDED**: The prompt contains factual claims/questions that should be verified
+    - 🟢 **NO_FACT_CHECK_NEEDED**: The prompt is creative, computational, or opinion-based
+    """
+    )
+    with gr.Row():
+        with gr.Column(scale=2):
+            input_text = gr.Textbox(
+                label="Input Prompt",
+                placeholder="Enter a prompt to classify...",
+                lines=4,
+            )
+            submit_btn = gr.Button("Classify", variant="primary")
+        with gr.Column(scale=1):
+            output_label = gr.Markdown(label="Classification Result")
+            output_scores = gr.Label(label="Confidence Scores", num_top_classes=2)
+    gr.Examples(
+        examples=EXAMPLES,
+        inputs=input_text,
+        outputs=[output_label, output_scores],
+        fn=classify_text,
+        cache_examples=True,
+    )
+    submit_btn.click(
+        fn=classify_text,
+        inputs=input_text,
+        outputs=[output_label, output_scores],
+    )
+    input_text.submit(
+        fn=classify_text,
+        inputs=input_text,
+        outputs=[output_label, output_scores],
+    )
+    gr.Markdown(
+        """
+    ---
+    **Model**: [LLM-Semantic-Router/halugate-sentinel](https://huggingface.co/LLM-Semantic-Router/halugate-sentinel)
+    | **Architecture**: ModernBERT for Sequence Classification
+    """
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch
+transformers>=4.56.0
+gradio>=4.0.0