Spaces:
Running
Running
| import gradio as gr | |
| from lettucedetect.models.inference import HallucinationDetector | |
| import os | |
| title = """# 🙋🏻♂️Welcome to 🌟Tonic's 🥬 LettuceDetect - 🤯🧠 Hallucination Tester 🟢🔴 | |
| Powered by `lettucedect-large-modernbert-en-v1` from KRLabsOrg. Detect hallucinations in answers based on context and questions using ModernBERT with 8192-token context support! | |
| """ | |
| description2 = """ | |
| ### Model Details | |
| - **Model Name**: [lettucedect-large-modernbert-en-v1](https://huggingface.co/KRLabsOrg/lettucedect-large-modernbert-en-v1) | |
| - **Organization**: [KRLabsOrg](https://huggingface.co/KRLabsOrg) | |
| - **Github**: [https://github.com/KRLabsOrg/LettuceDetect](https://github.com/KRLabsOrg/LettuceDetect) | |
| - **Architecture**: ModernBERT (Large) with extended context support up to 8192 tokens | |
| - **Task**: Token Classification / Hallucination Detection | |
| - **Training Dataset**: [RagTruth](https://huggingface.co/datasets/wandb/RAGTruth-processed) | |
| - **Language**: English | |
| - **Capabilities**: Detects hallucinated spans in answers, provides confidence scores, and calculates average confidence across detected spans. | |
| LettuceDetect excels at processing long documents to determine if an answer aligns with the provided context, making it a powerful tool for ensuring factual accuracy. | |
| """ | |
| howto1 = """ | |
| ### How to Use LettuceDetect Tester | |
| 1. **Enter a Context**: Provide the source text or document (e.g., "France is a country in Europe..."). This is the factual basis for evaluation. | |
| 2. **Enter a Question**: Ask something related to the context (e.g., "What is the capital of France?"). | |
| 3. **Enter an Answer**: Input the response you want to check (e.g., "The capital of France is Paris. The population is 69 million."). | |
| 4. **Press Submit**: Analyze the answer for hallucinations! | |
| """ | |
| howto2 = """ | |
| ### Understanding the Output | |
| - **Status**: | |
| - 🟢 = No hallucinations detected | |
| - 🔴 = Hallucinations detected | |
| - ⚪ = Error occurred | |
| - **Explanation**: A brief summary of the result. | |
| - **Highlighted Answer**: Shows the answer with hallucinated parts in **red**, labeled with confidence scores (e.g., "hallucination (conf: 0.9944)"). | |
| - **Hallucinated Spans & Confidence**: Lists each hallucinated segment with its confidence score. | |
| - **Average Confidence**: Displays the average confidence of all detected hallucinations (e.g., "Average Confidence: 0.9944"). | |
| Use this tool to ensure your answers are grounded in reality! | |
| """ | |
| join_us = """ | |
| ## Join us: | |
| 🌟TeamTonic🌟 is always making cool demos! Join our active builder's 🛠️community 👻 | |
| [Join us on Discord](https://discord.gg/n8ytYeh25n) | |
| On 🤗Huggingface: [MultiTransformer](https://huggingface.co/MultiTransformer) | |
| On 🌐Github: [Tonic-AI](https://github.com/tonic-ai) & contribute to🌟 [Data Tonic](https://github.com/multiTonic/thinking-dataset/) | |
| 🤗Big thanks to Yuvi Sharma and all the folks at huggingface for the community grant 🤗 | |
| """ | |
| # Initialize the LettuceDetect model | |
| detector = HallucinationDetector( | |
| method="transformer", | |
| model_path="KRLabsOrg/lettucedect-large-modernbert-en-v1" | |
| ) | |
| # Function to evaluate hallucination with LettuceDetect | |
| def evaluate_hallucination(context, question, answer): | |
| try: | |
| # Get span-level predictions from LettuceDetect | |
| predictions = detector.predict( | |
| context=[context], | |
| question=question, | |
| answer=answer, | |
| output_format="spans" | |
| ) | |
| # Process predictions for HighlightedText | |
| if not predictions: | |
| return "🟢", "No hallucinations detected", [(answer, None)], "Confidence: N/A", "N/A" | |
| highlighted_segments = [] | |
| confidence_scores = [] | |
| last_end = 0 | |
| total_confidence = 0.0 | |
| for pred in predictions: | |
| start, end = pred['start'], pred['end'] | |
| confidence = pred['confidence'] | |
| text = pred['text'] | |
| # Add non-hallucinated text before this span | |
| if last_end < start: | |
| highlighted_segments.append((answer[last_end:start], None)) | |
| # Add hallucinated span with confidence as label | |
| label_with_confidence = f"hallucination (conf: {confidence:.4f})" | |
| highlighted_segments.append((text, label_with_confidence)) | |
| confidence_scores.append(f"'{text}' - Confidence: {confidence:.4f}") | |
| total_confidence += confidence | |
| last_end = end | |
| # Add any remaining text after the last hallucination | |
| if last_end < len(answer): | |
| highlighted_segments.append((answer[last_end:], None)) | |
| # Calculate average confidence | |
| avg_confidence = total_confidence / len(predictions) if predictions else 0.0 | |
| # Determine overall status | |
| status = "🔴" if predictions else "🟢" | |
| explanation = "Hallucinations detected" if predictions else "No hallucinations detected" | |
| return ( | |
| status, | |
| explanation, | |
| highlighted_segments, | |
| "\n".join(confidence_scores) if confidence_scores else "N/A", | |
| f"Average Confidence: {avg_confidence:.4f}" if predictions else "N/A" | |
| ) | |
| except Exception as e: | |
| return "⚪", f"Error: {str(e)}", [(answer, None)], "N/A", "N/A" | |
| # Gradio Blocks interface | |
| with gr.Blocks( | |
| title="🥬 LettuceDetect Hallucination Tester 🟢🔴" | |
| ) as demo: | |
| gr.Markdown(title) | |
| with gr.Row(): | |
| with gr.Group(): | |
| gr.Markdown(description2) | |
| with gr.Group(): | |
| gr.Markdown(howto2) | |
| with gr.Row(): | |
| with gr.Group(): | |
| gr.Markdown(howto1) | |
| with gr.Group(): | |
| gr.Markdown(join_us) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| # Inputs | |
| context_input = gr.Textbox( | |
| label="Context", | |
| lines=5, | |
| placeholder="Enter the context (e.g., a document or source text)..." | |
| ) | |
| question_input = gr.Textbox( | |
| label="Question", | |
| placeholder="Enter the question..." | |
| ) | |
| answer_input = gr.Textbox( | |
| label="Answer", | |
| lines=3, | |
| placeholder="Enter the answer to evaluate..." | |
| ) | |
| submit_btn = gr.Button("Submit") | |
| with gr.Column(scale=3): | |
| with gr.Row(): | |
| with gr.Column(): | |
| status_output = gr.Label(label="Status") | |
| with gr.Column(): | |
| explanation_output = gr.Textbox(label="Explanation", interactive=False) | |
| highlighted_answer_output = gr.HighlightedText( | |
| label="Answer with Hallucinations Highlighted", | |
| show_legend=False, | |
| color_map={"hallucination": "red"}, # Note: Only "hallucination" is used as base category | |
| combine_adjacent=True | |
| ) | |
| spans_output = gr.Textbox(label="Hallucinated Spans & Confidence", lines=5, interactive=False) | |
| avg_confidence_output = gr.Textbox(label="Average Confidence", interactive=False) | |
| # Connect inputs to outputs via the evaluation function | |
| submit_btn.click( | |
| fn=evaluate_hallucination, | |
| inputs=[context_input, question_input, answer_input], | |
| outputs=[status_output, explanation_output, highlighted_answer_output, spans_output, avg_confidence_output] | |
| ) | |
| # Example | |
| gr.Markdown("### Example") | |
| with gr.Row(): | |
| gr.Examples( | |
| examples=[ | |
| [ | |
| "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.", | |
| "What is the capital of France? What is the population of France?", | |
| "The capital of France is Paris. The population of France is 69 million." | |
| ] | |
| ], | |
| inputs=[context_input, question_input, answer_input] | |
| ) | |
| # Launch the demo | |
| demo.launch() |