Spaces:
Running
Running
| import gradio as gr | |
| from lettucedetect.models.inference import HallucinationDetector | |
| import os | |
| # Initialize the LettuceDetect model | |
| detector = HallucinationDetector( | |
| method="transformer", | |
| model_path="KRLabsOrg/lettucedect-large-modernbert-en-v1" | |
| ) | |
| # Function to evaluate hallucination with LettuceDetect | |
| def evaluate_hallucination(context, question, answer): | |
| try: | |
| # Get span-level predictions from LettuceDetect | |
| predictions = detector.predict( | |
| context=[context], | |
| question=question, | |
| answer=answer, | |
| output_format="spans" | |
| ) | |
| # Process predictions for HighlightedText | |
| if not predictions: | |
| return "π’", "No hallucinations detected", [(answer, None)], "Confidence: N/A", "N/A" | |
| highlighted_segments = [] | |
| confidence_scores = [] | |
| last_end = 0 | |
| total_confidence = 0.0 | |
| for pred in predictions: | |
| start, end = pred['start'], pred['end'] | |
| confidence = pred['confidence'] | |
| text = pred['text'] | |
| # Add non-hallucinated text before this span | |
| if last_end < start: | |
| highlighted_segments.append((answer[last_end:start], None)) | |
| # Add hallucinated span with confidence as label | |
| label_with_confidence = f"hallucination (conf: {confidence:.4f})" | |
| highlighted_segments.append((text, label_with_confidence)) | |
| confidence_scores.append(f"'{text}' - Confidence: {confidence:.4f}") | |
| total_confidence += confidence | |
| last_end = end | |
| # Add any remaining text after the last hallucination | |
| if last_end < len(answer): | |
| highlighted_segments.append((answer[last_end:], None)) | |
| # Calculate average confidence | |
| avg_confidence = total_confidence / len(predictions) if predictions else 0.0 | |
| # Determine overall status | |
| status = "π΄" if predictions else "π’" | |
| explanation = "Hallucinations detected" if predictions else "No hallucinations detected" | |
| return ( | |
| status, | |
| explanation, | |
| highlighted_segments, | |
| "\n".join(confidence_scores) if confidence_scores else "N/A", | |
| f"Average Confidence: {avg_confidence:.4f}" if predictions else "N/A" | |
| ) | |
| except Exception as e: | |
| return "βͺ", f"Error: {str(e)}", [(answer, None)], "N/A", "N/A" | |
| # Gradio Blocks interface | |
| with gr.Blocks( | |
| title="π₯¬ LettuceDetect Hallucination Tester π’π΄", | |
| theme="ParityError/Anime" | |
| ) as demo: | |
| gr.Markdown( | |
| """ | |
| # π₯¬ LettuceDetect Hallucination Tester π’π΄ | |
| Powered by `lettucedect-large-modernbert-en-v1` from KRLabsOrg. Detect hallucinations in answers based on context and questions using ModernBERT with 8192-token context support! | |
| ### How to Use: | |
| 1. Enter a **Context** (source document or info). | |
| 2. Enter a **Question** related to the context. | |
| 3. Enter an **Answer** to evaluate. | |
| 4. Press **Submit** to see if the answer hallucinates! | |
| - π’ = No hallucinations | |
| - π΄ = Hallucinations detected | |
| - Highlighted text shows hallucinated spans in **red** with confidence scores. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| # Inputs | |
| context_input = gr.Textbox( | |
| label="Context", | |
| lines=5, | |
| placeholder="Enter the context (e.g., a document or source text)..." | |
| ) | |
| question_input = gr.Textbox( | |
| label="Question", | |
| placeholder="Enter the question..." | |
| ) | |
| answer_input = gr.Textbox( | |
| label="Answer", | |
| lines=3, | |
| placeholder="Enter the answer to evaluate..." | |
| ) | |
| submit_btn = gr.Button("Submit") | |
| with gr.Column(scale=3): | |
| # Outputs | |
| status_output = gr.Label(label="Status") | |
| explanation_output = gr.Textbox(label="Explanation", interactive=False) | |
| highlighted_answer_output = gr.HighlightedText( | |
| label="Answer with Hallucinations Highlighted", | |
| show_legend=True, | |
| color_map={"hallucination": "red"}, # Note: Only "hallucination" is used as base category | |
| combine_adjacent=True | |
| ) | |
| spans_output = gr.Textbox(label="Hallucinated Spans & Confidence", lines=5, interactive=False) | |
| avg_confidence_output = gr.Textbox(label="Average Confidence", interactive=False) | |
| # Connect inputs to outputs via the evaluation function | |
| submit_btn.click( | |
| fn=evaluate_hallucination, | |
| inputs=[context_input, question_input, answer_input], | |
| outputs=[status_output, explanation_output, highlighted_answer_output, spans_output, avg_confidence_output] | |
| ) | |
| # Example | |
| gr.Markdown("### Example") | |
| with gr.Row(): | |
| gr.Examples( | |
| examples=[ | |
| [ | |
| "France is a country in Europe. The capital of France is Paris. The population of France is 67 million.", | |
| "What is the capital of France? What is the population of France?", | |
| "The capital of France is Paris. The population of France is 69 million." | |
| ] | |
| ], | |
| inputs=[context_input, question_input, answer_input] | |
| ) | |
| # Launch the demo | |
| demo.launch() |