import torch import gradio as gr import numpy as np from transformers import AutoTokenizer, AutoModelForSequenceClassification from peft import PeftModel base = "dleemiller/ModernCE-large-nli" adapter = "modernce-adapter" tokenizer = AutoTokenizer.from_pretrained(base) base_model = AutoModelForSequenceClassification.from_pretrained( base, num_labels=2, problem_type="multi_label_classification", ignore_mismatched_sizes=True ) model = PeftModel.from_pretrained(base_model, adapter) model.eval() sigmoid = lambda x: 1 / (1 + np.exp(-x)) def predict(query, context, output): sep = tokenizer.sep_token inp = sep.join([query, context, output]) inputs = tokenizer( inp, return_tensors="pt", truncation=True ) with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits.cpu().numpy()[0] probs = sigmoid(logits) labels = { "evident_conflict": float(probs[0]), "baseless_info": float(probs[1]) } hallucinated = any(p > 0.5 for p in probs) return { "hallucinated": hallucinated, "probabilities": labels } gr.Interface( fn=predict, inputs=[ gr.Textbox(label="Query"), gr.Textbox(label="Context"), gr.Textbox(label="Model Output") ], outputs="json", title="ModernCE Hallucination Detector", description="Detects hallucinations using ModernCE + LoRA adapter" ).launch()