import gradio as gr import torch import torch.nn.functional as F from transformers import AutoModelForCausalLM, AutoTokenizer import html # Load model and tokenizer MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" if torch.cuda.is_available() else None, trust_remote_code=True ) def calculate_entropy(logits): """Calculate entropy of probability distribution. Higher = more uncertain.""" probs = F.softmax(logits, dim=-1) log_probs = F.log_softmax(logits, dim=-1) entropy = -torch.sum(probs * log_probs, dim=-1) # Normalize by max entropy (log of vocab size) max_entropy = torch.log(torch.tensor(logits.shape[-1], dtype=torch.float32)) normalized_entropy = entropy / max_entropy return normalized_entropy.item() def get_top_alternatives(logits, tokenizer, k=3): """Get top k alternative tokens the model considered.""" probs = F.softmax(logits, dim=-1) top_probs, top_ids = torch.topk(probs, k) alternatives = [] for prob, tok_id in zip(top_probs.tolist(), top_ids.tolist()): token_text = tokenizer.decode([tok_id]).strip() if token_text: alternatives.append(f"'{token_text}' ({prob:.1%})") return alternatives def get_confidence_color(prob, entropy=None): """Map probability and entropy to color.""" # Combine signals: low prob OR high entropy = red if entropy is not None and entropy > 0.7: return "#ef4444" # red (high uncertainty) if prob >= 0.7: return "#22c55e" # green elif prob >= 0.4: return "#eab308" # yellow else: return "#ef4444" # red def generate_with_confidence(message): """Generate response with per-token confidence scores.""" if not message.strip(): return "

Please enter a message.

" # Build conversation (single turn for simplicity) messages = [ {"role": "system", "content": "You are a helpful assistant. Keep responses concise."}, {"role": "user", "content": message} ] # Tokenize text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) inputs = tokenizer(text, return_tensors="pt") if torch.cuda.is_available(): inputs = inputs.to("cuda") input_length = inputs["input_ids"].shape[1] # Generate with scores with torch.no_grad(): outputs = model.generate( **inputs, max_new_tokens=256, do_sample=True, temperature=0.7, top_p=0.9, output_scores=True, return_dict_in_generate=True, pad_token_id=tokenizer.eos_token_id ) # Extract generated tokens and their probabilities generated_ids = outputs.sequences[0][input_length:] scores = outputs.scores # Build HTML response html_parts = [] current_word = "" current_probs = [] current_entropies = [] current_alternatives = [] for i, (token_id, score) in enumerate(zip(generated_ids, scores)): # Get probability for selected token probs = torch.softmax(score[0], dim=-1) token_prob = probs[token_id].item() # Calculate entropy (uncertainty measure) entropy = calculate_entropy(score[0]) # Get what else the model considered alternatives = get_top_alternatives(score[0], tokenizer, k=3) # Decode token token_text = tokenizer.decode([token_id]) # Check if token starts a new word if token_text.startswith(" ") or token_text.startswith("\n"): # Flush current word if current_word and current_probs: avg_prob = sum(current_probs) / len(current_probs) avg_entropy = sum(current_entropies) / len(current_entropies) if current_entropies else 0 color = get_confidence_color(avg_prob, avg_entropy) escaped_word = html.escape(current_word) # Build rich tooltip tooltip_lines = [f"Confidence: {avg_prob:.1%}", f"Uncertainty: {avg_entropy:.1%}"] if current_alternatives: tooltip_lines.append("Alternatives: " + ", ".join(current_alternatives[-1][:3])) tooltip = " | ".join(tooltip_lines) html_parts.append( f'{escaped_word}' ) current_word = token_text current_probs = [token_prob] current_entropies = [entropy] current_alternatives = [alternatives] else: current_word += token_text current_probs.append(token_prob) current_entropies.append(entropy) current_alternatives.append(alternatives) # Flush last word if current_word and current_probs: avg_prob = sum(current_probs) / len(current_probs) avg_entropy = sum(current_entropies) / len(current_entropies) if current_entropies else 0 color = get_confidence_color(avg_prob, avg_entropy) escaped_word = html.escape(current_word) tooltip_lines = [f"Confidence: {avg_prob:.1%}", f"Uncertainty: {avg_entropy:.1%}"] if current_alternatives: tooltip_lines.append("Alternatives: " + ", ".join(current_alternatives[-1][:3])) tooltip = " | ".join(tooltip_lines) html_parts.append( f'{escaped_word}' ) html_response = "".join(html_parts) # Wrap in a styled container result = f"""

AI Response:

{html_response}

""" return result # Custom CSS css = """ footer {visibility: hidden} .output-html { min-height: 200px; } """ # Build interface with gr.Blocks(css=css, theme=gr.themes.Base(primary_hue="red")) as demo: gr.Markdown(""" # Hallucination Heatmap **See exactly where the AI might be lying.** Every word is color-coded by model confidence. Red words = low confidence = potential hallucination. Hover over any word to see the exact probability score and alternative tokens. """) with gr.Row(): msg = gr.Textbox( placeholder="Ask me anything...", label="Your message", scale=7 ) submit = gr.Button("Generate", variant="primary", scale=1) # Output as HTML component output = gr.HTML( value="

Response will appear here with confidence highlighting...

", label="Response" ) # Legend gr.HTML("""

Confidence Legend: High (70%+) Medium (40-70%) Low / Uncertain
Hover over words to see: confidence %, uncertainty (entropy), and alternative tokens the model considered

""") # Examples gr.Examples( examples=[ ["What is the capital of France?"], ["Tell me about quantum computing."], ["What happened on July 4th, 1776?"], ["Explain why the sky is blue."], ], inputs=msg ) # Event handlers msg.submit(generate_with_confidence, msg, output) submit.click(generate_with_confidence, msg, output) gr.Markdown(""" --- **How it works:** The model outputs a probability distribution over all possible next tokens. We capture the probability of each selected token and visualize it. Low probability = the model was "unsure" = higher hallucination risk. Built by Eric Raymond (Purdue University) & Samiksha BC (IU South Bend) """) demo.launch()