raayraay's picture
Upload folder using huggingface_hub
c5e6036 verified
import gradio as gr
import torch
import torch.nn.functional as F
from transformers import AutoModelForCausalLM, AutoTokenizer
import html
# Load model and tokenizer
MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto" if torch.cuda.is_available() else None,
trust_remote_code=True
)
def calculate_entropy(logits):
"""Calculate entropy of probability distribution. Higher = more uncertain."""
probs = F.softmax(logits, dim=-1)
log_probs = F.log_softmax(logits, dim=-1)
entropy = -torch.sum(probs * log_probs, dim=-1)
# Normalize by max entropy (log of vocab size)
max_entropy = torch.log(torch.tensor(logits.shape[-1], dtype=torch.float32))
normalized_entropy = entropy / max_entropy
return normalized_entropy.item()
def get_top_alternatives(logits, tokenizer, k=3):
"""Get top k alternative tokens the model considered."""
probs = F.softmax(logits, dim=-1)
top_probs, top_ids = torch.topk(probs, k)
alternatives = []
for prob, tok_id in zip(top_probs.tolist(), top_ids.tolist()):
token_text = tokenizer.decode([tok_id]).strip()
if token_text:
alternatives.append(f"'{token_text}' ({prob:.1%})")
return alternatives
def get_confidence_color(prob, entropy=None):
"""Map probability and entropy to color."""
# Combine signals: low prob OR high entropy = red
if entropy is not None and entropy > 0.7:
return "#ef4444" # red (high uncertainty)
if prob >= 0.7:
return "#22c55e" # green
elif prob >= 0.4:
return "#eab308" # yellow
else:
return "#ef4444" # red
def generate_with_confidence(message):
"""Generate response with per-token confidence scores."""
if not message.strip():
return "<p style='color: #888;'>Please enter a message.</p>"
# Build conversation (single turn for simplicity)
messages = [
{"role": "system", "content": "You are a helpful assistant. Keep responses concise."},
{"role": "user", "content": message}
]
# Tokenize
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(text, return_tensors="pt")
if torch.cuda.is_available():
inputs = inputs.to("cuda")
input_length = inputs["input_ids"].shape[1]
# Generate with scores
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=256,
do_sample=True,
temperature=0.7,
top_p=0.9,
output_scores=True,
return_dict_in_generate=True,
pad_token_id=tokenizer.eos_token_id
)
# Extract generated tokens and their probabilities
generated_ids = outputs.sequences[0][input_length:]
scores = outputs.scores
# Build HTML response
html_parts = []
current_word = ""
current_probs = []
current_entropies = []
current_alternatives = []
for i, (token_id, score) in enumerate(zip(generated_ids, scores)):
# Get probability for selected token
probs = torch.softmax(score[0], dim=-1)
token_prob = probs[token_id].item()
# Calculate entropy (uncertainty measure)
entropy = calculate_entropy(score[0])
# Get what else the model considered
alternatives = get_top_alternatives(score[0], tokenizer, k=3)
# Decode token
token_text = tokenizer.decode([token_id])
# Check if token starts a new word
if token_text.startswith(" ") or token_text.startswith("\n"):
# Flush current word
if current_word and current_probs:
avg_prob = sum(current_probs) / len(current_probs)
avg_entropy = sum(current_entropies) / len(current_entropies) if current_entropies else 0
color = get_confidence_color(avg_prob, avg_entropy)
escaped_word = html.escape(current_word)
# Build rich tooltip
tooltip_lines = [f"Confidence: {avg_prob:.1%}", f"Uncertainty: {avg_entropy:.1%}"]
if current_alternatives:
tooltip_lines.append("Alternatives: " + ", ".join(current_alternatives[-1][:3]))
tooltip = " | ".join(tooltip_lines)
html_parts.append(
f'<span style="background-color: {color}; padding: 1px 3px; '
f'border-radius: 3px; margin: 1px; cursor: help;" '
f'title="{tooltip}">{escaped_word}</span>'
)
current_word = token_text
current_probs = [token_prob]
current_entropies = [entropy]
current_alternatives = [alternatives]
else:
current_word += token_text
current_probs.append(token_prob)
current_entropies.append(entropy)
current_alternatives.append(alternatives)
# Flush last word
if current_word and current_probs:
avg_prob = sum(current_probs) / len(current_probs)
avg_entropy = sum(current_entropies) / len(current_entropies) if current_entropies else 0
color = get_confidence_color(avg_prob, avg_entropy)
escaped_word = html.escape(current_word)
tooltip_lines = [f"Confidence: {avg_prob:.1%}", f"Uncertainty: {avg_entropy:.1%}"]
if current_alternatives:
tooltip_lines.append("Alternatives: " + ", ".join(current_alternatives[-1][:3]))
tooltip = " | ".join(tooltip_lines)
html_parts.append(
f'<span style="background-color: {color}; padding: 1px 3px; '
f'border-radius: 3px; margin: 1px; cursor: help;" '
f'title="{tooltip}">{escaped_word}</span>'
)
html_response = "".join(html_parts)
# Wrap in a styled container
result = f"""
<div style="background: #1a1a2e; padding: 20px; border-radius: 12px; font-size: 16px; line-height: 1.8;">
<div style="margin-bottom: 10px; color: #888; font-size: 12px;">AI Response:</div>
<div>{html_response}</div>
</div>
"""
return result
# Custom CSS
css = """
footer {visibility: hidden}
.output-html {
min-height: 200px;
}
"""
# Build interface
with gr.Blocks(css=css, theme=gr.themes.Base(primary_hue="red")) as demo:
gr.Markdown("""
# Hallucination Heatmap
**See exactly where the AI might be lying.**
Every word is color-coded by model confidence. Red words = low confidence = potential hallucination.
Hover over any word to see the exact probability score and alternative tokens.
""")
with gr.Row():
msg = gr.Textbox(
placeholder="Ask me anything...",
label="Your message",
scale=7
)
submit = gr.Button("Generate", variant="primary", scale=1)
# Output as HTML component
output = gr.HTML(
value="<div style='padding: 20px; color: #888; text-align: center;'>Response will appear here with confidence highlighting...</div>",
label="Response"
)
# Legend
gr.HTML("""
<div style="margin-top: 15px; padding: 15px; background: #1a1a2e; border-radius: 8px; font-size: 13px;">
<b>Confidence Legend:</b>
<span style="background-color: #22c55e; padding: 3px 10px; border-radius: 3px; margin-left: 10px;">High (70%+)</span>
<span style="background-color: #eab308; padding: 3px 10px; border-radius: 3px; margin-left: 5px;">Medium (40-70%)</span>
<span style="background-color: #ef4444; padding: 3px 10px; border-radius: 3px; margin-left: 5px;">Low / Uncertain</span>
<br><i style="color: #888; margin-top: 8px; display: block;">Hover over words to see: confidence %, uncertainty (entropy), and alternative tokens the model considered</i>
</div>
""")
# Examples
gr.Examples(
examples=[
["What is the capital of France?"],
["Tell me about quantum computing."],
["What happened on July 4th, 1776?"],
["Explain why the sky is blue."],
],
inputs=msg
)
# Event handlers
msg.submit(generate_with_confidence, msg, output)
submit.click(generate_with_confidence, msg, output)
gr.Markdown("""
---
**How it works:** The model outputs a probability distribution over all possible next tokens.
We capture the probability of each selected token and visualize it. Low probability = the model was "unsure" = higher hallucination risk.
Built by Eric Raymond (Purdue University) & Samiksha BC (IU South Bend)
""")
demo.launch()