Spaces:

asadshahab
/

token-attention-visualizer

Running

App Files Files Community

token-attention-visualizer / app.py

asadshahab

initial

dd850a7 8 months ago

raw

history blame contribute delete

25.8 kB

	import gradio as gr
	import torch
	import sys
	import os
	import json
	from pathlib import Path

	# Add project root to path
	sys.path.insert(0, str(Path(__file__).parent))

	from core.model_handler import ModelHandler
	from core.attention import AttentionProcessor
	from core.cache import AttentionCache
	from config import Config
	from visualization.d3_viz import create_d3_visualization

	class TokenVisualizerApp:
	def __init__(self):
	self.config = Config()
	self.model_handler = ModelHandler(config=self.config)
	self.cache = AttentionCache(max_size=self.config.CACHE_SIZE)
	self.current_data = None
	self.model_loaded = False


	def load_model(self, model_name: str = None) -> str:
	"""Load the model and return status message."""
	if not model_name:
	model_name = self.config.DEFAULT_MODEL

	success, message = self.model_handler.load_model(model_name)
	self.model_loaded = success

	if success:
	model_info = self.model_handler.get_model_info()
	return f"✅ Model loaded: {model_name}\n📊 Parameters: {model_info['num_parameters']:,}\n🖥️ Device: {model_info['device']}"
	else:
	return f"❌ Failed to load model: {message}"

	def generate_and_visualize(
	self,
	prompt: str,
	max_tokens: int,
	threshold: float,
	temperature: float,
	normalization: str,
	progress=gr.Progress()
	):
	"""Main generation function (no visualization)."""
	if not self.model_loaded:
	return None, "Please load a model first!", None

	if not prompt.strip():
	return None, "Please enter a prompt!", None

	progress(0.2, desc="Checking cache...")

	# Check cache
	cache_key = self.cache.get_key(
	prompt, max_tokens,
	self.model_handler.model_name,
	temperature
	)
	cached = self.cache.get(cache_key)

	if cached:
	progress(0.5, desc="Using cached data...")
	self.current_data = cached
	else:
	progress(0.3, desc="Generating text...")

	# Generate new
	attention_data, output_tokens, input_tokens, generated_text = \
	self.model_handler.generate_with_attention(
	prompt, max_tokens, temperature
	)

	if attention_data is None:
	return None, f"Generation failed: {generated_text}", None

	progress(0.6, desc="Processing attention...")

	# Process attention based on normalization method
	if normalization == "separate":
	attention_matrices = AttentionProcessor.process_attention_separate(
	attention_data, input_tokens, output_tokens
	)
	else:
	attention_matrices = AttentionProcessor.process_attention_joint(
	attention_data, input_tokens, output_tokens
	)

	self.current_data = {
	'input_tokens': input_tokens,
	'output_tokens': output_tokens,
	'attention_matrices': attention_matrices,
	'generated_text': generated_text,
	'attention_data': attention_data # Keep raw for step updates
	}

	# Cache it
	self.cache.set(cache_key, self.current_data)

	progress(1.0, desc="Complete!")

	# Create info text
	info_text = f"📝 Generated: {self.current_data['generated_text']}\n"
	info_text += f"🔤 Input tokens: {len(self.current_data['input_tokens'])}\n"
	info_text += f"🔤 Output tokens: {len(self.current_data['output_tokens'])}"

	return (
	info_text,
	)

	def update_step(self, step_idx: int, threshold: float):
	"""No-op placeholder after removing visualization."""
	return None

	def update_threshold(self, threshold: float, normalization: str):
	"""No-op placeholder after removing visualization."""
	return None

	def filter_token_connections(self, token_idx: int, token_type: str, threshold: float):
	"""Removed visualization; keep placeholder."""
	return None

	def reset_view(self, threshold: float):
	"""Removed visualization; keep placeholder."""
	return None

	def on_d3_token_click(self, click_data: str, threshold: float):
	"""Removed visualization; keep placeholder for compatibility."""
	return None, gr.update()

	def on_input_token_select(self, token_label: str, threshold: float):
	"""Removed visualization; keep placeholder for compatibility."""
	return None

	def prepare_d3_data(self, step_idx: int, threshold: float = 0.01, filter_token: str = None):
	"""
	Convert attention data to D3.js-friendly JSON format.

	Args:
	step_idx: Generation step to visualize (0-based)
	threshold: Minimum attention weight to include
	filter_token: Token to filter by (format: "[IN] token" or "[OUT] token" or "All tokens")

	Returns:
	dict: JSON structure with nodes and links for D3.js
	"""
	if not self.current_data:
	return {"nodes": [], "links": []}

	input_tokens = self.current_data['input_tokens']
	output_tokens = self.current_data['output_tokens']
	attention_matrices = self.current_data['attention_matrices']

	# Ensure step_idx is within bounds
	if step_idx >= len(attention_matrices):
	step_idx = len(attention_matrices) - 1

	attention_matrix = attention_matrices[step_idx]

	# Create nodes
	nodes = []

	# Add input nodes
	for i, token in enumerate(input_tokens):
	nodes.append({
	"id": f"input_{i}",
	"token": token,
	"type": "input",
	"index": i
	})

	# Add output nodes (up to current step)
	for i in range(step_idx + 1):
	if i < len(output_tokens):
	nodes.append({
	"id": f"output_{i}",
	"token": output_tokens[i],
	"type": "output",
	"index": i
	})

	# Parse filter token
	filter_type = None
	filter_idx = None
	if filter_token and filter_token != "All tokens":
	if filter_token.startswith("[IN] "):
	filter_type = "input"
	filter_token_text = filter_token[5:] # Remove "[IN] " prefix
	filter_idx = next((i for i, token in enumerate(input_tokens) if token == filter_token_text), None)
	elif filter_token.startswith("[OUT] "):
	filter_type = "output"
	filter_token_text = filter_token[6:] # Remove "[OUT] " prefix
	filter_idx = next((i for i, token in enumerate(output_tokens) if token == filter_token_text), None)

	# Create links from attention matrices - show ALL steps up to current step
	links = []

	# Show connections for all steps up to and including step_idx
	for current_step in range(step_idx + 1):
	if current_step < len(attention_matrices):
	step_attention = attention_matrices[current_step]

	# Links from input tokens to this output token
	input_attention = step_attention['input_attention']
	if input_attention is not None:
	for input_idx in range(len(input_tokens)):
	if input_idx < len(input_attention): # Check bounds
	weight = float(input_attention[input_idx])
	if weight >= threshold:
	# Apply filtering
	show_link = True
	if filter_type == "input" and filter_idx is not None:
	# Only show connections involving the selected input token
	show_link = (input_idx == filter_idx)
	elif filter_type == "output" and filter_idx is not None:
	# Only show connections involving the selected output token
	show_link = (current_step == filter_idx)

	if show_link:
	links.append({
	"source": f"input_{input_idx}",
	"target": f"output_{current_step}",
	"weight": weight,
	"type": "input_to_output"
	})

	# Links from previous output tokens to this output token
	output_attention = step_attention['output_attention']
	if output_attention is not None and current_step > 0:
	for prev_output_idx in range(current_step):
	if prev_output_idx < len(output_attention): # Check bounds
	weight = float(output_attention[prev_output_idx])
	if weight >= threshold:
	# Apply filtering
	show_link = True
	if filter_type == "input" and filter_idx is not None:
	# Don't show output-to-output connections when filtering by input
	show_link = False
	elif filter_type == "output" and filter_idx is not None:
	# Only show connections involving the selected output token
	show_link = (prev_output_idx == filter_idx or current_step == filter_idx)

	if show_link:
	links.append({
	"source": f"output_{prev_output_idx}",
	"target": f"output_{current_step}",
	"weight": weight,
	"type": "output_to_output"
	})

	return {
	"nodes": nodes,
	"links": links,
	"step": step_idx,
	"total_steps": len(attention_matrices),
	"input_count": len(input_tokens),
	"output_count": step_idx + 1
	}

	def create_d3_visualization_html(self, step_idx: int = 0, threshold: float = 0.01, filter_token: str = None):
	"""
	Create D3.js visualization HTML for the current data.

	Args:
	step_idx: Generation step to visualize (0-based)
	threshold: Minimum attention weight to include
	filter_token: Token to filter by (format: "[IN] token" or "[OUT] token")

	Returns:
	str: HTML string for D3.js visualization
	"""
	if not self.current_data:
	return "<div>No data available. Generate text first!</div>"

	d3_data = self.prepare_d3_data(step_idx, threshold, filter_token)

	viz_html = create_d3_visualization(d3_data)
	return viz_html

	def get_token_choices(self):
	"""
	Get list of token choices for dropdown.

	Returns:
	list: List of token strings for dropdown options
	"""
	if not self.current_data:
	return []

	input_tokens = self.current_data['input_tokens']
	output_tokens = self.current_data['output_tokens']

	# Create choices with prefixes to distinguish input/output
	choices = ["All tokens"]
	choices.extend([f"[IN] {token}" for token in input_tokens])
	choices.extend([f"[OUT] {token}" for token in output_tokens])

	return choices


	def create_gradio_interface():
	"""Create the Gradio interface."""
	app = TokenVisualizerApp()

	with gr.Blocks(
	title="Token Attention Visualizer",
	css="""
	/* Default/Light mode styles */
	.main-header {
	text-align: center;
	padding: 2rem 0 3rem 0;
	background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%);
	border-radius: 1rem;
	margin-bottom: 2rem;
	border: 1px solid #e2e8f0;
	}

	.main-title {
	font-size: 2.5rem;
	font-weight: 700;
	color: #1e293b;
	margin-bottom: 0.5rem;
	background: linear-gradient(135deg, #1e293b 0%, #3b82f6 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	background-clip: text;
	}

	.main-subtitle {
	font-size: 1.125rem;
	color: #64748b;
	font-weight: 400;
	}

	.section-title {
	font-size: 1.25rem;
	font-weight: 600;
	color: #1e293b;
	margin-bottom: 1.5rem;
	padding-bottom: 0.5rem;
	border-bottom: 2px solid #e2e8f0;
	}

	/* Explicit light mode overrides */
	.light .main-header,
	[data-theme="light"] .main-header {
	background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%);
	border: 1px solid #e2e8f0;
	}

	.light .main-title,
	[data-theme="light"] .main-title {
	color: #1e293b;
	background: linear-gradient(135deg, #1e293b 0%, #3b82f6 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	background-clip: text;
	}

	.light .main-subtitle,
	[data-theme="light"] .main-subtitle {
	color: #64748b;
	}

	.light .section-title,
	[data-theme="light"] .section-title {
	color: #1e293b;
	border-bottom: 2px solid #e2e8f0;
	}

	/* Dark mode styles with higher specificity */
	.dark .main-header,
	[data-theme="dark"] .main-header {
	background: linear-gradient(135deg, #1e293b 0%, #334155 100%) !important;
	border: 1px solid #475569 !important;
	}

	.dark .main-title,
	[data-theme="dark"] .main-title {
	color: #f1f5f9 !important;
	background: linear-gradient(135deg, #f1f5f9 0%, #60a5fa 100%) !important;
	-webkit-background-clip: text !important;
	-webkit-text-fill-color: transparent !important;
	background-clip: text !important;
	}

	.dark .main-subtitle,
	[data-theme="dark"] .main-subtitle {
	color: #cbd5e1 !important;
	}

	.dark .section-title,
	[data-theme="dark"] .section-title {
	color: #f1f5f9 !important;
	border-bottom: 2px solid #475569 !important;
	}

	/* System dark mode - only apply when no explicit theme is set */
	@media (prefers-color-scheme: dark) {
	:root:not([data-theme="light"]) .main-header {
	background: linear-gradient(135deg, #1e293b 0%, #334155 100%);
	border: 1px solid #475569;
	}

	:root:not([data-theme="light"]) .main-title {
	color: #f1f5f9;
	background: linear-gradient(135deg, #f1f5f9 0%, #60a5fa 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	background-clip: text;
	}

	:root:not([data-theme="light"]) .main-subtitle {
	color: #cbd5e1;
	}

	:root:not([data-theme="light"]) .section-title {
	color: #f1f5f9;
	border-bottom: 2px solid #475569;
	}
	}

	.load-model-btn {
	background: linear-gradient(135deg, #f97316 0%, #ea580c 100%) !important;
	color: white !important;
	border: none !important;
	font-weight: 600 !important;
	padding: 0.75rem 2rem !important;
	border-radius: 0.5rem !important;
	box-shadow: 0 4px 6px -1px rgba(249, 115, 22, 0.25) !important;
	transition: all 0.2s ease !important;
	}

	.load-model-btn:hover {
	background: linear-gradient(135deg, #ea580c 0%, #dc2626 100%) !important;
	transform: translateY(-1px) !important;
	box-shadow: 0 6px 8px -1px rgba(249, 115, 22, 0.35) !important;
	}
	"""
	) as demo:
	gr.HTML("""
	<div class="main-header">
	<h1 class="main-title">Token Attention Visualizer</h1>
	<p class="main-subtitle">Interactive visualization of attention patterns in Large Language Models</p>
	</div>
	""")

	with gr.Row():
	# Left Panel - Controls
	with gr.Column(scale=1):
	gr.HTML('<h2 class="section-title">Model & Generation</h2>')

	# Model loading
	model_input = gr.Textbox(
	label="Model Name",
	value=app.config.DEFAULT_MODEL,
	placeholder="Enter Hugging Face model name..."
	)
	load_model_btn = gr.Button("Load Model", variant="primary", elem_classes=["load-model-btn"])

	model_status = gr.Textbox(
	label="Model Status",
	value="No model loaded",
	interactive=False,
	lines=2
	)

	# Generation controls
	prompt_input = gr.Textbox(
	label="Prompt",
	value=app.config.DEFAULT_PROMPT,
	lines=3,
	placeholder="Enter your prompt here..."
	)

	max_tokens_input = gr.Slider(
	minimum=1,
	maximum=50,
	value=app.config.DEFAULT_MAX_TOKENS,
	step=1,
	label="Max Tokens"
	)

	temperature_input = gr.Slider(
	minimum=0.0,
	maximum=2.0,
	value=app.config.DEFAULT_TEMPERATURE,
	step=0.1,
	label="Temperature"
	)

	generate_btn = gr.Button("Generate", variant="primary", size="lg")

	generated_info = gr.Textbox(
	label="Generation Info",
	interactive=False,
	lines=4
	)

	gr.HTML('<h2 class="section-title">Visualization Controls</h2>')

	step_slider = gr.Slider(
	minimum=0,
	maximum=10,
	value=0,
	step=1,
	label="Generation Step",
	info="Navigate through generation steps"
	)

	threshold_slider = gr.Slider(
	minimum=0.001,
	maximum=0.5,
	value=0.01,
	step=0.001,
	label="Attention Threshold",
	info="Filter weak connections"
	)

	token_dropdown = gr.Dropdown(
	choices=["All tokens"],
	value="All tokens",
	label="Filter by Token",
	info="Select a token to highlight"
	)

	# Right Panel - Visualization
	with gr.Column(scale=2):
	gr.HTML('<h2 class="section-title">Attention Visualization</h2>')

	d3_visualization = gr.HTML(
	value="""<div style='height: 700px; display: flex; align-items: center; justify-content: center; font-size: 16px;'>
	<div style='text-align: center;'>
	<div style='font-size: 3rem; margin-bottom: 16px; opacity: 0.5;'>⚪</div>
	<div style='font-weight: 500; margin-bottom: 8px;'>Ready to visualize</div>
	<div>Generate text to see attention patterns</div>
	</div>
	</div>"""
	)

	# (Visualization output and overlay removed)

	# Instructions
	with gr.Accordion("📖 How to Use", open=False):
	gr.Markdown(
	"""
	### Instructions:
	1. Load a model from Hugging Face (default: Llama-3.2-1B)
	2. Enter a prompt and configure generation settings
	3. Click Generate to create text and visualize attention
	4. Interact with the visualization:
	- Use the step slider to navigate through generation steps
	- Adjust the threshold to filter weak connections
	- Click on tokens in the plot to filter their connections
	- Click Reset View to show all connections

	### Understanding the Visualization:
	- Blue lines: Attention from input to output tokens
	- Orange curves: Attention between output tokens
	- Line thickness: Represents attention weight strength
	- Node colors: Blue = input tokens, Coral = generated tokens
	"""
	)

	# Event handlers
	load_model_btn.click(
	fn=app.load_model,
	inputs=[model_input],
	outputs=[model_status]
	)

	def _generate(prompt, max_tokens, threshold, temperature):
	info, = app.generate_and_visualize(
	prompt, max_tokens, threshold, temperature, "separate" # Always use separate normalization
	)

	# Update visualization and dropdown choices
	max_steps = len(app.current_data['attention_matrices']) - 1 if app.current_data else 0
	viz_html = app.create_d3_visualization_html(step_idx=max_steps, threshold=0.01) # Start with last step
	token_choices = app.get_token_choices()

	return info, viz_html, gr.update(choices=token_choices, value="All tokens"), gr.update(maximum=max_steps, value=max_steps)

	generate_btn.click(
	fn=_generate,
	inputs=[
	prompt_input,
	max_tokens_input,
	gr.State(app.config.DEFAULT_THRESHOLD), # keep threshold in call but unused
	temperature_input
	],
	outputs=[generated_info, d3_visualization, token_dropdown, step_slider]
	)

	# Event handlers for visualization controls
	def _update_visualization(step_idx, threshold, filter_token="All tokens"):
	"""Update visualization when step or threshold changes."""
	viz_html = app.create_d3_visualization_html(step_idx=int(step_idx), threshold=threshold, filter_token=filter_token)
	return viz_html

	def _filter_by_token(selected_token, step_idx, threshold):
	"""Update visualization when token filter changes."""
	viz_html = app.create_d3_visualization_html(step_idx=int(step_idx), threshold=threshold, filter_token=selected_token)
	return viz_html

	# Connect visualization controls
	step_slider.change(
	fn=_update_visualization,
	inputs=[step_slider, threshold_slider, token_dropdown],
	outputs=[d3_visualization]
	)

	threshold_slider.change(
	fn=_update_visualization,
	inputs=[step_slider, threshold_slider, token_dropdown],
	outputs=[d3_visualization]
	)

	token_dropdown.change(
	fn=_filter_by_token,
	inputs=[token_dropdown, step_slider, threshold_slider],
	outputs=[d3_visualization]
	)



	# Load default model on startup
	demo.load(
	fn=app.load_model,
	inputs=[gr.State(app.config.DEFAULT_MODEL)],
	outputs=[model_status]
	)

	return demo

	if __name__ == "__main__":
	# Check if CUDA is available
	if torch.cuda.is_available():
	print(f"✅ CUDA available: {torch.cuda.get_device_name(0)}")
	else:
	print("⚠️ CUDA not available, using CPU")

	# Create and launch the app
	demo = create_gradio_interface()
	""" demo.launch(
	share=False, # Set to True for public URL
	server_name="0.0.0.0", # Allow external connections
	server_port=7860, # Default Gradio port
	inbrowser=False # Don't auto-open browser
	) """

	demo.launch()