#!/usr/bin/env python3 """ NeuroScope β Neural Network Activation Visualizer Interactive Gradio dashboard for visualizing LLM hidden states, attention patterns, and activation maps during inference on Qwen3-4B. Run locally (demo mode β no GPU required): python app.py Run with real model: python app.py --model Tabs: - Analyze: Single-prompt analysis with 4 core views + fingerprinting - Compare: Side-by-side comparison of two prompts - Generate: Streaming token-by-token generation with live activations Part of the Alogotron project: https://huggingface.co/Alogotron """ import sys import os import argparse import time # Ensure local imports work regardless of cwd sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) import gradio as gr from extraction import ActivationExtractor, ExtractionResult from viz_attention import create_attention_heatmap, get_head_choices from viz_magnitude import create_magnitude_chart from viz_token_layer import create_token_layer_grid from viz_scatter import create_scatter_plot from viz_fingerprint import create_fingerprint_strip, create_fingerprint_comparison from viz_comparison import ( create_attention_comparison, create_magnitude_comparison, create_token_layer_comparison, create_scatter_comparison, ) # --------------------------------------------------------------------------- # Theme & styling # --------------------------------------------------------------------------- ACCENT = "#e6b800" BG_DARK = "#1a1a2e" TEXT = "#e0e0e0" CUSTOM_CSS = """ /* Global dark background overrides */ .gradio-container { background-color: #0f0f23 !important; } footer { display: none !important; } /* Header branding */ .neuroscope-header { text-align: center; padding: 12px 0 4px; } .neuroscope-header h1 { color: #e6b800; font-size: 2em; margin: 0; letter-spacing: 2px; } .neuroscope-header p { color: #e0e0e0; opacity: 0.7; margin: 4px 0 0; font-size: 0.9em; } /* Status badge styling */ .status-bar { font-family: monospace; font-size: 0.85em; padding: 6px 12px; border-radius: 6px; background: #16162b; border: 1px solid #2a2a4e; } /* Plot containers β remove extra padding */ .plot-container .js-plotly-plot { margin: 0 !important; } /* Control panel styling */ .control-panel { border: 1px solid #2a2a4e; border-radius: 8px; padding: 8px; background: #16162b; } /* Generated text display */ .gen-text-display { font-family: 'Courier New', monospace; font-size: 1.1em; line-height: 1.6; padding: 12px; background: #16162b; border: 1px solid #2a2a4e; border-radius: 8px; color: #e0e0e0; min-height: 60px; } .gen-text-display .new-token { color: #e6b800; font-weight: bold; } """ # --------------------------------------------------------------------------- # Global state # --------------------------------------------------------------------------- extractor = ActivationExtractor() current_result: ExtractionResult | None = None compare_result_a: ExtractionResult | None = None compare_result_b: ExtractionResult | None = None def get_status_text(result: ExtractionResult | None, model_loaded: bool) -> str: """Generate status bar markdown.""" if result is None: model_status = "β Model loaded" if model_loaded else "π€ Demo mode (no GPU)" return f"**Status:** {model_status} β Enter a prompt and click Run" mode = "π§ͺ Demo Data" if result.is_demo else "π§ Real Inference" return ( f"**Status:** {mode} | " f"β± {result.inference_time:.3f}s | " f"π {len(result.tokens)} tokens | " f"π {result.num_layers} layers Γ {result.num_heads} heads Γ {result.hidden_dim}d" ) # --------------------------------------------------------------------------- # Tab 1: Analyze β callbacks # --------------------------------------------------------------------------- def run_inference(prompt: str): """Extract activations from the real model.""" global current_result if not prompt.strip(): prompt = "The quick brown fox jumps over the lazy dog" if not extractor.model_loaded: gr.Warning("Model not loaded β using demo data instead.") return run_demo(prompt) try: current_result = extractor.extract(prompt) except Exception as e: gr.Warning(f"Inference failed: {e}. Falling back to demo data.") current_result = ActivationExtractor.generate_demo_data(prompt) return _build_all_outputs(current_result) def run_demo(prompt: str): """Generate demo data (no GPU required).""" global current_result if not prompt.strip(): prompt = "The quick brown fox jumps over the lazy dog" current_result = ActivationExtractor.generate_demo_data(prompt) return _build_all_outputs(current_result) def update_attention(layer: int, head: str): """Update attention heatmap on layer/head change.""" if current_result is None: return _empty_plot("Run inference first") return create_attention_heatmap(current_result, layer=int(layer), head=head) def update_magnitude(metric: str): """Update magnitude chart on metric change.""" if current_result is None: return _empty_plot("Run inference first") return create_magnitude_chart(current_result, metric=metric) def update_token_grid(normalize: str): """Update token-layer grid on normalization change.""" if current_result is None: return _empty_plot("Run inference first") return create_token_layer_grid(current_result, normalize=normalize) def update_scatter(layer: int, method: str, overlay: str): """Update scatter plot on layer/method change.""" if current_result is None: return _empty_plot("Run inference first") return create_scatter_plot( current_result, layer=int(layer), method=method, overlay_layers=overlay, ) def _build_all_outputs(result: ExtractionResult): """Build all plot outputs + status from an ExtractionResult.""" fig_attn = create_attention_heatmap(result, layer=0, head="average") fig_mag = create_magnitude_chart(result, metric="mean_l2") fig_grid = create_token_layer_grid(result, normalize="global") fig_scatter = create_scatter_plot(result, layer=18, method="pca") fig_fp = create_fingerprint_strip(result) status = get_status_text(result, extractor.model_loaded) return fig_attn, fig_mag, fig_grid, fig_scatter, fig_fp, status def _empty_plot(message: str): """Return a blank Plotly figure with a centered message.""" import plotly.graph_objects as go fig = go.Figure() fig.add_annotation( text=message, xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(color=TEXT, size=16), ) fig.update_layout( paper_bgcolor=BG_DARK, plot_bgcolor=BG_DARK, xaxis=dict(visible=False), yaxis=dict(visible=False), height=400, ) return fig # --------------------------------------------------------------------------- # Tab 2: Compare β callbacks # --------------------------------------------------------------------------- def run_compare(prompt_a: str, prompt_b: str): """Run inference on both prompts and build comparison outputs.""" global compare_result_a, compare_result_b if not prompt_a.strip(): prompt_a = "The quick brown fox jumps over the lazy dog" if not prompt_b.strip(): prompt_b = "A slow red cat sleeps under the warm sun" extract_fn = extractor.extract if extractor.model_loaded else ActivationExtractor.generate_demo_data try: compare_result_a = extract_fn(prompt_a) except Exception: compare_result_a = ActivationExtractor.generate_demo_data(prompt_a) try: compare_result_b = extract_fn(prompt_b) except Exception: compare_result_b = ActivationExtractor.generate_demo_data(prompt_b) return _build_compare_outputs(compare_result_a, compare_result_b) def run_compare_demo(prompt_a: str, prompt_b: str): """Generate demo data for both prompts.""" global compare_result_a, compare_result_b if not prompt_a.strip(): prompt_a = "The quick brown fox jumps over the lazy dog" if not prompt_b.strip(): prompt_b = "A slow red cat sleeps under the warm sun" compare_result_a = ActivationExtractor.generate_demo_data(prompt_a) compare_result_b = ActivationExtractor.generate_demo_data(prompt_b) return _build_compare_outputs(compare_result_a, compare_result_b) def update_compare_attention(layer: int, head: str): if compare_result_a is None or compare_result_b is None: return _empty_plot("Run comparison first") return create_attention_comparison(compare_result_a, compare_result_b, layer=int(layer), head=head) def update_compare_magnitude(metric: str): if compare_result_a is None or compare_result_b is None: return _empty_plot("Run comparison first") return create_magnitude_comparison(compare_result_a, compare_result_b, metric=metric) def update_compare_grid(normalize: str): if compare_result_a is None or compare_result_b is None: return _empty_plot("Run comparison first") return create_token_layer_comparison(compare_result_a, compare_result_b, normalize=normalize) def update_compare_scatter(layer: int, method: str): if compare_result_a is None or compare_result_b is None: return _empty_plot("Run comparison first") return create_scatter_comparison(compare_result_a, compare_result_b, layer=int(layer), method=method) def _build_compare_outputs(result_a: ExtractionResult, result_b: ExtractionResult): """Build all comparison plot outputs.""" fig_attn = create_attention_comparison(result_a, result_b, layer=0, head="average") fig_mag = create_magnitude_comparison(result_a, result_b, metric="mean_l2") fig_grid = create_token_layer_comparison(result_a, result_b, normalize="global") fig_scatter = create_scatter_comparison(result_a, result_b, layer=18, method="pca") fig_fp = create_fingerprint_comparison(result_a, result_b) mode = "π§ͺ Demo" if result_a.is_demo else "π§ Real" status = ( f"**Comparison:** {mode} | " f"Prompt A: {len(result_a.tokens)} tokens ({result_a.inference_time:.3f}s) | " f"Prompt B: {len(result_b.tokens)} tokens ({result_b.inference_time:.3f}s)" ) return fig_attn, fig_mag, fig_grid, fig_scatter, fig_fp, status # --------------------------------------------------------------------------- # Tab 3: Generate β streaming callbacks # --------------------------------------------------------------------------- def run_generate(prompt: str, max_tokens: int): """Stream token generation with live activation updates.""" if not prompt.strip(): prompt = "Once upon a time" max_tokens = int(max_tokens) if extractor.model_loaded: gen = extractor.generate_streaming(prompt, max_new_tokens=max_tokens) else: gen = ActivationExtractor.generate_demo_streaming(prompt, max_new_tokens=max_tokens) for result in gen: text_display = " ".join(result.tokens) fig_mag = create_magnitude_chart(result, metric="mean_l2") fig_grid = create_token_layer_grid(result, normalize="global") fig_fp = create_fingerprint_strip(result) status = ( f"**Generating:** {len(result.tokens)} tokens | " f"β± {result.inference_time:.2f}s | " f"{'π§ͺ Demo' if result.is_demo else 'π§ Real'}" ) yield text_display, fig_mag, fig_grid, fig_fp, status # --------------------------------------------------------------------------- # Gradio UI # --------------------------------------------------------------------------- def build_app() -> tuple[gr.Blocks, gr.themes.Base]: """Construct the Gradio Blocks interface.""" theme = gr.themes.Base( primary_hue=gr.themes.colors.yellow, secondary_hue=gr.themes.colors.blue, neutral_hue=gr.themes.colors.gray, font=["Inter", "system-ui", "sans-serif"], ).set( body_background_fill="#0f0f23", body_background_fill_dark="#0f0f23", block_background_fill="#16162b", block_background_fill_dark="#16162b", block_border_color="#2a2a4e", block_border_color_dark="#2a2a4e", block_title_text_color="#e6b800", block_title_text_color_dark="#e6b800", block_label_text_color="#e0e0e0", block_label_text_color_dark="#e0e0e0", input_background_fill="#1a1a2e", input_background_fill_dark="#1a1a2e", input_border_color="#2a2a4e", input_border_color_dark="#2a2a4e", button_primary_background_fill="#e6b800", button_primary_background_fill_dark="#e6b800", button_primary_text_color="#0f0f23", button_primary_text_color_dark="#0f0f23", button_secondary_background_fill="#2a2a4e", button_secondary_background_fill_dark="#2a2a4e", button_secondary_text_color="#e0e0e0", button_secondary_text_color_dark="#e0e0e0", ) with gr.Blocks(title="NeuroScope") as app: # Header gr.HTML( '
Neural Network Activation Visualizer β ' 'See inside Qwen3-4B during inference
' '