"""LLM Explorer -- Interactive tools for understanding how LLMs work. Gradio app with four tabs: 1. Probability Explorer (step-by-step or bulk generation) 2. Tokenizer 3. System Prompt Explorer (instruct models) 4. Attention Explorer (GPT-2 attention visualization) Plus a password-protected Admin panel for runtime configuration. """ import json import math import os import subprocess import urllib.parse from datetime import datetime, timezone, timedelta import gradio as gr import re from models import AVAILABLE_MODELS, manager, demo_tokenizer from attention import get_attention_data, build_attention_html # --------------------------------------------------------------------------- # Admin password -- set via env var on HF Spaces, or fall back to default # --------------------------------------------------------------------------- ADMIN_PASSWORD = os.environ.get("ADMIN_PASSWORD", "admin") # Build timestamp (Central Time) — computed once at import/startup _CT = timezone(timedelta(hours=-6)) BUILD_TIMESTAMP = datetime.now(_CT).strftime("%Y-%m-%d %H:%M:%S") # --------------------------------------------------------------------------- # Share / URL shortening # --------------------------------------------------------------------------- REBRANDLY_API_KEY = os.environ.get("REBRANDLY_API_KEY", "") _SPACE_ID = os.environ.get("SPACE_ID", "") if _SPACE_ID: _owner, _name = _SPACE_ID.split("/") _BASE_URL = f"https://{_owner}-{_name}.hf.space/" else: _BASE_URL = "http://localhost:7860/" # --------------------------------------------------------------------------- # Theme — use .set() for Gradio-controlled element colors # --------------------------------------------------------------------------- THEME = gr.themes.Soft( primary_hue=gr.themes.Color( c50="#faf8fc", c100="#f3f0f7", c200="#ded9f4", c300="#c4b5e0", c400="#a78bcc", c500="#8b5fb8", c600="#7c3aad", c700="#63348d", c800="#52296f", c900="#421f59", c950="#2a1339", ), neutral_hue="slate", font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"], font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "monospace"], ).set( # Primary buttons: dark purple with white text button_primary_background_fill="#63348d", button_primary_background_fill_dark="#63348d", button_primary_text_color="white", button_primary_text_color_dark="white", button_primary_border_color="#63348d", button_primary_border_color_dark="#63348d", button_primary_background_fill_hover="#4e2870", button_primary_background_fill_hover_dark="#7c3aad", # Secondary buttons: dark purple outline button_secondary_text_color="#63348d", button_secondary_text_color_dark="#ded9f4", button_secondary_border_color="#63348d", button_secondary_border_color_dark="#475569", # Block backgrounds: light purple in light, dark slate in dark block_background_fill="#f3f0f7", block_background_fill_dark="#1e293b", panel_background_fill="#f3f0f7", panel_background_fill_dark="#0f172a", ) # --------------------------------------------------------------------------- # Custom CSS — headings, tabs, dark mode, output HTML # --------------------------------------------------------------------------- CUSTOM_CSS = """ @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;700&family=Merriweather:wght@300;400;700;900&display=swap'); .gradio-container { max-width: 960px !important; } /* Headings: Merriweather, dark purple / light purple */ h1, h2, h3, .gradio-container h1, .gradio-container h2, .gradio-container h3 { font-family: 'Merriweather', serif !important; color: #63348d !important; } body.dark h1, body.dark h2, body.dark h3, body.dark .gradio-container h1, body.dark .gradio-container h2, body.dark .gradio-container h3, .dark h1, .dark h2, .dark h3, .dark .gradio-container h1, .dark .gradio-container h2, .dark .gradio-container h3 { color: #ded9f4 !important; } /* Tab labels: dark purple / light purple */ .tab-nav button, .tabs button.tab-nav-button { color: #63348d !important; } body.dark .tab-nav button, .dark .tabs button.tab-nav-button { color: #ded9f4 !important; } /* Dark mode toggle button */ #dark-mode-btn { color: white !important; border-color: #63348d !important; background: #63348d !important; } body.dark #dark-mode-btn { color: #ded9f4 !important; border-color: #475569 !important; background: #1e293b !important; } /* --- CSS variables for output HTML --- */ :root { --llm-card-bg: #ffffff; --llm-card-border: #e2e8f0; --llm-text-bg: #f8fafc; --llm-prompt-color: #475569; --llm-generated-color: #1e293b; --llm-highlight-bg: #ded9f4; --llm-highlight-color: #63348d; --llm-step-header-color: #63348d; --llm-entropy-color: #64748b; --llm-prob-label-color: #1e293b; --llm-prob-value-color: #334155; --llm-bar-selected: #63348d; --llm-bar-default: #c4b5d4; --llm-token-count-color: #64748b; --llm-help-color: #64748b; } body.dark, .dark { --llm-card-bg: #1e293b; --llm-card-border: #334155; --llm-text-bg: #0f172a; --llm-prompt-color: #94a3b8; --llm-generated-color: #e2e8f0; --llm-highlight-bg: #63348d; --llm-highlight-color: #ded9f4; --llm-step-header-color: #ded9f4; --llm-entropy-color: #94a3b8; --llm-prob-label-color: #e2e8f0; --llm-prob-value-color: #94a3b8; --llm-bar-selected: #a78bcc; --llm-bar-default: #4a3a64; --llm-token-count-color: #94a3b8; --llm-help-color: #94a3b8; } /* Parameter help text */ .param-help p { font-size: 13px !important; color: var(--llm-help-color) !important; margin-top: -4px !important; margin-bottom: 8px !important; line-height: 1.4 !important; } /* --- System Prompt Explorer: green terminal --- */ .green-terminal { font-family: 'IBM Plex Mono', 'JetBrains Mono', 'Courier New', monospace !important; font-size: 13px !important; line-height: 1.5 !important; background: #0a0a0a !important; color: #33ff33 !important; border-radius: 0.5rem !important; border: 2px solid #1a1a1a !important; box-shadow: inset 0 0 20px rgba(51, 255, 51, 0.05) !important; padding: 16px !important; min-height: 200px; max-height: 500px; overflow-y: auto; white-space: pre-wrap; word-wrap: break-word; } .green-terminal::-webkit-scrollbar { width: 8px; } .green-terminal::-webkit-scrollbar-track { background: #1a1a1a; border-radius: 4px; } .green-terminal::-webkit-scrollbar-thumb { background: #33ff33; border-radius: 4px; opacity: 0.5; } .green-terminal .sp-label { color: #22c55e; font-weight: bold; } .green-terminal .sp-special { color: #666666; } .green-terminal .sp-system { color: #facc15; } .green-terminal .sp-user { color: #60a5fa; } .green-terminal .sp-assistant { color: #33ff33; } .green-terminal .sp-response { color: #f0f0f0; } /* Response card */ .response-card { border: 1px solid var(--llm-card-border); border-radius: 8px; padding: 16px; background: var(--llm-card-bg); font-size: 15px; line-height: 1.6; } /* Attention Explorer example pairs */ .attn-example-row { background: #f8f6fb !important; border-radius: 6px !important; padding: 6px 10px !important; margin-bottom: 2px !important; align-items: flex-start !important; gap: 6px !important; flex-wrap: wrap !important; } .attn-example-label { min-width: 70px !important; max-width: 70px !important; flex-shrink: 0 !important; padding-top: 4px !important; } .attn-example-label p { font-family: 'Merriweather', serif !important; color: #63348d !important; font-size: 13px !important; margin: 0 !important; } .attn-example-btn { flex: 0 0 auto !important; min-width: 0 !important; max-width: fit-content !important; } .attn-example-btn button { font-size: 12px !important; padding: 4px 12px !important; border: 1.5px solid #d4c8e8 !important; border-radius: 14px !important; background: #fff !important; color: #4a3070 !important; white-space: nowrap !important; width: auto !important; text-align: left !important; } .attn-example-btn button:hover { background: #f3f0f7 !important; border-color: #63348d !important; } """ # Dark mode toggle JS — toggles class and swaps button text + heading colors DARK_MODE_JS = """ () => { document.body.classList.toggle('dark'); const isDark = document.body.classList.contains('dark'); const el = document.getElementById('dark-mode-btn'); if (el) el.innerText = isDark ? 'Light mode' : 'Dark mode'; document.querySelectorAll('h1, h2, h3').forEach(h => { h.style.setProperty('color', isDark ? '#ded9f4' : '#63348d', 'important'); }); } """ # Force light mode on page load (override browser/system dark mode preference) FORCE_LIGHT_MODE_JS = """ () => { document.body.classList.remove('dark'); const el = document.getElementById('dark-mode-btn'); if (el) el.innerText = 'Dark mode'; document.querySelectorAll('h1, h2, h3').forEach(h => { h.style.setProperty('color', '#63348d', 'important'); }); } """ # --------------------------------------------------------------------------- # Share helpers # --------------------------------------------------------------------------- def _shorten_url(long_url: str) -> str: """Shorten a URL via Rebrandly API (using curl). Falls back to long URL on failure.""" if not REBRANDLY_API_KEY or "localhost" in long_url: return long_url try: payload = json.dumps({ "destination": long_url, "domain": {"fullName": "go.ropavieja.org"}, }) result = subprocess.run( [ "curl", "-s", "-X", "POST", "https://api.rebrandly.com/v1/links", "-H", "Content-Type: application/json", "-H", f"apikey: {REBRANDLY_API_KEY}", "-d", payload, ], capture_output=True, text=True, timeout=10, ) if result.returncode != 0 or not result.stdout.strip(): return long_url data = json.loads(result.stdout) return f"https://{data['shortUrl']}" except (subprocess.TimeoutExpired, KeyError, json.JSONDecodeError, OSError) as exc: print(f"[share] Rebrandly error: {exc}") return long_url def _safe_float(val, default: float) -> float: try: return float(val) except (TypeError, ValueError): return default def _safe_int(val, default: int) -> int: try: return int(val) except (TypeError, ValueError): return default def share_probability_explorer(prompt, temp, top_k, steps, show_steps, seed): """Build a share URL encoding current Probability Explorer settings.""" params = { "tab": "prob", "p": prompt, "t": str(float(temp)), "k": str(int(top_k)), "s": str(int(steps)), "ss": "1" if show_steps else "0", "seed": str(int(seed)), } long_url = _BASE_URL + "?" + urllib.parse.urlencode(params) return _shorten_url(long_url) def share_tokenizer(text): """Build a share URL encoding current Tokenizer input.""" params = {"tab": "tok", "text": text} long_url = _BASE_URL + "?" + urllib.parse.urlencode(params) return _shorten_url(long_url) # --------------------------------------------------------------------------- # HTML rendering helpers # --------------------------------------------------------------------------- # Alternating token chip colors TOKEN_COLORS = [ ("#e0f2fe", "#0c4a6e"), # light blue / dark blue ("#fef3c7", "#92400e"), # light amber / dark amber ("#d1fae5", "#065f46"), # light green / dark green ("#fce7f3", "#9d174d"), # light pink / dark pink ("#e0e7ff", "#3730a3"), # light indigo / dark indigo ("#fde68a", "#78350f"), # yellow / brown ] def _esc(text: str) -> str: """Escape HTML special characters.""" return text.replace("&", "&").replace("<", "<").replace(">", ">") def _token_label(token_str: str) -> str: """Format a token for display in probability table (no quotes, trimmed).""" display = _esc(token_str).strip() if not display: return "\u2423" # visible space symbol return display def _highlight_token(token_str: str) -> str: """Wrap the non-whitespace part of a token in a highlight span. Leading/trailing whitespace stays outside the highlight so the purple chip only covers the visible characters. """ escaped = _esc(token_str) stripped = escaped.strip() if not stripped: return ('\u2423') leading_count = len(escaped) - len(escaped.lstrip()) trailing_count = len(escaped) - len(escaped.rstrip()) highlight = (f'{stripped}') parts = [] if leading_count: parts.append(escaped[:leading_count]) parts.append(highlight) if trailing_count: parts.append(escaped[len(escaped) - trailing_count:]) return "".join(parts) def _render_step_html(step_data: dict, prompt: str, prev_generated: str) -> str: """Render one generation step as styled HTML. prev_generated is the text generated in all prior steps (between prompt and the latest token), so only the newest token gets highlighted. """ s = step_data # Build probability bar chart rows_html = "" for token_str, prob, tid in s["top_tokens"]: bar_width = max(1, int(prob * 300)) if not math.isnan(prob) else 1 is_selected = tid == s["token_id"] bar_color = "var(--llm-bar-selected)" if is_selected else "var(--llm-bar-default)" label_style = "font-weight:700;" if is_selected else "" arrow = " \u2190" if is_selected else "" token_display = _token_label(token_str) rows_html += f"""
{token_display}
{"0.0000" if math.isnan(prob) else f"{prob:.4f}"}{arrow}
""" highlighted = _highlight_token(s["token"]) return f"""
Step {s['step']} Entropy: {s['entropy']:.2f} bits
{_esc(prompt)}{_esc(prev_generated)}{highlighted}
{rows_html}
""" def _render_final_text_html(prompt: str, generated_text: str) -> str: """Render final text with all generated text highlighted (show-steps OFF mode).""" generated = generated_text[len(prompt):] escaped = _esc(generated) stripped = escaped.lstrip() leading = escaped[:len(escaped) - len(stripped)] return f"""
{_esc(prompt)}{leading}{stripped}
""" def _render_tokens_html(tokens: list[tuple[str, int]]) -> str: """Render tokenized text as colored chips.""" chips = "" for i, (token_str, tid) in enumerate(tokens): bg, fg = TOKEN_COLORS[i % len(TOKEN_COLORS)] display = _esc(token_str) # Show spaces explicitly if display.strip() == "": display = repr(token_str).strip("'") chips += f"""{display}""" return f"""
{len(tokens)} tokens — hover for token IDs
{chips}
""" # --------------------------------------------------------------------------- # Tab 1: Probability Explorer # --------------------------------------------------------------------------- def explore_probabilities(prompt, temperature, top_k, steps, show_steps, seed): """Generate tokens step by step and return formatted HTML.""" if not manager.is_ready(): return f"

{manager.status_message()}

" seed = int(seed) results = manager.generate_step_by_step( prompt=prompt, steps=int(steps), temperature=temperature, top_k=int(top_k), seed=seed, show_steps=show_steps, ) if not results: return "

No results generated.

" if show_steps: html_parts = [] for i, r in enumerate(results): prev_generated = results[i - 1]["text"][len(prompt):] if i > 0 else "" html_parts.append(_render_step_html(r, prompt, prev_generated)) return "\n".join(html_parts) else: final_text = results[-1]["text"] return _render_final_text_html(prompt, final_text) def _build_slide_rows(step_data, top_k, bar_max): """Build probability bar HTML rows for a slideshow slide.""" rows = "" display_tokens = step_data["top_tokens"][:int(top_k)] for token_str, prob, tid in display_tokens: is_selected = tid == step_data["token_id"] bar_width = max(2, int(prob * bar_max)) if not math.isnan(prob) else 2 bar_color = "var(--llm-bar-selected)" if is_selected else "var(--llm-bar-default)" sel_class = " selected" if is_selected else "" arrow = " \u2190" if is_selected else "" prob_str = "0.0000" if math.isnan(prob) else f"{prob:.4f}" token_display = _token_label(token_str) rows += f'''
{token_display}
{prob_str}{arrow}
''' return rows def generate_slideshow(prompt, temperature, top_k, steps, seed): """Generate tokens and return a self-contained HTML slideshow file.""" if not manager.is_ready(): return None results = manager.generate_step_by_step( prompt=prompt, steps=int(steps), temperature=temperature, top_k=int(top_k), seed=int(seed), show_steps=True, ) if not results: return None # Build slides HTML slides_html = "" bar_max = 900 # Slide 0: prompt-only text + first step's probability table slide0_rows = _build_slide_rows(results[0], top_k, bar_max) slide0_text = f'{_esc(prompt)}' slides_html += f'''
{slide0_rows}
{slide0_text}
''' # Slides 1..N: each step with highlighted token for i, r in enumerate(results): prev_generated = results[i - 1]["text"][len(prompt):] if i > 0 else "" highlighted_token = _highlight_token(r["token"]) text_html = ( f'{_esc(prompt)}' f'{_esc(prev_generated)}' f'{highlighted_token}' ) rows_html = _build_slide_rows(r, top_k, bar_max) slides_html += f'''
{rows_html}
{text_html}
''' total = len(results) + 1 html = f''' LLM Explorer — Slides
{slides_html}
1 / {total}
''' # Write to temp file import tempfile tmp = tempfile.NamedTemporaryFile( suffix=".html", prefix="llm-slides-", delete=False, mode="w" ) tmp.write(html) tmp.close() return tmp.name def on_show_steps_change(show_steps): """Adjust max steps slider when show_steps toggles. When show_steps is turned ON, also reset to default steps value (since high step counts are only useful for bulk generation). """ cfg = manager.get_config() if show_steps: return gr.update(maximum=20, value=cfg.get("default_steps", 8)) else: return gr.update(maximum=100) # --------------------------------------------------------------------------- # Tab 2: Tokenizer # --------------------------------------------------------------------------- def tokenize_text(text): """Tokenize input and return formatted HTML. Uses GPT-2's tokenizer (not the generation model's tokenizer) because GPT-2's smaller vocabulary produces more interesting subword splits. """ tokens = demo_tokenizer.tokenize(text) return _render_tokens_html(tokens) # --------------------------------------------------------------------------- # Tab 3: System Prompt Explorer # --------------------------------------------------------------------------- MAX_CHAT_TURNS = 10 # max user messages before forcing reset def _get_presets() -> dict: """Get current system prompt presets from config.""" return manager.get_config().get("system_prompt_presets", {}) def _esc_terminal(text: str) -> str: """Escape HTML for terminal display.""" return text.replace("&", "&").replace("<", "<").replace(">", ">") def _format_terminal(raw_text: str) -> str: """Parse a chat template string and produce color-coded HTML for the green terminal. Works with both Llama (<|start_header_id|>) and Qwen (<|im_start|>) templates. """ # Split on special tokens, keeping them parts = re.split(r'(<\|[^|]*\|>)', raw_text) html_parts = [] current_role = None expect_role = False # true right after a token that precedes a role label role_css = { "system": "sp-system", "user": "sp-user", "assistant": "sp-assistant", } for part in parts: if re.match(r'<\|[^|]*\|>', part): # Special token — render in gray html_parts.append(f"{_esc_terminal(part)}") # After im_start or start_header_id, next text chunk is a role label expect_role = ("im_start" in part or "start_header_id" in part) elif expect_role and part.strip() in role_css: # Role label (system / user / assistant) role = part.strip() current_role = role before = _esc_terminal(part[: len(part) - len(part.lstrip())]) after = _esc_terminal(part[len(part.rstrip()) :]) html_parts.append(f"{before}{role}{after}") expect_role = False else: expect_role = False css = role_css.get(current_role, "") if css and part.strip(): html_parts.append(f"{_esc_terminal(part)}") else: html_parts.append(_esc_terminal(part)) return "
" + "".join(html_parts) + "
" def _initial_terminal() -> str: return "
Send a message to see what the model receives.
" def send_chat_message(user_message, history, system_prompt, max_tokens, temperature, seed): """Handle a user message: generate response, update state + chatbot + terminal. `history` is the gr.State list of clean {"role": ..., "content": ...} dicts. The Chatbot is derived from this — never read back from Chatbot (Gradio mangles the dicts on round-trip). """ if not user_message or not user_message.strip(): chatbot = [{"role": m["role"], "content": m["content"]} for m in history] return "", history, chatbot, _format_terminal_from_history(history, system_prompt) if not manager.chat_ready(): history = history + [ {"role": "user", "content": user_message}, {"role": "assistant", "content": "No chat model loaded. Load one from the Admin tab."}, ] chatbot = [{"role": m["role"], "content": m["content"]} for m in history] return "", history, chatbot, _initial_terminal() # Check turn limit user_turns = sum(1 for m in history if m["role"] == "user") if user_turns >= MAX_CHAT_TURNS: history = history + [ {"role": "user", "content": user_message}, {"role": "assistant", "content": f"Conversation limit reached ({MAX_CHAT_TURNS} exchanges). Click Reset to start a new conversation."}, ] chatbot = [{"role": m["role"], "content": m["content"]} for m in history] return "", history, chatbot, _format_terminal_from_history(history, system_prompt) # Build full messages for the model history = history + [{"role": "user", "content": user_message}] messages = [] if system_prompt and system_prompt.strip(): messages.append({"role": "system", "content": system_prompt}) messages.extend(history) result = manager.generate_chat( messages=messages, max_new_tokens=int(max_tokens), temperature=temperature, seed=int(seed), ) if "error" in result: history = history + [ {"role": "assistant", "content": f"Error: {result['error']}"}, ] chatbot = [{"role": m["role"], "content": m["content"]} for m in history] return "", history, chatbot, _format_terminal_from_history(history, system_prompt) history = history + [{"role": "assistant", "content": result["response"]}] chatbot = [{"role": m["role"], "content": m["content"]} for m in history] terminal_html = _format_terminal(result["formatted_display"]) return "", history, chatbot, terminal_html def _format_terminal_from_history(chat_history, system_prompt): """Build terminal display from chat history (without generating).""" if not chat_history: return _initial_terminal() messages = [] if system_prompt and system_prompt.strip(): messages.append({"role": "system", "content": system_prompt}) messages.extend(chat_history) formatted = manager.format_chat_template(messages) if not formatted: return _initial_terminal() return _format_terminal(formatted) def reset_chat(system_prompt): """Clear chat history, keep system prompt. Show initial terminal with just system prompt. Returns (state, chatbot, terminal). """ if system_prompt and system_prompt.strip() and manager.chat_ready(): messages = [{"role": "system", "content": system_prompt}] formatted = manager.format_chat_template(messages) return [], [], _format_terminal(formatted) return [], [], _initial_terminal() def on_preset_change(preset_name): """Update system prompt textbox when a preset is selected. No chat reset.""" return _get_presets().get(preset_name, "") # --------------------------------------------------------------------------- # Admin panel # --------------------------------------------------------------------------- def admin_login(password): """Check admin password and return visibility update.""" if password == ADMIN_PASSWORD: return gr.update(visible=True), gr.update(visible=False), "" return gr.update(visible=False), gr.update(visible=True), "Incorrect password." def admin_load_model(model_name): """Load a new base model from admin panel.""" status = manager.load_model(model_name) cfg = manager.get_config() header_status = f"**{manager.status_message()}**" return status, json.dumps(cfg, indent=2), header_status def admin_load_chat_model(model_name): """Load a new chat model from admin panel.""" status = manager.load_chat_model(model_name) cfg = manager.get_config() header_status = f"**{manager.status_message()}**" return status, json.dumps(cfg, indent=2), header_status def admin_save_defaults(prompt, tokenizer_text, temperature, top_k, steps, seed): """Save default settings and return updated values for all outputs.""" manager.update_config( default_prompt=prompt, default_tokenizer_text=tokenizer_text, default_temperature=temperature, default_top_k=int(top_k), default_steps=int(steps), default_seed=int(seed), ) cfg = manager.get_config() return ( "Defaults saved.", json.dumps(cfg, indent=2), prompt, temperature, int(top_k), int(steps), int(seed), tokenizer_text, ) def admin_save_presets(presets_json): """Save system prompt presets from admin panel. Returns (status_msg, config_json, dropdown_update, presets_json_display). """ try: presets = json.loads(presets_json) except (json.JSONDecodeError, TypeError) as e: cfg = manager.get_config() return ( f"Invalid JSON: {e}", json.dumps(cfg, indent=2), gr.update(), gr.update(), ) if not isinstance(presets, dict): cfg = manager.get_config() return ( "Presets must be a JSON object `{\"Name\": \"prompt\", ...}`", json.dumps(cfg, indent=2), gr.update(), gr.update(), ) manager.update_config(system_prompt_presets=presets) cfg = manager.get_config() return ( f"Presets saved ({len(presets)} presets).", json.dumps(cfg, indent=2), gr.update(choices=list(presets.keys())), json.dumps(presets, indent=2), ) def admin_save_attention(attn_model, examples_json): """Save attention explorer settings from admin panel. Returns (status_msg, config_json). """ try: examples = json.loads(examples_json) except (json.JSONDecodeError, TypeError) as e: cfg = manager.get_config() return f"Invalid JSON: {e}", json.dumps(cfg, indent=2) if not isinstance(examples, list): cfg = manager.get_config() return "Examples must be a JSON array", json.dumps(cfg, indent=2) manager.update_config( attention_model=attn_model.strip(), attention_examples=examples, ) cfg = manager.get_config() return ( f"Attention settings saved. Model: {attn_model.strip()}. " f"Note: model change takes effect on next Explore click. " f"Example changes require app restart.", json.dumps(cfg, indent=2), ) # --------------------------------------------------------------------------- # Build the Gradio app # --------------------------------------------------------------------------- def create_app(): cfg = manager.get_config() with gr.Blocks(title="LLM Explorer") as demo: # Header with dark mode toggle with gr.Row(): gr.Markdown("# LLM Explorer\n*Interactive tools for understanding how LLMs work*") dark_btn = gr.Button( "Dark mode", elem_id="dark-mode-btn", size="sm", scale=0, min_width=100, variant="secondary", ) dark_btn.click(fn=None, js=DARK_MODE_JS) # Status bar status_display = gr.Markdown(value=f"**{manager.status_message()}**") # Hidden state for share-link params (avoids race conditions with component updates) share_params = gr.State(value={}) # ================================================================== # Tabs # ================================================================== with gr.Tabs() as tabs: # ================================================================== # Tab 1: Probability Explorer # ================================================================== with gr.Tab("Probability Explorer", id="prob"): gr.Markdown("### Step-by-Step Next-Token Prediction") gr.Markdown( "Enter a prompt and watch the model predict one token at a time. " "Each step shows the probability distribution over the vocabulary." ) t1_prompt = gr.Textbox( label="Prompt", value=cfg.get("default_prompt", "The best thing about Huston-Tillotson University is"), lines=2, ) # --- Generation settings --- gr.Markdown("#### Settings") t1_temperature = gr.Slider( label="Temperature", minimum=0, maximum=2.5, step=0.1, value=cfg.get("default_temperature", 0.8), ) gr.Markdown( "Controls randomness. At 0 the model always picks the most probable word; " "higher values make surprising choices more likely. " "Default 0.8 gives coherent but varied output.", elem_classes=["param-help"], ) t1_top_k = gr.Slider( label="Top-K", minimum=5, maximum=100, step=1, value=cfg.get("default_top_k", 10), ) gr.Markdown( "Limits which tokens the model considers and how many " "appear in the probability table.", elem_classes=["param-help"], ) t1_steps = gr.Slider( label="Steps", minimum=1, maximum=20, step=1, value=cfg.get("default_steps", 8), ) gr.Markdown( "How many tokens to generate.", elem_classes=["param-help"], ) # Show steps — own section with explanation t1_show_steps = gr.Checkbox( label="Show steps", value=True, ) gr.Markdown( "When on, each step shows the full probability table and which token " "was selected (max 20 steps). When off, just generates the final text " "(up to 100 steps).", elem_classes=["param-help"], ) with gr.Accordion("Random Seed", open=False): t1_seed = gr.Number( label="Random seed", value=cfg.get("default_seed", 42), precision=0, ) gr.Markdown( "Makes output reproducible -- same seed and settings always " "produce the same text. Change it to get different results.", elem_classes=["param-help"], ) with gr.Row(): t1_generate_btn = gr.Button("Generate", variant="primary") t1_share_btn = gr.Button("Share", variant="secondary", scale=0, min_width=80) t1_share_url = gr.Textbox(label="Share URL", visible=False, interactive=False, buttons=["copy"]) t1_output = gr.HTML(label="Output") # Show steps toggle adjusts max steps t1_show_steps.change( fn=on_show_steps_change, inputs=[t1_show_steps], outputs=[t1_steps], ) t1_generate_btn.click( fn=explore_probabilities, inputs=[t1_prompt, t1_temperature, t1_top_k, t1_steps, t1_show_steps, t1_seed], outputs=[t1_output], ) # Share button t1_share_btn.click( fn=share_probability_explorer, inputs=[t1_prompt, t1_temperature, t1_top_k, t1_steps, t1_show_steps, t1_seed], outputs=[t1_share_url], ).then( fn=lambda: gr.update(visible=True), outputs=[t1_share_url], ) # ================================================================== # Tab 2: Tokenizer # ================================================================== with gr.Tab("Tokenizer", id="tok"): gr.Markdown("### Token Visualization") gr.Markdown( "See how text is split into tokens before the model processes it. " "Hover over each token to see its numeric ID. " "Uses GPT-2's tokenizer, which splits words into interesting subword pieces." ) t3_input = gr.Textbox( label="Text", value=cfg.get("default_tokenizer_text", "Huston-Tillotson University is an HBCU in Austin, Texas."), lines=3, ) with gr.Row(): t3_btn = gr.Button("Tokenize", variant="primary") t3_share_btn = gr.Button("Share", variant="secondary", scale=0, min_width=80) t3_share_url = gr.Textbox(label="Share URL", visible=False, interactive=False, buttons=["copy"]) t3_output = gr.HTML(label="Tokens") t3_btn.click( fn=tokenize_text, inputs=[t3_input], outputs=[t3_output], ) # Share button t3_share_btn.click( fn=share_tokenizer, inputs=[t3_input], outputs=[t3_share_url], ).then( fn=lambda: gr.update(visible=True), outputs=[t3_share_url], ) # ================================================================== # Tab 3: System Prompt Explorer # ================================================================== with gr.Tab("System Prompt Explorer", id="sys"): gr.Markdown("### System Prompt Explorer") gr.Markdown( "See how **system prompts** change an LLM's behavior. " "Pick a preset or write your own, then chat with the model. " "The green terminal shows exactly what the model receives — " "every special token, every role label, every turn." ) presets = _get_presets() preset_names = list(presets.keys()) default_preset = "Helpful Assistant" if "Helpful Assistant" in presets else preset_names[0] if preset_names else "" sp_preset = gr.Dropdown( label="Preset", choices=preset_names, value=default_preset, interactive=True, ) sp_system = gr.Textbox( label="System Prompt", value=presets.get(default_preset, ""), lines=3, placeholder="Enter a system prompt, or select a preset above...", ) with gr.Accordion("Settings", open=False): sp_max_tokens = gr.Slider( label="Max tokens", minimum=32, maximum=1024, step=16, value=512, ) gr.Markdown( "Maximum number of tokens per response.", elem_classes=["param-help"], ) sp_temperature = gr.Slider( label="Temperature", minimum=0, maximum=2.0, step=0.1, value=0.7, ) sp_seed = gr.Number( label="Random seed", value=42, precision=0, ) with gr.Accordion("What the model sees", open=False): gr.Markdown( "The full text sent to the model on every turn — system prompt, " "all previous messages, and special tokens. Watch it grow with each exchange.", elem_classes=["param-help"], ) sp_terminal = gr.HTML(value=_initial_terminal()) gr.Markdown("#### Chat") gr.Markdown( "**No hidden system prompt.** This model's helpful behavior comes from " "fine-tuning (RLHF), not a secret prompt. When you add a system prompt above, " "it's the *only* instruction the model receives. Commercial APIs like ChatGPT " "and Claude prepend their own system prompts before yours — you can't see or " "remove them.", elem_classes=["param-help"], ) sp_chat_state = gr.State([]) sp_chatbot = gr.Chatbot(height=700, feedback_options=None) with gr.Row(): sp_user_input = gr.Textbox( label="Message", placeholder="Type a message...", lines=1, scale=4, show_label=False, ) sp_send_btn = gr.Button("Send", variant="primary", scale=0, min_width=80) sp_reset_btn = gr.Button("Reset", variant="secondary", scale=0, min_width=80) # --- Wiring --- # Preset dropdown → just fill in the textbox (no chat reset) sp_preset.change( fn=on_preset_change, inputs=[sp_preset], outputs=[sp_system], ) # System prompt textbox edits take effect on the next message sent. # No auto-reset — avoids losing conversation on accidental edits. # Use Reset button or pick a new preset to start fresh. # Send message (button or enter) send_inputs = [sp_user_input, sp_chat_state, sp_system, sp_max_tokens, sp_temperature, sp_seed] send_outputs = [sp_user_input, sp_chat_state, sp_chatbot, sp_terminal] sp_send_btn.click( fn=send_chat_message, inputs=send_inputs, outputs=send_outputs, ) sp_user_input.submit( fn=send_chat_message, inputs=send_inputs, outputs=send_outputs, ) # Reset button sp_reset_btn.click( fn=reset_chat, inputs=[sp_system], outputs=[sp_chat_state, sp_chatbot, sp_terminal], ) # ================================================================== # Tab 4: Attention Explorer # ================================================================== with gr.Tab("Attention Explorer", id="attn"): gr.Markdown("### Attention Explorer") _attn_model_name = cfg.get("attention_model", "gpt2-medium") gr.Markdown( "See which words the model pays attention to when processing a sentence. " f"Uses `{_attn_model_name}`. " "Click a word to see curved lines connecting it to the words it attended to — " "thicker lines mean stronger attention." ) # Example sentence pairs — read from config, fall back to defaults _default_attn_examples = [ ["bass", "He tuned his bass and plugged into the", "On the lake she caught a bass and pulled it onto the"], ["spring", "She wound the metal spring and the clock began to", "After the long winter the warm spring rain made the flowers"], ["light", "She flipped the switch and the light began to", "The bag was so light she carried it with"], ] attn_example_pairs = [ tuple(ex) for ex in cfg.get("attention_examples", _default_attn_examples) if isinstance(ex, (list, tuple)) and len(ex) == 3 ] attn_example_btns = [] for word, sent_a, sent_b in attn_example_pairs: with gr.Row(elem_classes=["attn-example-row"]): gr.Markdown(f"**{word}:**", elem_classes=["attn-example-label"]) btn_a = gr.Button(sent_a, size="sm", variant="secondary", elem_classes=["attn-example-btn"]) btn_b = gr.Button(sent_b, size="sm", variant="secondary", elem_classes=["attn-example-btn"]) attn_example_btns.extend([btn_a, btn_b]) gr.Markdown("*Try an example above, or enter your own text:*") attn_input = gr.Textbox( label="Text", value="", lines=1, placeholder="Enter text to explore...", ) attn_btn = gr.Button("Explore", variant="primary") attn_output = gr.HTML(label="Visualization") def run_attention_explorer(text): """Run attention extraction and build visualization HTML.""" if not text or not text.strip(): return "

Enter text above and click Explore.

" attn_model = manager.config.get("attention_model", "gpt2-medium") data = get_attention_data(text.strip(), model_name=attn_model) return build_attention_html(data) attn_btn.click( fn=run_attention_explorer, inputs=[attn_input], outputs=[attn_output], ) # Wire up example buttons — each sets input and auto-explores for btn in attn_example_btns: btn.click( fn=lambda text: (text, run_attention_explorer(text)), inputs=[btn], outputs=[attn_input, attn_output], ) # ================================================================== # Admin Panel # ================================================================== with gr.Tab("Admin", id="admin"): gr.Markdown("### Admin Panel") # Login gate with gr.Group() as admin_login_group: admin_pw = gr.Textbox( label="Password", type="password", placeholder="Enter admin password", ) admin_login_btn = gr.Button("Login") admin_login_msg = gr.Markdown("") # Admin controls (hidden until login) with gr.Group(visible=False) as admin_controls: gr.Markdown("#### Base Model (Probability Explorer)") with gr.Row(): admin_model_dropdown = gr.Dropdown( choices=list(AVAILABLE_MODELS.keys()), value=manager.current_model_name or cfg.get("model", "Llama-3.2-3B"), label="Select model", ) admin_load_btn = gr.Button("Load", variant="primary") admin_model_status = gr.Markdown("") gr.Markdown("#### Chat Model (System Prompt Explorer)") with gr.Row(): admin_chat_dropdown = gr.Dropdown( choices=list(AVAILABLE_MODELS.keys()), value=manager.chat_model_name or cfg.get("chat_model", "Llama-3.2-3B-Instruct"), label="Select chat model", ) admin_chat_load_btn = gr.Button("Load", variant="primary") admin_chat_status = gr.Markdown("") gr.Markdown("---") gr.Markdown("#### Default Settings") admin_prompt = gr.Textbox( label="Default prompt (Probability Explorer)", value=cfg.get("default_prompt", ""), ) admin_tokenizer_text = gr.Textbox( label="Default text (Tokenizer)", value=cfg.get("default_tokenizer_text", ""), ) with gr.Row(): admin_temp = gr.Number( label="Default temperature", value=cfg.get("default_temperature", 0.8), ) admin_top_k_admin = gr.Number( label="Default top-k", value=cfg.get("default_top_k", 10), precision=0, ) admin_steps = gr.Number( label="Default steps", value=cfg.get("default_steps", 8), precision=0, ) admin_seed = gr.Number( label="Default seed", value=cfg.get("default_seed", 42), precision=0, ) admin_save_btn = gr.Button("Save Defaults") admin_save_msg = gr.Markdown("") gr.Markdown("---") gr.Markdown("#### Attention Explorer") admin_attn_model = gr.Textbox( label="Attention model", value=cfg.get("attention_model", "gpt2-medium"), info="GPT-2 family: gpt2, gpt2-medium, gpt2-large. Changes take effect on next Explore click (reloads model).", ) admin_attn_examples = gr.Code( value=json.dumps(cfg.get("attention_examples", []), indent=2), language="json", interactive=True, label="Example sentences (JSON: [[\"word\", \"sent_a\", \"sent_b\"], ...])", ) admin_attn_save_btn = gr.Button("Save Attention Settings") admin_attn_save_msg = gr.Markdown("") gr.Markdown("---") gr.Markdown("#### System Prompt Presets") gr.Markdown( "Edit the presets available in the System Prompt Explorer dropdown. " "JSON object: `{\"Name\": \"prompt text\", ...}`", elem_classes=["param-help"], ) admin_presets = gr.Code( value=json.dumps(cfg.get("system_prompt_presets", {}), indent=2), language="json", interactive=True, ) admin_presets_save_btn = gr.Button("Save Presets") admin_presets_msg = gr.Markdown("") gr.Markdown("---") with gr.Accordion("Environment Variables Reference", open=False): _pw_status = "*(set)*" if os.environ.get("ADMIN_PASSWORD") else "*(default: admin)*" _rb_status = "*(set)*" if REBRANDLY_API_KEY else "*(not set)*" gr.Markdown( "Override settings via " "[HF Space Settings](https://huggingface.co/spaces/chyams/llm-explorer/settings). " "Use **Secrets** for sensitive values (encrypted, hidden after saving) " "and **Variables** for everything else (visible in settings).\n\n" "**Precedence:** env var > config.json > code defaults\n\n" "**Secrets** (sensitive — encrypted)\n\n" "| Variable | Description | Format | Current |\n" "|----------|-------------|--------|---------|\n" f"| `ADMIN_PASSWORD` | Admin panel password | Plain text | {_pw_status} |\n" f"| `REBRANDLY_API_KEY` | URL shortener API key | API key | {_rb_status} |\n" "\n**Variables** (non-sensitive — visible)\n\n" "| Variable | Description | Format | Current |\n" "|----------|-------------|--------|---------|\n" f"| `DEFAULT_MODEL` | Base model (Prob Explorer) | Model name | `{cfg.get('model', '')}` |\n" f"| `DEFAULT_CHAT_MODEL` | Chat model (Sys Prompt Explorer) | Model name | `{cfg.get('chat_model', '')}` |\n" f"| `DEFAULT_PROMPT` | Default prompt | Plain text | `{cfg.get('default_prompt', '')[:40]}...` |\n" f"| `DEFAULT_TEMPERATURE` | Default temperature | Number (0–2.5) | `{cfg.get('default_temperature', 0.8)}` |\n" f"| `DEFAULT_TOP_K` | Default top-k | Integer (5–100) | `{cfg.get('default_top_k', 10)}` |\n" f"| `DEFAULT_STEPS` | Default steps | Integer (1–100) | `{cfg.get('default_steps', 8)}` |\n" f"| `DEFAULT_SEED` | Default seed | Integer | `{cfg.get('default_seed', 42)}` |\n" f"| `DEFAULT_TOKENIZER_TEXT` | Default tokenizer text | Plain text | `{cfg.get('default_tokenizer_text', '')[:40]}...` |\n" f"| `SYSTEM_PROMPT_PRESETS` | System prompt presets | JSON object | *({len(cfg.get('system_prompt_presets', {}))} presets)* |" ) gr.Markdown("---") gr.Markdown("#### Current Config") admin_config_display = gr.Code( value=json.dumps(cfg, indent=2), language="json", interactive=False, ) gr.Markdown("---") gr.Markdown("#### Export Slides") gr.Markdown( "*Uses current settings from Probability Explorer tab.*", elem_classes=["param-help"], ) admin_export_btn = gr.Button("Export Slides", variant="secondary") admin_slides_file = gr.File(label="Slideshow", visible=False) # Login wiring admin_login_btn.click( fn=admin_login, inputs=[admin_pw], outputs=[admin_controls, admin_login_group, admin_login_msg], ) # Model loading — base admin_load_btn.click( fn=admin_load_model, inputs=[admin_model_dropdown], outputs=[admin_model_status, admin_config_display, status_display], ) # Model loading — chat admin_chat_load_btn.click( fn=admin_load_chat_model, inputs=[admin_chat_dropdown], outputs=[admin_chat_status, admin_config_display, status_display], ) # Save defaults — updates config display + Probability Explorer + Tokenizer controls admin_save_btn.click( fn=admin_save_defaults, inputs=[admin_prompt, admin_tokenizer_text, admin_temp, admin_top_k_admin, admin_steps, admin_seed], outputs=[ admin_save_msg, admin_config_display, t1_prompt, t1_temperature, t1_top_k, t1_steps, t1_seed, t3_input, ], ) # Save presets — updates config, dropdown choices, and presets display admin_presets_save_btn.click( fn=admin_save_presets, inputs=[admin_presets], outputs=[admin_presets_msg, admin_config_display, sp_preset, admin_presets], ) # Save attention settings admin_attn_save_btn.click( fn=admin_save_attention, inputs=[admin_attn_model, admin_attn_examples], outputs=[admin_attn_save_msg, admin_config_display], ) # Export slides — uses current Probability Explorer settings admin_export_btn.click( fn=generate_slideshow, inputs=[t1_prompt, t1_temperature, t1_top_k, t1_steps, t1_seed], outputs=[admin_slides_file], ).then( fn=lambda f: gr.update(visible=f is not None), inputs=[admin_slides_file], outputs=[admin_slides_file], ) # Footer with build timestamp gr.Markdown( f"
" f"Updated {BUILD_TIMESTAMP} CT
" ) # ============================================================== # Page load: populate config defaults, then apply share params # ============================================================== def load_config_values(request: gr.Request): """Step 1: Read config + parse share query params. Returns config defaults for all controls, the share_params State dict, and a gr.Tabs update to select the right tab. """ c = manager.get_config() qp = dict(request.query_params) if request else {} # Determine tab from share params tab = qp.get("tab") tab_update = gr.update(selected=tab) if tab in ("prob", "tok") else gr.update() return ( # Admin panel fields c.get("default_prompt", ""), c.get("default_tokenizer_text", ""), c.get("default_temperature", 0.8), c.get("default_top_k", 10), c.get("default_steps", 8), c.get("default_seed", 42), json.dumps(c, indent=2), # Probability Explorer fields (config defaults for now) c.get("default_prompt", ""), c.get("default_temperature", 0.8), c.get("default_top_k", 10), c.get("default_steps", 8), c.get("default_seed", 42), # Tokenizer field c.get("default_tokenizer_text", ""), # Share params State (for subsequent steps) qp, # Tab selection tab_update, ) def apply_share_params(params): """Step 2: Override controls with share-link values.""" if not params or not params.get("tab"): return ( gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), ) c = manager.get_config() if params.get("tab") == "prob": prompt = params.get("p", c.get("default_prompt", "")) temp = _safe_float(params.get("t"), c.get("default_temperature", 0.8)) top_k = _safe_int(params.get("k"), c.get("default_top_k", 10)) show_steps = params.get("ss", "1") == "1" steps = _safe_int(params.get("s"), c.get("default_steps", 8)) seed = _safe_int(params.get("seed"), c.get("default_seed", 42)) max_steps = 20 if show_steps else 100 return ( gr.update(value=prompt), gr.update(value=temp), gr.update(value=top_k), gr.update(value=steps, maximum=max_steps), gr.update(value=show_steps), gr.update(value=seed), gr.update(), # tokenizer unchanged ) if params.get("tab") == "tok": text = params.get("text", c.get("default_tokenizer_text", "")) return ( gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(value=text), ) return ( gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), gr.update(), ) def auto_generate_prob(params): """Step 3: Auto-run Probability Explorer if share link is for prob tab. Reads from params State (not components) to avoid race conditions. """ if not params or params.get("tab") != "prob": return gr.update() c = manager.get_config() prompt = params.get("p", c.get("default_prompt", "")) temp = _safe_float(params.get("t"), c.get("default_temperature", 0.8)) top_k = _safe_int(params.get("k"), c.get("default_top_k", 10)) show_steps = params.get("ss", "1") == "1" steps = _safe_int(params.get("s"), c.get("default_steps", 8)) seed = _safe_int(params.get("seed"), c.get("default_seed", 42)) return explore_probabilities(prompt, temp, top_k, steps, show_steps, seed) def auto_tokenize(params): """Step 4: Auto-run Tokenizer if share link is for tok tab.""" if not params or params.get("tab") != "tok": return gr.update() c = manager.get_config() text = params.get("text", c.get("default_tokenizer_text", "")) return tokenize_text(text) # Chain: load config → apply share overrides → auto-generate demo.load( fn=load_config_values, js=FORCE_LIGHT_MODE_JS, outputs=[ # Admin panel fields admin_prompt, admin_tokenizer_text, admin_temp, admin_top_k_admin, admin_steps, admin_seed, admin_config_display, # Probability Explorer fields t1_prompt, t1_temperature, t1_top_k, t1_steps, t1_seed, # Tokenizer field t3_input, # Share state + tab selection share_params, tabs, ], ).then( fn=apply_share_params, inputs=[share_params], outputs=[t1_prompt, t1_temperature, t1_top_k, t1_steps, t1_show_steps, t1_seed, t3_input], ).then( fn=auto_generate_prob, inputs=[share_params], outputs=[t1_output], ).then( fn=auto_tokenize, inputs=[share_params], outputs=[t3_output], ) return demo # --------------------------------------------------------------------------- # Startup # --------------------------------------------------------------------------- if __name__ == "__main__": cfg = manager.get_config() # Load base model (Probability Explorer) base_model = cfg.get("model", "Llama-3.2-3B") print(f"Loading base model: {base_model}") print(manager.load_model(base_model)) # Load chat model (System Prompt Explorer) chat_model = cfg.get("chat_model", "Llama-3.2-3B-Instruct") print(f"Loading chat model: {chat_model}") print(manager.load_chat_model(chat_model)) app = create_app() app.launch( server_name="0.0.0.0", server_port=7860, theme=THEME, css=CUSTOM_CSS, )