Spaces:
Running on L4
Running on L4
| """LLM Explorer -- Interactive tools for understanding how LLMs work. | |
| Gradio app with four tabs: | |
| 1. Probability Explorer (step-by-step or bulk generation) | |
| 2. Tokenizer | |
| 3. System Prompt Explorer (instruct models) | |
| 4. Attention Explorer (GPT-2 attention visualization) | |
| Plus a password-protected Admin panel for runtime configuration. | |
| """ | |
| import json | |
| import math | |
| import os | |
| import subprocess | |
| import urllib.parse | |
| from datetime import datetime, timezone, timedelta | |
| import gradio as gr | |
| import re | |
| from models import AVAILABLE_MODELS, manager, demo_tokenizer | |
| from attention import get_attention_data, build_attention_html | |
| # --------------------------------------------------------------------------- | |
| # Admin password -- set via env var on HF Spaces, or fall back to default | |
| # --------------------------------------------------------------------------- | |
| ADMIN_PASSWORD = os.environ.get("ADMIN_PASSWORD", "admin") | |
| # Build timestamp (Central Time) — computed once at import/startup | |
| _CT = timezone(timedelta(hours=-6)) | |
| BUILD_TIMESTAMP = datetime.now(_CT).strftime("%Y-%m-%d %H:%M:%S") | |
| # --------------------------------------------------------------------------- | |
| # Share / URL shortening | |
| # --------------------------------------------------------------------------- | |
| REBRANDLY_API_KEY = os.environ.get("REBRANDLY_API_KEY", "") | |
| _SPACE_ID = os.environ.get("SPACE_ID", "") | |
| if _SPACE_ID: | |
| _owner, _name = _SPACE_ID.split("/") | |
| _BASE_URL = f"https://{_owner}-{_name}.hf.space/" | |
| else: | |
| _BASE_URL = "http://localhost:7860/" | |
| # --------------------------------------------------------------------------- | |
| # Theme — use .set() for Gradio-controlled element colors | |
| # --------------------------------------------------------------------------- | |
| THEME = gr.themes.Soft( | |
| primary_hue=gr.themes.Color( | |
| c50="#faf8fc", | |
| c100="#f3f0f7", | |
| c200="#ded9f4", | |
| c300="#c4b5e0", | |
| c400="#a78bcc", | |
| c500="#8b5fb8", | |
| c600="#7c3aad", | |
| c700="#63348d", | |
| c800="#52296f", | |
| c900="#421f59", | |
| c950="#2a1339", | |
| ), | |
| neutral_hue="slate", | |
| font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"], | |
| font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "monospace"], | |
| ).set( | |
| # Primary buttons: dark purple with white text | |
| button_primary_background_fill="#63348d", | |
| button_primary_background_fill_dark="#63348d", | |
| button_primary_text_color="white", | |
| button_primary_text_color_dark="white", | |
| button_primary_border_color="#63348d", | |
| button_primary_border_color_dark="#63348d", | |
| button_primary_background_fill_hover="#4e2870", | |
| button_primary_background_fill_hover_dark="#7c3aad", | |
| # Secondary buttons: dark purple outline | |
| button_secondary_text_color="#63348d", | |
| button_secondary_text_color_dark="#ded9f4", | |
| button_secondary_border_color="#63348d", | |
| button_secondary_border_color_dark="#475569", | |
| # Block backgrounds: light purple in light, dark slate in dark | |
| block_background_fill="#f3f0f7", | |
| block_background_fill_dark="#1e293b", | |
| panel_background_fill="#f3f0f7", | |
| panel_background_fill_dark="#0f172a", | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Custom CSS — headings, tabs, dark mode, output HTML | |
| # --------------------------------------------------------------------------- | |
| CUSTOM_CSS = """ | |
| @import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;700&family=Merriweather:wght@300;400;700;900&display=swap'); | |
| .gradio-container { max-width: 960px !important; } | |
| /* Headings: Merriweather, dark purple / light purple */ | |
| h1, h2, h3, | |
| .gradio-container h1, .gradio-container h2, .gradio-container h3 { | |
| font-family: 'Merriweather', serif !important; | |
| color: #63348d !important; | |
| } | |
| body.dark h1, body.dark h2, body.dark h3, | |
| body.dark .gradio-container h1, body.dark .gradio-container h2, body.dark .gradio-container h3, | |
| .dark h1, .dark h2, .dark h3, | |
| .dark .gradio-container h1, .dark .gradio-container h2, .dark .gradio-container h3 { | |
| color: #ded9f4 !important; | |
| } | |
| /* Tab labels: dark purple / light purple */ | |
| .tab-nav button, .tabs button.tab-nav-button { | |
| color: #63348d !important; | |
| } | |
| body.dark .tab-nav button, .dark .tabs button.tab-nav-button { | |
| color: #ded9f4 !important; | |
| } | |
| /* Dark mode toggle button */ | |
| #dark-mode-btn { | |
| color: white !important; | |
| border-color: #63348d !important; | |
| background: #63348d !important; | |
| } | |
| body.dark #dark-mode-btn { | |
| color: #ded9f4 !important; | |
| border-color: #475569 !important; | |
| background: #1e293b !important; | |
| } | |
| /* --- CSS variables for output HTML --- */ | |
| :root { | |
| --llm-card-bg: #ffffff; | |
| --llm-card-border: #e2e8f0; | |
| --llm-text-bg: #f8fafc; | |
| --llm-prompt-color: #475569; | |
| --llm-generated-color: #1e293b; | |
| --llm-highlight-bg: #ded9f4; | |
| --llm-highlight-color: #63348d; | |
| --llm-step-header-color: #63348d; | |
| --llm-entropy-color: #64748b; | |
| --llm-prob-label-color: #1e293b; | |
| --llm-prob-value-color: #334155; | |
| --llm-bar-selected: #63348d; | |
| --llm-bar-default: #c4b5d4; | |
| --llm-token-count-color: #64748b; | |
| --llm-help-color: #64748b; | |
| } | |
| body.dark, .dark { | |
| --llm-card-bg: #1e293b; | |
| --llm-card-border: #334155; | |
| --llm-text-bg: #0f172a; | |
| --llm-prompt-color: #94a3b8; | |
| --llm-generated-color: #e2e8f0; | |
| --llm-highlight-bg: #63348d; | |
| --llm-highlight-color: #ded9f4; | |
| --llm-step-header-color: #ded9f4; | |
| --llm-entropy-color: #94a3b8; | |
| --llm-prob-label-color: #e2e8f0; | |
| --llm-prob-value-color: #94a3b8; | |
| --llm-bar-selected: #a78bcc; | |
| --llm-bar-default: #4a3a64; | |
| --llm-token-count-color: #94a3b8; | |
| --llm-help-color: #94a3b8; | |
| } | |
| /* Parameter help text */ | |
| .param-help p { | |
| font-size: 13px !important; | |
| color: var(--llm-help-color) !important; | |
| margin-top: -4px !important; | |
| margin-bottom: 8px !important; | |
| line-height: 1.4 !important; | |
| } | |
| /* --- System Prompt Explorer: green terminal --- */ | |
| .green-terminal { | |
| font-family: 'IBM Plex Mono', 'JetBrains Mono', 'Courier New', monospace !important; | |
| font-size: 13px !important; | |
| line-height: 1.5 !important; | |
| background: #0a0a0a !important; | |
| color: #33ff33 !important; | |
| border-radius: 0.5rem !important; | |
| border: 2px solid #1a1a1a !important; | |
| box-shadow: inset 0 0 20px rgba(51, 255, 51, 0.05) !important; | |
| padding: 16px !important; | |
| min-height: 200px; | |
| max-height: 500px; | |
| overflow-y: auto; | |
| white-space: pre-wrap; | |
| word-wrap: break-word; | |
| } | |
| .green-terminal::-webkit-scrollbar { width: 8px; } | |
| .green-terminal::-webkit-scrollbar-track { background: #1a1a1a; border-radius: 4px; } | |
| .green-terminal::-webkit-scrollbar-thumb { background: #33ff33; border-radius: 4px; opacity: 0.5; } | |
| .green-terminal .sp-label { color: #22c55e; font-weight: bold; } | |
| .green-terminal .sp-special { color: #666666; } | |
| .green-terminal .sp-system { color: #facc15; } | |
| .green-terminal .sp-user { color: #60a5fa; } | |
| .green-terminal .sp-assistant { color: #33ff33; } | |
| .green-terminal .sp-response { color: #f0f0f0; } | |
| /* Response card */ | |
| .response-card { | |
| border: 1px solid var(--llm-card-border); | |
| border-radius: 8px; | |
| padding: 16px; | |
| background: var(--llm-card-bg); | |
| font-size: 15px; | |
| line-height: 1.6; | |
| } | |
| /* Attention Explorer example pairs */ | |
| .attn-example-row { | |
| background: #f8f6fb !important; | |
| border-radius: 6px !important; | |
| padding: 6px 10px !important; | |
| margin-bottom: 2px !important; | |
| align-items: flex-start !important; | |
| gap: 6px !important; | |
| flex-wrap: wrap !important; | |
| } | |
| .attn-example-label { | |
| min-width: 70px !important; | |
| max-width: 70px !important; | |
| flex-shrink: 0 !important; | |
| padding-top: 4px !important; | |
| } | |
| .attn-example-label p { | |
| font-family: 'Merriweather', serif !important; | |
| color: #63348d !important; | |
| font-size: 13px !important; | |
| margin: 0 !important; | |
| } | |
| .attn-example-btn { | |
| flex: 0 0 auto !important; | |
| min-width: 0 !important; | |
| max-width: fit-content !important; | |
| } | |
| .attn-example-btn button { | |
| font-size: 12px !important; | |
| padding: 4px 12px !important; | |
| border: 1.5px solid #d4c8e8 !important; | |
| border-radius: 14px !important; | |
| background: #fff !important; | |
| color: #4a3070 !important; | |
| white-space: nowrap !important; | |
| width: auto !important; | |
| text-align: left !important; | |
| } | |
| .attn-example-btn button:hover { | |
| background: #f3f0f7 !important; | |
| border-color: #63348d !important; | |
| } | |
| """ | |
| # Dark mode toggle JS — toggles class and swaps button text + heading colors | |
| DARK_MODE_JS = """ | |
| () => { | |
| document.body.classList.toggle('dark'); | |
| const isDark = document.body.classList.contains('dark'); | |
| const el = document.getElementById('dark-mode-btn'); | |
| if (el) el.innerText = isDark ? 'Light mode' : 'Dark mode'; | |
| document.querySelectorAll('h1, h2, h3').forEach(h => { | |
| h.style.setProperty('color', isDark ? '#ded9f4' : '#63348d', 'important'); | |
| }); | |
| } | |
| """ | |
| # Force light mode on page load (override browser/system dark mode preference) | |
| FORCE_LIGHT_MODE_JS = """ | |
| () => { | |
| document.body.classList.remove('dark'); | |
| const el = document.getElementById('dark-mode-btn'); | |
| if (el) el.innerText = 'Dark mode'; | |
| document.querySelectorAll('h1, h2, h3').forEach(h => { | |
| h.style.setProperty('color', '#63348d', 'important'); | |
| }); | |
| } | |
| """ | |
| # --------------------------------------------------------------------------- | |
| # Share helpers | |
| # --------------------------------------------------------------------------- | |
| def _shorten_url(long_url: str) -> str: | |
| """Shorten a URL via Rebrandly API (using curl). Falls back to long URL on failure.""" | |
| if not REBRANDLY_API_KEY or "localhost" in long_url: | |
| return long_url | |
| try: | |
| payload = json.dumps({ | |
| "destination": long_url, | |
| "domain": {"fullName": "go.ropavieja.org"}, | |
| }) | |
| result = subprocess.run( | |
| [ | |
| "curl", "-s", "-X", "POST", | |
| "https://api.rebrandly.com/v1/links", | |
| "-H", "Content-Type: application/json", | |
| "-H", f"apikey: {REBRANDLY_API_KEY}", | |
| "-d", payload, | |
| ], | |
| capture_output=True, text=True, timeout=10, | |
| ) | |
| if result.returncode != 0 or not result.stdout.strip(): | |
| return long_url | |
| data = json.loads(result.stdout) | |
| return f"https://{data['shortUrl']}" | |
| except (subprocess.TimeoutExpired, KeyError, json.JSONDecodeError, OSError) as exc: | |
| print(f"[share] Rebrandly error: {exc}") | |
| return long_url | |
| def _safe_float(val, default: float) -> float: | |
| try: | |
| return float(val) | |
| except (TypeError, ValueError): | |
| return default | |
| def _safe_int(val, default: int) -> int: | |
| try: | |
| return int(val) | |
| except (TypeError, ValueError): | |
| return default | |
| def share_probability_explorer(prompt, temp, top_k, steps, show_steps, seed): | |
| """Build a share URL encoding current Probability Explorer settings.""" | |
| params = { | |
| "tab": "prob", | |
| "p": prompt, | |
| "t": str(float(temp)), | |
| "k": str(int(top_k)), | |
| "s": str(int(steps)), | |
| "ss": "1" if show_steps else "0", | |
| "seed": str(int(seed)), | |
| } | |
| long_url = _BASE_URL + "?" + urllib.parse.urlencode(params) | |
| return _shorten_url(long_url) | |
| def share_tokenizer(text): | |
| """Build a share URL encoding current Tokenizer input.""" | |
| params = {"tab": "tok", "text": text} | |
| long_url = _BASE_URL + "?" + urllib.parse.urlencode(params) | |
| return _shorten_url(long_url) | |
| # --------------------------------------------------------------------------- | |
| # HTML rendering helpers | |
| # --------------------------------------------------------------------------- | |
| # Alternating token chip colors | |
| TOKEN_COLORS = [ | |
| ("#e0f2fe", "#0c4a6e"), # light blue / dark blue | |
| ("#fef3c7", "#92400e"), # light amber / dark amber | |
| ("#d1fae5", "#065f46"), # light green / dark green | |
| ("#fce7f3", "#9d174d"), # light pink / dark pink | |
| ("#e0e7ff", "#3730a3"), # light indigo / dark indigo | |
| ("#fde68a", "#78350f"), # yellow / brown | |
| ] | |
| def _esc(text: str) -> str: | |
| """Escape HTML special characters.""" | |
| return text.replace("&", "&").replace("<", "<").replace(">", ">") | |
| def _token_label(token_str: str) -> str: | |
| """Format a token for display in probability table (no quotes, trimmed).""" | |
| display = _esc(token_str).strip() | |
| if not display: | |
| return "\u2423" # visible space symbol | |
| return display | |
| def _highlight_token(token_str: str) -> str: | |
| """Wrap the non-whitespace part of a token in a highlight span. | |
| Leading/trailing whitespace stays outside the highlight so the | |
| purple chip only covers the visible characters. | |
| """ | |
| escaped = _esc(token_str) | |
| stripped = escaped.strip() | |
| if not stripped: | |
| return ('<span style="background:var(--llm-highlight-bg);color:var(--llm-highlight-color);' | |
| 'font-weight:700;padding:1px 4px;border-radius:3px;">\u2423</span>') | |
| leading_count = len(escaped) - len(escaped.lstrip()) | |
| trailing_count = len(escaped) - len(escaped.rstrip()) | |
| highlight = (f'<span style="background:var(--llm-highlight-bg);color:var(--llm-highlight-color);' | |
| f'font-weight:700;padding:1px 4px;border-radius:3px;">{stripped}</span>') | |
| parts = [] | |
| if leading_count: | |
| parts.append(escaped[:leading_count]) | |
| parts.append(highlight) | |
| if trailing_count: | |
| parts.append(escaped[len(escaped) - trailing_count:]) | |
| return "".join(parts) | |
| def _render_step_html(step_data: dict, prompt: str, prev_generated: str) -> str: | |
| """Render one generation step as styled HTML. | |
| prev_generated is the text generated in all prior steps (between prompt | |
| and the latest token), so only the newest token gets highlighted. | |
| """ | |
| s = step_data | |
| # Build probability bar chart | |
| rows_html = "" | |
| for token_str, prob, tid in s["top_tokens"]: | |
| bar_width = max(1, int(prob * 300)) if not math.isnan(prob) else 1 | |
| is_selected = tid == s["token_id"] | |
| bar_color = "var(--llm-bar-selected)" if is_selected else "var(--llm-bar-default)" | |
| label_style = "font-weight:700;" if is_selected else "" | |
| arrow = " \u2190" if is_selected else "" | |
| token_display = _token_label(token_str) | |
| rows_html += f""" | |
| <div style="display:flex;align-items:center;gap:8px;margin:2px 0;font-family:monospace;font-size:13px;"> | |
| <span style="width:140px;text-align:right;color:var(--llm-prob-label-color);{label_style}">{token_display}</span> | |
| <div style="width:{bar_width}px;height:16px;background:{bar_color};border-radius:3px;"></div> | |
| <span style="color:var(--llm-prob-value-color);{label_style}">{"0.0000" if math.isnan(prob) else f"{prob:.4f}"}{arrow}</span> | |
| </div>""" | |
| highlighted = _highlight_token(s["token"]) | |
| return f""" | |
| <div style="border:1px solid var(--llm-card-border);border-radius:8px;padding:12px;margin:8px 0;background:var(--llm-card-bg);"> | |
| <div style="display:flex;justify-content:space-between;align-items:center;margin-bottom:8px;"> | |
| <span style="font-family:'Merriweather',serif;font-weight:600;color:var(--llm-step-header-color);">Step {s['step']}</span> | |
| <span style="color:var(--llm-entropy-color);font-size:12px;">Entropy: {s['entropy']:.2f} bits</span> | |
| </div> | |
| <div style="font-family:monospace;font-size:14px;padding:8px;background:var(--llm-text-bg);border-radius:4px;margin-bottom:8px;word-wrap:break-word;line-height:1.6;"> | |
| <span style="color:var(--llm-prompt-color);">{_esc(prompt)}</span><span style="color:var(--llm-generated-color);">{_esc(prev_generated)}</span>{highlighted} | |
| </div> | |
| {rows_html} | |
| </div>""" | |
| def _render_final_text_html(prompt: str, generated_text: str) -> str: | |
| """Render final text with all generated text highlighted (show-steps OFF mode).""" | |
| generated = generated_text[len(prompt):] | |
| escaped = _esc(generated) | |
| stripped = escaped.lstrip() | |
| leading = escaped[:len(escaped) - len(stripped)] | |
| return f""" | |
| <div style="border:1px solid var(--llm-card-border);border-radius:8px;padding:16px;background:var(--llm-card-bg);"> | |
| <div style="font-family:monospace;font-size:16px;line-height:1.6;word-wrap:break-word;"> | |
| <span style="color:var(--llm-prompt-color);">{_esc(prompt)}</span>{leading}<span style="background:var(--llm-highlight-bg);color:var(--llm-highlight-color);font-weight:600;padding:2px 6px;border-radius:4px;">{stripped}</span> | |
| </div> | |
| </div>""" | |
| def _render_tokens_html(tokens: list[tuple[str, int]]) -> str: | |
| """Render tokenized text as colored chips.""" | |
| chips = "" | |
| for i, (token_str, tid) in enumerate(tokens): | |
| bg, fg = TOKEN_COLORS[i % len(TOKEN_COLORS)] | |
| display = _esc(token_str) | |
| # Show spaces explicitly | |
| if display.strip() == "": | |
| display = repr(token_str).strip("'") | |
| chips += f"""<span title="ID: {tid}" style=" | |
| display:inline-block;padding:4px 8px;margin:2px;border-radius:4px; | |
| background:{bg};color:{fg};font-family:monospace;font-size:14px;cursor:default; | |
| ">{display}</span>""" | |
| return f""" | |
| <div style="padding:8px;"> | |
| <div style="margin-bottom:12px;color:var(--llm-token-count-color);font-size:13px;"> | |
| {len(tokens)} tokens — hover for token IDs | |
| </div> | |
| <div style="line-height:2.2;">{chips}</div> | |
| </div>""" | |
| # --------------------------------------------------------------------------- | |
| # Tab 1: Probability Explorer | |
| # --------------------------------------------------------------------------- | |
| def explore_probabilities(prompt, temperature, top_k, steps, show_steps, seed): | |
| """Generate tokens step by step and return formatted HTML.""" | |
| if not manager.is_ready(): | |
| return f"<p style='color:red;'>{manager.status_message()}</p>" | |
| seed = int(seed) | |
| results = manager.generate_step_by_step( | |
| prompt=prompt, | |
| steps=int(steps), | |
| temperature=temperature, | |
| top_k=int(top_k), | |
| seed=seed, | |
| show_steps=show_steps, | |
| ) | |
| if not results: | |
| return "<p>No results generated.</p>" | |
| if show_steps: | |
| html_parts = [] | |
| for i, r in enumerate(results): | |
| prev_generated = results[i - 1]["text"][len(prompt):] if i > 0 else "" | |
| html_parts.append(_render_step_html(r, prompt, prev_generated)) | |
| return "\n".join(html_parts) | |
| else: | |
| final_text = results[-1]["text"] | |
| return _render_final_text_html(prompt, final_text) | |
| def _build_slide_rows(step_data, top_k, bar_max): | |
| """Build probability bar HTML rows for a slideshow slide.""" | |
| rows = "" | |
| display_tokens = step_data["top_tokens"][:int(top_k)] | |
| for token_str, prob, tid in display_tokens: | |
| is_selected = tid == step_data["token_id"] | |
| bar_width = max(2, int(prob * bar_max)) if not math.isnan(prob) else 2 | |
| bar_color = "var(--llm-bar-selected)" if is_selected else "var(--llm-bar-default)" | |
| sel_class = " selected" if is_selected else "" | |
| arrow = " \u2190" if is_selected else "" | |
| prob_str = "0.0000" if math.isnan(prob) else f"{prob:.4f}" | |
| token_display = _token_label(token_str) | |
| rows += f''' | |
| <div class="prob-row{sel_class}"> | |
| <span class="token-label">{token_display}</span> | |
| <div class="bar" style="width:{bar_width}px;background:{bar_color};"></div> | |
| <span class="prob-value">{prob_str}{arrow}</span> | |
| </div>''' | |
| return rows | |
| def generate_slideshow(prompt, temperature, top_k, steps, seed): | |
| """Generate tokens and return a self-contained HTML slideshow file.""" | |
| if not manager.is_ready(): | |
| return None | |
| results = manager.generate_step_by_step( | |
| prompt=prompt, | |
| steps=int(steps), | |
| temperature=temperature, | |
| top_k=int(top_k), | |
| seed=int(seed), | |
| show_steps=True, | |
| ) | |
| if not results: | |
| return None | |
| # Build slides HTML | |
| slides_html = "" | |
| bar_max = 900 | |
| # Slide 0: prompt-only text + first step's probability table | |
| slide0_rows = _build_slide_rows(results[0], top_k, bar_max) | |
| slide0_text = f'<span class="prompt">{_esc(prompt)}</span>' | |
| slides_html += f''' | |
| <div class="slide active" id="slide-0"> | |
| {slide0_rows} | |
| <div class="text-box">{slide0_text}</div> | |
| </div>''' | |
| # Slides 1..N: each step with highlighted token | |
| for i, r in enumerate(results): | |
| prev_generated = results[i - 1]["text"][len(prompt):] if i > 0 else "" | |
| highlighted_token = _highlight_token(r["token"]) | |
| text_html = ( | |
| f'<span class="prompt">{_esc(prompt)}</span>' | |
| f'<span class="generated">{_esc(prev_generated)}</span>' | |
| f'{highlighted_token}' | |
| ) | |
| rows_html = _build_slide_rows(r, top_k, bar_max) | |
| slides_html += f''' | |
| <div class="slide" id="slide-{i+1}"> | |
| {rows_html} | |
| <div class="text-box">{text_html}</div> | |
| </div>''' | |
| total = len(results) + 1 | |
| html = f'''<!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="utf-8"> | |
| <title>LLM Explorer — Slides</title> | |
| <style> | |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=JetBrains+Mono:wght@400;700&family=Merriweather:wght@400;700&display=swap'); | |
| * {{ box-sizing: border-box; margin: 0; padding: 0; }} | |
| body {{ | |
| background: #ffffff; | |
| display: flex; align-items: center; justify-content: center; | |
| height: 100vh; overflow: hidden; | |
| font-family: 'Inter', sans-serif; | |
| }} | |
| :root {{ | |
| --llm-text-bg: #f3f0f7; | |
| --llm-prompt-color: #475569; | |
| --llm-generated-color: #1e293b; | |
| --llm-highlight-bg: #ded9f4; | |
| --llm-highlight-color: #63348d; | |
| --llm-prob-label-color: #1e293b; | |
| --llm-prob-value-color: #334155; | |
| --llm-bar-selected: #63348d; | |
| --llm-bar-default: #c4b5d4; | |
| }} | |
| .slide-container {{ | |
| width: 1920px; height: 1080px; | |
| position: relative; background: #ffffff; | |
| }} | |
| .slide {{ | |
| display: none; width: 100%; height: 100%; | |
| padding: 72px 96px; flex-direction: column; | |
| }} | |
| .slide.active {{ display: flex; }} | |
| .text-box {{ | |
| font-family: 'JetBrains Mono', monospace; font-size: 24px; | |
| padding: 24px 36px; background: var(--llm-text-bg); | |
| border-radius: 8px; margin-top: 32px; | |
| line-height: 1.6; word-wrap: break-word; | |
| }} | |
| .text-box .prompt {{ color: var(--llm-prompt-color); }} | |
| .text-box .generated {{ color: var(--llm-generated-color); }} | |
| .text-box span[style*="highlight"] {{ }} | |
| .prob-row {{ | |
| display: flex; align-items: center; gap: 12px; | |
| margin: 4px 0; font-family: 'JetBrains Mono', monospace; font-size: 20px; | |
| }} | |
| .prob-row .token-label {{ | |
| width: 240px; text-align: right; color: var(--llm-prob-label-color); | |
| }} | |
| .prob-row .bar {{ height: 28px; border-radius: 4px; min-width: 2px; }} | |
| .prob-row .prob-value {{ color: var(--llm-prob-value-color); }} | |
| .prob-row.selected .token-label, | |
| .prob-row.selected .prob-value {{ font-weight: 700; }} | |
| .nav-hint {{ | |
| position: absolute; bottom: 16px; right: 24px; | |
| font-size: 16px; color: #999; | |
| }} | |
| .slide-counter {{ | |
| position: absolute; bottom: 16px; left: 24px; | |
| font-size: 16px; color: #999; | |
| }} | |
| </style> | |
| </head> | |
| <body> | |
| <div class="slide-container"> | |
| {slides_html} | |
| <div class="slide-counter" id="counter">1 / {total}</div> | |
| <div class="nav-hint">\u2190 \u2192 arrow keys to navigate</div> | |
| </div> | |
| <script> | |
| var slides = document.querySelectorAll('.slide'); | |
| var counter = document.getElementById('counter'); | |
| var current = 0; | |
| function show(idx) {{ | |
| slides.forEach(function(s) {{ s.classList.remove('active'); }}); | |
| slides[idx].classList.add('active'); | |
| counter.textContent = (idx + 1) + ' / ' + slides.length; | |
| }} | |
| document.addEventListener('keydown', function(e) {{ | |
| if (e.key === 'ArrowRight' || e.key === ' ') {{ | |
| current = Math.min(current + 1, slides.length - 1); | |
| show(current); | |
| }} else if (e.key === 'ArrowLeft') {{ | |
| current = Math.max(current - 1, 0); | |
| show(current); | |
| }} | |
| }}); | |
| </script> | |
| </body> | |
| </html>''' | |
| # Write to temp file | |
| import tempfile | |
| tmp = tempfile.NamedTemporaryFile( | |
| suffix=".html", prefix="llm-slides-", delete=False, mode="w" | |
| ) | |
| tmp.write(html) | |
| tmp.close() | |
| return tmp.name | |
| def on_show_steps_change(show_steps): | |
| """Adjust max steps slider when show_steps toggles. | |
| When show_steps is turned ON, also reset to default steps value | |
| (since high step counts are only useful for bulk generation). | |
| """ | |
| cfg = manager.get_config() | |
| if show_steps: | |
| return gr.update(maximum=20, value=cfg.get("default_steps", 8)) | |
| else: | |
| return gr.update(maximum=100) | |
| # --------------------------------------------------------------------------- | |
| # Tab 2: Tokenizer | |
| # --------------------------------------------------------------------------- | |
| def tokenize_text(text): | |
| """Tokenize input and return formatted HTML. | |
| Uses GPT-2's tokenizer (not the generation model's tokenizer) because | |
| GPT-2's smaller vocabulary produces more interesting subword splits. | |
| """ | |
| tokens = demo_tokenizer.tokenize(text) | |
| return _render_tokens_html(tokens) | |
| # --------------------------------------------------------------------------- | |
| # Tab 3: System Prompt Explorer | |
| # --------------------------------------------------------------------------- | |
| MAX_CHAT_TURNS = 10 # max user messages before forcing reset | |
| def _get_presets() -> dict: | |
| """Get current system prompt presets from config.""" | |
| return manager.get_config().get("system_prompt_presets", {}) | |
| def _esc_terminal(text: str) -> str: | |
| """Escape HTML for terminal display.""" | |
| return text.replace("&", "&").replace("<", "<").replace(">", ">") | |
| def _format_terminal(raw_text: str) -> str: | |
| """Parse a chat template string and produce color-coded HTML for the green terminal. | |
| Works with both Llama (<|start_header_id|>) and Qwen (<|im_start|>) templates. | |
| """ | |
| # Split on special tokens, keeping them | |
| parts = re.split(r'(<\|[^|]*\|>)', raw_text) | |
| html_parts = [] | |
| current_role = None | |
| expect_role = False # true right after a token that precedes a role label | |
| role_css = { | |
| "system": "sp-system", | |
| "user": "sp-user", | |
| "assistant": "sp-assistant", | |
| } | |
| for part in parts: | |
| if re.match(r'<\|[^|]*\|>', part): | |
| # Special token — render in gray | |
| html_parts.append(f"<span class='sp-special'>{_esc_terminal(part)}</span>") | |
| # After im_start or start_header_id, next text chunk is a role label | |
| expect_role = ("im_start" in part or "start_header_id" in part) | |
| elif expect_role and part.strip() in role_css: | |
| # Role label (system / user / assistant) | |
| role = part.strip() | |
| current_role = role | |
| before = _esc_terminal(part[: len(part) - len(part.lstrip())]) | |
| after = _esc_terminal(part[len(part.rstrip()) :]) | |
| html_parts.append(f"{before}<span class='sp-label'>{role}</span>{after}") | |
| expect_role = False | |
| else: | |
| expect_role = False | |
| css = role_css.get(current_role, "") | |
| if css and part.strip(): | |
| html_parts.append(f"<span class='{css}'>{_esc_terminal(part)}</span>") | |
| else: | |
| html_parts.append(_esc_terminal(part)) | |
| return "<div class='green-terminal'>" + "".join(html_parts) + "</div>" | |
| def _initial_terminal() -> str: | |
| return "<div class='green-terminal'><span class='sp-special'>Send a message to see what the model receives.</span></div>" | |
| def send_chat_message(user_message, history, system_prompt, max_tokens, temperature, seed): | |
| """Handle a user message: generate response, update state + chatbot + terminal. | |
| `history` is the gr.State list of clean {"role": ..., "content": ...} dicts. | |
| The Chatbot is derived from this — never read back from Chatbot (Gradio | |
| mangles the dicts on round-trip). | |
| """ | |
| if not user_message or not user_message.strip(): | |
| chatbot = [{"role": m["role"], "content": m["content"]} for m in history] | |
| return "", history, chatbot, _format_terminal_from_history(history, system_prompt) | |
| if not manager.chat_ready(): | |
| history = history + [ | |
| {"role": "user", "content": user_message}, | |
| {"role": "assistant", "content": "No chat model loaded. Load one from the Admin tab."}, | |
| ] | |
| chatbot = [{"role": m["role"], "content": m["content"]} for m in history] | |
| return "", history, chatbot, _initial_terminal() | |
| # Check turn limit | |
| user_turns = sum(1 for m in history if m["role"] == "user") | |
| if user_turns >= MAX_CHAT_TURNS: | |
| history = history + [ | |
| {"role": "user", "content": user_message}, | |
| {"role": "assistant", "content": f"Conversation limit reached ({MAX_CHAT_TURNS} exchanges). Click Reset to start a new conversation."}, | |
| ] | |
| chatbot = [{"role": m["role"], "content": m["content"]} for m in history] | |
| return "", history, chatbot, _format_terminal_from_history(history, system_prompt) | |
| # Build full messages for the model | |
| history = history + [{"role": "user", "content": user_message}] | |
| messages = [] | |
| if system_prompt and system_prompt.strip(): | |
| messages.append({"role": "system", "content": system_prompt}) | |
| messages.extend(history) | |
| result = manager.generate_chat( | |
| messages=messages, | |
| max_new_tokens=int(max_tokens), | |
| temperature=temperature, | |
| seed=int(seed), | |
| ) | |
| if "error" in result: | |
| history = history + [ | |
| {"role": "assistant", "content": f"Error: {result['error']}"}, | |
| ] | |
| chatbot = [{"role": m["role"], "content": m["content"]} for m in history] | |
| return "", history, chatbot, _format_terminal_from_history(history, system_prompt) | |
| history = history + [{"role": "assistant", "content": result["response"]}] | |
| chatbot = [{"role": m["role"], "content": m["content"]} for m in history] | |
| terminal_html = _format_terminal(result["formatted_display"]) | |
| return "", history, chatbot, terminal_html | |
| def _format_terminal_from_history(chat_history, system_prompt): | |
| """Build terminal display from chat history (without generating).""" | |
| if not chat_history: | |
| return _initial_terminal() | |
| messages = [] | |
| if system_prompt and system_prompt.strip(): | |
| messages.append({"role": "system", "content": system_prompt}) | |
| messages.extend(chat_history) | |
| formatted = manager.format_chat_template(messages) | |
| if not formatted: | |
| return _initial_terminal() | |
| return _format_terminal(formatted) | |
| def reset_chat(system_prompt): | |
| """Clear chat history, keep system prompt. Show initial terminal with just system prompt. | |
| Returns (state, chatbot, terminal). | |
| """ | |
| if system_prompt and system_prompt.strip() and manager.chat_ready(): | |
| messages = [{"role": "system", "content": system_prompt}] | |
| formatted = manager.format_chat_template(messages) | |
| return [], [], _format_terminal(formatted) | |
| return [], [], _initial_terminal() | |
| def on_preset_change(preset_name): | |
| """Update system prompt textbox when a preset is selected. No chat reset.""" | |
| return _get_presets().get(preset_name, "") | |
| # --------------------------------------------------------------------------- | |
| # Admin panel | |
| # --------------------------------------------------------------------------- | |
| def admin_login(password): | |
| """Check admin password and return visibility update.""" | |
| if password == ADMIN_PASSWORD: | |
| return gr.update(visible=True), gr.update(visible=False), "" | |
| return gr.update(visible=False), gr.update(visible=True), "Incorrect password." | |
| def admin_load_model(model_name): | |
| """Load a new base model from admin panel.""" | |
| status = manager.load_model(model_name) | |
| cfg = manager.get_config() | |
| header_status = f"**{manager.status_message()}**" | |
| return status, json.dumps(cfg, indent=2), header_status | |
| def admin_load_chat_model(model_name): | |
| """Load a new chat model from admin panel.""" | |
| status = manager.load_chat_model(model_name) | |
| cfg = manager.get_config() | |
| header_status = f"**{manager.status_message()}**" | |
| return status, json.dumps(cfg, indent=2), header_status | |
| def admin_save_defaults(prompt, tokenizer_text, temperature, top_k, steps, seed): | |
| """Save default settings and return updated values for all outputs.""" | |
| manager.update_config( | |
| default_prompt=prompt, | |
| default_tokenizer_text=tokenizer_text, | |
| default_temperature=temperature, | |
| default_top_k=int(top_k), | |
| default_steps=int(steps), | |
| default_seed=int(seed), | |
| ) | |
| cfg = manager.get_config() | |
| return ( | |
| "Defaults saved.", | |
| json.dumps(cfg, indent=2), | |
| prompt, | |
| temperature, | |
| int(top_k), | |
| int(steps), | |
| int(seed), | |
| tokenizer_text, | |
| ) | |
| def admin_save_presets(presets_json): | |
| """Save system prompt presets from admin panel. | |
| Returns (status_msg, config_json, dropdown_update, presets_json_display). | |
| """ | |
| try: | |
| presets = json.loads(presets_json) | |
| except (json.JSONDecodeError, TypeError) as e: | |
| cfg = manager.get_config() | |
| return ( | |
| f"Invalid JSON: {e}", | |
| json.dumps(cfg, indent=2), | |
| gr.update(), | |
| gr.update(), | |
| ) | |
| if not isinstance(presets, dict): | |
| cfg = manager.get_config() | |
| return ( | |
| "Presets must be a JSON object `{\"Name\": \"prompt\", ...}`", | |
| json.dumps(cfg, indent=2), | |
| gr.update(), | |
| gr.update(), | |
| ) | |
| manager.update_config(system_prompt_presets=presets) | |
| cfg = manager.get_config() | |
| return ( | |
| f"Presets saved ({len(presets)} presets).", | |
| json.dumps(cfg, indent=2), | |
| gr.update(choices=list(presets.keys())), | |
| json.dumps(presets, indent=2), | |
| ) | |
| def admin_save_attention(attn_model, examples_json): | |
| """Save attention explorer settings from admin panel. | |
| Returns (status_msg, config_json). | |
| """ | |
| try: | |
| examples = json.loads(examples_json) | |
| except (json.JSONDecodeError, TypeError) as e: | |
| cfg = manager.get_config() | |
| return f"Invalid JSON: {e}", json.dumps(cfg, indent=2) | |
| if not isinstance(examples, list): | |
| cfg = manager.get_config() | |
| return "Examples must be a JSON array", json.dumps(cfg, indent=2) | |
| manager.update_config( | |
| attention_model=attn_model.strip(), | |
| attention_examples=examples, | |
| ) | |
| cfg = manager.get_config() | |
| return ( | |
| f"Attention settings saved. Model: {attn_model.strip()}. " | |
| f"Note: model change takes effect on next Explore click. " | |
| f"Example changes require app restart.", | |
| json.dumps(cfg, indent=2), | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Build the Gradio app | |
| # --------------------------------------------------------------------------- | |
| def create_app(): | |
| cfg = manager.get_config() | |
| with gr.Blocks(title="LLM Explorer") as demo: | |
| # Header with dark mode toggle | |
| with gr.Row(): | |
| gr.Markdown("# LLM Explorer\n*Interactive tools for understanding how LLMs work*") | |
| dark_btn = gr.Button( | |
| "Dark mode", elem_id="dark-mode-btn", | |
| size="sm", scale=0, min_width=100, variant="secondary", | |
| ) | |
| dark_btn.click(fn=None, js=DARK_MODE_JS) | |
| # Status bar | |
| status_display = gr.Markdown(value=f"**{manager.status_message()}**") | |
| # Hidden state for share-link params (avoids race conditions with component updates) | |
| share_params = gr.State(value={}) | |
| # ================================================================== | |
| # Tabs | |
| # ================================================================== | |
| with gr.Tabs() as tabs: | |
| # ================================================================== | |
| # Tab 1: Probability Explorer | |
| # ================================================================== | |
| with gr.Tab("Probability Explorer", id="prob"): | |
| gr.Markdown("### Step-by-Step Next-Token Prediction") | |
| gr.Markdown( | |
| "Enter a prompt and watch the model predict one token at a time. " | |
| "Each step shows the probability distribution over the vocabulary." | |
| ) | |
| t1_prompt = gr.Textbox( | |
| label="Prompt", | |
| value=cfg.get("default_prompt", "The best thing about Huston-Tillotson University is"), | |
| lines=2, | |
| ) | |
| # --- Generation settings --- | |
| gr.Markdown("#### Settings") | |
| t1_temperature = gr.Slider( | |
| label="Temperature", | |
| minimum=0, maximum=2.5, step=0.1, | |
| value=cfg.get("default_temperature", 0.8), | |
| ) | |
| gr.Markdown( | |
| "Controls randomness. At 0 the model always picks the most probable word; " | |
| "higher values make surprising choices more likely. " | |
| "Default 0.8 gives coherent but varied output.", | |
| elem_classes=["param-help"], | |
| ) | |
| t1_top_k = gr.Slider( | |
| label="Top-K", | |
| minimum=5, maximum=100, step=1, | |
| value=cfg.get("default_top_k", 10), | |
| ) | |
| gr.Markdown( | |
| "Limits which tokens the model considers and how many " | |
| "appear in the probability table.", | |
| elem_classes=["param-help"], | |
| ) | |
| t1_steps = gr.Slider( | |
| label="Steps", | |
| minimum=1, maximum=20, step=1, | |
| value=cfg.get("default_steps", 8), | |
| ) | |
| gr.Markdown( | |
| "How many tokens to generate.", | |
| elem_classes=["param-help"], | |
| ) | |
| # Show steps — own section with explanation | |
| t1_show_steps = gr.Checkbox( | |
| label="Show steps", | |
| value=True, | |
| ) | |
| gr.Markdown( | |
| "When on, each step shows the full probability table and which token " | |
| "was selected (max 20 steps). When off, just generates the final text " | |
| "(up to 100 steps).", | |
| elem_classes=["param-help"], | |
| ) | |
| with gr.Accordion("Random Seed", open=False): | |
| t1_seed = gr.Number( | |
| label="Random seed", | |
| value=cfg.get("default_seed", 42), | |
| precision=0, | |
| ) | |
| gr.Markdown( | |
| "Makes output reproducible -- same seed and settings always " | |
| "produce the same text. Change it to get different results.", | |
| elem_classes=["param-help"], | |
| ) | |
| with gr.Row(): | |
| t1_generate_btn = gr.Button("Generate", variant="primary") | |
| t1_share_btn = gr.Button("Share", variant="secondary", scale=0, min_width=80) | |
| t1_share_url = gr.Textbox(label="Share URL", visible=False, interactive=False, buttons=["copy"]) | |
| t1_output = gr.HTML(label="Output") | |
| # Show steps toggle adjusts max steps | |
| t1_show_steps.change( | |
| fn=on_show_steps_change, | |
| inputs=[t1_show_steps], | |
| outputs=[t1_steps], | |
| ) | |
| t1_generate_btn.click( | |
| fn=explore_probabilities, | |
| inputs=[t1_prompt, t1_temperature, t1_top_k, t1_steps, t1_show_steps, t1_seed], | |
| outputs=[t1_output], | |
| ) | |
| # Share button | |
| t1_share_btn.click( | |
| fn=share_probability_explorer, | |
| inputs=[t1_prompt, t1_temperature, t1_top_k, t1_steps, t1_show_steps, t1_seed], | |
| outputs=[t1_share_url], | |
| ).then( | |
| fn=lambda: gr.update(visible=True), | |
| outputs=[t1_share_url], | |
| ) | |
| # ================================================================== | |
| # Tab 2: Tokenizer | |
| # ================================================================== | |
| with gr.Tab("Tokenizer", id="tok"): | |
| gr.Markdown("### Token Visualization") | |
| gr.Markdown( | |
| "See how text is split into tokens before the model processes it. " | |
| "Hover over each token to see its numeric ID. " | |
| "Uses GPT-2's tokenizer, which splits words into interesting subword pieces." | |
| ) | |
| t3_input = gr.Textbox( | |
| label="Text", | |
| value=cfg.get("default_tokenizer_text", "Huston-Tillotson University is an HBCU in Austin, Texas."), | |
| lines=3, | |
| ) | |
| with gr.Row(): | |
| t3_btn = gr.Button("Tokenize", variant="primary") | |
| t3_share_btn = gr.Button("Share", variant="secondary", scale=0, min_width=80) | |
| t3_share_url = gr.Textbox(label="Share URL", visible=False, interactive=False, buttons=["copy"]) | |
| t3_output = gr.HTML(label="Tokens") | |
| t3_btn.click( | |
| fn=tokenize_text, | |
| inputs=[t3_input], | |
| outputs=[t3_output], | |
| ) | |
| # Share button | |
| t3_share_btn.click( | |
| fn=share_tokenizer, | |
| inputs=[t3_input], | |
| outputs=[t3_share_url], | |
| ).then( | |
| fn=lambda: gr.update(visible=True), | |
| outputs=[t3_share_url], | |
| ) | |
| # ================================================================== | |
| # Tab 3: System Prompt Explorer | |
| # ================================================================== | |
| with gr.Tab("System Prompt Explorer", id="sys"): | |
| gr.Markdown("### System Prompt Explorer") | |
| gr.Markdown( | |
| "See how **system prompts** change an LLM's behavior. " | |
| "Pick a preset or write your own, then chat with the model. " | |
| "The green terminal shows exactly what the model receives — " | |
| "every special token, every role label, every turn." | |
| ) | |
| presets = _get_presets() | |
| preset_names = list(presets.keys()) | |
| default_preset = "Helpful Assistant" if "Helpful Assistant" in presets else preset_names[0] if preset_names else "" | |
| sp_preset = gr.Dropdown( | |
| label="Preset", | |
| choices=preset_names, | |
| value=default_preset, | |
| interactive=True, | |
| ) | |
| sp_system = gr.Textbox( | |
| label="System Prompt", | |
| value=presets.get(default_preset, ""), | |
| lines=3, | |
| placeholder="Enter a system prompt, or select a preset above...", | |
| ) | |
| with gr.Accordion("Settings", open=False): | |
| sp_max_tokens = gr.Slider( | |
| label="Max tokens", | |
| minimum=32, maximum=1024, step=16, | |
| value=512, | |
| ) | |
| gr.Markdown( | |
| "Maximum number of tokens per response.", | |
| elem_classes=["param-help"], | |
| ) | |
| sp_temperature = gr.Slider( | |
| label="Temperature", | |
| minimum=0, maximum=2.0, step=0.1, | |
| value=0.7, | |
| ) | |
| sp_seed = gr.Number( | |
| label="Random seed", | |
| value=42, | |
| precision=0, | |
| ) | |
| with gr.Accordion("What the model sees", open=False): | |
| gr.Markdown( | |
| "The full text sent to the model on every turn — system prompt, " | |
| "all previous messages, and special tokens. Watch it grow with each exchange.", | |
| elem_classes=["param-help"], | |
| ) | |
| sp_terminal = gr.HTML(value=_initial_terminal()) | |
| gr.Markdown("#### Chat") | |
| gr.Markdown( | |
| "**No hidden system prompt.** This model's helpful behavior comes from " | |
| "fine-tuning (RLHF), not a secret prompt. When you add a system prompt above, " | |
| "it's the *only* instruction the model receives. Commercial APIs like ChatGPT " | |
| "and Claude prepend their own system prompts before yours — you can't see or " | |
| "remove them.", | |
| elem_classes=["param-help"], | |
| ) | |
| sp_chat_state = gr.State([]) | |
| sp_chatbot = gr.Chatbot(height=700, feedback_options=None) | |
| with gr.Row(): | |
| sp_user_input = gr.Textbox( | |
| label="Message", | |
| placeholder="Type a message...", | |
| lines=1, | |
| scale=4, | |
| show_label=False, | |
| ) | |
| sp_send_btn = gr.Button("Send", variant="primary", scale=0, min_width=80) | |
| sp_reset_btn = gr.Button("Reset", variant="secondary", scale=0, min_width=80) | |
| # --- Wiring --- | |
| # Preset dropdown → just fill in the textbox (no chat reset) | |
| sp_preset.change( | |
| fn=on_preset_change, | |
| inputs=[sp_preset], | |
| outputs=[sp_system], | |
| ) | |
| # System prompt textbox edits take effect on the next message sent. | |
| # No auto-reset — avoids losing conversation on accidental edits. | |
| # Use Reset button or pick a new preset to start fresh. | |
| # Send message (button or enter) | |
| send_inputs = [sp_user_input, sp_chat_state, sp_system, sp_max_tokens, sp_temperature, sp_seed] | |
| send_outputs = [sp_user_input, sp_chat_state, sp_chatbot, sp_terminal] | |
| sp_send_btn.click( | |
| fn=send_chat_message, | |
| inputs=send_inputs, | |
| outputs=send_outputs, | |
| ) | |
| sp_user_input.submit( | |
| fn=send_chat_message, | |
| inputs=send_inputs, | |
| outputs=send_outputs, | |
| ) | |
| # Reset button | |
| sp_reset_btn.click( | |
| fn=reset_chat, | |
| inputs=[sp_system], | |
| outputs=[sp_chat_state, sp_chatbot, sp_terminal], | |
| ) | |
| # ================================================================== | |
| # Tab 4: Attention Explorer | |
| # ================================================================== | |
| with gr.Tab("Attention Explorer", id="attn"): | |
| gr.Markdown("### Attention Explorer") | |
| _attn_model_name = cfg.get("attention_model", "gpt2-medium") | |
| gr.Markdown( | |
| "See which words the model pays attention to when processing a sentence. " | |
| f"Uses `{_attn_model_name}`. " | |
| "Click a word to see curved lines connecting it to the words it attended to — " | |
| "thicker lines mean stronger attention." | |
| ) | |
| # Example sentence pairs — read from config, fall back to defaults | |
| _default_attn_examples = [ | |
| ["bass", "He tuned his bass and plugged into the", "On the lake she caught a bass and pulled it onto the"], | |
| ["spring", "She wound the metal spring and the clock began to", "After the long winter the warm spring rain made the flowers"], | |
| ["light", "She flipped the switch and the light began to", "The bag was so light she carried it with"], | |
| ] | |
| attn_example_pairs = [ | |
| tuple(ex) for ex in cfg.get("attention_examples", _default_attn_examples) | |
| if isinstance(ex, (list, tuple)) and len(ex) == 3 | |
| ] | |
| attn_example_btns = [] | |
| for word, sent_a, sent_b in attn_example_pairs: | |
| with gr.Row(elem_classes=["attn-example-row"]): | |
| gr.Markdown(f"**{word}:**", elem_classes=["attn-example-label"]) | |
| btn_a = gr.Button(sent_a, size="sm", variant="secondary", elem_classes=["attn-example-btn"]) | |
| btn_b = gr.Button(sent_b, size="sm", variant="secondary", elem_classes=["attn-example-btn"]) | |
| attn_example_btns.extend([btn_a, btn_b]) | |
| gr.Markdown("*Try an example above, or enter your own text:*") | |
| attn_input = gr.Textbox( | |
| label="Text", | |
| value="", | |
| lines=1, | |
| placeholder="Enter text to explore...", | |
| ) | |
| attn_btn = gr.Button("Explore", variant="primary") | |
| attn_output = gr.HTML(label="Visualization") | |
| def run_attention_explorer(text): | |
| """Run attention extraction and build visualization HTML.""" | |
| if not text or not text.strip(): | |
| return "<p style='color:#94a3b8;font-style:italic;'>Enter text above and click Explore.</p>" | |
| attn_model = manager.config.get("attention_model", "gpt2-medium") | |
| data = get_attention_data(text.strip(), model_name=attn_model) | |
| return build_attention_html(data) | |
| attn_btn.click( | |
| fn=run_attention_explorer, | |
| inputs=[attn_input], | |
| outputs=[attn_output], | |
| ) | |
| # Wire up example buttons — each sets input and auto-explores | |
| for btn in attn_example_btns: | |
| btn.click( | |
| fn=lambda text: (text, run_attention_explorer(text)), | |
| inputs=[btn], | |
| outputs=[attn_input, attn_output], | |
| ) | |
| # ================================================================== | |
| # Admin Panel | |
| # ================================================================== | |
| with gr.Tab("Admin", id="admin"): | |
| gr.Markdown("### Admin Panel") | |
| # Login gate | |
| with gr.Group() as admin_login_group: | |
| admin_pw = gr.Textbox( | |
| label="Password", | |
| type="password", | |
| placeholder="Enter admin password", | |
| ) | |
| admin_login_btn = gr.Button("Login") | |
| admin_login_msg = gr.Markdown("") | |
| # Admin controls (hidden until login) | |
| with gr.Group(visible=False) as admin_controls: | |
| gr.Markdown("#### Base Model (Probability Explorer)") | |
| with gr.Row(): | |
| admin_model_dropdown = gr.Dropdown( | |
| choices=list(AVAILABLE_MODELS.keys()), | |
| value=manager.current_model_name or cfg.get("model", "Llama-3.2-3B"), | |
| label="Select model", | |
| ) | |
| admin_load_btn = gr.Button("Load", variant="primary") | |
| admin_model_status = gr.Markdown("") | |
| gr.Markdown("#### Chat Model (System Prompt Explorer)") | |
| with gr.Row(): | |
| admin_chat_dropdown = gr.Dropdown( | |
| choices=list(AVAILABLE_MODELS.keys()), | |
| value=manager.chat_model_name or cfg.get("chat_model", "Llama-3.2-3B-Instruct"), | |
| label="Select chat model", | |
| ) | |
| admin_chat_load_btn = gr.Button("Load", variant="primary") | |
| admin_chat_status = gr.Markdown("") | |
| gr.Markdown("---") | |
| gr.Markdown("#### Default Settings") | |
| admin_prompt = gr.Textbox( | |
| label="Default prompt (Probability Explorer)", | |
| value=cfg.get("default_prompt", ""), | |
| ) | |
| admin_tokenizer_text = gr.Textbox( | |
| label="Default text (Tokenizer)", | |
| value=cfg.get("default_tokenizer_text", ""), | |
| ) | |
| with gr.Row(): | |
| admin_temp = gr.Number( | |
| label="Default temperature", | |
| value=cfg.get("default_temperature", 0.8), | |
| ) | |
| admin_top_k_admin = gr.Number( | |
| label="Default top-k", | |
| value=cfg.get("default_top_k", 10), | |
| precision=0, | |
| ) | |
| admin_steps = gr.Number( | |
| label="Default steps", | |
| value=cfg.get("default_steps", 8), | |
| precision=0, | |
| ) | |
| admin_seed = gr.Number( | |
| label="Default seed", | |
| value=cfg.get("default_seed", 42), | |
| precision=0, | |
| ) | |
| admin_save_btn = gr.Button("Save Defaults") | |
| admin_save_msg = gr.Markdown("") | |
| gr.Markdown("---") | |
| gr.Markdown("#### Attention Explorer") | |
| admin_attn_model = gr.Textbox( | |
| label="Attention model", | |
| value=cfg.get("attention_model", "gpt2-medium"), | |
| info="GPT-2 family: gpt2, gpt2-medium, gpt2-large. Changes take effect on next Explore click (reloads model).", | |
| ) | |
| admin_attn_examples = gr.Code( | |
| value=json.dumps(cfg.get("attention_examples", []), indent=2), | |
| language="json", | |
| interactive=True, | |
| label="Example sentences (JSON: [[\"word\", \"sent_a\", \"sent_b\"], ...])", | |
| ) | |
| admin_attn_save_btn = gr.Button("Save Attention Settings") | |
| admin_attn_save_msg = gr.Markdown("") | |
| gr.Markdown("---") | |
| gr.Markdown("#### System Prompt Presets") | |
| gr.Markdown( | |
| "Edit the presets available in the System Prompt Explorer dropdown. " | |
| "JSON object: `{\"Name\": \"prompt text\", ...}`", | |
| elem_classes=["param-help"], | |
| ) | |
| admin_presets = gr.Code( | |
| value=json.dumps(cfg.get("system_prompt_presets", {}), indent=2), | |
| language="json", | |
| interactive=True, | |
| ) | |
| admin_presets_save_btn = gr.Button("Save Presets") | |
| admin_presets_msg = gr.Markdown("") | |
| gr.Markdown("---") | |
| with gr.Accordion("Environment Variables Reference", open=False): | |
| _pw_status = "*(set)*" if os.environ.get("ADMIN_PASSWORD") else "*(default: admin)*" | |
| _rb_status = "*(set)*" if REBRANDLY_API_KEY else "*(not set)*" | |
| gr.Markdown( | |
| "Override settings via " | |
| "[HF Space Settings](https://huggingface.co/spaces/chyams/llm-explorer/settings). " | |
| "Use **Secrets** for sensitive values (encrypted, hidden after saving) " | |
| "and **Variables** for everything else (visible in settings).\n\n" | |
| "**Precedence:** env var > config.json > code defaults\n\n" | |
| "**Secrets** (sensitive — encrypted)\n\n" | |
| "| Variable | Description | Format | Current |\n" | |
| "|----------|-------------|--------|---------|\n" | |
| f"| `ADMIN_PASSWORD` | Admin panel password | Plain text | {_pw_status} |\n" | |
| f"| `REBRANDLY_API_KEY` | URL shortener API key | API key | {_rb_status} |\n" | |
| "\n**Variables** (non-sensitive — visible)\n\n" | |
| "| Variable | Description | Format | Current |\n" | |
| "|----------|-------------|--------|---------|\n" | |
| f"| `DEFAULT_MODEL` | Base model (Prob Explorer) | Model name | `{cfg.get('model', '')}` |\n" | |
| f"| `DEFAULT_CHAT_MODEL` | Chat model (Sys Prompt Explorer) | Model name | `{cfg.get('chat_model', '')}` |\n" | |
| f"| `DEFAULT_PROMPT` | Default prompt | Plain text | `{cfg.get('default_prompt', '')[:40]}...` |\n" | |
| f"| `DEFAULT_TEMPERATURE` | Default temperature | Number (0–2.5) | `{cfg.get('default_temperature', 0.8)}` |\n" | |
| f"| `DEFAULT_TOP_K` | Default top-k | Integer (5–100) | `{cfg.get('default_top_k', 10)}` |\n" | |
| f"| `DEFAULT_STEPS` | Default steps | Integer (1–100) | `{cfg.get('default_steps', 8)}` |\n" | |
| f"| `DEFAULT_SEED` | Default seed | Integer | `{cfg.get('default_seed', 42)}` |\n" | |
| f"| `DEFAULT_TOKENIZER_TEXT` | Default tokenizer text | Plain text | `{cfg.get('default_tokenizer_text', '')[:40]}...` |\n" | |
| f"| `SYSTEM_PROMPT_PRESETS` | System prompt presets | JSON object | *({len(cfg.get('system_prompt_presets', {}))} presets)* |" | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("#### Current Config") | |
| admin_config_display = gr.Code( | |
| value=json.dumps(cfg, indent=2), | |
| language="json", | |
| interactive=False, | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("#### Export Slides") | |
| gr.Markdown( | |
| "*Uses current settings from Probability Explorer tab.*", | |
| elem_classes=["param-help"], | |
| ) | |
| admin_export_btn = gr.Button("Export Slides", variant="secondary") | |
| admin_slides_file = gr.File(label="Slideshow", visible=False) | |
| # Login wiring | |
| admin_login_btn.click( | |
| fn=admin_login, | |
| inputs=[admin_pw], | |
| outputs=[admin_controls, admin_login_group, admin_login_msg], | |
| ) | |
| # Model loading — base | |
| admin_load_btn.click( | |
| fn=admin_load_model, | |
| inputs=[admin_model_dropdown], | |
| outputs=[admin_model_status, admin_config_display, status_display], | |
| ) | |
| # Model loading — chat | |
| admin_chat_load_btn.click( | |
| fn=admin_load_chat_model, | |
| inputs=[admin_chat_dropdown], | |
| outputs=[admin_chat_status, admin_config_display, status_display], | |
| ) | |
| # Save defaults — updates config display + Probability Explorer + Tokenizer controls | |
| admin_save_btn.click( | |
| fn=admin_save_defaults, | |
| inputs=[admin_prompt, admin_tokenizer_text, admin_temp, admin_top_k_admin, admin_steps, admin_seed], | |
| outputs=[ | |
| admin_save_msg, | |
| admin_config_display, | |
| t1_prompt, | |
| t1_temperature, | |
| t1_top_k, | |
| t1_steps, | |
| t1_seed, | |
| t3_input, | |
| ], | |
| ) | |
| # Save presets — updates config, dropdown choices, and presets display | |
| admin_presets_save_btn.click( | |
| fn=admin_save_presets, | |
| inputs=[admin_presets], | |
| outputs=[admin_presets_msg, admin_config_display, sp_preset, admin_presets], | |
| ) | |
| # Save attention settings | |
| admin_attn_save_btn.click( | |
| fn=admin_save_attention, | |
| inputs=[admin_attn_model, admin_attn_examples], | |
| outputs=[admin_attn_save_msg, admin_config_display], | |
| ) | |
| # Export slides — uses current Probability Explorer settings | |
| admin_export_btn.click( | |
| fn=generate_slideshow, | |
| inputs=[t1_prompt, t1_temperature, t1_top_k, t1_steps, t1_seed], | |
| outputs=[admin_slides_file], | |
| ).then( | |
| fn=lambda f: gr.update(visible=f is not None), | |
| inputs=[admin_slides_file], | |
| outputs=[admin_slides_file], | |
| ) | |
| # Footer with build timestamp | |
| gr.Markdown( | |
| f"<div style='text-align:center;color:#999;font-size:11px;padding:8px 0;'>" | |
| f"Updated {BUILD_TIMESTAMP} CT</div>" | |
| ) | |
| # ============================================================== | |
| # Page load: populate config defaults, then apply share params | |
| # ============================================================== | |
| def load_config_values(request: gr.Request): | |
| """Step 1: Read config + parse share query params. | |
| Returns config defaults for all controls, the share_params | |
| State dict, and a gr.Tabs update to select the right tab. | |
| """ | |
| c = manager.get_config() | |
| qp = dict(request.query_params) if request else {} | |
| # Determine tab from share params | |
| tab = qp.get("tab") | |
| tab_update = gr.update(selected=tab) if tab in ("prob", "tok") else gr.update() | |
| return ( | |
| # Admin panel fields | |
| c.get("default_prompt", ""), | |
| c.get("default_tokenizer_text", ""), | |
| c.get("default_temperature", 0.8), | |
| c.get("default_top_k", 10), | |
| c.get("default_steps", 8), | |
| c.get("default_seed", 42), | |
| json.dumps(c, indent=2), | |
| # Probability Explorer fields (config defaults for now) | |
| c.get("default_prompt", ""), | |
| c.get("default_temperature", 0.8), | |
| c.get("default_top_k", 10), | |
| c.get("default_steps", 8), | |
| c.get("default_seed", 42), | |
| # Tokenizer field | |
| c.get("default_tokenizer_text", ""), | |
| # Share params State (for subsequent steps) | |
| qp, | |
| # Tab selection | |
| tab_update, | |
| ) | |
| def apply_share_params(params): | |
| """Step 2: Override controls with share-link values.""" | |
| if not params or not params.get("tab"): | |
| return ( | |
| gr.update(), gr.update(), gr.update(), | |
| gr.update(), gr.update(), gr.update(), | |
| gr.update(), | |
| ) | |
| c = manager.get_config() | |
| if params.get("tab") == "prob": | |
| prompt = params.get("p", c.get("default_prompt", "")) | |
| temp = _safe_float(params.get("t"), c.get("default_temperature", 0.8)) | |
| top_k = _safe_int(params.get("k"), c.get("default_top_k", 10)) | |
| show_steps = params.get("ss", "1") == "1" | |
| steps = _safe_int(params.get("s"), c.get("default_steps", 8)) | |
| seed = _safe_int(params.get("seed"), c.get("default_seed", 42)) | |
| max_steps = 20 if show_steps else 100 | |
| return ( | |
| gr.update(value=prompt), | |
| gr.update(value=temp), | |
| gr.update(value=top_k), | |
| gr.update(value=steps, maximum=max_steps), | |
| gr.update(value=show_steps), | |
| gr.update(value=seed), | |
| gr.update(), # tokenizer unchanged | |
| ) | |
| if params.get("tab") == "tok": | |
| text = params.get("text", c.get("default_tokenizer_text", "")) | |
| return ( | |
| gr.update(), gr.update(), gr.update(), | |
| gr.update(), gr.update(), gr.update(), | |
| gr.update(value=text), | |
| ) | |
| return ( | |
| gr.update(), gr.update(), gr.update(), | |
| gr.update(), gr.update(), gr.update(), | |
| gr.update(), | |
| ) | |
| def auto_generate_prob(params): | |
| """Step 3: Auto-run Probability Explorer if share link is for prob tab. | |
| Reads from params State (not components) to avoid race conditions. | |
| """ | |
| if not params or params.get("tab") != "prob": | |
| return gr.update() | |
| c = manager.get_config() | |
| prompt = params.get("p", c.get("default_prompt", "")) | |
| temp = _safe_float(params.get("t"), c.get("default_temperature", 0.8)) | |
| top_k = _safe_int(params.get("k"), c.get("default_top_k", 10)) | |
| show_steps = params.get("ss", "1") == "1" | |
| steps = _safe_int(params.get("s"), c.get("default_steps", 8)) | |
| seed = _safe_int(params.get("seed"), c.get("default_seed", 42)) | |
| return explore_probabilities(prompt, temp, top_k, steps, show_steps, seed) | |
| def auto_tokenize(params): | |
| """Step 4: Auto-run Tokenizer if share link is for tok tab.""" | |
| if not params or params.get("tab") != "tok": | |
| return gr.update() | |
| c = manager.get_config() | |
| text = params.get("text", c.get("default_tokenizer_text", "")) | |
| return tokenize_text(text) | |
| # Chain: load config → apply share overrides → auto-generate | |
| demo.load( | |
| fn=load_config_values, | |
| js=FORCE_LIGHT_MODE_JS, | |
| outputs=[ | |
| # Admin panel fields | |
| admin_prompt, admin_tokenizer_text, admin_temp, admin_top_k_admin, admin_steps, admin_seed, | |
| admin_config_display, | |
| # Probability Explorer fields | |
| t1_prompt, t1_temperature, t1_top_k, t1_steps, t1_seed, | |
| # Tokenizer field | |
| t3_input, | |
| # Share state + tab selection | |
| share_params, tabs, | |
| ], | |
| ).then( | |
| fn=apply_share_params, | |
| inputs=[share_params], | |
| outputs=[t1_prompt, t1_temperature, t1_top_k, t1_steps, t1_show_steps, t1_seed, t3_input], | |
| ).then( | |
| fn=auto_generate_prob, | |
| inputs=[share_params], | |
| outputs=[t1_output], | |
| ).then( | |
| fn=auto_tokenize, | |
| inputs=[share_params], | |
| outputs=[t3_output], | |
| ) | |
| return demo | |
| # --------------------------------------------------------------------------- | |
| # Startup | |
| # --------------------------------------------------------------------------- | |
| if __name__ == "__main__": | |
| cfg = manager.get_config() | |
| # Load base model (Probability Explorer) | |
| base_model = cfg.get("model", "Llama-3.2-3B") | |
| print(f"Loading base model: {base_model}") | |
| print(manager.load_model(base_model)) | |
| # Load chat model (System Prompt Explorer) | |
| chat_model = cfg.get("chat_model", "Llama-3.2-3B-Instruct") | |
| print(f"Loading chat model: {chat_model}") | |
| print(manager.load_chat_model(chat_model)) | |
| app = create_app() | |
| app.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| theme=THEME, | |
| css=CUSTOM_CSS, | |
| ) | |