"""LLM Explorer -- Interactive tools for understanding how LLMs work.
Gradio app with four tabs:
1. Probability Explorer (step-by-step or bulk generation)
2. Tokenizer
3. System Prompt Explorer (instruct models)
4. Attention Explorer (GPT-2 attention visualization)
Plus a password-protected Admin panel for runtime configuration.
"""
import json
import math
import os
import subprocess
import urllib.parse
from datetime import datetime, timezone, timedelta
import gradio as gr
import re
from models import AVAILABLE_MODELS, manager, demo_tokenizer
from attention import get_attention_data, build_attention_html
# ---------------------------------------------------------------------------
# Admin password -- set via env var on HF Spaces, or fall back to default
# ---------------------------------------------------------------------------
ADMIN_PASSWORD = os.environ.get("ADMIN_PASSWORD", "admin")
# Build timestamp (Central Time) — computed once at import/startup
_CT = timezone(timedelta(hours=-6))
BUILD_TIMESTAMP = datetime.now(_CT).strftime("%Y-%m-%d %H:%M:%S")
# ---------------------------------------------------------------------------
# Share / URL shortening
# ---------------------------------------------------------------------------
REBRANDLY_API_KEY = os.environ.get("REBRANDLY_API_KEY", "")
_SPACE_ID = os.environ.get("SPACE_ID", "")
if _SPACE_ID:
_owner, _name = _SPACE_ID.split("/")
_BASE_URL = f"https://{_owner}-{_name}.hf.space/"
else:
_BASE_URL = "http://localhost:7860/"
# ---------------------------------------------------------------------------
# Theme — use .set() for Gradio-controlled element colors
# ---------------------------------------------------------------------------
THEME = gr.themes.Soft(
primary_hue=gr.themes.Color(
c50="#faf8fc",
c100="#f3f0f7",
c200="#ded9f4",
c300="#c4b5e0",
c400="#a78bcc",
c500="#8b5fb8",
c600="#7c3aad",
c700="#63348d",
c800="#52296f",
c900="#421f59",
c950="#2a1339",
),
neutral_hue="slate",
font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "monospace"],
).set(
# Primary buttons: dark purple with white text
button_primary_background_fill="#63348d",
button_primary_background_fill_dark="#63348d",
button_primary_text_color="white",
button_primary_text_color_dark="white",
button_primary_border_color="#63348d",
button_primary_border_color_dark="#63348d",
button_primary_background_fill_hover="#4e2870",
button_primary_background_fill_hover_dark="#7c3aad",
# Secondary buttons: dark purple outline
button_secondary_text_color="#63348d",
button_secondary_text_color_dark="#ded9f4",
button_secondary_border_color="#63348d",
button_secondary_border_color_dark="#475569",
# Block backgrounds: light purple in light, dark slate in dark
block_background_fill="#f3f0f7",
block_background_fill_dark="#1e293b",
panel_background_fill="#f3f0f7",
panel_background_fill_dark="#0f172a",
)
# ---------------------------------------------------------------------------
# Custom CSS — headings, tabs, dark mode, output HTML
# ---------------------------------------------------------------------------
CUSTOM_CSS = """
@import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;700&family=Merriweather:wght@300;400;700;900&display=swap');
.gradio-container { max-width: 960px !important; }
/* Headings: Merriweather, dark purple / light purple */
h1, h2, h3,
.gradio-container h1, .gradio-container h2, .gradio-container h3 {
font-family: 'Merriweather', serif !important;
color: #63348d !important;
}
body.dark h1, body.dark h2, body.dark h3,
body.dark .gradio-container h1, body.dark .gradio-container h2, body.dark .gradio-container h3,
.dark h1, .dark h2, .dark h3,
.dark .gradio-container h1, .dark .gradio-container h2, .dark .gradio-container h3 {
color: #ded9f4 !important;
}
/* Tab labels: dark purple / light purple */
.tab-nav button, .tabs button.tab-nav-button {
color: #63348d !important;
}
body.dark .tab-nav button, .dark .tabs button.tab-nav-button {
color: #ded9f4 !important;
}
/* Dark mode toggle button */
#dark-mode-btn {
color: white !important;
border-color: #63348d !important;
background: #63348d !important;
}
body.dark #dark-mode-btn {
color: #ded9f4 !important;
border-color: #475569 !important;
background: #1e293b !important;
}
/* --- CSS variables for output HTML --- */
:root {
--llm-card-bg: #ffffff;
--llm-card-border: #e2e8f0;
--llm-text-bg: #f8fafc;
--llm-prompt-color: #475569;
--llm-generated-color: #1e293b;
--llm-highlight-bg: #ded9f4;
--llm-highlight-color: #63348d;
--llm-step-header-color: #63348d;
--llm-entropy-color: #64748b;
--llm-prob-label-color: #1e293b;
--llm-prob-value-color: #334155;
--llm-bar-selected: #63348d;
--llm-bar-default: #c4b5d4;
--llm-token-count-color: #64748b;
--llm-help-color: #64748b;
}
body.dark, .dark {
--llm-card-bg: #1e293b;
--llm-card-border: #334155;
--llm-text-bg: #0f172a;
--llm-prompt-color: #94a3b8;
--llm-generated-color: #e2e8f0;
--llm-highlight-bg: #63348d;
--llm-highlight-color: #ded9f4;
--llm-step-header-color: #ded9f4;
--llm-entropy-color: #94a3b8;
--llm-prob-label-color: #e2e8f0;
--llm-prob-value-color: #94a3b8;
--llm-bar-selected: #a78bcc;
--llm-bar-default: #4a3a64;
--llm-token-count-color: #94a3b8;
--llm-help-color: #94a3b8;
}
/* Parameter help text */
.param-help p {
font-size: 13px !important;
color: var(--llm-help-color) !important;
margin-top: -4px !important;
margin-bottom: 8px !important;
line-height: 1.4 !important;
}
/* --- System Prompt Explorer: green terminal --- */
.green-terminal {
font-family: 'IBM Plex Mono', 'JetBrains Mono', 'Courier New', monospace !important;
font-size: 13px !important;
line-height: 1.5 !important;
background: #0a0a0a !important;
color: #33ff33 !important;
border-radius: 0.5rem !important;
border: 2px solid #1a1a1a !important;
box-shadow: inset 0 0 20px rgba(51, 255, 51, 0.05) !important;
padding: 16px !important;
min-height: 200px;
max-height: 500px;
overflow-y: auto;
white-space: pre-wrap;
word-wrap: break-word;
}
.green-terminal::-webkit-scrollbar { width: 8px; }
.green-terminal::-webkit-scrollbar-track { background: #1a1a1a; border-radius: 4px; }
.green-terminal::-webkit-scrollbar-thumb { background: #33ff33; border-radius: 4px; opacity: 0.5; }
.green-terminal .sp-label { color: #22c55e; font-weight: bold; }
.green-terminal .sp-special { color: #666666; }
.green-terminal .sp-system { color: #facc15; }
.green-terminal .sp-user { color: #60a5fa; }
.green-terminal .sp-assistant { color: #33ff33; }
.green-terminal .sp-response { color: #f0f0f0; }
/* Response card */
.response-card {
border: 1px solid var(--llm-card-border);
border-radius: 8px;
padding: 16px;
background: var(--llm-card-bg);
font-size: 15px;
line-height: 1.6;
}
/* Attention Explorer example pairs */
.attn-example-row {
background: #f8f6fb !important;
border-radius: 6px !important;
padding: 6px 10px !important;
margin-bottom: 2px !important;
align-items: flex-start !important;
gap: 6px !important;
flex-wrap: wrap !important;
}
.attn-example-label {
min-width: 70px !important;
max-width: 70px !important;
flex-shrink: 0 !important;
padding-top: 4px !important;
}
.attn-example-label p {
font-family: 'Merriweather', serif !important;
color: #63348d !important;
font-size: 13px !important;
margin: 0 !important;
}
.attn-example-btn {
flex: 0 0 auto !important;
min-width: 0 !important;
max-width: fit-content !important;
}
.attn-example-btn button {
font-size: 12px !important;
padding: 4px 12px !important;
border: 1.5px solid #d4c8e8 !important;
border-radius: 14px !important;
background: #fff !important;
color: #4a3070 !important;
white-space: nowrap !important;
width: auto !important;
text-align: left !important;
}
.attn-example-btn button:hover {
background: #f3f0f7 !important;
border-color: #63348d !important;
}
"""
# Dark mode toggle JS — toggles class and swaps button text + heading colors
DARK_MODE_JS = """
() => {
document.body.classList.toggle('dark');
const isDark = document.body.classList.contains('dark');
const el = document.getElementById('dark-mode-btn');
if (el) el.innerText = isDark ? 'Light mode' : 'Dark mode';
document.querySelectorAll('h1, h2, h3').forEach(h => {
h.style.setProperty('color', isDark ? '#ded9f4' : '#63348d', 'important');
});
}
"""
# Force light mode on page load (override browser/system dark mode preference)
FORCE_LIGHT_MODE_JS = """
() => {
document.body.classList.remove('dark');
const el = document.getElementById('dark-mode-btn');
if (el) el.innerText = 'Dark mode';
document.querySelectorAll('h1, h2, h3').forEach(h => {
h.style.setProperty('color', '#63348d', 'important');
});
}
"""
# ---------------------------------------------------------------------------
# Share helpers
# ---------------------------------------------------------------------------
def _shorten_url(long_url: str) -> str:
"""Shorten a URL via Rebrandly API (using curl). Falls back to long URL on failure."""
if not REBRANDLY_API_KEY or "localhost" in long_url:
return long_url
try:
payload = json.dumps({
"destination": long_url,
"domain": {"fullName": "go.ropavieja.org"},
})
result = subprocess.run(
[
"curl", "-s", "-X", "POST",
"https://api.rebrandly.com/v1/links",
"-H", "Content-Type: application/json",
"-H", f"apikey: {REBRANDLY_API_KEY}",
"-d", payload,
],
capture_output=True, text=True, timeout=10,
)
if result.returncode != 0 or not result.stdout.strip():
return long_url
data = json.loads(result.stdout)
return f"https://{data['shortUrl']}"
except (subprocess.TimeoutExpired, KeyError, json.JSONDecodeError, OSError) as exc:
print(f"[share] Rebrandly error: {exc}")
return long_url
def _safe_float(val, default: float) -> float:
try:
return float(val)
except (TypeError, ValueError):
return default
def _safe_int(val, default: int) -> int:
try:
return int(val)
except (TypeError, ValueError):
return default
def share_probability_explorer(prompt, temp, top_k, steps, show_steps, seed):
"""Build a share URL encoding current Probability Explorer settings."""
params = {
"tab": "prob",
"p": prompt,
"t": str(float(temp)),
"k": str(int(top_k)),
"s": str(int(steps)),
"ss": "1" if show_steps else "0",
"seed": str(int(seed)),
}
long_url = _BASE_URL + "?" + urllib.parse.urlencode(params)
return _shorten_url(long_url)
def share_tokenizer(text):
"""Build a share URL encoding current Tokenizer input."""
params = {"tab": "tok", "text": text}
long_url = _BASE_URL + "?" + urllib.parse.urlencode(params)
return _shorten_url(long_url)
# ---------------------------------------------------------------------------
# HTML rendering helpers
# ---------------------------------------------------------------------------
# Alternating token chip colors
TOKEN_COLORS = [
("#e0f2fe", "#0c4a6e"), # light blue / dark blue
("#fef3c7", "#92400e"), # light amber / dark amber
("#d1fae5", "#065f46"), # light green / dark green
("#fce7f3", "#9d174d"), # light pink / dark pink
("#e0e7ff", "#3730a3"), # light indigo / dark indigo
("#fde68a", "#78350f"), # yellow / brown
]
def _esc(text: str) -> str:
"""Escape HTML special characters."""
return text.replace("&", "&").replace("<", "<").replace(">", ">")
def _token_label(token_str: str) -> str:
"""Format a token for display in probability table (no quotes, trimmed)."""
display = _esc(token_str).strip()
if not display:
return "\u2423" # visible space symbol
return display
def _highlight_token(token_str: str) -> str:
"""Wrap the non-whitespace part of a token in a highlight span.
Leading/trailing whitespace stays outside the highlight so the
purple chip only covers the visible characters.
"""
escaped = _esc(token_str)
stripped = escaped.strip()
if not stripped:
return ('\u2423')
leading_count = len(escaped) - len(escaped.lstrip())
trailing_count = len(escaped) - len(escaped.rstrip())
highlight = (f'{stripped}')
parts = []
if leading_count:
parts.append(escaped[:leading_count])
parts.append(highlight)
if trailing_count:
parts.append(escaped[len(escaped) - trailing_count:])
return "".join(parts)
def _render_step_html(step_data: dict, prompt: str, prev_generated: str) -> str:
"""Render one generation step as styled HTML.
prev_generated is the text generated in all prior steps (between prompt
and the latest token), so only the newest token gets highlighted.
"""
s = step_data
# Build probability bar chart
rows_html = ""
for token_str, prob, tid in s["top_tokens"]:
bar_width = max(1, int(prob * 300)) if not math.isnan(prob) else 1
is_selected = tid == s["token_id"]
bar_color = "var(--llm-bar-selected)" if is_selected else "var(--llm-bar-default)"
label_style = "font-weight:700;" if is_selected else ""
arrow = " \u2190" if is_selected else ""
token_display = _token_label(token_str)
rows_html += f"""
{token_display}
{"0.0000" if math.isnan(prob) else f"{prob:.4f}"}{arrow}
"""
highlighted = _highlight_token(s["token"])
return f"""
Step {s['step']}
Entropy: {s['entropy']:.2f} bits
{_esc(prompt)}{_esc(prev_generated)}{highlighted}
{rows_html}
"""
def _render_final_text_html(prompt: str, generated_text: str) -> str:
"""Render final text with all generated text highlighted (show-steps OFF mode)."""
generated = generated_text[len(prompt):]
escaped = _esc(generated)
stripped = escaped.lstrip()
leading = escaped[:len(escaped) - len(stripped)]
return f"""
{_esc(prompt)}{leading}{stripped}
"""
def _render_tokens_html(tokens: list[tuple[str, int]]) -> str:
"""Render tokenized text as colored chips."""
chips = ""
for i, (token_str, tid) in enumerate(tokens):
bg, fg = TOKEN_COLORS[i % len(TOKEN_COLORS)]
display = _esc(token_str)
# Show spaces explicitly
if display.strip() == "":
display = repr(token_str).strip("'")
chips += f"""{display}"""
return f"""
{len(tokens)} tokens — hover for token IDs
{chips}
"""
# ---------------------------------------------------------------------------
# Tab 1: Probability Explorer
# ---------------------------------------------------------------------------
def explore_probabilities(prompt, temperature, top_k, steps, show_steps, seed):
"""Generate tokens step by step and return formatted HTML."""
if not manager.is_ready():
return f"{manager.status_message()}
"
seed = int(seed)
results = manager.generate_step_by_step(
prompt=prompt,
steps=int(steps),
temperature=temperature,
top_k=int(top_k),
seed=seed,
show_steps=show_steps,
)
if not results:
return "No results generated.
"
if show_steps:
html_parts = []
for i, r in enumerate(results):
prev_generated = results[i - 1]["text"][len(prompt):] if i > 0 else ""
html_parts.append(_render_step_html(r, prompt, prev_generated))
return "\n".join(html_parts)
else:
final_text = results[-1]["text"]
return _render_final_text_html(prompt, final_text)
def _build_slide_rows(step_data, top_k, bar_max):
"""Build probability bar HTML rows for a slideshow slide."""
rows = ""
display_tokens = step_data["top_tokens"][:int(top_k)]
for token_str, prob, tid in display_tokens:
is_selected = tid == step_data["token_id"]
bar_width = max(2, int(prob * bar_max)) if not math.isnan(prob) else 2
bar_color = "var(--llm-bar-selected)" if is_selected else "var(--llm-bar-default)"
sel_class = " selected" if is_selected else ""
arrow = " \u2190" if is_selected else ""
prob_str = "0.0000" if math.isnan(prob) else f"{prob:.4f}"
token_display = _token_label(token_str)
rows += f'''
{token_display}
{prob_str}{arrow}
'''
return rows
def generate_slideshow(prompt, temperature, top_k, steps, seed):
"""Generate tokens and return a self-contained HTML slideshow file."""
if not manager.is_ready():
return None
results = manager.generate_step_by_step(
prompt=prompt,
steps=int(steps),
temperature=temperature,
top_k=int(top_k),
seed=int(seed),
show_steps=True,
)
if not results:
return None
# Build slides HTML
slides_html = ""
bar_max = 900
# Slide 0: prompt-only text + first step's probability table
slide0_rows = _build_slide_rows(results[0], top_k, bar_max)
slide0_text = f'{_esc(prompt)}'
slides_html += f'''
{slide0_rows}
{slide0_text}
'''
# Slides 1..N: each step with highlighted token
for i, r in enumerate(results):
prev_generated = results[i - 1]["text"][len(prompt):] if i > 0 else ""
highlighted_token = _highlight_token(r["token"])
text_html = (
f'{_esc(prompt)}'
f'{_esc(prev_generated)}'
f'{highlighted_token}'
)
rows_html = _build_slide_rows(r, top_k, bar_max)
slides_html += f'''
'''
total = len(results) + 1
html = f'''
LLM Explorer — Slides
{slides_html}
1 / {total}
\u2190 \u2192 arrow keys to navigate
'''
# Write to temp file
import tempfile
tmp = tempfile.NamedTemporaryFile(
suffix=".html", prefix="llm-slides-", delete=False, mode="w"
)
tmp.write(html)
tmp.close()
return tmp.name
def on_show_steps_change(show_steps):
"""Adjust max steps slider when show_steps toggles.
When show_steps is turned ON, also reset to default steps value
(since high step counts are only useful for bulk generation).
"""
cfg = manager.get_config()
if show_steps:
return gr.update(maximum=20, value=cfg.get("default_steps", 8))
else:
return gr.update(maximum=100)
# ---------------------------------------------------------------------------
# Tab 2: Tokenizer
# ---------------------------------------------------------------------------
def tokenize_text(text):
"""Tokenize input and return formatted HTML.
Uses GPT-2's tokenizer (not the generation model's tokenizer) because
GPT-2's smaller vocabulary produces more interesting subword splits.
"""
tokens = demo_tokenizer.tokenize(text)
return _render_tokens_html(tokens)
# ---------------------------------------------------------------------------
# Tab 3: System Prompt Explorer
# ---------------------------------------------------------------------------
MAX_CHAT_TURNS = 10 # max user messages before forcing reset
def _get_presets() -> dict:
"""Get current system prompt presets from config."""
return manager.get_config().get("system_prompt_presets", {})
def _esc_terminal(text: str) -> str:
"""Escape HTML for terminal display."""
return text.replace("&", "&").replace("<", "<").replace(">", ">")
def _format_terminal(raw_text: str) -> str:
"""Parse a chat template string and produce color-coded HTML for the green terminal.
Works with both Llama (<|start_header_id|>) and Qwen (<|im_start|>) templates.
"""
# Split on special tokens, keeping them
parts = re.split(r'(<\|[^|]*\|>)', raw_text)
html_parts = []
current_role = None
expect_role = False # true right after a token that precedes a role label
role_css = {
"system": "sp-system",
"user": "sp-user",
"assistant": "sp-assistant",
}
for part in parts:
if re.match(r'<\|[^|]*\|>', part):
# Special token — render in gray
html_parts.append(f"{_esc_terminal(part)}")
# After im_start or start_header_id, next text chunk is a role label
expect_role = ("im_start" in part or "start_header_id" in part)
elif expect_role and part.strip() in role_css:
# Role label (system / user / assistant)
role = part.strip()
current_role = role
before = _esc_terminal(part[: len(part) - len(part.lstrip())])
after = _esc_terminal(part[len(part.rstrip()) :])
html_parts.append(f"{before}{role}{after}")
expect_role = False
else:
expect_role = False
css = role_css.get(current_role, "")
if css and part.strip():
html_parts.append(f"{_esc_terminal(part)}")
else:
html_parts.append(_esc_terminal(part))
return "" + "".join(html_parts) + "
"
def _initial_terminal() -> str:
return "Send a message to see what the model receives.
"
def send_chat_message(user_message, history, system_prompt, max_tokens, temperature, seed):
"""Handle a user message: generate response, update state + chatbot + terminal.
`history` is the gr.State list of clean {"role": ..., "content": ...} dicts.
The Chatbot is derived from this — never read back from Chatbot (Gradio
mangles the dicts on round-trip).
"""
if not user_message or not user_message.strip():
chatbot = [{"role": m["role"], "content": m["content"]} for m in history]
return "", history, chatbot, _format_terminal_from_history(history, system_prompt)
if not manager.chat_ready():
history = history + [
{"role": "user", "content": user_message},
{"role": "assistant", "content": "No chat model loaded. Load one from the Admin tab."},
]
chatbot = [{"role": m["role"], "content": m["content"]} for m in history]
return "", history, chatbot, _initial_terminal()
# Check turn limit
user_turns = sum(1 for m in history if m["role"] == "user")
if user_turns >= MAX_CHAT_TURNS:
history = history + [
{"role": "user", "content": user_message},
{"role": "assistant", "content": f"Conversation limit reached ({MAX_CHAT_TURNS} exchanges). Click Reset to start a new conversation."},
]
chatbot = [{"role": m["role"], "content": m["content"]} for m in history]
return "", history, chatbot, _format_terminal_from_history(history, system_prompt)
# Build full messages for the model
history = history + [{"role": "user", "content": user_message}]
messages = []
if system_prompt and system_prompt.strip():
messages.append({"role": "system", "content": system_prompt})
messages.extend(history)
result = manager.generate_chat(
messages=messages,
max_new_tokens=int(max_tokens),
temperature=temperature,
seed=int(seed),
)
if "error" in result:
history = history + [
{"role": "assistant", "content": f"Error: {result['error']}"},
]
chatbot = [{"role": m["role"], "content": m["content"]} for m in history]
return "", history, chatbot, _format_terminal_from_history(history, system_prompt)
history = history + [{"role": "assistant", "content": result["response"]}]
chatbot = [{"role": m["role"], "content": m["content"]} for m in history]
terminal_html = _format_terminal(result["formatted_display"])
return "", history, chatbot, terminal_html
def _format_terminal_from_history(chat_history, system_prompt):
"""Build terminal display from chat history (without generating)."""
if not chat_history:
return _initial_terminal()
messages = []
if system_prompt and system_prompt.strip():
messages.append({"role": "system", "content": system_prompt})
messages.extend(chat_history)
formatted = manager.format_chat_template(messages)
if not formatted:
return _initial_terminal()
return _format_terminal(formatted)
def reset_chat(system_prompt):
"""Clear chat history, keep system prompt. Show initial terminal with just system prompt.
Returns (state, chatbot, terminal).
"""
if system_prompt and system_prompt.strip() and manager.chat_ready():
messages = [{"role": "system", "content": system_prompt}]
formatted = manager.format_chat_template(messages)
return [], [], _format_terminal(formatted)
return [], [], _initial_terminal()
def on_preset_change(preset_name):
"""Update system prompt textbox when a preset is selected. No chat reset."""
return _get_presets().get(preset_name, "")
# ---------------------------------------------------------------------------
# Admin panel
# ---------------------------------------------------------------------------
def admin_login(password):
"""Check admin password and return visibility update."""
if password == ADMIN_PASSWORD:
return gr.update(visible=True), gr.update(visible=False), ""
return gr.update(visible=False), gr.update(visible=True), "Incorrect password."
def admin_load_model(model_name):
"""Load a new base model from admin panel."""
status = manager.load_model(model_name)
cfg = manager.get_config()
header_status = f"**{manager.status_message()}**"
return status, json.dumps(cfg, indent=2), header_status
def admin_load_chat_model(model_name):
"""Load a new chat model from admin panel."""
status = manager.load_chat_model(model_name)
cfg = manager.get_config()
header_status = f"**{manager.status_message()}**"
return status, json.dumps(cfg, indent=2), header_status
def admin_save_defaults(prompt, tokenizer_text, temperature, top_k, steps, seed):
"""Save default settings and return updated values for all outputs."""
manager.update_config(
default_prompt=prompt,
default_tokenizer_text=tokenizer_text,
default_temperature=temperature,
default_top_k=int(top_k),
default_steps=int(steps),
default_seed=int(seed),
)
cfg = manager.get_config()
return (
"Defaults saved.",
json.dumps(cfg, indent=2),
prompt,
temperature,
int(top_k),
int(steps),
int(seed),
tokenizer_text,
)
def admin_save_presets(presets_json):
"""Save system prompt presets from admin panel.
Returns (status_msg, config_json, dropdown_update, presets_json_display).
"""
try:
presets = json.loads(presets_json)
except (json.JSONDecodeError, TypeError) as e:
cfg = manager.get_config()
return (
f"Invalid JSON: {e}",
json.dumps(cfg, indent=2),
gr.update(),
gr.update(),
)
if not isinstance(presets, dict):
cfg = manager.get_config()
return (
"Presets must be a JSON object `{\"Name\": \"prompt\", ...}`",
json.dumps(cfg, indent=2),
gr.update(),
gr.update(),
)
manager.update_config(system_prompt_presets=presets)
cfg = manager.get_config()
return (
f"Presets saved ({len(presets)} presets).",
json.dumps(cfg, indent=2),
gr.update(choices=list(presets.keys())),
json.dumps(presets, indent=2),
)
def admin_save_attention(attn_model, examples_json):
"""Save attention explorer settings from admin panel.
Returns (status_msg, config_json).
"""
try:
examples = json.loads(examples_json)
except (json.JSONDecodeError, TypeError) as e:
cfg = manager.get_config()
return f"Invalid JSON: {e}", json.dumps(cfg, indent=2)
if not isinstance(examples, list):
cfg = manager.get_config()
return "Examples must be a JSON array", json.dumps(cfg, indent=2)
manager.update_config(
attention_model=attn_model.strip(),
attention_examples=examples,
)
cfg = manager.get_config()
return (
f"Attention settings saved. Model: {attn_model.strip()}. "
f"Note: model change takes effect on next Explore click. "
f"Example changes require app restart.",
json.dumps(cfg, indent=2),
)
# ---------------------------------------------------------------------------
# Build the Gradio app
# ---------------------------------------------------------------------------
def create_app():
cfg = manager.get_config()
with gr.Blocks(title="LLM Explorer") as demo:
# Header with dark mode toggle
with gr.Row():
gr.Markdown("# LLM Explorer\n*Interactive tools for understanding how LLMs work*")
dark_btn = gr.Button(
"Dark mode", elem_id="dark-mode-btn",
size="sm", scale=0, min_width=100, variant="secondary",
)
dark_btn.click(fn=None, js=DARK_MODE_JS)
# Status bar
status_display = gr.Markdown(value=f"**{manager.status_message()}**")
# Hidden state for share-link params (avoids race conditions with component updates)
share_params = gr.State(value={})
# ==================================================================
# Tabs
# ==================================================================
with gr.Tabs() as tabs:
# ==================================================================
# Tab 1: Probability Explorer
# ==================================================================
with gr.Tab("Probability Explorer", id="prob"):
gr.Markdown("### Step-by-Step Next-Token Prediction")
gr.Markdown(
"Enter a prompt and watch the model predict one token at a time. "
"Each step shows the probability distribution over the vocabulary."
)
t1_prompt = gr.Textbox(
label="Prompt",
value=cfg.get("default_prompt", "The best thing about Huston-Tillotson University is"),
lines=2,
)
# --- Generation settings ---
gr.Markdown("#### Settings")
t1_temperature = gr.Slider(
label="Temperature",
minimum=0, maximum=2.5, step=0.1,
value=cfg.get("default_temperature", 0.8),
)
gr.Markdown(
"Controls randomness. At 0 the model always picks the most probable word; "
"higher values make surprising choices more likely. "
"Default 0.8 gives coherent but varied output.",
elem_classes=["param-help"],
)
t1_top_k = gr.Slider(
label="Top-K",
minimum=5, maximum=100, step=1,
value=cfg.get("default_top_k", 10),
)
gr.Markdown(
"Limits which tokens the model considers and how many "
"appear in the probability table.",
elem_classes=["param-help"],
)
t1_steps = gr.Slider(
label="Steps",
minimum=1, maximum=20, step=1,
value=cfg.get("default_steps", 8),
)
gr.Markdown(
"How many tokens to generate.",
elem_classes=["param-help"],
)
# Show steps — own section with explanation
t1_show_steps = gr.Checkbox(
label="Show steps",
value=True,
)
gr.Markdown(
"When on, each step shows the full probability table and which token "
"was selected (max 20 steps). When off, just generates the final text "
"(up to 100 steps).",
elem_classes=["param-help"],
)
with gr.Accordion("Random Seed", open=False):
t1_seed = gr.Number(
label="Random seed",
value=cfg.get("default_seed", 42),
precision=0,
)
gr.Markdown(
"Makes output reproducible -- same seed and settings always "
"produce the same text. Change it to get different results.",
elem_classes=["param-help"],
)
with gr.Row():
t1_generate_btn = gr.Button("Generate", variant="primary")
t1_share_btn = gr.Button("Share", variant="secondary", scale=0, min_width=80)
t1_share_url = gr.Textbox(label="Share URL", visible=False, interactive=False, buttons=["copy"])
t1_output = gr.HTML(label="Output")
# Show steps toggle adjusts max steps
t1_show_steps.change(
fn=on_show_steps_change,
inputs=[t1_show_steps],
outputs=[t1_steps],
)
t1_generate_btn.click(
fn=explore_probabilities,
inputs=[t1_prompt, t1_temperature, t1_top_k, t1_steps, t1_show_steps, t1_seed],
outputs=[t1_output],
)
# Share button
t1_share_btn.click(
fn=share_probability_explorer,
inputs=[t1_prompt, t1_temperature, t1_top_k, t1_steps, t1_show_steps, t1_seed],
outputs=[t1_share_url],
).then(
fn=lambda: gr.update(visible=True),
outputs=[t1_share_url],
)
# ==================================================================
# Tab 2: Tokenizer
# ==================================================================
with gr.Tab("Tokenizer", id="tok"):
gr.Markdown("### Token Visualization")
gr.Markdown(
"See how text is split into tokens before the model processes it. "
"Hover over each token to see its numeric ID. "
"Uses GPT-2's tokenizer, which splits words into interesting subword pieces."
)
t3_input = gr.Textbox(
label="Text",
value=cfg.get("default_tokenizer_text", "Huston-Tillotson University is an HBCU in Austin, Texas."),
lines=3,
)
with gr.Row():
t3_btn = gr.Button("Tokenize", variant="primary")
t3_share_btn = gr.Button("Share", variant="secondary", scale=0, min_width=80)
t3_share_url = gr.Textbox(label="Share URL", visible=False, interactive=False, buttons=["copy"])
t3_output = gr.HTML(label="Tokens")
t3_btn.click(
fn=tokenize_text,
inputs=[t3_input],
outputs=[t3_output],
)
# Share button
t3_share_btn.click(
fn=share_tokenizer,
inputs=[t3_input],
outputs=[t3_share_url],
).then(
fn=lambda: gr.update(visible=True),
outputs=[t3_share_url],
)
# ==================================================================
# Tab 3: System Prompt Explorer
# ==================================================================
with gr.Tab("System Prompt Explorer", id="sys"):
gr.Markdown("### System Prompt Explorer")
gr.Markdown(
"See how **system prompts** change an LLM's behavior. "
"Pick a preset or write your own, then chat with the model. "
"The green terminal shows exactly what the model receives — "
"every special token, every role label, every turn."
)
presets = _get_presets()
preset_names = list(presets.keys())
default_preset = "Helpful Assistant" if "Helpful Assistant" in presets else preset_names[0] if preset_names else ""
sp_preset = gr.Dropdown(
label="Preset",
choices=preset_names,
value=default_preset,
interactive=True,
)
sp_system = gr.Textbox(
label="System Prompt",
value=presets.get(default_preset, ""),
lines=3,
placeholder="Enter a system prompt, or select a preset above...",
)
with gr.Accordion("Settings", open=False):
sp_max_tokens = gr.Slider(
label="Max tokens",
minimum=32, maximum=1024, step=16,
value=512,
)
gr.Markdown(
"Maximum number of tokens per response.",
elem_classes=["param-help"],
)
sp_temperature = gr.Slider(
label="Temperature",
minimum=0, maximum=2.0, step=0.1,
value=0.7,
)
sp_seed = gr.Number(
label="Random seed",
value=42,
precision=0,
)
with gr.Accordion("What the model sees", open=False):
gr.Markdown(
"The full text sent to the model on every turn — system prompt, "
"all previous messages, and special tokens. Watch it grow with each exchange.",
elem_classes=["param-help"],
)
sp_terminal = gr.HTML(value=_initial_terminal())
gr.Markdown("#### Chat")
gr.Markdown(
"**No hidden system prompt.** This model's helpful behavior comes from "
"fine-tuning (RLHF), not a secret prompt. When you add a system prompt above, "
"it's the *only* instruction the model receives. Commercial APIs like ChatGPT "
"and Claude prepend their own system prompts before yours — you can't see or "
"remove them.",
elem_classes=["param-help"],
)
sp_chat_state = gr.State([])
sp_chatbot = gr.Chatbot(height=700, feedback_options=None)
with gr.Row():
sp_user_input = gr.Textbox(
label="Message",
placeholder="Type a message...",
lines=1,
scale=4,
show_label=False,
)
sp_send_btn = gr.Button("Send", variant="primary", scale=0, min_width=80)
sp_reset_btn = gr.Button("Reset", variant="secondary", scale=0, min_width=80)
# --- Wiring ---
# Preset dropdown → just fill in the textbox (no chat reset)
sp_preset.change(
fn=on_preset_change,
inputs=[sp_preset],
outputs=[sp_system],
)
# System prompt textbox edits take effect on the next message sent.
# No auto-reset — avoids losing conversation on accidental edits.
# Use Reset button or pick a new preset to start fresh.
# Send message (button or enter)
send_inputs = [sp_user_input, sp_chat_state, sp_system, sp_max_tokens, sp_temperature, sp_seed]
send_outputs = [sp_user_input, sp_chat_state, sp_chatbot, sp_terminal]
sp_send_btn.click(
fn=send_chat_message,
inputs=send_inputs,
outputs=send_outputs,
)
sp_user_input.submit(
fn=send_chat_message,
inputs=send_inputs,
outputs=send_outputs,
)
# Reset button
sp_reset_btn.click(
fn=reset_chat,
inputs=[sp_system],
outputs=[sp_chat_state, sp_chatbot, sp_terminal],
)
# ==================================================================
# Tab 4: Attention Explorer
# ==================================================================
with gr.Tab("Attention Explorer", id="attn"):
gr.Markdown("### Attention Explorer")
_attn_model_name = cfg.get("attention_model", "gpt2-medium")
gr.Markdown(
"See which words the model pays attention to when processing a sentence. "
f"Uses `{_attn_model_name}`. "
"Click a word to see curved lines connecting it to the words it attended to — "
"thicker lines mean stronger attention."
)
# Example sentence pairs — read from config, fall back to defaults
_default_attn_examples = [
["bass", "He tuned his bass and plugged into the", "On the lake she caught a bass and pulled it onto the"],
["spring", "She wound the metal spring and the clock began to", "After the long winter the warm spring rain made the flowers"],
["light", "She flipped the switch and the light began to", "The bag was so light she carried it with"],
]
attn_example_pairs = [
tuple(ex) for ex in cfg.get("attention_examples", _default_attn_examples)
if isinstance(ex, (list, tuple)) and len(ex) == 3
]
attn_example_btns = []
for word, sent_a, sent_b in attn_example_pairs:
with gr.Row(elem_classes=["attn-example-row"]):
gr.Markdown(f"**{word}:**", elem_classes=["attn-example-label"])
btn_a = gr.Button(sent_a, size="sm", variant="secondary", elem_classes=["attn-example-btn"])
btn_b = gr.Button(sent_b, size="sm", variant="secondary", elem_classes=["attn-example-btn"])
attn_example_btns.extend([btn_a, btn_b])
gr.Markdown("*Try an example above, or enter your own text:*")
attn_input = gr.Textbox(
label="Text",
value="",
lines=1,
placeholder="Enter text to explore...",
)
attn_btn = gr.Button("Explore", variant="primary")
attn_output = gr.HTML(label="Visualization")
def run_attention_explorer(text):
"""Run attention extraction and build visualization HTML."""
if not text or not text.strip():
return "Enter text above and click Explore.
"
attn_model = manager.config.get("attention_model", "gpt2-medium")
data = get_attention_data(text.strip(), model_name=attn_model)
return build_attention_html(data)
attn_btn.click(
fn=run_attention_explorer,
inputs=[attn_input],
outputs=[attn_output],
)
# Wire up example buttons — each sets input and auto-explores
for btn in attn_example_btns:
btn.click(
fn=lambda text: (text, run_attention_explorer(text)),
inputs=[btn],
outputs=[attn_input, attn_output],
)
# ==================================================================
# Admin Panel
# ==================================================================
with gr.Tab("Admin", id="admin"):
gr.Markdown("### Admin Panel")
# Login gate
with gr.Group() as admin_login_group:
admin_pw = gr.Textbox(
label="Password",
type="password",
placeholder="Enter admin password",
)
admin_login_btn = gr.Button("Login")
admin_login_msg = gr.Markdown("")
# Admin controls (hidden until login)
with gr.Group(visible=False) as admin_controls:
gr.Markdown("#### Base Model (Probability Explorer)")
with gr.Row():
admin_model_dropdown = gr.Dropdown(
choices=list(AVAILABLE_MODELS.keys()),
value=manager.current_model_name or cfg.get("model", "Llama-3.2-3B"),
label="Select model",
)
admin_load_btn = gr.Button("Load", variant="primary")
admin_model_status = gr.Markdown("")
gr.Markdown("#### Chat Model (System Prompt Explorer)")
with gr.Row():
admin_chat_dropdown = gr.Dropdown(
choices=list(AVAILABLE_MODELS.keys()),
value=manager.chat_model_name or cfg.get("chat_model", "Llama-3.2-3B-Instruct"),
label="Select chat model",
)
admin_chat_load_btn = gr.Button("Load", variant="primary")
admin_chat_status = gr.Markdown("")
gr.Markdown("---")
gr.Markdown("#### Default Settings")
admin_prompt = gr.Textbox(
label="Default prompt (Probability Explorer)",
value=cfg.get("default_prompt", ""),
)
admin_tokenizer_text = gr.Textbox(
label="Default text (Tokenizer)",
value=cfg.get("default_tokenizer_text", ""),
)
with gr.Row():
admin_temp = gr.Number(
label="Default temperature",
value=cfg.get("default_temperature", 0.8),
)
admin_top_k_admin = gr.Number(
label="Default top-k",
value=cfg.get("default_top_k", 10),
precision=0,
)
admin_steps = gr.Number(
label="Default steps",
value=cfg.get("default_steps", 8),
precision=0,
)
admin_seed = gr.Number(
label="Default seed",
value=cfg.get("default_seed", 42),
precision=0,
)
admin_save_btn = gr.Button("Save Defaults")
admin_save_msg = gr.Markdown("")
gr.Markdown("---")
gr.Markdown("#### Attention Explorer")
admin_attn_model = gr.Textbox(
label="Attention model",
value=cfg.get("attention_model", "gpt2-medium"),
info="GPT-2 family: gpt2, gpt2-medium, gpt2-large. Changes take effect on next Explore click (reloads model).",
)
admin_attn_examples = gr.Code(
value=json.dumps(cfg.get("attention_examples", []), indent=2),
language="json",
interactive=True,
label="Example sentences (JSON: [[\"word\", \"sent_a\", \"sent_b\"], ...])",
)
admin_attn_save_btn = gr.Button("Save Attention Settings")
admin_attn_save_msg = gr.Markdown("")
gr.Markdown("---")
gr.Markdown("#### System Prompt Presets")
gr.Markdown(
"Edit the presets available in the System Prompt Explorer dropdown. "
"JSON object: `{\"Name\": \"prompt text\", ...}`",
elem_classes=["param-help"],
)
admin_presets = gr.Code(
value=json.dumps(cfg.get("system_prompt_presets", {}), indent=2),
language="json",
interactive=True,
)
admin_presets_save_btn = gr.Button("Save Presets")
admin_presets_msg = gr.Markdown("")
gr.Markdown("---")
with gr.Accordion("Environment Variables Reference", open=False):
_pw_status = "*(set)*" if os.environ.get("ADMIN_PASSWORD") else "*(default: admin)*"
_rb_status = "*(set)*" if REBRANDLY_API_KEY else "*(not set)*"
gr.Markdown(
"Override settings via "
"[HF Space Settings](https://huggingface.co/spaces/chyams/llm-explorer/settings). "
"Use **Secrets** for sensitive values (encrypted, hidden after saving) "
"and **Variables** for everything else (visible in settings).\n\n"
"**Precedence:** env var > config.json > code defaults\n\n"
"**Secrets** (sensitive — encrypted)\n\n"
"| Variable | Description | Format | Current |\n"
"|----------|-------------|--------|---------|\n"
f"| `ADMIN_PASSWORD` | Admin panel password | Plain text | {_pw_status} |\n"
f"| `REBRANDLY_API_KEY` | URL shortener API key | API key | {_rb_status} |\n"
"\n**Variables** (non-sensitive — visible)\n\n"
"| Variable | Description | Format | Current |\n"
"|----------|-------------|--------|---------|\n"
f"| `DEFAULT_MODEL` | Base model (Prob Explorer) | Model name | `{cfg.get('model', '')}` |\n"
f"| `DEFAULT_CHAT_MODEL` | Chat model (Sys Prompt Explorer) | Model name | `{cfg.get('chat_model', '')}` |\n"
f"| `DEFAULT_PROMPT` | Default prompt | Plain text | `{cfg.get('default_prompt', '')[:40]}...` |\n"
f"| `DEFAULT_TEMPERATURE` | Default temperature | Number (0–2.5) | `{cfg.get('default_temperature', 0.8)}` |\n"
f"| `DEFAULT_TOP_K` | Default top-k | Integer (5–100) | `{cfg.get('default_top_k', 10)}` |\n"
f"| `DEFAULT_STEPS` | Default steps | Integer (1–100) | `{cfg.get('default_steps', 8)}` |\n"
f"| `DEFAULT_SEED` | Default seed | Integer | `{cfg.get('default_seed', 42)}` |\n"
f"| `DEFAULT_TOKENIZER_TEXT` | Default tokenizer text | Plain text | `{cfg.get('default_tokenizer_text', '')[:40]}...` |\n"
f"| `SYSTEM_PROMPT_PRESETS` | System prompt presets | JSON object | *({len(cfg.get('system_prompt_presets', {}))} presets)* |"
)
gr.Markdown("---")
gr.Markdown("#### Current Config")
admin_config_display = gr.Code(
value=json.dumps(cfg, indent=2),
language="json",
interactive=False,
)
gr.Markdown("---")
gr.Markdown("#### Export Slides")
gr.Markdown(
"*Uses current settings from Probability Explorer tab.*",
elem_classes=["param-help"],
)
admin_export_btn = gr.Button("Export Slides", variant="secondary")
admin_slides_file = gr.File(label="Slideshow", visible=False)
# Login wiring
admin_login_btn.click(
fn=admin_login,
inputs=[admin_pw],
outputs=[admin_controls, admin_login_group, admin_login_msg],
)
# Model loading — base
admin_load_btn.click(
fn=admin_load_model,
inputs=[admin_model_dropdown],
outputs=[admin_model_status, admin_config_display, status_display],
)
# Model loading — chat
admin_chat_load_btn.click(
fn=admin_load_chat_model,
inputs=[admin_chat_dropdown],
outputs=[admin_chat_status, admin_config_display, status_display],
)
# Save defaults — updates config display + Probability Explorer + Tokenizer controls
admin_save_btn.click(
fn=admin_save_defaults,
inputs=[admin_prompt, admin_tokenizer_text, admin_temp, admin_top_k_admin, admin_steps, admin_seed],
outputs=[
admin_save_msg,
admin_config_display,
t1_prompt,
t1_temperature,
t1_top_k,
t1_steps,
t1_seed,
t3_input,
],
)
# Save presets — updates config, dropdown choices, and presets display
admin_presets_save_btn.click(
fn=admin_save_presets,
inputs=[admin_presets],
outputs=[admin_presets_msg, admin_config_display, sp_preset, admin_presets],
)
# Save attention settings
admin_attn_save_btn.click(
fn=admin_save_attention,
inputs=[admin_attn_model, admin_attn_examples],
outputs=[admin_attn_save_msg, admin_config_display],
)
# Export slides — uses current Probability Explorer settings
admin_export_btn.click(
fn=generate_slideshow,
inputs=[t1_prompt, t1_temperature, t1_top_k, t1_steps, t1_seed],
outputs=[admin_slides_file],
).then(
fn=lambda f: gr.update(visible=f is not None),
inputs=[admin_slides_file],
outputs=[admin_slides_file],
)
# Footer with build timestamp
gr.Markdown(
f""
f"Updated {BUILD_TIMESTAMP} CT
"
)
# ==============================================================
# Page load: populate config defaults, then apply share params
# ==============================================================
def load_config_values(request: gr.Request):
"""Step 1: Read config + parse share query params.
Returns config defaults for all controls, the share_params
State dict, and a gr.Tabs update to select the right tab.
"""
c = manager.get_config()
qp = dict(request.query_params) if request else {}
# Determine tab from share params
tab = qp.get("tab")
tab_update = gr.update(selected=tab) if tab in ("prob", "tok") else gr.update()
return (
# Admin panel fields
c.get("default_prompt", ""),
c.get("default_tokenizer_text", ""),
c.get("default_temperature", 0.8),
c.get("default_top_k", 10),
c.get("default_steps", 8),
c.get("default_seed", 42),
json.dumps(c, indent=2),
# Probability Explorer fields (config defaults for now)
c.get("default_prompt", ""),
c.get("default_temperature", 0.8),
c.get("default_top_k", 10),
c.get("default_steps", 8),
c.get("default_seed", 42),
# Tokenizer field
c.get("default_tokenizer_text", ""),
# Share params State (for subsequent steps)
qp,
# Tab selection
tab_update,
)
def apply_share_params(params):
"""Step 2: Override controls with share-link values."""
if not params or not params.get("tab"):
return (
gr.update(), gr.update(), gr.update(),
gr.update(), gr.update(), gr.update(),
gr.update(),
)
c = manager.get_config()
if params.get("tab") == "prob":
prompt = params.get("p", c.get("default_prompt", ""))
temp = _safe_float(params.get("t"), c.get("default_temperature", 0.8))
top_k = _safe_int(params.get("k"), c.get("default_top_k", 10))
show_steps = params.get("ss", "1") == "1"
steps = _safe_int(params.get("s"), c.get("default_steps", 8))
seed = _safe_int(params.get("seed"), c.get("default_seed", 42))
max_steps = 20 if show_steps else 100
return (
gr.update(value=prompt),
gr.update(value=temp),
gr.update(value=top_k),
gr.update(value=steps, maximum=max_steps),
gr.update(value=show_steps),
gr.update(value=seed),
gr.update(), # tokenizer unchanged
)
if params.get("tab") == "tok":
text = params.get("text", c.get("default_tokenizer_text", ""))
return (
gr.update(), gr.update(), gr.update(),
gr.update(), gr.update(), gr.update(),
gr.update(value=text),
)
return (
gr.update(), gr.update(), gr.update(),
gr.update(), gr.update(), gr.update(),
gr.update(),
)
def auto_generate_prob(params):
"""Step 3: Auto-run Probability Explorer if share link is for prob tab.
Reads from params State (not components) to avoid race conditions.
"""
if not params or params.get("tab") != "prob":
return gr.update()
c = manager.get_config()
prompt = params.get("p", c.get("default_prompt", ""))
temp = _safe_float(params.get("t"), c.get("default_temperature", 0.8))
top_k = _safe_int(params.get("k"), c.get("default_top_k", 10))
show_steps = params.get("ss", "1") == "1"
steps = _safe_int(params.get("s"), c.get("default_steps", 8))
seed = _safe_int(params.get("seed"), c.get("default_seed", 42))
return explore_probabilities(prompt, temp, top_k, steps, show_steps, seed)
def auto_tokenize(params):
"""Step 4: Auto-run Tokenizer if share link is for tok tab."""
if not params or params.get("tab") != "tok":
return gr.update()
c = manager.get_config()
text = params.get("text", c.get("default_tokenizer_text", ""))
return tokenize_text(text)
# Chain: load config → apply share overrides → auto-generate
demo.load(
fn=load_config_values,
js=FORCE_LIGHT_MODE_JS,
outputs=[
# Admin panel fields
admin_prompt, admin_tokenizer_text, admin_temp, admin_top_k_admin, admin_steps, admin_seed,
admin_config_display,
# Probability Explorer fields
t1_prompt, t1_temperature, t1_top_k, t1_steps, t1_seed,
# Tokenizer field
t3_input,
# Share state + tab selection
share_params, tabs,
],
).then(
fn=apply_share_params,
inputs=[share_params],
outputs=[t1_prompt, t1_temperature, t1_top_k, t1_steps, t1_show_steps, t1_seed, t3_input],
).then(
fn=auto_generate_prob,
inputs=[share_params],
outputs=[t1_output],
).then(
fn=auto_tokenize,
inputs=[share_params],
outputs=[t3_output],
)
return demo
# ---------------------------------------------------------------------------
# Startup
# ---------------------------------------------------------------------------
if __name__ == "__main__":
cfg = manager.get_config()
# Load base model (Probability Explorer)
base_model = cfg.get("model", "Llama-3.2-3B")
print(f"Loading base model: {base_model}")
print(manager.load_model(base_model))
# Load chat model (System Prompt Explorer)
chat_model = cfg.get("chat_model", "Llama-3.2-3B-Instruct")
print(f"Loading chat model: {chat_model}")
print(manager.load_chat_model(chat_model))
app = create_app()
app.launch(
server_name="0.0.0.0",
server_port=7860,
theme=THEME,
css=CUSTOM_CSS,
)