Spaces:

GPUburnout
/

gpuburnout-models

Running

App Files Files Community

gpuburnout-models / app.py

GPUburnout

Thicker borders (2px) on all boxes

1785d90 11 days ago

raw

history blame contribute delete

13.5 kB

	"""
	GPUburnout Models — Unified Demo
	Compare models trained from scratch: GPUburnout-3M → GPUburnout-134M → GPUburnout-1B
	"""

	import gc
	import json
	import os
	import sys

	import gradio as gr
	import torch
	import torch.nn.functional as F

	# Add models directory to path
	sys.path.insert(0, os.path.join(os.path.dirname(__file__), "models"))

	# ── Model Registry ──────────────────────────────────────────────────────────

	MODELS = {
	"GPUburnout-3M (3.2M)": {
	"path": "checkpoints/tiny",
	"arch": "s1",
	"description": "Character-level model trained on Shakespeare. The very first step.",
	"examples": ["ROMEO:", "JULIET:", "To be, or not to be", "First Citizen:"],
	},
	"GPUburnout-134M (134M)": {
	"path": "checkpoints/gpt2_small",
	"arch": "s1",
	"description": "Season 1 final model. BPE tokenizer, 2.8B tokens, 12 layers.",
	"examples": [
	"The capital of France is",
	"Explain machine learning in simple terms.",
	"def fibonacci(n):",
	"The meaning of life is",
	],
	},
	"GPUburnout-1B (1.04B)": {
	"path": "checkpoints/llama_1b",
	"arch": "s2",
	"description": "Season 2. Llama architecture, 11.8B tokens, $175 total. Final loss 2.494.",
	"examples": [
	"The capital of France is",
	"In a shocking discovery, scientists found that",
	"def fibonacci(n):",
	"Once upon a time, in a land far away,",
	],
	},
	}

	# ── Current model state (one at a time) ─────────────────────────────────────

	current = {"name": None, "model": None, "tokenizer": None, "config": None}


	def unload_current():
	"""Free the currently loaded model from memory."""
	if current["model"] is not None:
	del current["model"]
	current["model"] = None
	current["tokenizer"] = None
	current["config"] = None
	current["name"] = None
	gc.collect()
	if torch.cuda.is_available():
	torch.cuda.empty_cache()


	def load_model(model_name):
	"""Load a model by name, unloading the previous one first."""
	if current["name"] == model_name and current["model"] is not None:
	return current["model"], current["tokenizer"], current["config"]

	unload_current()

	info = MODELS[model_name]
	model_dir = info["path"]
	config_path = os.path.join(model_dir, "config.json")

	if not os.path.exists(config_path):
	raise FileNotFoundError(f"Model not found: {model_dir}")

	with open(config_path) as f:
	config = json.load(f)

	if info["arch"] == "s1":
	model, tokenizer = _load_s1(model_dir, config)
	else:
	model, tokenizer = _load_s2(model_dir, config)

	current["name"] = model_name
	current["model"] = model
	current["tokenizer"] = tokenizer
	current["config"] = config
	return model, tokenizer, config


	def _load_s1(model_dir, config):
	"""Load Season 1 GPT-2 style model."""
	from s1_model import TransformerLanguageModel

	model = TransformerLanguageModel(
	vocab_size=config["vocab_size"],
	embed_dim=config["embed_dim"],
	num_heads=config["num_heads"],
	num_layers=config["num_layers"],
	ff_dim=config["ff_dim"],
	max_seq_len=config["max_seq_len"],
	dropout=0.0,
	)
	weights_path = os.path.join(model_dir, "pytorch_model.bin")
	model.load_state_dict(torch.load(weights_path, map_location="cpu", weights_only=True))
	model.eval()

	# Load tokenizer
	tokenizer_type = config.get("tokenizer_type", "character")
	tokenizer_path = os.path.join(model_dir, "tokenizer.json")

	if tokenizer_type == "bpe":
	from s1_tokenizer_bpe import BPETokenizer
	tokenizer = BPETokenizer()
	tokenizer.load(tokenizer_path)
	else:
	from s1_tokenizer_char import CharacterTokenizer
	tokenizer = CharacterTokenizer()
	tokenizer.load(tokenizer_path)

	return model, tokenizer


	def _load_s2(model_dir, config):
	"""Load Season 2 Llama style model."""
	from s2_model import LlamaModel, ModelConfig

	model_config = ModelConfig(
	vocab_size=config.get("vocab_size", 32005),
	d_model=config.get("d_model", 2048),
	n_layers=config.get("n_layers", 16),
	n_heads=config.get("n_heads", 32),
	n_kv_heads=config.get("n_kv_heads", 8),
	d_ff=config.get("d_ff", 8192),
	max_seq_len=config.get("max_seq_len", 2048),
	)

	model = LlamaModel(model_config).to("cpu")
	weights_path = os.path.join(model_dir, "pytorch_model.bin")

	# Download from HF model repo if not present locally (Space LFS limit workaround)
	if not os.path.exists(weights_path):
	from huggingface_hub import hf_hub_download
	print("Downloading Llama 1B weights from GPUburnout/gpuburnout-1b...")
	weights_path = hf_hub_download(
	repo_id="GPUburnout/gpuburnout-1b",
	filename="pytorch_model.bin",
	local_dir=model_dir,
	)

	state_dict = torch.load(weights_path, map_location="cpu", weights_only=True)
	model.load_state_dict(state_dict)
	model.eval()

	# S2 uses HuggingFace tokenizers library
	from tokenizers import Tokenizer
	tokenizer = Tokenizer.from_file("tokenizer/bpe_tokenizer.json")

	return model, tokenizer


	# ── Generation ──────────────────────────────────────────────────────────────

	def generate_s1(model, tokenizer, config, prompt, max_tokens, temperature, top_k):
	"""Generate text with S1 (GPT-2) model."""
	tokens = tokenizer.encode(prompt)
	if not tokens:
	return "Could not encode prompt."
	tokens = torch.tensor(tokens, dtype=torch.long).unsqueeze(0)
	max_seq_len = config.get("max_seq_len", 256)

	with torch.no_grad():
	for _ in range(max_tokens):
	inp = tokens[:, -max_seq_len:] if tokens.size(1) > max_seq_len else tokens
	logits = model(inp)[:, -1, :] / temperature
	if top_k > 0:
	v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
	logits[logits < v[:, [-1]]] = float("-inf")
	probs = F.softmax(logits, dim=-1)
	next_token = torch.multinomial(probs, num_samples=1)
	tokens = torch.cat([tokens, next_token], dim=1)

	return tokenizer.decode(tokens[0].tolist())


	def generate_s2(model, tokenizer, prompt, max_tokens, temperature, top_k):
	"""Generate text with S2 (Llama) model."""
	encoded = tokenizer.encode(prompt)
	input_ids = torch.tensor([encoded.ids], dtype=torch.long)

	with torch.no_grad():
	output_ids = model.generate(
	input_ids,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_k=top_k if top_k > 0 else None,
	)

	return tokenizer.decode(output_ids[0].tolist())


	def generate_text(model_name, prompt, max_tokens, temperature, top_k):
	"""Main generation entry point."""
	if not prompt.strip():
	return "Please enter a prompt."

	try:
	model, tokenizer, config = load_model(model_name)
	except FileNotFoundError as e:
	return f"Error: {e}"

	info = MODELS[model_name]
	if info["arch"] == "s1":
	return generate_s1(model, tokenizer, config, prompt, int(max_tokens), temperature, int(top_k))
	else:
	return generate_s2(model, tokenizer, prompt, int(max_tokens), temperature, int(top_k))


	def get_status(model_name):
	"""Return status string for the selected model."""
	info = MODELS[model_name]
	loaded = "Loaded" if current["name"] == model_name else "Not loaded (will load on generate)"
	return f"{model_name} — {info['description']}\n\nStatus: {loaded}"


	def update_examples(model_name):
	"""Return example prompts for the selected model."""
	return gr.update(samples=[[ex] for ex in MODELS[model_name]["examples"]])


	# ── Custom CSS ──────────────────────────────────────────────────────────────

	CUSTOM_CSS = """
	.gradio-container {
	max-width: 900px !important;
	margin: auto;
	}
	.header-text {
	text-align: center;
	margin-bottom: 0.5em;
	}
	.header-text h1 {
	color: #22d3ee;
	font-family: 'Courier New', monospace;
	}
	.header-text a {
	color: #f59e0b;
	}
	.model-info {
	font-family: 'Courier New', monospace;
	font-size: 0.85em;
	padding: 10px;
	border-radius: 8px;
	}
	"""

	# ── Theme ────────────────────────────────────────────────────────────────────

	dark_theme = gr.themes.Base(
	primary_hue="cyan",
	neutral_hue="gray",
	font=gr.themes.GoogleFont("JetBrains Mono"),
	).set(
	body_background_fill="#08080d",
	body_background_fill_dark="#08080d",
	background_fill_primary="#0e0e15",
	background_fill_primary_dark="#0e0e15",
	background_fill_secondary="#12121a",
	background_fill_secondary_dark="#12121a",
	block_background_fill="#0e0e15",
	block_background_fill_dark="#0e0e15",
	block_border_color="#2a3a4a",
	block_border_color_dark="#2a3a4a",
	block_border_width="2px",
	block_border_width_dark="2px",
	block_label_background_fill="#12121a",
	block_label_background_fill_dark="#12121a",
	block_label_text_color="#9ca3af",
	block_label_text_color_dark="#9ca3af",
	block_title_text_color="#9ca3af",
	block_title_text_color_dark="#9ca3af",
	body_text_color="#e0e0e5",
	body_text_color_dark="#e0e0e5",
	body_text_color_subdued="#6b7280",
	body_text_color_subdued_dark="#6b7280",
	border_color_primary="#2a3a4a",
	border_color_primary_dark="#2a3a4a",
	input_background_fill="#12121a",
	input_background_fill_dark="#12121a",
	input_border_color="#2a3a4a",
	input_border_color_dark="#2a3a4a",
	input_placeholder_color="#6b7280",
	input_placeholder_color_dark="#6b7280",
	button_primary_background_fill="#22d3ee",
	button_primary_background_fill_dark="#22d3ee",
	button_primary_text_color="#08080d",
	button_primary_text_color_dark="#08080d",
	button_primary_background_fill_hover="#67e8f9",
	button_primary_background_fill_hover_dark="#67e8f9",
	panel_background_fill="#0e0e15",
	panel_background_fill_dark="#0e0e15",
	panel_border_color="#2a3a4a",
	panel_border_color_dark="#2a3a4a",
	panel_border_width="2px",
	panel_border_width_dark="2px",
	slider_color="#22d3ee",
	slider_color_dark="#22d3ee",
	)

	# ── Gradio UI ───────────────────────────────────────────────────────────────

	with gr.Blocks(
	title="GPUburnout Models",
	theme=dark_theme,
	css=CUSTOM_CSS,
	) as demo:

	gr.HTML("""
	<div class="header-text">
	<h1>GPUburnout Models</h1>
	<p>Compare language models I trained from scratch — from 3.2M to 1 billion parameters.</p>
	<p>
	<a href="https://gpuburnout.com" target="_blank">Read the blog</a> ·
	<a href="https://github.com/GPUburnout" target="_blank">GitHub</a> ·
	<a href="https://gpuburnout.com/about/" target="_blank">About</a>
	</p>
	</div>
	""")

	with gr.Row():
	with gr.Column(scale=1):
	model_selector = gr.Dropdown(
	choices=list(MODELS.keys()),
	value="GPUburnout-134M (134M)",
	label="Select Model",
	)

	model_status = gr.Markdown(elem_classes=["model-info"])

	prompt = gr.Textbox(
	label="Prompt",
	placeholder="Type something...",
	lines=2,
	value="The capital of France is",
	)

	with gr.Row():
	max_tokens = gr.Slider(50, 300, value=50, step=25, label="Max tokens")
	temperature = gr.Slider(0.1, 1.5, value=0.8, step=0.1, label="Temperature")

	top_k = gr.Slider(1, 100, value=50, step=1, label="Top-K")

	generate_btn = gr.Button("Generate", variant="primary", size="lg")

	with gr.Column(scale=1):
	output = gr.Textbox(label="Output", lines=15, show_copy_button=True)

	examples = gr.Examples(
	examples=[["The capital of France is"], ["def fibonacci(n):"]],
	inputs=prompt,
	label="Example prompts",
	)

	# Events
	demo.load(get_status, inputs=model_selector, outputs=model_status)
	model_selector.change(get_status, inputs=model_selector, outputs=model_status)
	model_selector.change(update_examples, inputs=model_selector, outputs=examples.dataset)

	generate_btn.click(
	generate_text,
	inputs=[model_selector, prompt, max_tokens, temperature, top_k],
	outputs=output,
	)
	prompt.submit(
	generate_text,
	inputs=[model_selector, prompt, max_tokens, temperature, top_k],
	outputs=output,
	)

	if __name__ == "__main__":
	demo.launch()