Spaces:

jonmabe
/

tiny-llm-demo

Sleeping

App Files Files Community

tiny-llm-demo / app.py

jonmabe

Initial Gradio demo upload

3aa6f7b verified 11 days ago

raw

history blame contribute delete

7.7 kB

	"""
	Tiny-LLM Demo - Text Generation with a 54M Parameter Model

	This model was trained from scratch on Wikipedia data.
	"""

	import gradio as gr
	import torch
	from huggingface_hub import hf_hub_download
	from model import TinyLLM, MODEL_CONFIG

	# Model configuration
	MODEL_ID = "jonmabe/tiny-llm-54m"
	MODEL_FILENAME = "final_model.pt"

	# Try to use transformers tokenizer, fall back to simple tokenizer
	try:
	from transformers import AutoTokenizer
	# Try to load from model repo, fall back to GPT-2 tokenizer
	try:
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	except:
	tokenizer = AutoTokenizer.from_pretrained("gpt2")
	USE_HF_TOKENIZER = True
	except Exception as e:
	print(f"Could not load HuggingFace tokenizer: {e}")
	USE_HF_TOKENIZER = False
	tokenizer = None

	# Load model
	print("Downloading model...")
	model_path = hf_hub_download(repo_id=MODEL_ID, filename=MODEL_FILENAME)
	print(f"Model downloaded to {model_path}")

	print("Loading model...")
	checkpoint = torch.load(model_path, map_location="cpu", weights_only=False)

	# Get config from checkpoint if available
	if "config" in checkpoint and isinstance(checkpoint["config"], dict):
	config = checkpoint["config"]
	if "model" in config:
	config = config["model"]
	else:
	config = MODEL_CONFIG

	# Initialize model
	model = TinyLLM(config)

	# Load weights
	if "model_state_dict" in checkpoint:
	state_dict = checkpoint["model_state_dict"]
	else:
	state_dict = checkpoint

	missing, unexpected = model.load_state_dict(state_dict, strict=False)
	if missing:
	print(f"Warning: Missing keys: {missing[:5]}...")
	if unexpected:
	print(f"Warning: Unexpected keys: {unexpected[:5]}...")

	# Move to device
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	model = model.to(device)
	model.eval()

	total_params = sum(p.numel() for p in model.parameters())
	print(f"Model loaded on {device} with {total_params:,} parameters")


	def generate_text(
	prompt: str,
	max_tokens: int = 100,
	temperature: float = 0.8,
	top_p: float = 0.9,
	top_k: int = 50,
	repetition_penalty: float = 1.1,
	) -> str:
	"""Generate text continuation from a prompt."""

	if not prompt.strip():
	return "Please enter a prompt to generate text."

	# Tokenize
	if USE_HF_TOKENIZER and tokenizer is not None:
	input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
	eos_token_id = tokenizer.eos_token_id
	else:
	# Simple fallback - won't work well but better than crashing
	return "Tokenizer not available. Please ensure transformers is installed."

	# Generate
	with torch.no_grad():
	output_ids = model.generate(
	input_ids,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	top_k=top_k,
	repetition_penalty=repetition_penalty,
	eos_token_id=eos_token_id,
	)

	# Decode
	if USE_HF_TOKENIZER and tokenizer is not None:
	generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
	else:
	generated_text = "Decoding not available."

	return generated_text


	# Example prompts
	EXAMPLES = [
	["The history of artificial intelligence began"],
	["In the year 2050, humanity"],
	["The most important scientific discovery was"],
	["Once upon a time, in a kingdom far away"],
	["The universe is vast and"],
	["Climate change affects"],
	["The theory of relativity states that"],
	["In ancient Rome,"],
	]


	# Create Gradio interface
	with gr.Blocks(title="Tiny-LLM Text Generator") as demo:
	gr.Markdown("""
	# 🤖 Tiny-LLM Text Generator

	A 54 million parameter language model trained from scratch on Wikipedia.

	This demonstrates that meaningful language models can be trained on consumer hardware!

	### Architecture
	- Parameters: 54.93M
	- Layers: 12
	- Hidden Size: 512
	- Attention Heads: 8
	- Position Encoding: RoPE
	- Normalization: RMSNorm
	- Activation: SwiGLU
	""")

	with gr.Row():
	with gr.Column(scale=2):
	prompt_input = gr.Textbox(
	label="Prompt",
	placeholder="Enter your prompt here...",
	lines=3,
	value="The history of artificial intelligence began"
	)

	with gr.Row():
	with gr.Column():
	max_tokens = gr.Slider(
	minimum=10,
	maximum=256,
	value=100,
	step=10,
	label="Max New Tokens",
	)
	temperature = gr.Slider(
	minimum=0.1,
	maximum=2.0,
	value=0.8,
	step=0.1,
	label="Temperature",
	info="Higher = more random"
	)

	with gr.Column():
	top_p = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.9,
	step=0.05,
	label="Top-p (Nucleus Sampling)",
	)
	top_k = gr.Slider(
	minimum=1,
	maximum=100,
	value=50,
	step=5,
	label="Top-k",
	)

	repetition_penalty = gr.Slider(
	minimum=1.0,
	maximum=2.0,
	value=1.1,
	step=0.05,
	label="Repetition Penalty",
	info="Higher = less repetition"
	)

	generate_btn = gr.Button("✨ Generate", variant="primary", size="lg")

	with gr.Column(scale=2):
	output_text = gr.Textbox(
	label="Generated Text",
	lines=15,
	interactive=False,
	)

	gr.Markdown("### 📝 Example Prompts")
	gr.Examples(
	examples=EXAMPLES,
	inputs=prompt_input,
	)

	# Event handlers
	generate_btn.click(
	fn=generate_text,
	inputs=[prompt_input, max_tokens, temperature, top_p, top_k, repetition_penalty],
	outputs=output_text,
	)

	prompt_input.submit(
	fn=generate_text,
	inputs=[prompt_input, max_tokens, temperature, top_p, top_k, repetition_penalty],
	outputs=output_text,
	)

	gr.Markdown("""
	---
	### About This Model

	Model: [jonmabe/tiny-llm-54m](https://huggingface.co/jonmabe/tiny-llm-54m)

	This is a decoder-only transformer trained from scratch on Wikipedia text.
	It demonstrates that meaningful language models can be trained on consumer hardware
	with modest compute budgets (~3 hours on an RTX 5090).

	#### Training Details
	- Training Steps: 50,000
	- Tokens: ~100M
	- Hardware: NVIDIA RTX 5090 (32GB)
	- Training Time: ~3 hours

	#### Limitations
	- Small model size limits knowledge and capabilities
	- Trained only on Wikipedia - limited domain coverage
	- May generate factually incorrect information
	- Not instruction-tuned

	#### Intended Use
	- Educational: Understanding transformer training
	- Experimental: Testing fine-tuning approaches
	- Research: Lightweight model for NLP experiments
	""")


	if __name__ == "__main__":
	demo.launch()