GTP-on-Reddit / gradio_app.py
saumilyajj's picture
Upload folder using huggingface_hub
47df44d verified
"""
Gradio Interface for GPT from Scratch
=====================================
Interactive web interface for text generation using the trained GPT models.
Supports both GPTv1 and GPTv2 with real-time text generation and parameter control.
Author: Saumitra Gupta
Date: September 2025
"""
import gradio as gr
import torch
import torch.nn as nn
from torch.nn import functional as F
import pickle
import os
import random
# Device configuration
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")
# Model hyperparameters (should match training configuration)
block_size = 8
n_embd = 384
n_head = 32
n_layer = 32
dropout = 0.2
# Load vocabulary
def load_vocabulary():
"""Load character vocabulary from vocab.txt"""
try:
with open('artifacts/vocab.txt', 'r', encoding='utf-8') as f:
chars = [line.strip() for line in f.readlines()]
vocab_size = len(chars)
# Create mappings
string_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_string = {i: ch for i, ch in enumerate(chars)}
return chars, vocab_size, string_to_int, int_to_string
except FileNotFoundError:
# Fallback to wizard of oz text if vocab.txt not found
with open('wizard-of-oz.txt', 'r', encoding='utf-8') as f:
text = f.read()
chars = sorted(set(text))
vocab_size = len(chars)
string_to_int = {ch: i for i, ch in enumerate(chars)}
int_to_string = {i: ch for i, ch in enumerate(chars)}
return chars, vocab_size, string_to_int, int_to_string
# Load vocabulary
chars, vocab_size, string_to_int, int_to_string = load_vocabulary()
# Encoding and decoding functions
def encode(s):
"""Convert string to list of integers"""
return [string_to_int.get(c, 0) for c in s] # Use 0 for unknown characters
def decode(l):
"""Convert list of integers to string"""
return ''.join([int_to_string.get(i, '') for i in l])
# Model Architecture Classes
class Head(nn.Module):
"""Single attention head"""
def __init__(self, head_size):
super().__init__()
self.key = nn.Linear(n_embd, head_size, bias=False)
self.query = nn.Linear(n_embd, head_size, bias=False)
self.value = nn.Linear(n_embd, head_size, bias=False)
self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))
self.dropout = nn.Dropout(dropout)
def forward(self, x):
B, T, C = x.shape
k = self.key(x)
q = self.query(x)
# Attention weights
wei = q @ k.transpose(-2, -1) * k.shape[-1]**-0.5
wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf'))
wei = F.softmax(wei, dim=-1)
wei = self.dropout(wei)
# Apply attention to values
v = self.value(x)
out = wei @ v
return out
class MultiHeadAttention(nn.Module):
"""Multiple heads of self-attention in parallel"""
def __init__(self, num_heads, head_size):
super().__init__()
self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
self.proj = nn.Linear(head_size * num_heads, n_embd)
self.dropout = nn.Dropout(dropout)
def forward(self, x):
out = torch.cat([h(x) for h in self.heads], dim=-1)
out = self.dropout(self.proj(out))
return out
class FeedForward(nn.Module):
"""Position-wise feed-forward network"""
def __init__(self, n_embd):
super().__init__()
self.net = nn.Sequential(
nn.Linear(n_embd, 4 * n_embd),
nn.ReLU(),
nn.Linear(4 * n_embd, n_embd),
nn.Dropout(dropout),
)
def forward(self, x):
return self.net(x)
class Block(nn.Module):
"""Transformer block: communication followed by computation"""
def __init__(self, n_embd, n_head):
super().__init__()
head_size = n_embd // n_head
self.sa = MultiHeadAttention(n_head, head_size)
self.ffwd = FeedForward(n_embd)
self.ln1 = nn.LayerNorm(n_embd)
self.ln2 = nn.LayerNorm(n_embd)
def forward(self, x):
y = self.sa(x)
x = self.ln1(x + y)
y = self.ffwd(x)
x = self.ln2(x + y)
return x
class GPTLanguageModel(nn.Module):
"""Complete GPT Language Model"""
def __init__(self, vocab_size):
super().__init__()
self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
self.position_embedding_table = nn.Embedding(block_size, n_embd)
self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
self.ln_f = nn.LayerNorm(n_embd)
self.lm_head = nn.Linear(n_embd, vocab_size)
self.apply(self._init_weights)
def _init_weights(self, module):
if isinstance(module, nn.Linear):
torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
if module.bias is not None:
torch.nn.init.zeros_(module.bias)
elif isinstance(module, nn.Embedding):
torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
def forward(self, idx, targets=None):
B, T = idx.shape
tok_emb = self.token_embedding_table(idx)
pos_emb = self.position_embedding_table(torch.arange(T, device=device))
x = tok_emb + pos_emb
x = self.blocks(x)
x = self.ln_f(x)
logits = self.lm_head(x)
if targets is None:
loss = None
else:
B, T, C = logits.shape
logits = logits.view(B*T, C)
targets = targets.view(B*T)
loss = F.cross_entropy(logits, targets)
return logits, loss
def generate(self, idx, max_new_tokens, temperature=1.0):
"""Generate text with temperature control"""
self.eval()
with torch.no_grad():
for _ in range(max_new_tokens):
# Crop to last block_size tokens
idx_cond = idx[:, -block_size:]
# Get predictions
logits, _ = self.forward(idx_cond)
# Focus on last time step and apply temperature
logits = logits[:, -1, :] / temperature
# Apply softmax to get probabilities
probs = F.softmax(logits, dim=-1)
# Sample from distribution
idx_next = torch.multinomial(probs, num_samples=1)
# Append to sequence
idx = torch.cat((idx, idx_next), dim=1)
return idx
# Load the trained model
def load_model():
"""Load the trained model from pickle file"""
global model
model = GPTLanguageModel(vocab_size).to(device)
# Try to load saved model
model_paths = ['artifacts/model-01.pkl', 'model-01.pkl']
model_loaded = False
for path in model_paths:
if os.path.exists(path):
try:
with open(path, 'rb') as f:
model = pickle.load(f)
model = model.to(device)
model.eval()
print(f"βœ… Model loaded successfully from {path}")
model_loaded = True
break
except Exception as e:
print(f"❌ Error loading model from {path}: {e}")
if not model_loaded:
print("⚠️ No pre-trained model found. Using randomly initialized model.")
print("Note: Generate some text first to train the model or load a trained checkpoint.")
return model
# Initialize model
model = load_model()
# Gradio Interface Functions
def generate_text(prompt, max_tokens, temperature, seed):
"""Generate text based on user input"""
try:
# Set random seed for reproducibility
if seed > 0:
torch.manual_seed(seed)
random.seed(seed)
# Handle empty prompt
if not prompt.strip():
prompt = " " # Start with a space if no prompt
# Encode the prompt
encoded_prompt = encode(prompt)
# Convert to tensor
context = torch.tensor([encoded_prompt], dtype=torch.long, device=device)
# Ensure we don't exceed block size for context
if context.size(1) > block_size:
context = context[:, -block_size:]
# Generate text
generated = model.generate(
context,
max_new_tokens=max_tokens,
temperature=max(0.1, temperature) # Prevent temperature from being too low
)
# Decode and return
generated_text = decode(generated[0].tolist())
return generated_text
except Exception as e:
return f"❌ Error generating text: {str(e)}\\n\\nPlease check if the model is properly loaded."
def get_model_info():
"""Return information about the loaded model"""
try:
param_count = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
info = f"""
πŸ€– **Model Information**
- **Architecture**: GPT from Scratch
- **Parameters**: {param_count:,} total ({trainable_params:,} trainable)
- **Vocabulary Size**: {vocab_size}
- **Embedding Dimension**: {n_embd}
- **Attention Heads**: {n_head}
- **Transformer Layers**: {n_layer}
- **Context Window**: {block_size} tokens
- **Device**: {device}
"""
return info
except Exception as e:
return f"❌ Error getting model info: {str(e)}"
# Create Gradio Interface
def create_interface():
"""Create and return the Gradio interface"""
with gr.Blocks(
title="GPT from Scratch - Interactive Text Generator",
theme=gr.themes.Soft(),
css=".gradio-container {background: linear-gradient(45deg, #1e3c72, #2a5298);}"
) as interface:
# Header
gr.Markdown("""
# πŸš€ GPT from Scratch - Interactive Text Generator
Generate text using transformer models built from scratch with PyTorch!
**Features:**
- 🎯 Real-time text generation
- 🌑️ Temperature control for creativity
- 🎲 Seed control for reproducibility
- πŸ“Š Model architecture information
""")
with gr.Row():
with gr.Column(scale=2):
# Input Section
gr.Markdown("## πŸ“ Text Generation")
prompt_input = gr.Textbox(
label="Enter your prompt",
placeholder="Once upon a time...",
lines=3,
value="The wizard"
)
with gr.Row():
max_tokens = gr.Slider(
minimum=1,
maximum=1000,
value=200,
step=1,
label="Max Tokens to Generate"
)
temperature = gr.Slider(
minimum=0.1,
maximum=2.0,
value=0.8,
step=0.1,
label="Temperature (Creativity)"
)
with gr.Row():
seed = gr.Number(
label="Random Seed (0 = random)",
value=0,
precision=0
)
generate_btn = gr.Button(
"🎭 Generate Text",
variant="primary",
size="lg"
)
# Output Section
output_text = gr.Textbox(
label="Generated Text",
lines=15,
max_lines=20,
show_copy_button=True
)
with gr.Column(scale=1):
# Model Information
gr.Markdown("## πŸ€– Model Information")
model_info = gr.Textbox(
label="Model Details",
value=get_model_info(),
lines=12,
interactive=False
)
# Quick Examples
gr.Markdown("## πŸ’‘ Example Prompts")
examples = gr.Examples(
examples=[
["The brave knight", 150, 0.7, 42],
["In a world where", 200, 0.9, 123],
["The mysterious forest", 100, 0.6, 456],
["Once upon a time", 250, 0.8, 789],
["The ancient wizard", 180, 0.75, 321]
],
inputs=[prompt_input, max_tokens, temperature, seed],
label="Click to try these examples"
)
# Event Handlers
generate_btn.click(
fn=generate_text,
inputs=[prompt_input, max_tokens, temperature, seed],
outputs=output_text,
show_progress=True
)
# Additional Information
gr.Markdown("""
---
## πŸ“š How to Use
1. **Enter a prompt**: Start with any text you'd like the model to continue
2. **Adjust settings**:
- **Max Tokens**: How much text to generate
- **Temperature**: Lower values (0.1-0.7) = more focused, Higher values (0.8-2.0) = more creative
- **Seed**: Use the same seed for reproducible results
3. **Generate**: Click the button and watch the magic happen!
## πŸ”§ Technical Details
This interface uses a GPT model trained from scratch with:
- Character-level tokenization
- Multi-head self-attention mechanisms
- Transformer architecture with residual connections
- Layer normalization and dropout for regularization
**Note**: The model works best with prompts that match its training data style.
""")
return interface
# Launch the interface
if __name__ == "__main__":
print("πŸš€ Starting GPT from Scratch Gradio Interface...")
print(f"πŸ“Š Model Info: {vocab_size} vocab size, {n_embd} embedding dim, {n_layer} layers")
print(f"πŸ’» Device: {device}")
# Create and launch interface
demo = create_interface()
demo.launch(
server_name="0.0.0.0", # Allow external access
server_port=7860, # Default Gradio port
show_api=True, # Show API documentation
share=False, # Set to True for public sharing
inbrowser=True # Open in browser automatically
)