| """
|
| Gradio Interface for GPT from Scratch
|
| =====================================
|
|
|
| Interactive web interface for text generation using the trained GPT models.
|
| Supports both GPTv1 and GPTv2 with real-time text generation and parameter control.
|
|
|
| Author: Saumitra Gupta
|
| Date: September 2025
|
| """
|
|
|
| import gradio as gr
|
| import torch
|
| import torch.nn as nn
|
| from torch.nn import functional as F
|
| import pickle
|
| import os
|
| import random
|
|
|
|
|
| device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| print(f"Using device: {device}")
|
|
|
|
|
| block_size = 8
|
| n_embd = 384
|
| n_head = 32
|
| n_layer = 32
|
| dropout = 0.2
|
|
|
|
|
| def load_vocabulary():
|
| """Load character vocabulary from vocab.txt"""
|
| try:
|
| with open('artifacts/vocab.txt', 'r', encoding='utf-8') as f:
|
| chars = [line.strip() for line in f.readlines()]
|
| vocab_size = len(chars)
|
|
|
|
|
| string_to_int = {ch: i for i, ch in enumerate(chars)}
|
| int_to_string = {i: ch for i, ch in enumerate(chars)}
|
|
|
| return chars, vocab_size, string_to_int, int_to_string
|
| except FileNotFoundError:
|
|
|
| with open('wizard-of-oz.txt', 'r', encoding='utf-8') as f:
|
| text = f.read()
|
| chars = sorted(set(text))
|
| vocab_size = len(chars)
|
|
|
| string_to_int = {ch: i for i, ch in enumerate(chars)}
|
| int_to_string = {i: ch for i, ch in enumerate(chars)}
|
|
|
| return chars, vocab_size, string_to_int, int_to_string
|
|
|
|
|
| chars, vocab_size, string_to_int, int_to_string = load_vocabulary()
|
|
|
|
|
| def encode(s):
|
| """Convert string to list of integers"""
|
| return [string_to_int.get(c, 0) for c in s]
|
|
|
| def decode(l):
|
| """Convert list of integers to string"""
|
| return ''.join([int_to_string.get(i, '') for i in l])
|
|
|
|
|
| class Head(nn.Module):
|
| """Single attention head"""
|
|
|
| def __init__(self, head_size):
|
| super().__init__()
|
| self.key = nn.Linear(n_embd, head_size, bias=False)
|
| self.query = nn.Linear(n_embd, head_size, bias=False)
|
| self.value = nn.Linear(n_embd, head_size, bias=False)
|
| self.register_buffer('tril', torch.tril(torch.ones(block_size, block_size)))
|
| self.dropout = nn.Dropout(dropout)
|
|
|
| def forward(self, x):
|
| B, T, C = x.shape
|
| k = self.key(x)
|
| q = self.query(x)
|
|
|
|
|
| wei = q @ k.transpose(-2, -1) * k.shape[-1]**-0.5
|
| wei = wei.masked_fill(self.tril[:T, :T] == 0, float('-inf'))
|
| wei = F.softmax(wei, dim=-1)
|
| wei = self.dropout(wei)
|
|
|
|
|
| v = self.value(x)
|
| out = wei @ v
|
| return out
|
|
|
| class MultiHeadAttention(nn.Module):
|
| """Multiple heads of self-attention in parallel"""
|
|
|
| def __init__(self, num_heads, head_size):
|
| super().__init__()
|
| self.heads = nn.ModuleList([Head(head_size) for _ in range(num_heads)])
|
| self.proj = nn.Linear(head_size * num_heads, n_embd)
|
| self.dropout = nn.Dropout(dropout)
|
|
|
| def forward(self, x):
|
| out = torch.cat([h(x) for h in self.heads], dim=-1)
|
| out = self.dropout(self.proj(out))
|
| return out
|
|
|
| class FeedForward(nn.Module):
|
| """Position-wise feed-forward network"""
|
|
|
| def __init__(self, n_embd):
|
| super().__init__()
|
| self.net = nn.Sequential(
|
| nn.Linear(n_embd, 4 * n_embd),
|
| nn.ReLU(),
|
| nn.Linear(4 * n_embd, n_embd),
|
| nn.Dropout(dropout),
|
| )
|
|
|
| def forward(self, x):
|
| return self.net(x)
|
|
|
| class Block(nn.Module):
|
| """Transformer block: communication followed by computation"""
|
|
|
| def __init__(self, n_embd, n_head):
|
| super().__init__()
|
| head_size = n_embd // n_head
|
| self.sa = MultiHeadAttention(n_head, head_size)
|
| self.ffwd = FeedForward(n_embd)
|
| self.ln1 = nn.LayerNorm(n_embd)
|
| self.ln2 = nn.LayerNorm(n_embd)
|
|
|
| def forward(self, x):
|
| y = self.sa(x)
|
| x = self.ln1(x + y)
|
| y = self.ffwd(x)
|
| x = self.ln2(x + y)
|
| return x
|
|
|
| class GPTLanguageModel(nn.Module):
|
| """Complete GPT Language Model"""
|
|
|
| def __init__(self, vocab_size):
|
| super().__init__()
|
| self.token_embedding_table = nn.Embedding(vocab_size, n_embd)
|
| self.position_embedding_table = nn.Embedding(block_size, n_embd)
|
| self.blocks = nn.Sequential(*[Block(n_embd, n_head=n_head) for _ in range(n_layer)])
|
| self.ln_f = nn.LayerNorm(n_embd)
|
| self.lm_head = nn.Linear(n_embd, vocab_size)
|
|
|
| self.apply(self._init_weights)
|
|
|
| def _init_weights(self, module):
|
| if isinstance(module, nn.Linear):
|
| torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
|
| if module.bias is not None:
|
| torch.nn.init.zeros_(module.bias)
|
| elif isinstance(module, nn.Embedding):
|
| torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
|
|
|
| def forward(self, idx, targets=None):
|
| B, T = idx.shape
|
|
|
| tok_emb = self.token_embedding_table(idx)
|
| pos_emb = self.position_embedding_table(torch.arange(T, device=device))
|
| x = tok_emb + pos_emb
|
| x = self.blocks(x)
|
| x = self.ln_f(x)
|
| logits = self.lm_head(x)
|
|
|
| if targets is None:
|
| loss = None
|
| else:
|
| B, T, C = logits.shape
|
| logits = logits.view(B*T, C)
|
| targets = targets.view(B*T)
|
| loss = F.cross_entropy(logits, targets)
|
|
|
| return logits, loss
|
|
|
| def generate(self, idx, max_new_tokens, temperature=1.0):
|
| """Generate text with temperature control"""
|
| self.eval()
|
| with torch.no_grad():
|
| for _ in range(max_new_tokens):
|
|
|
| idx_cond = idx[:, -block_size:]
|
|
|
| logits, _ = self.forward(idx_cond)
|
|
|
| logits = logits[:, -1, :] / temperature
|
|
|
| probs = F.softmax(logits, dim=-1)
|
|
|
| idx_next = torch.multinomial(probs, num_samples=1)
|
|
|
| idx = torch.cat((idx, idx_next), dim=1)
|
| return idx
|
|
|
|
|
| def load_model():
|
| """Load the trained model from pickle file"""
|
| global model
|
| model = GPTLanguageModel(vocab_size).to(device)
|
|
|
|
|
| model_paths = ['artifacts/model-01.pkl', 'model-01.pkl']
|
| model_loaded = False
|
|
|
| for path in model_paths:
|
| if os.path.exists(path):
|
| try:
|
| with open(path, 'rb') as f:
|
| model = pickle.load(f)
|
| model = model.to(device)
|
| model.eval()
|
| print(f"β
Model loaded successfully from {path}")
|
| model_loaded = True
|
| break
|
| except Exception as e:
|
| print(f"β Error loading model from {path}: {e}")
|
|
|
| if not model_loaded:
|
| print("β οΈ No pre-trained model found. Using randomly initialized model.")
|
| print("Note: Generate some text first to train the model or load a trained checkpoint.")
|
|
|
| return model
|
|
|
|
|
| model = load_model()
|
|
|
|
|
| def generate_text(prompt, max_tokens, temperature, seed):
|
| """Generate text based on user input"""
|
| try:
|
|
|
| if seed > 0:
|
| torch.manual_seed(seed)
|
| random.seed(seed)
|
|
|
|
|
| if not prompt.strip():
|
| prompt = " "
|
|
|
|
|
| encoded_prompt = encode(prompt)
|
|
|
|
|
| context = torch.tensor([encoded_prompt], dtype=torch.long, device=device)
|
|
|
|
|
| if context.size(1) > block_size:
|
| context = context[:, -block_size:]
|
|
|
|
|
| generated = model.generate(
|
| context,
|
| max_new_tokens=max_tokens,
|
| temperature=max(0.1, temperature)
|
| )
|
|
|
|
|
| generated_text = decode(generated[0].tolist())
|
|
|
| return generated_text
|
|
|
| except Exception as e:
|
| return f"β Error generating text: {str(e)}\\n\\nPlease check if the model is properly loaded."
|
|
|
| def get_model_info():
|
| """Return information about the loaded model"""
|
| try:
|
| param_count = sum(p.numel() for p in model.parameters())
|
| trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
|
|
| info = f"""
|
| π€ **Model Information**
|
| - **Architecture**: GPT from Scratch
|
| - **Parameters**: {param_count:,} total ({trainable_params:,} trainable)
|
| - **Vocabulary Size**: {vocab_size}
|
| - **Embedding Dimension**: {n_embd}
|
| - **Attention Heads**: {n_head}
|
| - **Transformer Layers**: {n_layer}
|
| - **Context Window**: {block_size} tokens
|
| - **Device**: {device}
|
| """
|
| return info
|
| except Exception as e:
|
| return f"β Error getting model info: {str(e)}"
|
|
|
|
|
| def create_interface():
|
| """Create and return the Gradio interface"""
|
|
|
| with gr.Blocks(
|
| title="GPT from Scratch - Interactive Text Generator",
|
| theme=gr.themes.Soft(),
|
| css=".gradio-container {background: linear-gradient(45deg, #1e3c72, #2a5298);}"
|
| ) as interface:
|
|
|
|
|
| gr.Markdown("""
|
| # π GPT from Scratch - Interactive Text Generator
|
|
|
| Generate text using transformer models built from scratch with PyTorch!
|
|
|
| **Features:**
|
| - π― Real-time text generation
|
| - π‘οΈ Temperature control for creativity
|
| - π² Seed control for reproducibility
|
| - π Model architecture information
|
| """)
|
|
|
| with gr.Row():
|
| with gr.Column(scale=2):
|
|
|
| gr.Markdown("## π Text Generation")
|
|
|
| prompt_input = gr.Textbox(
|
| label="Enter your prompt",
|
| placeholder="Once upon a time...",
|
| lines=3,
|
| value="The wizard"
|
| )
|
|
|
| with gr.Row():
|
| max_tokens = gr.Slider(
|
| minimum=1,
|
| maximum=1000,
|
| value=200,
|
| step=1,
|
| label="Max Tokens to Generate"
|
| )
|
|
|
| temperature = gr.Slider(
|
| minimum=0.1,
|
| maximum=2.0,
|
| value=0.8,
|
| step=0.1,
|
| label="Temperature (Creativity)"
|
| )
|
|
|
| with gr.Row():
|
| seed = gr.Number(
|
| label="Random Seed (0 = random)",
|
| value=0,
|
| precision=0
|
| )
|
|
|
| generate_btn = gr.Button(
|
| "π Generate Text",
|
| variant="primary",
|
| size="lg"
|
| )
|
|
|
|
|
| output_text = gr.Textbox(
|
| label="Generated Text",
|
| lines=15,
|
| max_lines=20,
|
| show_copy_button=True
|
| )
|
|
|
| with gr.Column(scale=1):
|
|
|
| gr.Markdown("## π€ Model Information")
|
| model_info = gr.Textbox(
|
| label="Model Details",
|
| value=get_model_info(),
|
| lines=12,
|
| interactive=False
|
| )
|
|
|
|
|
| gr.Markdown("## π‘ Example Prompts")
|
| examples = gr.Examples(
|
| examples=[
|
| ["The brave knight", 150, 0.7, 42],
|
| ["In a world where", 200, 0.9, 123],
|
| ["The mysterious forest", 100, 0.6, 456],
|
| ["Once upon a time", 250, 0.8, 789],
|
| ["The ancient wizard", 180, 0.75, 321]
|
| ],
|
| inputs=[prompt_input, max_tokens, temperature, seed],
|
| label="Click to try these examples"
|
| )
|
|
|
|
|
| generate_btn.click(
|
| fn=generate_text,
|
| inputs=[prompt_input, max_tokens, temperature, seed],
|
| outputs=output_text,
|
| show_progress=True
|
| )
|
|
|
|
|
| gr.Markdown("""
|
| ---
|
|
|
| ## π How to Use
|
|
|
| 1. **Enter a prompt**: Start with any text you'd like the model to continue
|
| 2. **Adjust settings**:
|
| - **Max Tokens**: How much text to generate
|
| - **Temperature**: Lower values (0.1-0.7) = more focused, Higher values (0.8-2.0) = more creative
|
| - **Seed**: Use the same seed for reproducible results
|
| 3. **Generate**: Click the button and watch the magic happen!
|
|
|
| ## π§ Technical Details
|
|
|
| This interface uses a GPT model trained from scratch with:
|
| - Character-level tokenization
|
| - Multi-head self-attention mechanisms
|
| - Transformer architecture with residual connections
|
| - Layer normalization and dropout for regularization
|
|
|
| **Note**: The model works best with prompts that match its training data style.
|
| """)
|
|
|
| return interface
|
|
|
|
|
| if __name__ == "__main__":
|
| print("π Starting GPT from Scratch Gradio Interface...")
|
| print(f"π Model Info: {vocab_size} vocab size, {n_embd} embedding dim, {n_layer} layers")
|
| print(f"π» Device: {device}")
|
|
|
|
|
| demo = create_interface()
|
| demo.launch(
|
| server_name="0.0.0.0",
|
| server_port=7860,
|
| show_api=True,
|
| share=False,
|
| inbrowser=True
|
| ) |