File size: 8,166 Bytes
f7ba1e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2275feb
f7ba1e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73ae880
f7ba1e0
 
682d91f
f7ba1e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
"""
Gradio Demo for GPT-2 From Scratch
Multi-model demo: Tiny → Medium → GPT-2 Small
Deploy to HuggingFace Spaces for interactive blog demo
"""

import gradio as gr
import torch
import torch.nn.functional as F
from model import TransformerLanguageModel
import json
import os

# Available models
MODELS = {
    "Tiny Shakespeare (3.2M params)": {
        "path": "checkpoint_tiny",
        "description": "Phase 1: Character-level model trained on Shakespeare"
    },
    "Medium Character (3.3M params)": {
        "path": "checkpoint_medium",
        "description": "Phase 2: Character-level model trained on 250MB foundational dataset"
    },
    "GPT-2 Small (134M params)": {
        "path": "checkpoint_gpt2_small",
        "description": "Phase 3: BPE tokenizer, 12GB data (checkpoint 7 of 10)"
    }
}

# Cache for loaded models
loaded_models = {}


def load_model(model_name):
    """Load a model by name, with caching"""
    if model_name in loaded_models:
        return loaded_models[model_name]

    model_info = MODELS.get(model_name)
    if not model_info:
        return None, None, None

    model_dir = model_info["path"]
    config_path = os.path.join(model_dir, "config.json")

    if not os.path.exists(config_path):
        return None, None, f"Model not found: {model_dir}"

    # Load config
    with open(config_path, "r") as f:
        config = json.load(f)

    # Load tokenizer based on type
    tokenizer_type = config.get("tokenizer_type", "character")
    tokenizer_path = os.path.join(model_dir, "tokenizer.json")

    if tokenizer_type == "bpe":
        from tokenizer_bpe import BPETokenizer
        tokenizer = BPETokenizer()
        tokenizer.load(tokenizer_path)
    else:
        from tokenizer import CharacterTokenizer
        tokenizer = CharacterTokenizer()
        tokenizer.load(tokenizer_path)

    # Create model
    model = TransformerLanguageModel(
        vocab_size=config["vocab_size"],
        embed_dim=config["embed_dim"],
        num_heads=config["num_heads"],
        num_layers=config["num_layers"],
        ff_dim=config["ff_dim"],
        max_seq_len=config["max_seq_len"],
        dropout=0.0
    )

    # Load weights
    model_path = os.path.join(model_dir, "pytorch_model.bin")
    model.load_state_dict(torch.load(model_path, map_location="cpu", weights_only=True))
    model.eval()

    # Cache it
    loaded_models[model_name] = (model, tokenizer, config)
    return model, tokenizer, config


def generate(model, tokenizer, config, prompt, max_tokens=100, temperature=0.8, top_k=40):
    """Generate text from prompt"""
    if model is None:
        return "Model not loaded."

    if not prompt.strip():
        return "Please enter a prompt."

    # Encode prompt
    tokens = tokenizer.encode(prompt)
    if len(tokens) == 0:
        return "Could not encode prompt. Try different characters."

    tokens = torch.tensor(tokens, dtype=torch.long).unsqueeze(0)
    max_seq_len = config.get("max_seq_len", 256)

    with torch.no_grad():
        for _ in range(max_tokens):
            # Truncate if too long
            if tokens.size(1) > max_seq_len:
                input_tokens = tokens[:, -max_seq_len:]
            else:
                input_tokens = tokens

            # Forward pass
            logits = model(input_tokens)
            logits = logits[:, -1, :] / temperature

            # Top-k filtering
            if top_k is not None and top_k > 0:
                v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
                logits[logits < v[:, [-1]]] = float("-inf")

            # Sample
            probs = F.softmax(logits, dim=-1)
            next_token = torch.multinomial(probs, num_samples=1)
            tokens = torch.cat([tokens, next_token], dim=1)

    return tokenizer.decode(tokens[0].tolist())


def generate_wrapper(model_name, prompt, max_tokens, temperature, top_k):
    """Wrapper that loads the selected model and generates"""
    model, tokenizer, config = load_model(model_name)

    if isinstance(config, str):  # Error message
        return config

    if model is None:
        return f"Model '{model_name}' not available. Check if checkpoint exists."

    return generate(model, tokenizer, config, prompt, int(max_tokens), temperature, int(top_k))


def get_model_info(model_name):
    """Get info string for selected model"""
    model, tokenizer, config = load_model(model_name)

    if model is None:
        return f"⚠️ {model_name} - Not loaded (checkpoint missing)"

    params = config.get("total_parameters", 0)
    tok_type = config.get("tokenizer_type", "character")
    return f"✅ {model_name} | {params:,} parameters | {tok_type} tokenizer"


def update_examples(model_name):
    """Update example prompts based on model"""
    if "Shakespeare" in model_name or "Tiny" in model_name:
        return gr.update(samples=[
            ["ROMEO:"],
            ["JULIET:"],
            ["To be, or not to be"],
            ["First Citizen:"],
        ])
    else:
        return gr.update(samples=[
            ["What is the capital of France?"],
            ["Explain machine learning in simple terms."],
            ["Write a poem about coffee."],
            ["The meaning of life is"],
        ])


# Check which models are available
available_models = []
for name, info in MODELS.items():
    config_path = os.path.join(info["path"], "config.json")
    if os.path.exists(config_path):
        available_models.append(name)

if not available_models:
    available_models = list(MODELS.keys())  # Show all, will error on use


# Gradio interface
with gr.Blocks(title="GPT From Scratch Demo", theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        # GPT From Scratch Demo

        Compare models from my training journey — from tiny Shakespeare to GPT-2 Small.

        [Read the blog](https://gpuburnout.github.io/llm-journey/) |
        [View the code](https://github.com/GPUburnout/gpt2-from-scratch)
        """
    )

    with gr.Row():
        with gr.Column(scale=1):
            model_selector = gr.Dropdown(
                choices=list(MODELS.keys()),
                value=available_models[0] if available_models else list(MODELS.keys())[0],
                label="Select Model",
                info="Choose which model to use for generation"
            )

            model_status = gr.Markdown(value="")

            prompt = gr.Textbox(
                label="Enter your prompt",
                placeholder="Type something...",
                lines=2,
                value="ROMEO:" if "Tiny" in available_models[0] else "What is the capital of France?"
            )

            with gr.Row():
                max_tokens = gr.Slider(
                    minimum=50, maximum=500, value=200, step=50,
                    label="Max tokens"
                )
                temperature = gr.Slider(
                    minimum=0.1, maximum=1.5, value=0.8, step=0.1,
                    label="Temperature"
                )

            top_k = gr.Slider(
                minimum=1, maximum=100, value=40, step=1,
                label="Top-K sampling"
            )

            generate_btn = gr.Button("Generate", variant="primary")

        with gr.Column(scale=1):
            output = gr.Textbox(label="Generated text", lines=15)

    # Example prompts
    examples = gr.Examples(
        examples=[["ROMEO:"], ["JULIET:"], ["To be, or not to be"]],
        inputs=prompt,
        label="Example prompts"
    )

    # Update model status on load and selection change
    demo.load(
        fn=get_model_info,
        inputs=model_selector,
        outputs=model_status
    )

    model_selector.change(
        fn=get_model_info,
        inputs=model_selector,
        outputs=model_status
    )

    # Generate on button click or enter
    generate_btn.click(
        generate_wrapper,
        inputs=[model_selector, prompt, max_tokens, temperature, top_k],
        outputs=output
    )

    prompt.submit(
        generate_wrapper,
        inputs=[model_selector, prompt, max_tokens, temperature, top_k],
        outputs=output
    )

if __name__ == "__main__":
    demo.launch()