lemms commited on
Commit
8f3458c
Β·
verified Β·
1 Parent(s): c01f852

Upload app_with_10k.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app_with_10k.py +636 -0
app_with_10k.py ADDED
@@ -0,0 +1,636 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ OpenLLM Real Models App - Final working version with correct attribute naming
4
+ """
5
+
6
+ import gradio as gr
7
+ import torch
8
+ import torch.nn as nn
9
+ import torch.nn.functional as F
10
+ import json
11
+ import logging
12
+ import sentencepiece as spm
13
+ import math
14
+ from pathlib import Path
15
+ from typing import Dict, Any, Optional
16
+ from huggingface_hub import snapshot_download
17
+
18
+ # Set up logging
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+ class GPTConfig:
23
+ """GPT model configuration"""
24
+ def __init__(self, vocab_size=32000, n_layer=6, n_head=8, n_embd=512,
25
+ block_size=1024, dropout=0.1, bias=True, **kwargs):
26
+ # Accept any additional kwargs to handle extra config fields
27
+ self.vocab_size = vocab_size
28
+ self.n_layer = n_layer
29
+ self.n_head = n_head
30
+ self.n_embd = n_embd
31
+ self.block_size = block_size
32
+ self.dropout = dropout
33
+ self.bias = bias
34
+
35
+ class GPT(nn.Module):
36
+ """GPT-style transformer model - EXACT architecture matching the saved model"""
37
+ def __init__(self, config):
38
+ super().__init__()
39
+ assert config.vocab_size is not None
40
+ assert config.block_size is not None
41
+ self.config = config
42
+
43
+ # Create the transformer module with the exact naming convention
44
+ self.transformer = nn.ModuleDict(dict(
45
+ wte = nn.Embedding(config.vocab_size, config.n_embd),
46
+ wpe = nn.Embedding(config.block_size, config.n_embd),
47
+ drop = nn.Dropout(config.dropout),
48
+ h = nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
49
+ ln_f = nn.LayerNorm(config.n_embd),
50
+ ))
51
+
52
+ # Language model head - MUST have bias to match saved model
53
+ self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=True)
54
+
55
+ # Initialize weights
56
+ self.apply(self._init_weights)
57
+ for pn, p in self.named_parameters():
58
+ if pn.endswith('c_proj.weight'):
59
+ torch.nn.init.normal_(p, mean=0.0, std=0.02/math.sqrt(2 * config.n_layer))
60
+
61
+ def _init_weights(self, module):
62
+ if isinstance(module, nn.Linear):
63
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
64
+ if module.bias is not None:
65
+ torch.nn.init.zeros_(module.bias)
66
+ elif isinstance(module, nn.Embedding):
67
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
68
+
69
+ def forward(self, idx, targets=None):
70
+ device = idx.device
71
+ b, t = idx.size()
72
+ assert t <= self.config.block_size, f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
73
+
74
+ pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0)
75
+ tok_emb = self.transformer.wte(idx)
76
+ pos_emb = self.transformer.wpe(pos)
77
+ x = self.transformer.drop(tok_emb + pos_emb)
78
+
79
+ for block in self.transformer.h:
80
+ x = block(x)
81
+ x = self.transformer.ln_f(x)
82
+
83
+ if targets is not None:
84
+ logits = self.lm_head(x)
85
+ loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1), ignore_index=-1)
86
+ else:
87
+ logits = self.lm_head(x[:, [-1], :])
88
+ loss = None
89
+
90
+ return logits, loss
91
+
92
+ def generate(self, idx, max_new_tokens, temperature=1.0, top_k=None, top_p=None, do_sample=True):
93
+ for _ in range(max_new_tokens):
94
+ idx_cond = idx if idx.size(1) <= self.config.block_size else idx[:, -self.config.block_size:]
95
+ logits, _ = self(idx_cond)
96
+ logits = logits[:, -1, :] / temperature
97
+
98
+ if top_k is not None:
99
+ v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
100
+ logits[logits < v[:, [-1]]] = -float('Inf')
101
+
102
+ if top_p is not None:
103
+ sorted_logits, sorted_indices = torch.sort(logits, descending=True)
104
+ cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
105
+ sorted_indices_to_remove = cumulative_probs > top_p
106
+ sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
107
+ sorted_indices_to_remove[..., 0] = 0
108
+ indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
109
+ logits[indices_to_remove] = -float('Inf')
110
+
111
+ probs = F.softmax(logits, dim=-1)
112
+ if do_sample:
113
+ idx_next = torch.multinomial(probs, num_samples=1)
114
+ else:
115
+ _, idx_next = torch.topk(probs, k=1, dim=-1)
116
+
117
+ idx = torch.cat((idx, idx_next), dim=1)
118
+
119
+ return idx
120
+
121
+ class Block(nn.Module):
122
+ """Transformer block with self-attention and feed-forward layers"""
123
+ def __init__(self, config):
124
+ super().__init__()
125
+ self.ln_1 = nn.LayerNorm(config.n_embd)
126
+ self.attn = CausalSelfAttention(config)
127
+ self.ln_2 = nn.LayerNorm(config.n_embd)
128
+ self.mlp = MLP(config)
129
+
130
+ def forward(self, x):
131
+ x = x + self.attn(self.ln_1(x))
132
+ x = x + self.mlp(self.ln_2(x))
133
+ return x
134
+
135
+ class CausalSelfAttention(nn.Module):
136
+ """Multi-head self-attention with causal masking - FINAL WORKING VERSION"""
137
+ def __init__(self, config):
138
+ super().__init__()
139
+ assert config.n_embd % config.n_head == 0
140
+ self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd, bias=config.bias)
141
+ self.c_proj = nn.Linear(config.n_embd, config.n_embd, bias=config.bias)
142
+ self.attn_dropout = nn.Dropout(config.dropout)
143
+ self.resid_dropout = nn.Dropout(config.dropout)
144
+ self.n_head = config.n_head
145
+ self.n_embd = config.n_embd
146
+ self.dropout = config.dropout
147
+ self.use_bias = config.bias # Use different name for the boolean flag
148
+
149
+ # REGISTER THE ATTENTION BIAS as a buffer (not parameter) to match saved model
150
+ # This is actually an attention mask, not a learnable bias
151
+ if config.bias:
152
+ # Create a causal attention mask buffer
153
+ mask = torch.tril(torch.ones(config.block_size, config.block_size))
154
+ mask = mask.view(1, 1, config.block_size, config.block_size)
155
+ self.register_buffer('bias', mask) # This matches the saved model's 'bias' key
156
+ else:
157
+ self.register_buffer('bias', None)
158
+
159
+ def forward(self, x):
160
+ B, T, C = x.size()
161
+
162
+ # Calculate query, key, values for all heads
163
+ q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
164
+ k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
165
+ q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
166
+ v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
167
+
168
+ # Causal self-attention using the bias mask
169
+ if self.bias is not None:
170
+ # Use the causal mask
171
+ attn_mask = self.bias[:, :, :T, :T]
172
+ y = F.scaled_dot_product_attention(q, k, v, attn_mask=attn_mask, dropout_p=self.dropout if self.training else 0, is_causal=False)
173
+ else:
174
+ # Use built-in causal attention
175
+ y = F.scaled_dot_product_attention(q, k, v, attn_mask=None, dropout_p=self.dropout if self.training else 0, is_causal=True)
176
+
177
+ y = y.transpose(1, 2).contiguous().view(B, T, C)
178
+
179
+ # Output projection
180
+ y = self.resid_dropout(self.c_proj(y))
181
+ return y
182
+
183
+ class MLP(nn.Module):
184
+ """Multi-layer perceptron"""
185
+ def __init__(self, config):
186
+ super().__init__()
187
+ self.c_fc = nn.Linear(config.n_embd, 4 * config.n_embd, bias=config.bias)
188
+ self.gelu = nn.GELU()
189
+ self.c_proj = nn.Linear(4 * config.n_embd, config.n_embd, bias=config.bias)
190
+ self.dropout = nn.Dropout(config.dropout)
191
+
192
+ def forward(self, x):
193
+ x = self.c_fc(x)
194
+ x = self.gelu(x)
195
+ x = self.c_proj(x)
196
+ x = self.dropout(x)
197
+ return x
198
+
199
+ class RealOpenLLMInference:
200
+ """Real OpenLLM inference engine using actual trained models"""
201
+
202
+ def __init__(self):
203
+ self.models = {}
204
+ self.tokenizers = {}
205
+ self.current_model = None
206
+
207
+ # Real model configurations from Hugging Face
208
+ self.model_configs = {
209
+ "openllm-small-extended-4k": {
210
+ "name": "OpenLLM Small (4k steps)",
211
+ "description": "Real model trained for 4,000 steps - Early training stage",
212
+ "hf_repo": "lemms/openllm-small-extended-4k",
213
+ "training_steps": 4000,
214
+ "parameters": "35.8M"
215
+ },
216
+ "openllm-small-extended-6k": {
217
+ "name": "OpenLLM Small (6k steps)",
218
+ "description": "Real model trained for 6,000 steps - Improved coherence (Perplexity: 816.040)",
219
+ "hf_repo": "lemms/openllm-small-extended-6k",
220
+ "training_steps": 6000,
221
+ "parameters": "35.8M"
222
+ },
223
+ "openllm-small-extended-7k": {
224
+ "name": "OpenLLM Small (7k steps)",
225
+ "description": "Real model trained for 7,000 steps - Enhanced quality (Loss: 2.100, Perplexity: 8.200)",
226
+ "hf_repo": "lemms/openllm-small-extended-7k",
227
+ "training_steps": 7000,
228
+ "parameters": "35.8M"
229
+ },
230
+ "openllm-small-extended-8k": {
231
+ "name": "OpenLLM Small (8k steps)",
232
+ "description": "Real model trained for 8,000 steps - Sophisticated understanding",
233
+ "hf_repo": "lemms/openllm-small-extended-8k",
234
+ "training_steps": 8000,
235
+ "parameters": "35.8M"
236
+ },
237
+ "openllm-small-extended-9k": {
238
+ "name": "OpenLLM Small (9k steps)",
239
+ "description": "Real model trained for 9,000 steps - Best performing model",
240
+ "hf_repo": "lemms/openllm-small-extended-9k",
241
+ "training_steps": 9000,
242
+ "parameters": "35.8M"
243
+ },
244
+ "openllm-small-extended-10k": {
245
+ "name": "OpenLLM Small (10k steps)",
246
+ "description": "Real model trained for 10,000 steps - Latest extended training",
247
+ "hf_repo": "lemms/openllm-small-extended-10k",
248
+ "training_steps": 10000,
249
+ "parameters": "35.8M"
250
+ }
251
+ }
252
+
253
+ logger.info("πŸš€ Real OpenLLM Inference Engine initialized")
254
+
255
+ def load_model_from_hf(self, model_id: str) -> bool:
256
+ """Load a real model from Hugging Face"""
257
+ try:
258
+ config = self.model_configs.get(model_id)
259
+ if not config:
260
+ logger.error(f"❌ Unknown model ID: {model_id}")
261
+ return False
262
+
263
+ logger.info(f"πŸ“₯ Loading real model from HF: {config['hf_repo']}")
264
+
265
+ # Download model from Hugging Face
266
+ local_dir = snapshot_download(
267
+ repo_id=config['hf_repo'],
268
+ repo_type="model",
269
+ local_dir=f"temp_{model_id}",
270
+ allow_patterns=["*.pt", "*.json", "*.model", "*.bin"]
271
+ )
272
+
273
+ logger.info(f"βœ… Downloaded model to: {local_dir}")
274
+
275
+ # Load model and tokenizer
276
+ success = self._load_model_and_tokenizer(local_dir, model_id)
277
+ if success:
278
+ self.current_model = model_id
279
+ logger.info(f"βœ… Successfully loaded real model: {model_id}")
280
+ return True
281
+ else:
282
+ return False
283
+
284
+ except Exception as e:
285
+ logger.error(f"❌ Failed to load real model from HF {model_id}: {e}")
286
+ return False
287
+
288
+ def _load_model_and_tokenizer(self, model_dir: str, model_id: str) -> bool:
289
+ """Load model and tokenizer from local directory"""
290
+ try:
291
+ model_path = Path(model_dir)
292
+
293
+ # Load model configuration
294
+ config_file = model_path / "config.json"
295
+ if config_file.exists():
296
+ with open(config_file, 'r') as f:
297
+ config_data = json.load(f)
298
+
299
+ logger.info(f"πŸ“‹ Config data keys: {list(config_data.keys())}")
300
+
301
+ # Handle different config structures
302
+ if 'model_config' in config_data:
303
+ # Extract model_config section
304
+ model_config_data = config_data['model_config']
305
+ else:
306
+ # Use the entire config as model config
307
+ model_config_data = config_data
308
+
309
+ # Create GPTConfig with only the expected parameters
310
+ expected_params = {
311
+ 'vocab_size', 'n_layer', 'n_head', 'n_embd',
312
+ 'block_size', 'dropout', 'bias'
313
+ }
314
+
315
+ config_kwargs = {}
316
+ for key, value in model_config_data.items():
317
+ if key in expected_params:
318
+ config_kwargs[key] = value
319
+
320
+ logger.info(f"πŸ”§ Using config parameters: {config_kwargs}")
321
+ model_config = GPTConfig(**config_kwargs)
322
+ else:
323
+ # Default configuration for OpenLLM small models
324
+ model_config = GPTConfig(
325
+ vocab_size=32000,
326
+ n_layer=6,
327
+ n_head=8,
328
+ n_embd=512,
329
+ block_size=1024,
330
+ dropout=0.1,
331
+ bias=True
332
+ )
333
+
334
+ # Load model weights
335
+ model_file = model_path / "best_model.pt"
336
+ if not model_file.exists():
337
+ model_file = model_path / "model.pt"
338
+ if not model_file.exists():
339
+ model_file = model_path / "pytorch_model.bin"
340
+
341
+ if model_file.exists():
342
+ logger.info(f"πŸ“¦ Loading model from: {model_file}")
343
+ model = GPT(model_config)
344
+ checkpoint = torch.load(model_file, map_location='cpu')
345
+
346
+ # Handle different checkpoint formats
347
+ if isinstance(checkpoint, dict):
348
+ if 'model_state_dict' in checkpoint:
349
+ # Extract the actual model weights
350
+ state_dict = checkpoint['model_state_dict']
351
+ logger.info(f"πŸ“‹ Loading from model_state_dict with {len(state_dict)} keys")
352
+ elif 'model' in checkpoint:
353
+ state_dict = checkpoint['model']
354
+ logger.info(f"πŸ“‹ Loading from model with {len(state_dict)} keys")
355
+ else:
356
+ # Try to load directly as state dict
357
+ state_dict = checkpoint
358
+ logger.info(f"πŸ“‹ Loading direct state dict with {len(state_dict)} keys")
359
+ else:
360
+ # Direct state dict
361
+ state_dict = checkpoint
362
+ logger.info(f"πŸ“‹ Loading direct state dict with {len(state_dict)} keys")
363
+
364
+ # Load the state dict
365
+ model.load_state_dict(state_dict)
366
+ model.eval()
367
+ self.models[model_id] = model
368
+ logger.info(f"βœ… Model loaded successfully")
369
+ else:
370
+ logger.error(f"❌ Model file not found in {model_dir}")
371
+ logger.error(f" Available files: {list(model_path.glob('*'))}")
372
+ return False
373
+
374
+ # Load tokenizer
375
+ tokenizer_file = model_path / "tokenizer.model"
376
+ if tokenizer_file.exists():
377
+ tokenizer = spm.SentencePieceProcessor()
378
+ tokenizer.load(str(tokenizer_file))
379
+ self.tokenizers[model_id] = tokenizer
380
+ logger.info(f"βœ… Tokenizer loaded successfully")
381
+ else:
382
+ logger.error(f"❌ Tokenizer file not found in {model_dir}")
383
+ return False
384
+
385
+ return True
386
+
387
+ except Exception as e:
388
+ logger.error(f"❌ Failed to load model and tokenizer: {e}")
389
+ import traceback
390
+ logger.error(f"πŸ“‹ Full traceback: {traceback.format_exc()}")
391
+ return False
392
+
393
+ def generate_text(self, prompt: str, max_length: int = 100,
394
+ temperature: float = 0.7, top_k: int = 50,
395
+ top_p: float = 0.9) -> str:
396
+ """Generate text using the loaded real model"""
397
+ if not self.current_model or self.current_model not in self.models:
398
+ return "❌ No model loaded. Please select a model first."
399
+
400
+ try:
401
+ model = self.models[self.current_model]
402
+ tokenizer = self.tokenizers[self.current_model]
403
+
404
+ # Tokenize input
405
+ input_ids = tokenizer.encode(prompt)
406
+ input_tensor = torch.tensor([input_ids], dtype=torch.long)
407
+
408
+ logger.info(f"🎯 Generating text with prompt: '{prompt[:50]}...'")
409
+ logger.info(f"πŸ“Š Parameters: max_length={max_length}, temperature={temperature}, top_k={top_k}, top_p={top_p}")
410
+
411
+ # Generate text
412
+ with torch.no_grad():
413
+ output_ids = model.generate(
414
+ input_tensor,
415
+ max_new_tokens=max_length,
416
+ temperature=temperature,
417
+ top_k=top_k,
418
+ top_p=top_p,
419
+ do_sample=True
420
+ )
421
+
422
+ # Decode output
423
+ generated_text = tokenizer.decode(output_ids[0].tolist())
424
+
425
+ # Remove the input prompt from the output
426
+ if generated_text.startswith(prompt):
427
+ generated_text = generated_text[len(prompt):].strip()
428
+
429
+ logger.info(f"βœ… Generated text: '{generated_text[:100]}...'")
430
+ return generated_text
431
+
432
+ except Exception as e:
433
+ error_msg = f"❌ Generation failed: {str(e)}"
434
+ logger.error(error_msg)
435
+ import traceback
436
+ logger.error(f"πŸ“‹ Full traceback: {traceback.format_exc()}")
437
+ return error_msg
438
+
439
+ # Initialize the real inference engine
440
+ inference_engine = RealOpenLLMInference()
441
+
442
+ def load_model_info(model_id: str) -> str:
443
+ """Get information about a specific model"""
444
+ config = inference_engine.model_configs.get(model_id)
445
+ if config:
446
+ return f"**{config['name']}**\n\n{config['description']}\n\n**Parameters:** {config['parameters']}\n**Training Steps:** {config['training_steps']:,}"
447
+ return "❌ Model not found"
448
+
449
+ def generate_text_interface(model_id: str, prompt: str, max_length: int,
450
+ temperature: float, top_k: int, top_p: float) -> str:
451
+ """Gradio interface function for text generation"""
452
+ try:
453
+ # Load model if not already loaded
454
+ if model_id not in inference_engine.models:
455
+ logger.info(f"πŸ”„ Loading real model: {model_id}")
456
+ success = inference_engine.load_model_from_hf(model_id)
457
+ if not success:
458
+ return f"❌ Failed to load real model: {model_id}"
459
+
460
+ # Generate text
461
+ result = inference_engine.generate_text(
462
+ prompt=prompt,
463
+ max_length=max_length,
464
+ temperature=temperature,
465
+ top_k=top_k,
466
+ top_p=top_p
467
+ )
468
+
469
+ return result
470
+
471
+ except Exception as e:
472
+ error_msg = f"❌ Error in generation interface: {str(e)}"
473
+ logger.error(error_msg)
474
+ return error_msg
475
+
476
+ # Create Gradio interface
477
+ def create_interface():
478
+ """Create the Gradio interface"""
479
+
480
+ with gr.Blocks(
481
+ title="πŸš€ OpenLLM Real Models Space",
482
+ theme=gr.themes.Soft()
483
+ ) as interface:
484
+
485
+ # Header
486
+ gr.Markdown("""
487
+ # πŸš€ OpenLLM Real Models Space
488
+
489
+ Welcome to the OpenLLM Real Models Space! This interface uses **actual trained models** from Hugging Face.
490
+
491
+ ## 🎯 Real Trained Models
492
+
493
+ We provide **5 different real models** with varying training steps:
494
+
495
+ | Model | Training Steps | Parameters | Performance |
496
+ |-------|---------------|------------|-------------|
497
+ | **4k Model** | 4,000 | 35.8M | Early training stage |
498
+ | **6k Model** | 6,000 | 35.8M | Improved coherence (Perplexity: 816.040) |
499
+ | **7k Model** | 7,000 | 35.8M | Enhanced quality (Loss: 2.100, Perplexity: 8.200) |
500
+ | **8k Model** | 8,000 | 35.8M | Sophisticated understanding |
501
+ | **9k Model** | 9,000 | 35.8M | Best performing model |
502
+ | **10k Model** | 10,000 | 35.8M | Latest extended training |
503
+
504
+ **These are real GPT-style transformer models trained on Wikipedia passages from the SQuAD dataset.**
505
+
506
+ ---
507
+ """)
508
+
509
+ with gr.Row():
510
+ with gr.Column(scale=1):
511
+ # Model selection
512
+ model_dropdown = gr.Dropdown(
513
+ choices=list(inference_engine.model_configs.keys()),
514
+ value="openllm-small-extended-10k",
515
+ label="🎯 Select Model",
516
+ info="Choose the real trained model to use"
517
+ )
518
+
519
+ # Model information display
520
+ model_info = gr.Markdown(
521
+ value=load_model_info("openllm-small-extended-10k"),
522
+ label="πŸ“‹ Model Information"
523
+ )
524
+
525
+ # Update model info when selection changes
526
+ model_dropdown.change(
527
+ fn=load_model_info,
528
+ inputs=[model_dropdown],
529
+ outputs=[model_info]
530
+ )
531
+
532
+ with gr.Column(scale=2):
533
+ # Input prompt
534
+ prompt_input = gr.Textbox(
535
+ lines=5,
536
+ label="πŸ“ Input Prompt",
537
+ placeholder="Enter your text prompt here...",
538
+ info="The text that will be used as input for generation"
539
+ )
540
+
541
+ # Generation parameters
542
+ with gr.Row():
543
+ max_length = gr.Slider(
544
+ minimum=10,
545
+ maximum=500,
546
+ value=100,
547
+ step=10,
548
+ label="πŸ“ Max Length",
549
+ info="Maximum number of tokens to generate"
550
+ )
551
+
552
+ temperature = gr.Slider(
553
+ minimum=0.1,
554
+ maximum=2.0,
555
+ value=0.7,
556
+ step=0.1,
557
+ label="🌑️ Temperature",
558
+ info="Controls randomness (higher = more random)"
559
+ )
560
+
561
+ with gr.Row():
562
+ top_k = gr.Slider(
563
+ minimum=1,
564
+ maximum=100,
565
+ value=50,
566
+ step=1,
567
+ label="πŸ” Top-K",
568
+ info="Number of highest probability tokens to consider"
569
+ )
570
+
571
+ top_p = gr.Slider(
572
+ minimum=0.1,
573
+ maximum=1.0,
574
+ value=0.9,
575
+ step=0.1,
576
+ label="πŸ“Š Top-P",
577
+ info="Nucleus sampling parameter"
578
+ )
579
+
580
+ # Generate button
581
+ generate_btn = gr.Button(
582
+ "πŸš€ Generate Text",
583
+ variant="primary",
584
+ size="lg"
585
+ )
586
+
587
+ # Output
588
+ output_text = gr.Textbox(
589
+ lines=10,
590
+ label="🎯 Generated Text",
591
+ info="The generated text will appear here"
592
+ )
593
+
594
+ # Connect the generate button
595
+ generate_btn.click(
596
+ fn=generate_text_interface,
597
+ inputs=[model_dropdown, prompt_input, max_length, temperature, top_k, top_p],
598
+ outputs=[output_text]
599
+ )
600
+
601
+ # Footer
602
+ gr.Markdown("""
603
+ ---
604
+
605
+ ## πŸ”§ Technical Details
606
+
607
+ - **Architecture**: GPT-style transformer decoder
608
+ - **Model Size**: Small (6 layers, 8 heads, 512 embedding dim)
609
+ - **Vocabulary**: 32k tokens (SentencePiece BPE)
610
+ - **Training Data**: Wikipedia passages from SQuAD dataset
611
+ - **Framework**: PyTorch with real trained models
612
+ - **Gradio Version**: 4.44.1 (latest)
613
+
614
+ **These models generate actual text based on their training on Wikipedia content.**
615
+
616
+ **Model Sources:**
617
+ - [4k Model](https://huggingface.co/lemms/openllm-small-extended-4k)
618
+ - [6k Model](https://huggingface.co/lemms/openllm-small-extended-6k)
619
+ - [7k Model](https://huggingface.co/lemms/openllm-small-extended-7k)
620
+ - [8k Model](https://huggingface.co/lemms/openllm-small-extended-8k)
621
+ - [9k Model](https://huggingface.co/lemms/openllm-small-extended-9k)
622
+ - [10k Model](https://huggingface.co/lemms/openllm-small-extended-10k)
623
+ """)
624
+
625
+ return interface
626
+
627
+ # Create and launch the interface
628
+ if __name__ == "__main__":
629
+ interface = create_interface()
630
+ interface.launch(
631
+ server_name="0.0.0.0",
632
+ server_port=7860,
633
+ share=False,
634
+ debug=True
635
+ )
636
+