lemms commited on
Commit
aef4134
Β·
verified Β·
1 Parent(s): ef6446c

Upload app_simplified.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app_simplified.py +465 -0
app_simplified.py ADDED
@@ -0,0 +1,465 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ OpenLLM Inference Space - Simplified Gradio Interface
4
+ Loads models from Hugging Face repositories to avoid storage limits
5
+ """
6
+
7
+ import gradio as gr
8
+ import torch
9
+ import json
10
+ import os
11
+ import math
12
+ from pathlib import Path
13
+ from typing import Dict, Any, Optional
14
+ import logging
15
+ from dataclasses import dataclass
16
+ import torch.nn as nn
17
+ import torch.nn.functional as F
18
+
19
+ # Set up logging
20
+ logging.basicConfig(level=logging.INFO)
21
+ logger = logging.getLogger(__name__)
22
+
23
+ @dataclass
24
+ class GPTConfig:
25
+ """Configuration class for GPT model hyperparameters."""
26
+ vocab_size: int = 32000
27
+ n_layer: int = 6
28
+ n_head: int = 8
29
+ n_embd: int = 512
30
+ block_size: int = 1024
31
+ dropout: float = 0.1
32
+ bias: bool = True
33
+ model_name: str = "gpt-small"
34
+
35
+ class CausalSelfAttention(nn.Module):
36
+ """Multi-head causal self-attention mechanism."""
37
+
38
+ def __init__(self, config):
39
+ super().__init__()
40
+ assert config.n_embd % config.n_head == 0
41
+
42
+ self.config = config
43
+ self.n_head = config.n_head
44
+ self.n_embd = config.n_embd
45
+ self.head_dim = self.n_embd // self.n_head
46
+
47
+ self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd, bias=config.bias)
48
+ self.c_proj = nn.Linear(config.n_embd, config.n_embd, bias=config.bias)
49
+ self.attn_dropout = nn.Dropout(config.dropout)
50
+ self.resid_dropout = nn.Dropout(config.dropout)
51
+
52
+ # Causal mask
53
+ self.register_buffer(
54
+ "bias",
55
+ torch.tril(torch.ones(config.block_size, config.block_size)).view(
56
+ 1, 1, config.block_size, config.block_size
57
+ ),
58
+ )
59
+
60
+ def forward(self, x):
61
+ B, T, C = x.size()
62
+
63
+ q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
64
+ q = q.view(B, T, self.n_head, self.head_dim).transpose(1, 2)
65
+ k = k.view(B, T, self.n_head, self.head_dim).transpose(1, 2)
66
+ v = v.view(B, T, self.n_head, self.head_dim).transpose(1, 2)
67
+
68
+ att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(self.head_dim))
69
+ att = att.masked_fill(self.bias[:, :, :T, :T] == 0, float("-inf"))
70
+ att = F.softmax(att, dim=-1)
71
+ att = self.attn_dropout(att)
72
+
73
+ y = att @ v
74
+ y = y.transpose(1, 2).contiguous().view(B, T, C)
75
+ y = self.resid_dropout(self.c_proj(y))
76
+ return y
77
+
78
+ class MLP(nn.Module):
79
+ """Multi-Layer Perceptron for Transformer."""
80
+
81
+ def __init__(self, config):
82
+ super().__init__()
83
+ self.c_fc = nn.Linear(config.n_embd, 4 * config.n_embd, bias=config.bias)
84
+ self.gelu = nn.GELU()
85
+ self.c_proj = nn.Linear(4 * config.n_embd, config.n_embd, bias=config.bias)
86
+ self.dropout = nn.Dropout(config.dropout)
87
+
88
+ def forward(self, x):
89
+ x = self.c_fc(x)
90
+ x = self.gelu(x)
91
+ x = self.c_proj(x)
92
+ x = self.dropout(x)
93
+ return x
94
+
95
+ class Block(nn.Module):
96
+ """Single Transformer block."""
97
+
98
+ def __init__(self, config):
99
+ super().__init__()
100
+ self.ln_1 = nn.LayerNorm(config.n_embd)
101
+ self.attn = CausalSelfAttention(config)
102
+ self.ln_2 = nn.LayerNorm(config.n_embd)
103
+ self.mlp = MLP(config)
104
+
105
+ def forward(self, x):
106
+ x = x + self.attn(self.ln_1(x))
107
+ x = x + self.mlp(self.ln_2(x))
108
+ return x
109
+
110
+ class GPTModel(nn.Module):
111
+ """Complete GPT Language Model."""
112
+
113
+ def __init__(self, config):
114
+ super().__init__()
115
+
116
+ self.config = config
117
+
118
+ self.transformer = nn.ModuleDict(
119
+ dict(
120
+ wte=nn.Embedding(config.vocab_size, config.n_embd),
121
+ wpe=nn.Embedding(config.block_size, config.n_embd),
122
+ drop=nn.Dropout(config.dropout),
123
+ h=nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
124
+ ln_f=nn.LayerNorm(config.n_embd),
125
+ )
126
+ )
127
+
128
+ self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
129
+ self.transformer.wte.weight = self.lm_head.weight
130
+
131
+ self.apply(self._init_weights)
132
+
133
+ def _init_weights(self, module):
134
+ if isinstance(module, nn.Linear):
135
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
136
+ if module.bias is not None:
137
+ torch.nn.init.zeros_(module.bias)
138
+ elif isinstance(module, nn.Embedding):
139
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
140
+
141
+ def forward(self, input_ids, attention_mask=None, labels=None):
142
+ device = input_ids.device
143
+ b, t = input_ids.size()
144
+ assert t <= self.config.block_size
145
+
146
+ # Token embeddings
147
+ tok_emb = self.transformer.wte(input_ids)
148
+
149
+ # Position embeddings
150
+ pos = torch.arange(0, t, dtype=torch.long, device=device)
151
+ pos_emb = self.transformer.wpe(pos)
152
+
153
+ # Combine embeddings
154
+ x = self.transformer.drop(tok_emb + pos_emb)
155
+
156
+ # Pass through transformer blocks
157
+ for block in self.transformer.h:
158
+ x = block(x)
159
+
160
+ # Final layer normalization
161
+ x = self.transformer.ln_f(x)
162
+
163
+ # Language modeling head
164
+ logits = self.lm_head(x)
165
+
166
+ loss = None
167
+ if labels is not None:
168
+ # Shift so that tokens < n predict n
169
+ shift_logits = logits[..., :-1, :].contiguous()
170
+ shift_labels = labels[..., 1:].contiguous()
171
+ loss = F.cross_entropy(
172
+ shift_logits.view(-1, shift_logits.size(-1)),
173
+ shift_labels.view(-1),
174
+ ignore_index=-1
175
+ )
176
+
177
+ return (loss, logits) if loss is not None else (logits,)
178
+
179
+ def generate(self, input_ids, max_length=100, temperature=1.0, **kwargs):
180
+ """Generate text using the model."""
181
+ self.eval()
182
+ with torch.no_grad():
183
+ for _ in range(max_length - input_ids.size(1)):
184
+ # Crop sequence if it exceeds block size
185
+ idx_cond = (
186
+ input_ids
187
+ if input_ids.size(1) <= self.config.block_size
188
+ else input_ids[:, -self.config.block_size:]
189
+ )
190
+
191
+ # Forward pass
192
+ logits = self(idx_cond)[0]
193
+
194
+ # Get logits for the last token
195
+ logits = logits[:, -1, :] / temperature
196
+
197
+ # Apply softmax and sample
198
+ probs = F.softmax(logits, dim=-1)
199
+ idx_next = torch.multinomial(probs, num_samples=1)
200
+
201
+ # Append to sequence
202
+ input_ids = torch.cat((input_ids, idx_next), dim=1)
203
+
204
+ self.train()
205
+ return input_ids
206
+
207
+ class OpenLLMInferenceEngine:
208
+ """Simplified inference engine that loads models from Hugging Face repositories"""
209
+
210
+ def __init__(self):
211
+ self.models = {}
212
+ self.tokenizers = {}
213
+ self.current_model = None
214
+ self.current_tokenizer = None
215
+
216
+ # Model configurations with Hugging Face repository IDs
217
+ self.model_configs = {
218
+ "openllm-small-extended-4k": {
219
+ "name": "OpenLLM Small (4k steps)",
220
+ "description": "Small model trained for 4,000 steps - Early training stage",
221
+ "hf_repo": "lemms/openllm-small-extended-4k",
222
+ "local_path": "models/small-extended-4k",
223
+ "checkpoint": "best_model.pt",
224
+ "config": "config.json"
225
+ },
226
+ "openllm-small-extended-6k": {
227
+ "name": "OpenLLM Small (6k steps)",
228
+ "description": "Small model trained for 6,000 steps - Improved coherence",
229
+ "hf_repo": "lemms/openllm-small-extended-6k",
230
+ "local_path": "models/small-extended-6k",
231
+ "checkpoint": "best_model.pt",
232
+ "config": "config.json"
233
+ },
234
+ "openllm-small-extended-7k": {
235
+ "name": "OpenLLM Small (7k steps)",
236
+ "description": "Small model trained for 7,000 steps - Enhanced quality",
237
+ "hf_repo": "lemms/openllm-small-extended-7k",
238
+ "local_path": "models/small-extended-7k",
239
+ "checkpoint": "best_model.pt",
240
+ "config": "config.json"
241
+ },
242
+ "openllm-small-extended-8k": {
243
+ "name": "OpenLLM Small (8k steps)",
244
+ "description": "Small model trained for 8,000 steps - Sophisticated understanding",
245
+ "hf_repo": "lemms/openllm-small-extended-8k",
246
+ "local_path": "models/small-extended-8k",
247
+ "checkpoint": "best_model.pt",
248
+ "config": "config.json"
249
+ },
250
+ "openllm-small-extended-9k": {
251
+ "name": "OpenLLM Small (9k steps)",
252
+ "description": "Small model trained for 9,000 steps - Best performing model",
253
+ "hf_repo": "lemms/openllm-small-extended-9k",
254
+ "local_path": "models/small-extended-9k",
255
+ "checkpoint": "best_model.pt",
256
+ "config": "config.json"
257
+ },
258
+ "openllm-small-extended-10k": {
259
+ "name": "OpenLLM Small (10k steps)",
260
+ "description": "Small model trained for 10,000 steps - Latest extended training",
261
+ "hf_repo": "lemms/openllm-small-extended-10k",
262
+ "local_path": "models/small-extended-10k",
263
+ "checkpoint": "best_model.pt",
264
+ "config": "config.json"
265
+ }
266
+ }
267
+
268
+ logger.info("πŸš€ OpenLLM Inference Engine initialized")
269
+ logger.info(f"πŸ“‹ Available models: {list(self.model_configs.keys())}")
270
+
271
+ def load_model_from_hf(self, model_id: str) -> bool:
272
+ """Load model from Hugging Face repository"""
273
+ try:
274
+ from huggingface_hub import snapshot_download
275
+
276
+ config = self.model_configs.get(model_id)
277
+ if not config:
278
+ logger.error(f"❌ Unknown model ID: {model_id}")
279
+ return False
280
+
281
+ logger.info(f"πŸ“₯ Loading model from HF: {config['hf_repo']}")
282
+
283
+ # Download model files from Hugging Face
284
+ local_dir = snapshot_download(
285
+ repo_id=config['hf_repo'],
286
+ repo_type="model",
287
+ local_dir=f"temp_{model_id}",
288
+ allow_patterns=["*.pt", "*.json", "*.model"]
289
+ )
290
+
291
+ logger.info(f"βœ… Downloaded model to: {local_dir}")
292
+
293
+ # Load configuration
294
+ config_path = os.path.join(local_dir, "config.json")
295
+ if os.path.exists(config_path):
296
+ with open(config_path, 'r') as f:
297
+ config_data = json.load(f)
298
+
299
+ # Create model config
300
+ model_config = GPTConfig(
301
+ vocab_size=config_data["model_config"]["vocab_size"],
302
+ n_layer=config_data["model_config"]["n_layer"],
303
+ n_head=config_data["model_config"]["n_head"],
304
+ n_embd=config_data["model_config"]["n_embd"],
305
+ block_size=config_data["model_config"]["block_size"],
306
+ dropout=config_data["model_config"]["dropout"],
307
+ bias=config_data["model_config"]["bias"]
308
+ )
309
+
310
+ # Create model
311
+ model = GPTModel(model_config)
312
+
313
+ # Load weights if available
314
+ model_path = os.path.join(local_dir, "best_model.pt")
315
+ if os.path.exists(model_path):
316
+ model.load_state_dict(torch.load(model_path, map_location="cpu"))
317
+ logger.info("βœ… Loaded model weights")
318
+
319
+ self.models[model_id] = model
320
+ self.current_model = model_id
321
+
322
+ logger.info(f"βœ… Successfully loaded model: {model_id}")
323
+ return True
324
+ else:
325
+ logger.error(f"❌ Config file not found: {config_path}")
326
+ return False
327
+
328
+ except Exception as e:
329
+ logger.error(f"❌ Failed to load model from HF {model_id}: {e}")
330
+ return False
331
+
332
+ def generate_text(self, prompt: str, model_id: str, max_length: int = 100, temperature: float = 0.7) -> str:
333
+ """Generate text using the specified model"""
334
+ try:
335
+ # Load model if not already loaded
336
+ if model_id not in self.models:
337
+ if not self.load_model_from_hf(model_id):
338
+ return f"❌ Failed to load model: {model_id}"
339
+
340
+ model = self.models[model_id]
341
+ model.eval()
342
+
343
+ # Simple tokenization (for demo purposes)
344
+ # In a real implementation, you'd use the actual tokenizer
345
+ tokens = [ord(c) % 32000 for c in prompt] # Simple character-based tokenization
346
+ input_ids = torch.tensor([tokens], dtype=torch.long)
347
+
348
+ with torch.no_grad():
349
+ outputs = model.generate(
350
+ input_ids,
351
+ max_length=max_length,
352
+ temperature=temperature
353
+ )
354
+
355
+ # Simple detokenization
356
+ generated_text = ''.join([chr(t % 65536) for t in outputs[0].tolist()])
357
+ return generated_text
358
+
359
+ except Exception as e:
360
+ logger.error(f"❌ Generation failed: {e}")
361
+ return f"❌ Generation failed: {str(e)}"
362
+
363
+ # Initialize the inference engine
364
+ inference_engine = OpenLLMInferenceEngine()
365
+
366
+ def generate_text_interface(prompt: str, model_choice: str, max_length: int, temperature: float) -> str:
367
+ """Gradio interface function for text generation"""
368
+ try:
369
+ result = inference_engine.generate_text(
370
+ prompt=prompt,
371
+ model_id=model_choice,
372
+ max_length=max_length,
373
+ temperature=temperature
374
+ )
375
+ return result
376
+ except Exception as e:
377
+ return f"❌ Error: {str(e)}"
378
+
379
+ def get_model_info(model_choice: str) -> str:
380
+ """Get information about the selected model"""
381
+ config = inference_engine.model_configs.get(model_choice)
382
+ if config:
383
+ return f"""
384
+ **Model Information:**
385
+ - **Name**: {config['name']}
386
+ - **Description**: {config['description']}
387
+ - **Repository**: {config['hf_repo']}
388
+ - **Status**: Ready to load
389
+ """
390
+ else:
391
+ return "❌ Unknown model selected"
392
+
393
+ # Create Gradio interface
394
+ with gr.Blocks(title="OpenLLM Inference Space", theme=gr.themes.Soft()) as demo:
395
+ gr.Markdown("# πŸš€ OpenLLM Inference Space")
396
+ gr.Markdown("Welcome to the OpenLLM Inference Space! Select a model and generate text.")
397
+
398
+ with gr.Row():
399
+ with gr.Column(scale=1):
400
+ gr.Markdown("## 🎯 Model Selection")
401
+ model_choice = gr.Dropdown(
402
+ choices=list(inference_engine.model_configs.keys()),
403
+ value="openllm-small-extended-10k",
404
+ label="Select Model",
405
+ info="Choose from our trained models"
406
+ )
407
+
408
+ model_info = gr.Markdown("Select a model to see information")
409
+
410
+ def update_model_info(choice):
411
+ return get_model_info(choice)
412
+
413
+ model_choice.change(fn=update_model_info, inputs=model_choice, outputs=model_info)
414
+
415
+ with gr.Column(scale=2):
416
+ gr.Markdown("## ✍️ Text Generation")
417
+ prompt_input = gr.Textbox(
418
+ label="Enter your prompt",
419
+ placeholder="The future of artificial intelligence...",
420
+ lines=3
421
+ )
422
+
423
+ with gr.Row():
424
+ max_length = gr.Slider(
425
+ minimum=10,
426
+ maximum=500,
427
+ value=100,
428
+ step=10,
429
+ label="Max Length",
430
+ info="Number of tokens to generate"
431
+ )
432
+ temperature = gr.Slider(
433
+ minimum=0.1,
434
+ maximum=2.0,
435
+ value=0.7,
436
+ step=0.1,
437
+ label="Temperature",
438
+ info="Controls randomness (higher = more random)"
439
+ )
440
+
441
+ generate_btn = gr.Button("πŸš€ Generate Text", variant="primary")
442
+ output_text = gr.Textbox(label="Generated Text", lines=10)
443
+
444
+ gr.Markdown("## πŸ“Š Available Models")
445
+ gr.Markdown("""
446
+ | Model | Training Steps | Description | Best Loss |
447
+ |-------|---------------|-------------|-----------|
448
+ | **4k Model** | 4,000 | Early training stage, basic language patterns | ~6.2 |
449
+ | **6k Model** | 6,000 | Improved coherence, better vocabulary usage | ~5.8 |
450
+ | **7k Model** | 7,000 | Enhanced text generation quality | ~5.5 |
451
+ | **8k Model** | 8,000 | More sophisticated language understanding | ~5.3 |
452
+ | **9k Model** | 9,000 | Best performing model (latest training) | ~5.2 |
453
+ | **10k Model** | 10,000 | Latest extended training, maximum performance | ~5.22 |
454
+ """)
455
+
456
+ # Connect the generate button
457
+ generate_btn.click(
458
+ fn=generate_text_interface,
459
+ inputs=[prompt_input, model_choice, max_length, temperature],
460
+ outputs=output_text
461
+ )
462
+
463
+ # Launch the app
464
+ if __name__ == "__main__":
465
+ demo.launch()