lemms commited on
Commit
7e1e744
Β·
verified Β·
1 Parent(s): 708f977

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +611 -957
app.py CHANGED
@@ -1,1036 +1,690 @@
1
  #!/usr/bin/env python3
2
  """
3
- OpenLLM Training Space - Main Application
4
-
5
- This is the main entry point for the Hugging Face Space.
6
- It provides a web interface for running OpenLLM training with authentication.
7
-
8
- Author: Louis Chua Bean Chong
9
- License: GPLv3
10
  """
11
 
12
- import os
13
- import sys
14
- from pathlib import Path
15
-
16
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- # Add core/src to path for imports
19
- core_src_path = str(Path(__file__).parent / "core" / "src")
20
- if core_src_path not in sys.path:
21
- sys.path.insert(0, core_src_path)
22
-
23
- # Import our authentication and training modules
24
- try:
25
- from openllm_training_with_auth import OpenLLMTrainingManager
26
- from space_auth_test import test_space_authentication
27
-
28
- MODULES_AVAILABLE = True
29
- except ImportError as e:
30
- MODULES_AVAILABLE = False
31
- print(f"❌ Required modules not available: {e}")
32
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
- def create_space_interface():
35
- """Create the Gradio interface for the Space."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- def run_authentication_test():
38
- """Run the authentication test and return results."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  try:
40
- if not MODULES_AVAILABLE:
41
- return "❌ Required modules not available. Please check deployment."
42
-
43
- # Capture output from authentication test
44
- import contextlib
45
- import io
46
-
47
- output = io.StringIO()
48
- with contextlib.redirect_stdout(output):
49
- success = test_space_authentication()
 
 
 
 
50
 
51
- result = output.getvalue()
52
 
 
 
53
  if success:
54
- return f"βœ… Authentication Test Results:\n\n{result}"
 
 
55
  else:
56
- return f"❌ Authentication Test Failed:\n\n{result}"
57
 
58
  except Exception as e:
59
- return f"❌ Error running authentication test: {e}"
 
60
 
61
- def run_training(model_size, training_steps, use_real_training=False):
62
- """Run the OpenLLM training with authentication."""
63
  try:
64
- if not MODULES_AVAILABLE:
65
- return "❌ Required modules not available. Please check deployment."
66
-
67
- # Security mitigation: Input validation and sanitization
68
- if not isinstance(model_size, str) or model_size not in ["small", "medium", "large"]:
69
- return "❌ Invalid model size. Must be 'small', 'medium', or 'large'."
70
-
71
- if (
72
- not isinstance(training_steps, (int, float))
73
- or training_steps < 1000
74
- or training_steps > 50000
75
- ):
76
- return "❌ Invalid training steps. Must be between 1000 and 50000."
77
-
78
- # Sanitize inputs
79
- model_size = str(model_size).strip().lower()
80
- training_steps = int(float(training_steps))
81
-
82
- print(f"πŸš€ Starting OpenLLM Training")
83
- print("=" * 50)
84
- print(f"πŸ“Š Model Size: {model_size}")
85
- print(f"πŸ”„ Training Steps: {training_steps}")
86
- print(f"🎯 Training Mode: {'Real Training' if use_real_training else 'Demonstration'}")
87
-
88
- if use_real_training:
89
- # Use real training with comprehensive features
90
- try:
91
- from real_training_manager import RealTrainingManager, TrainingConfig
92
-
93
- # Create configuration for real training
94
- config = TrainingConfig(
95
- model_size=model_size,
96
- training_steps=training_steps,
97
- batch_size=32 if model_size == "small" else 16,
98
- learning_rate=3e-4,
99
- data_file="data/clean/training_data.txt",
100
- save_every=1000,
101
- eval_every=500,
102
- )
103
-
104
- # Initialize real training manager
105
- manager = RealTrainingManager(config)
106
-
107
- # Run real training
108
- model = manager.train()
109
-
110
- # Upload model
111
- repo_id = manager.upload_model(model)
112
-
113
- if repo_id:
114
- result = f"βœ… Real Training completed successfully!\n\n"
115
- result += f"πŸ“Š Results:\n"
116
- result += f" - Model Size: {model_size}\n"
117
- result += f" - Training Steps: {training_steps}\n"
118
- result += f" - Final Loss: {manager.training_history[-1]['loss']:.4f}\n"
119
- result += f" - Best Validation Loss: {manager.best_loss:.4f}\n"
120
- result += f" - Model URL: https://huggingface.co/{repo_id}\n\n"
121
- result += f"πŸŽ‰ Model available at: https://huggingface.co/{repo_id}"
122
- else:
123
- result = f"⚠️ Real training completed but upload failed\n\n"
124
- result += f"πŸ“Š Results:\n"
125
- result += f" - Model Size: {model_size}\n"
126
- result += f" - Training Steps: {training_steps}\n"
127
- result += f" - Final Loss: {manager.training_history[-1]['loss']:.4f}\n"
128
- result += f" - Model saved locally: ./trained_model"
129
-
130
- return result
131
-
132
- except ImportError:
133
- return (
134
- "❌ Real training module not available. Falling back to demonstration mode."
135
- )
136
- except Exception as e:
137
- return (
138
- f"❌ Real training failed: {str(e)}\n\nFalling back to demonstration mode."
139
- )
140
-
141
- # Fallback to demonstration training
142
- import contextlib
143
- import io
144
-
145
- output = io.StringIO()
146
- with contextlib.redirect_stdout(output):
147
- training_manager = OpenLLMTrainingManager()
148
- repo_id = training_manager.run_training(model_size=model_size, steps=training_steps)
149
 
150
- result = output.getvalue()
 
 
 
 
151
 
152
- return f"βœ… Training Results:\n\n{result}\n\nπŸŽ‰ Model available at: https://huggingface.co/{repo_id}"
153
 
154
- except Exception as e:
155
- return f"❌ Error running training: {e}"
156
-
157
- def resume_training_from_7k_to_8k():
158
- """Resume training from 7k model to create 8k model."""
159
- try:
160
- if not MODULES_AVAILABLE:
161
- return "❌ Required modules not available. Please check deployment."
162
-
163
- # Import required modules
164
- import json
165
- import time
166
- from datetime import datetime
167
-
168
- import torch
169
- from huggingface_hub import HfApi, create_repo, snapshot_download, whoami
170
- from model import GPTConfig, GPTModel
171
- from train_model import TextDataLoader
172
-
173
- print("πŸš€ Resuming Training from 7k to 8k Model")
174
- print("=" * 50)
175
-
176
- # Configuration
177
- hf_model_id = "lemms/openllm-small-extended-7k"
178
- additional_steps = 1000 # Train for 1000 more steps to reach 8k
179
- total_steps = 8000 # Total steps for the new model
180
-
181
- print(f"πŸ“₯ Source Model: {hf_model_id}")
182
- print(f"πŸ“ˆ Additional Steps: {additional_steps}")
183
- print(f"🎯 Target Steps: {total_steps}")
184
-
185
- # Setup authentication
186
- print("πŸ” Setting up authentication...")
187
- try:
188
- user_info = whoami()
189
- username = user_info.get("name", "unknown")
190
- print(f"βœ… Authentication successful! User: {username}")
191
- except Exception as e:
192
- return f"❌ Authentication failed: {e}"
193
-
194
- # Setup device
195
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
196
- print(f"πŸ–₯️ Using device: {device}")
197
-
198
- # Load model from Hugging Face
199
- print(f"πŸ“₯ Loading model from Hugging Face: {hf_model_id}")
200
- try:
201
- local_dir = snapshot_download(
202
- repo_id=hf_model_id,
203
- repo_type="model",
204
- local_dir=f"downloaded_models/{hf_model_id.replace('/', '_')}",
205
- )
206
- print(f"βœ… Model downloaded to: {local_dir}")
207
-
208
- # Load config
209
- config_path = Path(local_dir) / "config.json"
210
- if config_path.exists():
211
- with open(config_path, "r") as f:
212
- config_data = json.load(f)
213
-
214
- config = GPTConfig(
215
- vocab_size=config_data.get("vocab_size", 32000),
216
- block_size=config_data.get("block_size", 1024),
217
- n_layer=config_data.get("n_layer", 6),
218
- n_head=config_data.get("n_head", 6),
219
- n_embd=config_data.get("n_embd", 384),
220
- )
221
- print(f"πŸ“Š Loaded model config: {config}")
222
- else:
223
- config = GPTConfig.small()
224
- config.vocab_size = 32000
225
- print(f"⚠️ Config file not found, using default config")
226
-
227
- # Create model and load weights
228
- model = GPTModel(config)
229
- model_path = Path(local_dir) / "pytorch_model.bin"
230
-
231
- if model_path.exists():
232
- state_dict = torch.load(model_path, map_location=device)
233
- model.load_state_dict(state_dict)
234
- print(f"βœ… Model weights loaded successfully")
235
  else:
236
- raise FileNotFoundError(f"Model file not found: {model_path}")
237
-
238
- model = model.to(device)
239
-
240
- except Exception as e:
241
- return f"❌ Failed to load model from Hugging Face: {e}"
242
-
243
- # Create data loaders
244
- print(f"πŸ“Š Loading training data...")
245
- tokenizer_path = "data/tokenizer/tokenizer.model"
246
-
247
- train_loader = TextDataLoader(
248
- data_file="data/clean/training_data.txt",
249
- tokenizer_path=tokenizer_path,
250
- seq_len=1024,
251
- batch_size=16,
252
- shuffle=True,
253
- )
254
-
255
- print(f"βœ… Data loader created")
256
-
257
- # Setup optimizer
258
- optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=0.1)
259
-
260
- # Training loop
261
- print(f"\nπŸ”„ Starting training loop...")
262
- start_time = time.time()
263
- training_history = []
264
- best_loss = float("inf")
265
-
266
- try:
267
- train_iterator = iter(train_loader)
268
-
269
- for step in range(additional_steps):
270
- # Get batch
271
- try:
272
- batch = next(train_iterator)
273
- except StopIteration:
274
- # Restart data loader if exhausted
275
- train_loader = TextDataLoader(
276
- data_file="data/clean/training_data.txt",
277
- tokenizer_path=tokenizer_path,
278
- seq_len=1024,
279
- batch_size=16,
280
- shuffle=True,
281
- )
282
- train_iterator = iter(train_loader)
283
- batch = next(train_iterator)
284
-
285
- # Prepare inputs
286
- if isinstance(batch, (list, tuple)):
287
- inputs = batch[0].to(device)
288
- targets = batch[1].to(device) if len(batch) > 1 else None
289
- else:
290
- inputs = batch.to(device)
291
- targets = None
292
-
293
- # Forward pass
294
- logits, loss = model(inputs, targets)
295
-
296
- # Backward pass
297
- loss.backward()
298
-
299
- # Optimizer step
300
- optimizer.step()
301
- optimizer.zero_grad()
302
-
303
- # Record training history
304
- training_history.append(
305
- {
306
- "step": 7000 + step + 1, # Continue from step 7000
307
- "loss": loss.item(),
308
- "timestamp": datetime.now().isoformat(),
309
- }
310
- )
311
-
312
- # Progress reporting
313
- if (step + 1) % 10 == 0:
314
- elapsed = time.time() - start_time
315
- steps_per_sec = (step + 1) / elapsed
316
- eta = (additional_steps - step - 1) / steps_per_sec
317
-
318
- print(
319
- f"Step {7000 + step + 1}/{total_steps} | "
320
- f"Loss: {loss.item():.4f} | "
321
- f"Speed: {steps_per_sec:.1f} steps/s | "
322
- f"ETA: {eta/60:.1f} min"
323
- )
324
-
325
- # Evaluation
326
- if (step + 1) % 250 == 0:
327
- model.eval()
328
- total_loss = 0.0
329
- num_batches = 0
330
-
331
- with torch.no_grad():
332
- for val_batch in train_loader: # Use same loader for simplicity
333
- if isinstance(val_batch, (list, tuple)):
334
- val_inputs = val_batch[0].to(device)
335
- val_targets = (
336
- val_batch[1].to(device) if len(val_batch) > 1 else None
337
- )
338
- else:
339
- val_inputs = val_batch.to(device)
340
- val_targets = None
341
-
342
- val_logits, val_loss = model(val_inputs, val_targets)
343
- total_loss += val_loss.item()
344
- num_batches += 1
345
-
346
- if num_batches >= 5: # Limit evaluation
347
- break
348
-
349
- avg_val_loss = total_loss / num_batches
350
- model.train()
351
- print(f"πŸ“Š Validation Loss: {avg_val_loss:.4f}")
352
-
353
- # Check for best model
354
- if avg_val_loss < best_loss:
355
- best_loss = avg_val_loss
356
- print(f"πŸ† New best validation loss: {best_loss:.4f}")
357
-
358
- print(f"\nπŸŽ‰ Training completed successfully!")
359
- print(f"πŸ“Š Final Results:")
360
- print(f" - Additional Steps: {additional_steps}")
361
- print(f" - Total Steps: {total_steps}")
362
- print(f" - Final Loss: {loss.item():.4f}")
363
- print(f" - Best Validation Loss: {best_loss:.4f}")
364
- print(f" - Training Time: {(time.time() - start_time)/3600:.2f} hours")
365
-
366
- # Upload model
367
- print(f"\nπŸ“€ Uploading model to Hugging Face Hub...")
368
-
369
- # Create model directory
370
- model_path = Path("./trained_model")
371
- model_path.mkdir(exist_ok=True)
372
-
373
- # Save model files
374
- torch.save(model.state_dict(), model_path / "pytorch_model.bin")
375
-
376
- # Save config
377
- config_dict = {
378
- "model_type": "openllm",
379
- "model_size": "small",
380
- "vocab_size": 32000,
381
- "block_size": 1024,
382
- "n_layer": 6,
383
- "n_head": 6,
384
- "n_embd": 384,
385
- "training_config": {
386
- "model_size": "small",
387
- "training_steps": total_steps,
388
- "additional_steps": additional_steps,
389
- "base_model": hf_model_id,
390
- },
391
- "training_history": training_history,
392
  }
393
 
394
- with open(model_path / "config.json", "w") as f:
395
- json.dump(config_dict, f, indent=2)
396
-
397
- # Create model card
398
- readme_content = f"""# OpenLLM Small Model - Extended to 8k Steps
399
-
400
- This is an OpenLLM small model trained for {total_steps} steps by resuming training from [lemms/openllm-small-extended-7k](https://huggingface.co/lemms/openllm-small-extended-7k).
401
-
402
- ## Model Details
403
-
404
- - **Model Type**: OpenLLM
405
- - **Size**: small
406
- - **Training Steps**: {total_steps}
407
- - **Additional Steps**: {additional_steps}
408
- - **Base Model**: [lemms/openllm-small-extended-7k](https://huggingface.co/lemms/openllm-small-extended-7k)
409
- - **Final Loss**: {training_history[-1]['loss']:.4f} if training_history else 'N/A'
410
- - **Framework**: PyTorch
411
- - **License**: GPL-3.0
412
-
413
- ## Training Configuration
414
-
415
- ```json
416
- {json.dumps(config_dict, indent=2)}
417
- ```
418
-
419
- ## Training History
420
-
421
- The model was trained with the following key metrics:
422
- - Best validation loss: {best_loss:.4f}
423
- - Total training time: {len(training_history)} steps
424
- - Device used: {device}
425
-
426
- ## Usage
427
-
428
- This model can be used for text generation and language modeling tasks.
429
-
430
- ## Author
431
-
432
- Louis Chua Bean Chong
433
-
434
- ## License
435
-
436
- GPL-3.0
437
- """
438
-
439
- with open(model_path / "README.md", "w") as f:
440
- f.write(readme_content)
441
-
442
- # Upload to Hugging Face
443
- repo_name = "openllm-small-extended-8k"
444
- repo_id = f"{username}/{repo_name}"
445
-
446
- try:
447
- # Create repository
448
- create_repo(repo_id=repo_id, repo_type="model", exist_ok=True, private=False)
449
-
450
- # Upload files
451
- api = HfApi()
452
- api.upload_folder(
453
- folder_path=str(model_path),
454
- repo_id=repo_id,
455
- repo_type="model",
456
- commit_message=f"Add OpenLLM small model extended to {total_steps} steps",
457
- )
458
-
459
- print(f"βœ… Model uploaded successfully!")
460
- print(f"πŸ”— Model URL: https://huggingface.co/{repo_id}")
461
-
462
- result = f"βœ… 8k Model Training completed successfully!\n\n"
463
- result += f"πŸ“Š Results:\n"
464
- result += f" - Base Model: {hf_model_id}\n"
465
- result += f" - Additional Steps: {additional_steps}\n"
466
- result += f" - Total Steps: {total_steps}\n"
467
- result += f" - Final Loss: {loss.item():.4f}\n"
468
- result += f" - Best Validation Loss: {best_loss:.4f}\n"
469
- result += f" - Model URL: https://huggingface.co/{repo_id}\n\n"
470
- result += f"πŸŽ‰ Extended model available at: https://huggingface.co/{repo_id}"
471
 
472
- return result
 
 
 
 
 
 
 
 
 
 
 
 
473
 
474
- except Exception as e:
475
- print(f"❌ Model upload failed: {e}")
476
- return f"⚠️ Training completed but upload failed: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
477
 
478
- except KeyboardInterrupt:
479
- print(f"\n⚠️ Training interrupted by user")
480
- return "⚠️ Training was interrupted by user"
481
 
482
  except Exception as e:
483
- return f"❌ Error resuming training: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
484
 
485
- def resume_training_from_7k_to_8k():
486
- """Resume training from 7k model to create 8k model."""
487
  try:
488
- if not MODULES_AVAILABLE:
489
- return "❌ Required modules not available. Please check deployment."
490
-
491
- # Import required modules
492
- import json
493
- import time
494
- from datetime import datetime
495
-
496
- import torch
497
- from huggingface_hub import HfApi, create_repo, snapshot_download, whoami
498
- from model import GPTConfig, GPTModel
499
- from train_model import TextDataLoader
500
-
501
- print("πŸš€ Resuming Training from 7k to 8k Model")
502
- print("=" * 50)
503
-
504
- # Configuration
505
- hf_model_id = "lemms/openllm-small-extended-7k"
506
- additional_steps = 1000 # Train for 1000 more steps to reach 8k
507
- total_steps = 8000 # Total steps for the new model
508
-
509
- print(f"πŸ“₯ Source Model: {hf_model_id}")
510
- print(f"πŸ“ˆ Additional Steps: {additional_steps}")
511
- print(f"🎯 Target Steps: {total_steps}")
512
-
513
- # Setup authentication
514
- print("πŸ” Setting up authentication...")
515
- try:
516
- user_info = whoami()
517
- username = user_info.get("name", "unknown")
518
- print(f"βœ… Authentication successful! User: {username}")
519
- except Exception as e:
520
- return f"❌ Authentication failed: {e}"
521
-
522
- # Setup device
523
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
524
- print(f"πŸ–₯️ Using device: {device}")
525
-
526
- # Load model from Hugging Face
527
- print(f"πŸ“₯ Loading model from Hugging Face: {hf_model_id}")
528
- try:
529
- local_dir = snapshot_download(
530
- repo_id=hf_model_id,
531
- repo_type="model",
532
- local_dir=f"downloaded_models/{hf_model_id.replace('/', '_')}",
533
- )
534
- print(f"βœ… Model downloaded to: {local_dir}")
535
-
536
- # Load config
537
- config_path = Path(local_dir) / "config.json"
538
- if config_path.exists():
539
- with open(config_path, "r") as f:
540
- config_data = json.load(f)
541
-
542
- config = GPTConfig(
543
- vocab_size=config_data.get("vocab_size", 32000),
544
- block_size=config_data.get("block_size", 1024),
545
- n_layer=config_data.get("n_layer", 6),
546
- n_head=config_data.get("n_head", 6),
547
- n_embd=config_data.get("n_embd", 384),
548
- )
549
- print(f"πŸ“Š Loaded model config: {config}")
550
- else:
551
- config = GPTConfig.small()
552
- config.vocab_size = 32000
553
- print(f"⚠️ Config file not found, using default config")
554
-
555
- # Create model and load weights
556
- model = GPTModel(config)
557
- model_path = Path(local_dir) / "pytorch_model.bin"
558
-
559
- if model_path.exists():
560
- state_dict = torch.load(model_path, map_location=device)
561
- model.load_state_dict(state_dict)
562
- print(f"βœ… Model weights loaded successfully")
563
- else:
564
- raise FileNotFoundError(f"Model file not found: {model_path}")
565
-
566
- model = model.to(device)
567
 
568
- except Exception as e:
569
- return f"❌ Failed to load model from Hugging Face: {e}"
 
570
 
571
- # Create data loaders
572
- print(f"πŸ“Š Loading training data...")
573
- tokenizer_path = "data/tokenizer/tokenizer.model"
574
-
575
- train_loader = TextDataLoader(
576
- data_file="data/clean/training_data.txt",
577
- tokenizer_path=tokenizer_path,
578
- seq_len=1024,
579
- batch_size=16,
580
- shuffle=True,
581
  )
582
 
583
- print(f"βœ… Data loader created")
584
-
585
- # Setup optimizer
586
- optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4, weight_decay=0.1)
587
-
588
- # Training loop
589
- print(f"\nπŸ”„ Starting training loop...")
590
- start_time = time.time()
591
- training_history = []
592
- best_loss = float("inf")
593
-
594
- try:
595
- train_iterator = iter(train_loader)
596
-
597
- for step in range(additional_steps):
598
- # Get batch
599
- try:
600
- batch = next(train_iterator)
601
- except StopIteration:
602
- # Restart data loader if exhausted
603
- train_loader = TextDataLoader(
604
- data_file="data/clean/training_data.txt",
605
- tokenizer_path=tokenizer_path,
606
- seq_len=1024,
607
- batch_size=16,
608
- shuffle=True,
609
- )
610
- train_iterator = iter(train_loader)
611
- batch = next(train_iterator)
612
-
613
- # Prepare inputs
614
- if isinstance(batch, (list, tuple)):
615
- inputs = batch[0].to(device)
616
- targets = batch[1].to(device) if len(batch) > 1 else None
617
- else:
618
- inputs = batch.to(device)
619
- targets = None
620
-
621
- # Forward pass
622
- logits, loss = model(inputs, targets)
623
-
624
- # Backward pass
625
- loss.backward()
626
-
627
- # Optimizer step
628
- optimizer.step()
629
- optimizer.zero_grad()
630
-
631
- # Record training history
632
- training_history.append(
633
- {
634
- "step": 7000 + step + 1, # Continue from step 7000
635
- "loss": loss.item(),
636
- "timestamp": datetime.now().isoformat(),
637
- }
638
- )
639
-
640
- # Progress reporting
641
- if (step + 1) % 10 == 0:
642
- elapsed = time.time() - start_time
643
- steps_per_sec = (step + 1) / elapsed
644
- eta = (additional_steps - step - 1) / steps_per_sec
645
-
646
- print(
647
- f"Step {7000 + step + 1}/{total_steps} | "
648
- f"Loss: {loss.item():.4f} | "
649
- f"Speed: {steps_per_sec:.1f} steps/s | "
650
- f"ETA: {eta/60:.1f} min"
651
- )
652
-
653
- # Evaluation
654
- if (step + 1) % 250 == 0:
655
- model.eval()
656
- total_loss = 0.0
657
- num_batches = 0
658
-
659
- with torch.no_grad():
660
- for val_batch in train_loader: # Use same loader for simplicity
661
- if isinstance(val_batch, (list, tuple)):
662
- val_inputs = val_batch[0].to(device)
663
- val_targets = (
664
- val_batch[1].to(device) if len(val_batch) > 1 else None
665
- )
666
- else:
667
- val_inputs = val_batch.to(device)
668
- val_targets = None
669
-
670
- val_logits, val_loss = model(val_inputs, val_targets)
671
- total_loss += val_loss.item()
672
- num_batches += 1
673
-
674
- if num_batches >= 5: # Limit evaluation
675
- break
676
-
677
- avg_val_loss = total_loss / num_batches
678
- model.train()
679
- print(f"πŸ“Š Validation Loss: {avg_val_loss:.4f}")
680
-
681
- # Check for best model
682
- if avg_val_loss < best_loss:
683
- best_loss = avg_val_loss
684
- print(f"πŸ† New best validation loss: {best_loss:.4f}")
685
-
686
- print(f"\nπŸŽ‰ Training completed successfully!")
687
- print(f"πŸ“Š Final Results:")
688
- print(f" - Additional Steps: {additional_steps}")
689
- print(f" - Total Steps: {total_steps}")
690
- print(f" - Final Loss: {loss.item():.4f}")
691
- print(f" - Best Validation Loss: {best_loss:.4f}")
692
- print(f" - Training Time: {(time.time() - start_time)/3600:.2f} hours")
693
-
694
- # Upload model
695
- print(f"\nπŸ“€ Uploading model to Hugging Face Hub...")
696
-
697
- # Create model directory
698
- model_path = Path("./trained_model")
699
- model_path.mkdir(exist_ok=True)
700
-
701
- # Save model files
702
- torch.save(model.state_dict(), model_path / "pytorch_model.bin")
703
-
704
- # Save config
705
- config_dict = {
706
- "model_type": "openllm",
707
- "model_size": "small",
708
- "vocab_size": 32000,
709
- "block_size": 1024,
710
- "n_layer": 6,
711
- "n_head": 6,
712
- "n_embd": 384,
713
- "training_config": {
714
- "model_size": "small",
715
- "training_steps": total_steps,
716
- "additional_steps": additional_steps,
717
- "base_model": hf_model_id,
718
- },
719
- "training_history": training_history,
720
- }
721
-
722
- with open(model_path / "config.json", "w") as f:
723
- json.dump(config_dict, f, indent=2)
724
-
725
- # Create model card
726
- readme_content = f"""# OpenLLM Small Model - Extended to 8k Steps
727
-
728
- This is an OpenLLM small model trained for {total_steps} steps by resuming training from [lemms/openllm-small-extended-7k](https://huggingface.co/lemms/openllm-small-extended-7k).
729
-
730
- ## Model Details
731
-
732
- - **Model Type**: OpenLLM
733
- - **Size**: small
734
- - **Training Steps**: {total_steps}
735
- - **Additional Steps**: {additional_steps}
736
- - **Base Model**: [lemms/openllm-small-extended-7k](https://huggingface.co/lemms/openllm-small-extended-7k)
737
- - **Final Loss**: {training_history[-1]['loss']:.4f} if training_history else 'N/A'
738
- - **Framework**: PyTorch
739
- - **License**: GPL-3.0
740
-
741
- ## Training Configuration
742
-
743
- ```json
744
- {json.dumps(config_dict, indent=2)}
745
- ```
746
-
747
- ## Training History
748
-
749
- The model was trained with the following key metrics:
750
- - Best validation loss: {best_loss:.4f}
751
- - Total training time: {len(training_history)} steps
752
- - Device used: {device}
753
-
754
- ## Usage
755
-
756
- This model can be used for text generation and language modeling tasks.
757
-
758
- ## Author
759
-
760
- Louis Chua Bean Chong
761
-
762
- ## License
763
-
764
- GPL-3.0
765
- """
766
 
767
- with open(model_path / "README.md", "w") as f:
768
- f.write(readme_content)
769
 
770
- # Upload to Hugging Face
771
- repo_name = "openllm-small-extended-8k"
772
- repo_id = f"{username}/{repo_name}"
773
 
774
- try:
775
- # Create repository
776
- create_repo(repo_id=repo_id, repo_type="model", exist_ok=True, private=False)
777
 
778
- # Upload files
779
- api = HfApi()
780
- api.upload_folder(
781
- folder_path=str(model_path),
782
- repo_id=repo_id,
783
- repo_type="model",
784
- commit_message=f"Add OpenLLM small model extended to {total_steps} steps",
785
- )
786
 
787
- print(f"βœ… Model uploaded successfully!")
788
- print(f"πŸ”— Model URL: https://huggingface.co/{repo_id}")
789
 
790
- result = f"βœ… 8k Model Training completed successfully!\n\n"
791
- result += f"πŸ“Š Results:\n"
792
- result += f" - Base Model: {hf_model_id}\n"
793
- result += f" - Additional Steps: {additional_steps}\n"
794
- result += f" - Total Steps: {total_steps}\n"
795
- result += f" - Final Loss: {loss.item():.4f}\n"
796
- result += f" - Best Validation Loss: {best_loss:.4f}\n"
797
- result += f" - Model URL: https://huggingface.co/{repo_id}\n\n"
798
- result += f"πŸŽ‰ Extended model available at: https://huggingface.co/{repo_id}"
799
 
800
- return result
 
801
 
802
- except Exception as e:
803
- print(f"❌ Model upload failed: {e}")
804
- return f"⚠️ Training completed but upload failed: {e}"
805
 
806
- except KeyboardInterrupt:
807
- print(f"\n⚠️ Training interrupted by user")
808
- return "⚠️ Training was interrupted by user"
 
 
 
809
 
810
- except Exception as e:
811
- return f"❌ Error resuming training: {e}"
812
 
813
- def check_space_environment():
814
- """Check the Space environment and configuration."""
815
- try:
816
- # Check if we're in a Space
817
- space_vars = ["SPACE_ID", "SPACE_HOST", "SPACE_REPO_ID"]
818
- is_space = any(os.getenv(var) for var in space_vars)
 
 
 
 
 
819
 
820
- # Check HF_TOKEN
821
- hf_token = os.getenv("HF_TOKEN")
 
 
822
 
823
- result = "πŸ” Space Environment Check:\n\n"
824
 
825
- if is_space:
826
- result += "βœ… Running in Hugging Face Space environment\n"
827
- for var in space_vars:
828
- value = os.getenv(var)
829
- if value:
830
- result += f" - {var}: {value}\n"
831
- else:
832
- result += "ℹ️ Running in local environment\n"
833
-
834
- # Test Space's built-in authentication
835
- try:
836
- from huggingface_hub import whoami
837
-
838
- user_info = whoami()
839
- result += f"βœ… Space built-in authentication working\n"
840
- result += f" - User: {user_info['name']}\n"
841
- result += f" - Full name: {user_info['fullname']}\n"
842
- result += f" - Authentication: Space built-in token\n"
843
- except Exception as auth_error:
844
- result += f"❌ Space built-in authentication failed: {str(auth_error)[:50]}...\n"
845
-
846
- if hf_token:
847
- result += f"βœ… HF access token found: {hf_token[:8]}...{hf_token[-4:]}\n"
848
- result += " - Source: HF access token in Space settings\n"
849
- else:
850
- result += "❌ HF access token not found\n"
851
- result += " - Please set HF_TOKEN in Space settings with HF access token\n"
852
- result += " - Or ensure Space has proper authentication permissions\n"
853
 
854
- result += f"\nπŸ“ Available modules: {'βœ…' if MODULES_AVAILABLE else '❌'}"
855
 
856
- return result
 
 
857
 
858
- except Exception as e:
859
- return f"❌ Error checking environment: {e}"
860
-
861
- # Create the Gradio interface with security mitigations
862
- with gr.Blocks(
863
- title="OpenLLM Training Space",
864
- theme=gr.themes.Soft(),
865
- # Security mitigations
866
- analytics_enabled=False, # Disable analytics
867
- ) as interface:
868
  gr.Markdown(
869
  """
870
- # πŸš€ OpenLLM Training Space
 
 
871
 
872
- Welcome to the OpenLLM Training Space! This Space provides a complete environment for training OpenLLM models with automatic Hugging Face authentication and model upload.
873
 
874
- ## πŸ” Authentication
875
 
876
- This Space uses HF access token for secure authentication. The HF_TOKEN is automatically available from your Space settings.
 
 
 
 
 
 
 
877
 
878
- ## πŸ“‹ Available Actions
879
 
880
- 1. **Environment Check**: Verify Space configuration and authentication
881
- 2. **Authentication Test**: Test Hugging Face authentication
882
- 3. **Training Interface**: Unified interface for fresh training and resume training
883
  """
884
  )
885
 
886
- with gr.Tab("πŸ” Environment Check"):
887
- gr.Markdown("Check the Space environment and configuration.")
888
- env_check_btn = gr.Button("Check Environment", variant="primary")
889
- env_output = gr.Textbox(label="Environment Status", lines=10, interactive=False)
890
- env_check_btn.click(check_space_environment, outputs=env_output)
891
-
892
- with gr.Tab("πŸ” Authentication Test"):
893
- gr.Markdown("Test Hugging Face authentication using HF access token.")
894
- auth_test_btn = gr.Button("Run Authentication Test", variant="primary")
895
- auth_output = gr.Textbox(label="Authentication Results", lines=15, interactive=False)
896
- auth_test_btn.click(run_authentication_test, outputs=auth_output)
897
-
898
- with gr.Tab("πŸš€ Training Interface"):
899
- gr.Markdown(
900
- """
901
- # πŸš€ OpenLLM Training Interface
902
-
903
- Choose your training mode and configure parameters for model training.
904
-
905
- ## 🎯 Training Modes
906
-
907
- **1. Fresh Training**: Start training from scratch with a new model
908
- **2. Resume Training**: Load the 7k model and continue training to 8k steps
909
-
910
- ## πŸ“Š Training Parameters
911
-
912
- - **Model Size**: Choose the model size (small, medium, large)
913
- - **Training Steps**: Number of training steps (default: 8000)
914
- - **Training Mode**: Select between fresh training or resume training
915
- - **Real Training**: Enable comprehensive training with checkpoints and validation
916
-
917
- ## πŸŽ‰ Expected Results
918
-
919
- - Training will complete successfully
920
- - Model will be uploaded to Hugging Face Hub
921
- - Repository will be created with proper model files
922
- """
923
- )
924
-
925
- with gr.Row():
926
- training_mode = gr.Radio(
927
- choices=["Fresh Training", "Resume 7k to 8k"],
928
- value="Fresh Training",
929
- label="Training Mode",
930
- info="Choose between fresh training or resuming from 7k model",
931
  )
932
 
933
- with gr.Row():
934
- model_size = gr.Dropdown(
935
- choices=["small", "medium", "large"],
936
- value="small",
937
- label="Model Size",
938
- info="Choose the model size for training (only applies to fresh training)",
939
- interactive=True,
940
  )
941
- training_steps = gr.Number(
942
- value=8000,
943
- label="Training Steps",
944
- info="Number of training steps (only applies to fresh training)",
945
- minimum=1000,
946
- maximum=50000,
947
- interactive=True,
948
  )
949
 
950
- with gr.Row():
951
- use_real_training = gr.Checkbox(
952
- value=False,
953
- label="Use Real Training",
954
- info="Enable real model training with checkpoints and validation (slower but more realistic)",
 
 
955
  )
956
 
957
- # Function to update UI based on training mode
958
- def update_ui_for_mode(mode):
959
- """Update UI elements based on selected training mode."""
960
- if mode == "Resume 7k to 8k":
961
- return gr.Dropdown(interactive=False), gr.Number(interactive=False)
962
- else:
963
- return gr.Dropdown(interactive=True), gr.Number(interactive=True)
 
 
 
964
 
965
- # Update UI when training mode changes
966
- training_mode.change(
967
- update_ui_for_mode,
968
- inputs=[training_mode],
969
- outputs=[model_size, training_steps],
970
- )
 
 
971
 
972
- # Unified training function
973
- def unified_training(mode, model_size, training_steps, use_real_training):
974
- """Unified training function that handles both fresh and resume training."""
975
- if mode == "Resume 7k to 8k":
976
- return resume_training_from_7k_to_8k()
977
- else:
978
- return run_training(model_size, training_steps, use_real_training)
 
 
979
 
980
- train_btn = gr.Button("Start Training", variant="primary", size="lg")
981
- train_output = gr.Textbox(label="Training Results", lines=25, interactive=False)
 
 
 
 
 
 
982
 
983
- train_btn.click(
984
- unified_training,
985
- inputs=[training_mode, model_size, training_steps, use_real_training],
986
- outputs=train_output,
987
- )
 
 
 
 
 
 
 
 
 
988
 
989
- with gr.Tab("πŸ“š Documentation"):
990
- gr.Markdown(
991
- """
992
- ## πŸ“– Available Documentation
993
-
994
- - **HUGGINGFACE_SPACE_SETUP_GUIDE.md**: Complete setup guide
995
- - **SPACE_AUTHENTICATION_SUMMARY.md**: Authentication summary
996
- - **SPACE_READY_SUMMARY.md**: Deployment summary
997
-
998
- ## πŸ”§ Available Scripts
999
-
1000
- - **space_auth_test.py**: Authentication verification
1001
- - **openllm_training_with_auth.py**: Complete training script
1002
- - **integrate_auth_into_training.py**: Integration guide
1003
- - **setup_hf_space_auth.py**: Space authentication setup
1004
- - **verify_space_auth.py**: Space verification script
1005
-
1006
- ## 🎯 Quick Start
1007
-
1008
- 1. Check the environment to verify configuration
1009
- 2. Run authentication test to ensure GitHub secrets are working
1010
- 3. Start training with your desired parameters
1011
- 4. Monitor the training progress and model upload
1012
-
1013
- ## πŸ”’ Security
1014
-
1015
- - HF_TOKEN is securely stored in GitHub repository secrets
1016
- - No hardcoded tokens in any scripts
1017
- - Automatic cleanup of test repositories
1018
- - Proper error handling and logging
1019
  """
1020
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1021
 
1022
  return interface
1023
 
1024
 
 
1025
  if __name__ == "__main__":
1026
- # Create and launch the interface
1027
- interface = create_space_interface()
1028
- interface.launch(
1029
- server_name="0.0.0.0",
1030
- server_port=7860,
1031
- share=False,
1032
- # Security mitigations for Gradio vulnerabilities
1033
- allowed_paths=[], # Restrict file access
1034
- auth=None, # Disable authentication to prevent code injection
1035
- quiet=True, # Reduce logging
1036
- )
 
1
  #!/usr/bin/env python3
2
  """
3
+ OpenLLM Real Models App - Final working version with correct attribute naming
 
 
 
 
 
 
4
  """
5
 
 
 
 
 
6
  import gradio as gr
7
+ import torch
8
+ import torch.nn as nn
9
+ import torch.nn.functional as F
10
+ import json
11
+ import logging
12
+ import sentencepiece as spm
13
+ import math
14
+ from pathlib import Path
15
+ from typing import Dict, Any, Optional
16
+ from huggingface_hub import snapshot_download
17
+
18
+ # Set up logging
19
+ logging.basicConfig(level=logging.INFO)
20
+ logger = logging.getLogger(__name__)
21
+
22
+
23
+ class GPTConfig:
24
+ """GPT model configuration"""
25
+
26
+ def __init__(
27
+ self,
28
+ vocab_size=32000,
29
+ n_layer=6,
30
+ n_head=8,
31
+ n_embd=512,
32
+ block_size=1024,
33
+ dropout=0.1,
34
+ bias=False,
35
+ **kwargs,
36
+ ):
37
+ # Accept any additional kwargs to handle extra config fields
38
+ self.vocab_size = vocab_size
39
+ self.n_layer = n_layer
40
+ self.n_head = n_head
41
+ self.n_embd = n_embd
42
+ self.block_size = block_size
43
+ self.dropout = dropout
44
+ self.bias = bias
45
+
46
+
47
+ class GPT(nn.Module):
48
+ """GPT-style transformer model - EXACT architecture matching the saved model"""
49
+
50
+ def __init__(self, config):
51
+ super().__init__()
52
+ assert config.vocab_size is not None
53
+ assert config.block_size is not None
54
+ self.config = config
55
+
56
+ # Create the transformer module with the exact naming convention
57
+ self.transformer = nn.ModuleDict(
58
+ dict(
59
+ wte=nn.Embedding(config.vocab_size, config.n_embd),
60
+ wpe=nn.Embedding(config.block_size, config.n_embd),
61
+ drop=nn.Dropout(config.dropout),
62
+ h=nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
63
+ ln_f=nn.LayerNorm(config.n_embd),
64
+ )
65
+ )
66
 
67
+ # Language model head - Use bias=False to match saved models
68
+ self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
69
+
70
+ # Initialize weights
71
+ self.apply(self._init_weights)
72
+ for pn, p in self.named_parameters():
73
+ if pn.endswith("c_proj.weight"):
74
+ torch.nn.init.normal_(p, mean=0.0, std=0.02 / math.sqrt(2 * config.n_layer))
75
+
76
+ def _init_weights(self, module):
77
+ if isinstance(module, nn.Linear):
78
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
79
+ if module.bias is not None:
80
+ torch.nn.init.zeros_(module.bias)
81
+ elif isinstance(module, nn.Embedding):
82
+ torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
83
+
84
+ def forward(self, idx, targets=None):
85
+ device = idx.device
86
+ b, t = idx.size()
87
+ assert (
88
+ t <= self.config.block_size
89
+ ), f"Cannot forward sequence of length {t}, block size is only {self.config.block_size}"
90
+
91
+ pos = torch.arange(0, t, dtype=torch.long, device=device).unsqueeze(0)
92
+ tok_emb = self.transformer.wte(idx)
93
+ pos_emb = self.transformer.wpe(pos)
94
+ x = self.transformer.drop(tok_emb + pos_emb)
95
+
96
+ for block in self.transformer.h:
97
+ x = block(x)
98
+ x = self.transformer.ln_f(x)
99
+
100
+ if targets is not None:
101
+ logits = self.lm_head(x)
102
+ loss = F.cross_entropy(
103
+ logits.view(-1, logits.size(-1)), targets.view(-1), ignore_index=-1
104
+ )
105
+ else:
106
+ logits = self.lm_head(x[:, [-1], :])
107
+ loss = None
108
+
109
+ return logits, loss
110
+
111
+ def generate(
112
+ self, idx, max_new_tokens, temperature=1.0, top_k=None, top_p=None, do_sample=True
113
+ ):
114
+ for _ in range(max_new_tokens):
115
+ idx_cond = (
116
+ idx if idx.size(1) <= self.config.block_size else idx[:, -self.config.block_size :]
117
+ )
118
+ logits, _ = self(idx_cond)
119
+ logits = logits[:, -1, :] / temperature
120
+
121
+ if top_k is not None:
122
+ v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
123
+ logits[logits < v[:, [-1]]] = -float("Inf")
124
+
125
+ if top_p is not None:
126
+ sorted_logits, sorted_indices = torch.sort(logits, descending=True)
127
+ cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
128
+ sorted_indices_to_remove = cumulative_probs > top_p
129
+ sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
130
+ sorted_indices_to_remove[..., 0] = 0
131
+ indices_to_remove = sorted_indices_to_remove.scatter(
132
+ 1, sorted_indices, sorted_indices_to_remove
133
+ )
134
+ logits[indices_to_remove] = -float("Inf")
135
 
136
+ probs = F.softmax(logits, dim=-1)
137
+ if do_sample:
138
+ idx_next = torch.multinomial(probs, num_samples=1)
139
+ else:
140
+ _, idx_next = torch.topk(probs, k=1, dim=-1)
141
+
142
+ idx = torch.cat((idx, idx_next), dim=1)
143
+
144
+ return idx
145
+
146
+
147
+ class Block(nn.Module):
148
+ """Transformer block with self-attention and feed-forward layers"""
149
+
150
+ def __init__(self, config):
151
+ super().__init__()
152
+ self.ln_1 = nn.LayerNorm(config.n_embd)
153
+ self.attn = CausalSelfAttention(config)
154
+ self.ln_2 = nn.LayerNorm(config.n_embd)
155
+ self.mlp = MLP(config)
156
+
157
+ def forward(self, x):
158
+ x = x + self.attn(self.ln_1(x))
159
+ x = x + self.mlp(self.ln_2(x))
160
+ return x
161
+
162
+
163
+ class CausalSelfAttention(nn.Module):
164
+ """Multi-head self-attention with causal masking - FINAL WORKING VERSION"""
165
+
166
+ def __init__(self, config):
167
+ super().__init__()
168
+ assert config.n_embd % config.n_head == 0
169
+ self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd, bias=config.bias)
170
+ self.c_proj = nn.Linear(config.n_embd, config.n_embd, bias=config.bias)
171
+ self.attn_dropout = nn.Dropout(config.dropout)
172
+ self.resid_dropout = nn.Dropout(config.dropout)
173
+ self.n_head = config.n_head
174
+ self.n_embd = config.n_embd
175
+ self.dropout = config.dropout
176
+ self.use_bias = config.bias # Use different name for the boolean flag
177
+
178
+ # REGISTER THE ATTENTION BIAS as a buffer (not parameter) to match saved model
179
+ # This is actually an attention mask, not a learnable bias
180
+ if config.bias:
181
+ # Create a causal attention mask buffer
182
+ mask = torch.tril(torch.ones(config.block_size, config.block_size))
183
+ mask = mask.view(1, 1, config.block_size, config.block_size)
184
+ self.register_buffer("bias", mask) # This matches the saved model's 'bias' key
185
+ else:
186
+ self.register_buffer("bias", None)
187
+
188
+ def forward(self, x):
189
+ B, T, C = x.size()
190
+
191
+ # Calculate query, key, values for all heads
192
+ q, k, v = self.c_attn(x).split(self.n_embd, dim=2)
193
+ k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
194
+ q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
195
+ v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
196
+
197
+ # Causal self-attention using the bias mask
198
+ if self.bias is not None:
199
+ # Use the causal mask
200
+ attn_mask = self.bias[:, :, :T, :T]
201
+ y = F.scaled_dot_product_attention(
202
+ q,
203
+ k,
204
+ v,
205
+ attn_mask=attn_mask,
206
+ dropout_p=self.dropout if self.training else 0,
207
+ is_causal=False,
208
+ )
209
+ else:
210
+ # Use built-in causal attention
211
+ y = F.scaled_dot_product_attention(
212
+ q,
213
+ k,
214
+ v,
215
+ attn_mask=None,
216
+ dropout_p=self.dropout if self.training else 0,
217
+ is_causal=True,
218
+ )
219
 
220
+ y = y.transpose(1, 2).contiguous().view(B, T, C)
221
+
222
+ # Output projection
223
+ y = self.resid_dropout(self.c_proj(y))
224
+ return y
225
+
226
+
227
+ class MLP(nn.Module):
228
+ """Multi-layer perceptron"""
229
+
230
+ def __init__(self, config):
231
+ super().__init__()
232
+ self.c_fc = nn.Linear(config.n_embd, 4 * config.n_embd, bias=config.bias)
233
+ self.gelu = nn.GELU()
234
+ self.c_proj = nn.Linear(4 * config.n_embd, config.n_embd, bias=config.bias)
235
+ self.dropout = nn.Dropout(config.dropout)
236
+
237
+ def forward(self, x):
238
+ x = self.c_fc(x)
239
+ x = self.gelu(x)
240
+ x = self.c_proj(x)
241
+ x = self.dropout(x)
242
+ return x
243
+
244
+
245
+ class RealOpenLLMInference:
246
+ """Real OpenLLM inference engine using actual trained models"""
247
+
248
+ def __init__(self):
249
+ self.models = {}
250
+ self.tokenizers = {}
251
+ self.current_model = None
252
+
253
+ # Real model configurations from Hugging Face
254
+ self.model_configs = {
255
+ "openllm-small-extended-4k": {
256
+ "name": "OpenLLM Small (4k steps)",
257
+ "description": "Real model trained for 4,000 steps - Early training stage",
258
+ "hf_repo": "lemms/openllm-small-extended-4k",
259
+ "training_steps": 4000,
260
+ "parameters": "35.8M",
261
+ },
262
+ "openllm-small-extended-6k": {
263
+ "name": "OpenLLM Small (6k steps)",
264
+ "description": "Real model trained for 6,000 steps - Improved coherence (Perplexity: 816.040)",
265
+ "hf_repo": "lemms/openllm-small-extended-6k",
266
+ "training_steps": 6000,
267
+ "parameters": "35.8M",
268
+ },
269
+ "openllm-small-extended-7k": {
270
+ "name": "OpenLLM Small (7k steps)",
271
+ "description": "Real model trained for 7,000 steps - Enhanced quality (Loss: 2.100, Perplexity: 8.200)",
272
+ "hf_repo": "lemms/openllm-small-extended-7k",
273
+ "training_steps": 7000,
274
+ "parameters": "35.8M",
275
+ },
276
+ "openllm-small-extended-8k": {
277
+ "name": "OpenLLM Small (8k steps)",
278
+ "description": "Real model trained for 8,000 steps - Sophisticated understanding",
279
+ "hf_repo": "lemms/openllm-small-extended-8k",
280
+ "training_steps": 8000,
281
+ "parameters": "35.8M",
282
+ },
283
+ "openllm-small-extended-9k": {
284
+ "name": "OpenLLM Small (9k steps)",
285
+ "description": "Real model trained for 9,000 steps - Best performing model",
286
+ "hf_repo": "lemms/openllm-small-extended-9k",
287
+ "training_steps": 9000,
288
+ "parameters": "35.8M",
289
+ },
290
+ "openllm-small-extended-10k": {
291
+ "name": "OpenLLM Small (10k steps)",
292
+ "description": "Real model trained for 10,000 steps - Latest extended training",
293
+ "hf_repo": "lemms/openllm-small-extended-10k",
294
+ "training_steps": 10000,
295
+ "parameters": "35.8M",
296
+ },
297
+ "openllm-small-extended-10k-improved": {
298
+ "name": "OpenLLM Small (10k steps - Improved)",
299
+ "description": "Real model trained for 10,000 steps with improved training process - Proper checkpoint format",
300
+ "hf_repo": "lemms/openllm-small-extended-10k-improved",
301
+ "training_steps": 10000,
302
+ "parameters": "35.8M",
303
+ },
304
+ }
305
+
306
+ logger.info("πŸš€ Real OpenLLM Inference Engine initialized")
307
+
308
+ def load_model_from_hf(self, model_id: str) -> bool:
309
+ """Load a real model from Hugging Face"""
310
  try:
311
+ config = self.model_configs.get(model_id)
312
+ if not config:
313
+ logger.error(f"❌ Unknown model ID: {model_id}")
314
+ return False
315
+
316
+ logger.info(f"πŸ“₯ Loading real model from HF: {config['hf_repo']}")
317
+
318
+ # Download model from Hugging Face
319
+ local_dir = snapshot_download(
320
+ repo_id=config["hf_repo"],
321
+ repo_type="model",
322
+ local_dir=f"temp_{model_id}",
323
+ allow_patterns=["*.pt", "*.json", "*.model", "*.bin"],
324
+ )
325
 
326
+ logger.info(f"βœ… Downloaded model to: {local_dir}")
327
 
328
+ # Load model and tokenizer
329
+ success = self._load_model_and_tokenizer(local_dir, model_id)
330
  if success:
331
+ self.current_model = model_id
332
+ logger.info(f"βœ… Successfully loaded real model: {model_id}")
333
+ return True
334
  else:
335
+ return False
336
 
337
  except Exception as e:
338
+ logger.error(f"❌ Failed to load real model from HF {model_id}: {e}")
339
+ return False
340
 
341
+ def _load_model_and_tokenizer(self, model_dir: str, model_id: str) -> bool:
342
+ """Load model and tokenizer from local directory"""
343
  try:
344
+ model_path = Path(model_dir)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
 
346
+ # Load model configuration
347
+ config_file = model_path / "config.json"
348
+ if config_file.exists():
349
+ with open(config_file, "r") as f:
350
+ config_data = json.load(f)
351
 
352
+ logger.info(f"πŸ“‹ Config data keys: {list(config_data.keys())}")
353
 
354
+ # Handle different config structures
355
+ if "model_config" in config_data:
356
+ # Extract model_config section
357
+ model_config_data = config_data["model_config"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  else:
359
+ # Use the entire config as model config
360
+ model_config_data = config_data
361
+
362
+ # Create GPTConfig with only the expected parameters
363
+ expected_params = {
364
+ "vocab_size",
365
+ "n_layer",
366
+ "n_head",
367
+ "n_embd",
368
+ "block_size",
369
+ "dropout",
370
+ "bias",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
371
  }
372
 
373
+ config_kwargs = {}
374
+ for key, value in model_config_data.items():
375
+ if key in expected_params:
376
+ config_kwargs[key] = value
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
 
378
+ logger.info(f"πŸ”§ Using config parameters: {config_kwargs}")
379
+ model_config = GPTConfig(**config_kwargs)
380
+ else:
381
+ # Default configuration for OpenLLM small models
382
+ model_config = GPTConfig(
383
+ vocab_size=32000,
384
+ n_layer=6,
385
+ n_head=8,
386
+ n_embd=512,
387
+ block_size=1024,
388
+ dropout=0.1,
389
+ bias=False,
390
+ )
391
 
392
+ # Load model weights
393
+ model_file = model_path / "best_model.pt"
394
+ if not model_file.exists():
395
+ model_file = model_path / "model.pt"
396
+ if not model_file.exists():
397
+ model_file = model_path / "pytorch_model.bin"
398
+
399
+ if model_file.exists():
400
+ logger.info(f"πŸ“¦ Loading model from: {model_file}")
401
+ model = GPT(model_config)
402
+ checkpoint = torch.load(model_file, map_location="cpu")
403
+
404
+ # Handle different checkpoint formats
405
+ if isinstance(checkpoint, dict):
406
+ if "model_state_dict" in checkpoint:
407
+ # Extract the actual model weights
408
+ state_dict = checkpoint["model_state_dict"]
409
+ logger.info(f"πŸ“‹ Loading from model_state_dict with {len(state_dict)} keys")
410
+ elif "model" in checkpoint:
411
+ state_dict = checkpoint["model"]
412
+ logger.info(f"πŸ“‹ Loading from model with {len(state_dict)} keys")
413
+ else:
414
+ # Try to load directly as state dict
415
+ state_dict = checkpoint
416
+ logger.info(f"πŸ“‹ Loading direct state dict with {len(state_dict)} keys")
417
+ else:
418
+ # Direct state dict
419
+ state_dict = checkpoint
420
+ logger.info(f"πŸ“‹ Loading direct state dict with {len(state_dict)} keys")
421
+
422
+ # Load the state dict
423
+ model.load_state_dict(state_dict)
424
+ model.eval()
425
+ self.models[model_id] = model
426
+ logger.info(f"βœ… Model loaded successfully")
427
+ else:
428
+ logger.error(f"❌ Model file not found in {model_dir}")
429
+ logger.error(f" Available files: {list(model_path.glob('*'))}")
430
+ return False
431
+
432
+ # Load tokenizer
433
+ tokenizer_file = model_path / "tokenizer.model"
434
+ if tokenizer_file.exists():
435
+ tokenizer = spm.SentencePieceProcessor()
436
+ tokenizer.load(str(tokenizer_file))
437
+ self.tokenizers[model_id] = tokenizer
438
+ logger.info(f"βœ… Tokenizer loaded successfully")
439
+ else:
440
+ logger.error(f"❌ Tokenizer file not found in {model_dir}")
441
+ return False
442
 
443
+ return True
 
 
444
 
445
  except Exception as e:
446
+ logger.error(f"❌ Failed to load model and tokenizer: {e}")
447
+ import traceback
448
+
449
+ logger.error(f"πŸ“‹ Full traceback: {traceback.format_exc()}")
450
+ return False
451
+
452
+ def generate_text(
453
+ self,
454
+ prompt: str,
455
+ max_length: int = 100,
456
+ temperature: float = 0.7,
457
+ top_k: int = 50,
458
+ top_p: float = 0.9,
459
+ ) -> str:
460
+ """Generate text using the loaded real model"""
461
+ if not self.current_model or self.current_model not in self.models:
462
+ return "❌ No model loaded. Please select a model first."
463
 
 
 
464
  try:
465
+ model = self.models[self.current_model]
466
+ tokenizer = self.tokenizers[self.current_model]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
467
 
468
+ # Tokenize input
469
+ input_ids = tokenizer.encode(prompt)
470
+ input_tensor = torch.tensor([input_ids], dtype=torch.long)
471
 
472
+ logger.info(f"🎯 Generating text with prompt: '{prompt[:50]}...'")
473
+ logger.info(
474
+ f"πŸ“Š Parameters: max_length={max_length}, temperature={temperature}, top_k={top_k}, top_p={top_p}"
 
 
 
 
 
 
 
475
  )
476
 
477
+ # Generate text
478
+ with torch.no_grad():
479
+ output_ids = model.generate(
480
+ input_tensor,
481
+ max_new_tokens=max_length,
482
+ temperature=temperature,
483
+ top_k=top_k,
484
+ top_p=top_p,
485
+ do_sample=True,
486
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
 
488
+ # Decode output
489
+ generated_text = tokenizer.decode(output_ids[0].tolist())
490
 
491
+ # Remove the input prompt from the output
492
+ if generated_text.startswith(prompt):
493
+ generated_text = generated_text[len(prompt) :].strip()
494
 
495
+ logger.info(f"βœ… Generated text: '{generated_text[:100]}...'")
496
+ return generated_text
 
497
 
498
+ except Exception as e:
499
+ error_msg = f"❌ Generation failed: {str(e)}"
500
+ logger.error(error_msg)
501
+ import traceback
 
 
 
 
502
 
503
+ logger.error(f"πŸ“‹ Full traceback: {traceback.format_exc()}")
504
+ return error_msg
505
 
 
 
 
 
 
 
 
 
 
506
 
507
+ # Initialize the real inference engine
508
+ inference_engine = RealOpenLLMInference()
509
 
 
 
 
510
 
511
+ def load_model_info(model_id: str) -> str:
512
+ """Get information about a specific model"""
513
+ config = inference_engine.model_configs.get(model_id)
514
+ if config:
515
+ return f"**{config['name']}**\n\n{config['description']}\n\n**Parameters:** {config['parameters']}\n**Training Steps:** {config['training_steps']:,}"
516
+ return "❌ Model not found"
517
 
 
 
518
 
519
+ def generate_text_interface(
520
+ model_id: str, prompt: str, max_length: int, temperature: float, top_k: int, top_p: float
521
+ ) -> str:
522
+ """Gradio interface function for text generation"""
523
+ try:
524
+ # Load model if not already loaded
525
+ if model_id not in inference_engine.models:
526
+ logger.info(f"πŸ”„ Loading real model: {model_id}")
527
+ success = inference_engine.load_model_from_hf(model_id)
528
+ if not success:
529
+ return f"❌ Failed to load real model: {model_id}"
530
 
531
+ # Generate text
532
+ result = inference_engine.generate_text(
533
+ prompt=prompt, max_length=max_length, temperature=temperature, top_k=top_k, top_p=top_p
534
+ )
535
 
536
+ return result
537
 
538
+ except Exception as e:
539
+ error_msg = f"❌ Error in generation interface: {str(e)}"
540
+ logger.error(error_msg)
541
+ return error_msg
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
 
 
543
 
544
+ # Create Gradio interface
545
+ def create_interface():
546
+ """Create the Gradio interface"""
547
 
548
+ with gr.Blocks(title="πŸš€ OpenLLM Real Models Space", theme=gr.themes.Soft()) as interface:
549
+ # Header
 
 
 
 
 
 
 
 
550
  gr.Markdown(
551
  """
552
+ # πŸš€ OpenLLM Real Models Space
553
+
554
+ Welcome to the OpenLLM Real Models Space! This interface uses **actual trained models** from Hugging Face.
555
 
556
+ ## 🎯 Real Trained Models
557
 
558
+ We provide **5 different real models** with varying training steps:
559
 
560
+ | Model | Training Steps | Parameters | Performance |
561
+ |-------|---------------|------------|-------------|
562
+ | **4k Model** | 4,000 | 35.8M | Early training stage |
563
+ | **6k Model** | 6,000 | 35.8M | Improved coherence (Perplexity: 816.040) |
564
+ | **7k Model** | 7,000 | 35.8M | Enhanced quality (Loss: 2.100, Perplexity: 8.200) |
565
+ | **8k Model** | 8,000 | 35.8M | Sophisticated understanding |
566
+ | **9k Model** | 9,000 | 35.8M | Best performing model |
567
+ | **10k Model** | 10,000 | 35.8M | Latest extended training |
568
 
569
+ **These are real GPT-style transformer models trained on Wikipedia passages from the SQuAD dataset.**
570
 
571
+ ---
 
 
572
  """
573
  )
574
 
575
+ with gr.Row():
576
+ with gr.Column(scale=1):
577
+ # Model selection
578
+ model_dropdown = gr.Dropdown(
579
+ choices=list(inference_engine.model_configs.keys()),
580
+ value="openllm-small-extended-10k",
581
+ label="🎯 Select Model",
582
+ info="Choose the real trained model to use",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
583
  )
584
 
585
+ # Model information display
586
+ model_info = gr.Markdown(
587
+ value=load_model_info("openllm-small-extended-10k"), label="πŸ“‹ Model Information"
 
 
 
 
588
  )
589
+
590
+ # Update model info when selection changes
591
+ model_dropdown.change(
592
+ fn=load_model_info, inputs=[model_dropdown], outputs=[model_info]
 
 
 
593
  )
594
 
595
+ with gr.Column(scale=2):
596
+ # Input prompt
597
+ prompt_input = gr.Textbox(
598
+ lines=5,
599
+ label="πŸ“ Input Prompt",
600
+ placeholder="Enter your text prompt here...",
601
+ info="The text that will be used as input for generation",
602
  )
603
 
604
+ # Generation parameters
605
+ with gr.Row():
606
+ max_length = gr.Slider(
607
+ minimum=10,
608
+ maximum=500,
609
+ value=100,
610
+ step=10,
611
+ label="πŸ“ Max Length",
612
+ info="Maximum number of tokens to generate",
613
+ )
614
 
615
+ temperature = gr.Slider(
616
+ minimum=0.1,
617
+ maximum=2.0,
618
+ value=0.7,
619
+ step=0.1,
620
+ label="🌑️ Temperature",
621
+ info="Controls randomness (higher = more random)",
622
+ )
623
 
624
+ with gr.Row():
625
+ top_k = gr.Slider(
626
+ minimum=1,
627
+ maximum=100,
628
+ value=50,
629
+ step=1,
630
+ label="πŸ” Top-K",
631
+ info="Number of highest probability tokens to consider",
632
+ )
633
 
634
+ top_p = gr.Slider(
635
+ minimum=0.1,
636
+ maximum=1.0,
637
+ value=0.9,
638
+ step=0.1,
639
+ label="πŸ“Š Top-P",
640
+ info="Nucleus sampling parameter",
641
+ )
642
 
643
+ # Generate button
644
+ generate_btn = gr.Button("πŸš€ Generate Text", variant="primary", size="lg")
645
+
646
+ # Output
647
+ output_text = gr.Textbox(
648
+ lines=10, label="🎯 Generated Text", info="The generated text will appear here"
649
+ )
650
+
651
+ # Connect the generate button
652
+ generate_btn.click(
653
+ fn=generate_text_interface,
654
+ inputs=[model_dropdown, prompt_input, max_length, temperature, top_k, top_p],
655
+ outputs=[output_text],
656
+ )
657
 
658
+ # Footer
659
+ gr.Markdown(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
660
  """
661
+ ---
662
+
663
+ ## πŸ”§ Technical Details
664
+
665
+ - **Architecture**: GPT-style transformer decoder
666
+ - **Model Size**: Small (6 layers, 8 heads, 512 embedding dim)
667
+ - **Vocabulary**: 32k tokens (SentencePiece BPE)
668
+ - **Training Data**: Wikipedia passages from SQuAD dataset
669
+ - **Framework**: PyTorch with real trained models
670
+ - **Gradio Version**: 4.44.1 (latest)
671
+
672
+ **These models generate actual text based on their training on Wikipedia content.**
673
+
674
+ **Model Sources:**
675
+ - [4k Model](https://huggingface.co/lemms/openllm-small-extended-4k)
676
+ - [6k Model](https://huggingface.co/lemms/openllm-small-extended-6k)
677
+ - [7k Model](https://huggingface.co/lemms/openllm-small-extended-7k)
678
+ - [8k Model](https://huggingface.co/lemms/openllm-small-extended-8k)
679
+ - [9k Model](https://huggingface.co/lemms/openllm-small-extended-9k)
680
+ - [10k Model](https://huggingface.co/lemms/openllm-small-extended-10k)
681
+ """
682
+ )
683
 
684
  return interface
685
 
686
 
687
+ # Create and launch the interface
688
  if __name__ == "__main__":
689
+ interface = create_interface()
690
+ interface.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)