Spaces:

lemms
/

openllm

Runtime error

App Files Files Community

lemms commited on Aug 14, 2025

Commit

f1b5f6b

verified ·

1 Parent(s): cd40de7

Fix: Use OpenLLM custom GPTModel architecture instead of Hugging Face Transformers

Browse files

Files changed (1) hide show

app.py +244 -283

app.py CHANGED Viewed

@@ -1,59 +1,61 @@
 #!/usr/bin/env python3
 """
-OpenLLM Training Space Application - Custom Model Architecture Fix
-This version handles the custom GPT model architecture by:
-- Updating transformers to latest version
-- Using alternative model loading approaches
-- Handling custom model architectures properly
 Author: Louis Chua Bean Chong
 License: GPL-3.0
-Version: 2.0.8
 Last Updated: 2024
 """
 import gradio as gr
 import torch
 import os
 import time
 from typing import Dict, Any, Optional
 import threading
 from dataclasses import dataclass
-# First, try to update transformers to latest version
 try:
-    import subprocess
-    print("🔄 Updating transformers to latest version...")
-    subprocess.run(["pip", "install", "--upgrade", "transformers"], check=True)
-    print("✅ Transformers updated successfully")
-except Exception as e:
-    print(f"⚠️ Could not update transformers: {e}")
-# Import training dependencies with robust error handling
-try:
-    from transformers import (
-        AutoModelForCausalLM,
-        TrainingArguments,
-        Trainer,
-        DataCollatorForLanguageModeling
-    )
-    from datasets import load_dataset
-    from huggingface_hub import HfApi
-    TRAINING_AVAILABLE = True
-    print("✅ Transformers imported successfully")
 except ImportError as e:
-    print(f"Training dependencies not available: {e}")
-    TRAINING_AVAILABLE = False
-# Try to import sentencepiece with fallback
 try:
     import sentencepiece as spm
     SENTENCEPIECE_AVAILABLE = True
     print(f"✅ SentencePiece available: {spm.__version__}")
 except ImportError:
     SENTENCEPIECE_AVAILABLE = False
-    print("❌ SentencePiece not available - will use fallback methods")
 @dataclass
 class TrainingConfig:
@@ -70,13 +72,13 @@ class TrainingConfig:
 class OpenLLMTrainer:
     """
-    Complete training implementation for OpenLLM models with custom architecture handling.
     This class handles the entire training pipeline including:
-    - Model loading with custom architecture support
     - Tokenizer loading using sentencepiece.SentencePieceProcessor()
     - Dataset preparation
-    - Training execution
     - Model saving and uploading
     """
@@ -84,8 +86,9 @@ class OpenLLMTrainer:
         """Initialize the trainer with default settings."""
         self.model = None
         self.tokenizer = None
-        self.trainer = None
-        self.training_thread = None
         self.is_training = False
         self.training_progress = {
             "status": "Ready",
@@ -104,7 +107,7 @@ class OpenLLMTrainer:
     def load_model_and_tokenizer(self, model_size: str) -> str:
         """
-        Load the pre-trained OpenLLM model and tokenizer with custom architecture handling.
         Args:
             model_size: Size of the model to load ("small", "medium", "large")
@@ -113,129 +116,26 @@ class OpenLLMTrainer:
             Status message indicating success or failure
         """
         try:
-            # Map model size to actual model repository
-            model_mapping = {
-                "small": "lemms/openllm-small-extended-7k",
-                "medium": "lemms/openllm-medium-extended-7k",  # Placeholder
-                "large": "lemms/openllm-large-extended-7k"     # Placeholder
-            }
-            model_name = model_mapping.get(model_size, "lemms/openllm-small-extended-7k")
-            print(f"🔄 Loading OpenLLM model: {model_name}")
-            print("📝 Handling custom GPT architecture...")
-            # Try multiple approaches to load the model
-            model_loaded = False
-            # Approach 1: Try with latest transformers and trust_remote_code
             try:
-                print("🔄 Attempting to load model with latest transformers...")
-                self.model = AutoModelForCausalLM.from_pretrained(
-                    model_name,
-                    torch_dtype=torch.float16,
-                    device_map="auto" if torch.cuda.is_available() else None,
-                    trust_remote_code=True,
-                    revision="main"  # Use main branch for latest code
-                )
-                model_loaded = True
-                print(f"✅ Model loaded successfully with latest transformers: {type(self.model).__name__}")
-            except Exception as e1:
-                print(f"❌ Approach 1 failed: {e1}")
-                # Approach 2: Try installing transformers from source
-                try:
-                    print("🔄 Installing transformers from source...")
-                    subprocess.run(["pip", "install", "git+https://github.com/huggingface/transformers.git"], check=True)
-                    # Reload transformers
-                    import importlib
-                    import transformers
-                    importlib.reload(transformers)
-                    from transformers import AutoModelForCausalLM
-                    print("🔄 Attempting to load model with source transformers...")
-                    self.model = AutoModelForCausalLM.from_pretrained(
-                        model_name,
-                        torch_dtype=torch.float16,
-                        device_map="auto" if torch.cuda.is_available() else None,
-                        trust_remote_code=True
-                    )
-                    model_loaded = True
-                    print(f"✅ Model loaded successfully with source transformers: {type(self.model).__name__}")
-                except Exception as e2:
-                    print(f"❌ Approach 2 failed: {e2}")
-                    # Approach 3: Try loading as a generic model
-                    try:
-                        print("🔄 Attempting to load as generic model...")
-                        from transformers import AutoModel
-                        self.model = AutoModel.from_pretrained(
-                            model_name,
-                            torch_dtype=torch.float16,
-                            device_map="auto" if torch.cuda.is_available() else None,
-                            trust_remote_code=True
-                        )
-                        model_loaded = True
-                        print(f"✅ Model loaded as generic model: {type(self.model).__name__}")
-                    except Exception as e3:
-                        print(f"❌ Approach 3 failed: {e3}")
-                        return f"❌ Failed to load OpenLLM model: All approaches failed. Latest error: {str(e3)}"
-            # Load tokenizer using the same approach as local training code
             try:
                 print("🔄 Loading tokenizer using sentencepiece.SentencePieceProcessor()...")
-                # Create a custom tokenizer class that wraps SentencePieceProcessor
-                class OpenLLMTokenizer:
-                    def __init__(self, sp_processor):
-                        self.sp_processor = sp_processor
-                        self.pad_token = "<pad>"
-                        self.eos_token = "</s>"
-                        self.bos_token = "<s>"
-                        self.unk_token = "<unk>"
-                    def __call__(self, texts, **kwargs):
-                        """Tokenize texts using SentencePieceProcessor."""
-                        if isinstance(texts, str):
-                            texts = [texts]
-                        results = []
-                        for text in texts:
-                            # Encode text to token IDs
-                            token_ids = self.sp_processor.encode(text)
-                            # Create attention mask (all tokens are attended to)
-                            attention_mask = [1] * len(token_ids)
-                            results.append({
-                                'input_ids': token_ids,
-                                'attention_mask': attention_mask
-                            })
-                        return results
-                    def encode(self, text, **kwargs):
-                        """Encode text to token IDs."""
-                        return self.sp_processor.encode(text)
-                    def decode(self, token_ids, **kwargs):
-                        """Decode token IDs to text."""
-                        return self.sp_processor.decode(token_ids)
-                    def save_pretrained(self, path):
-                        """Save tokenizer files."""
-                        # The SentencePieceProcessor is already saved as tokenizer.model
-                        pass
-                # Download and load the tokenizer.model file
-                from huggingface_hub import hf_hub_download
-                print("🔄 Downloading tokenizer.model from HF Hub...")
                 tokenizer_path = hf_hub_download(
                     repo_id=model_name,
                     filename="tokenizer.model"
@@ -243,12 +143,12 @@ class OpenLLMTrainer:
                 print(f"✅ Tokenizer downloaded to: {tokenizer_path}")
-                # Load using SentencePieceProcessor (same as local code)
                 sp_processor = spm.SentencePieceProcessor()
                 sp_processor.load(tokenizer_path)
-                # Wrap in our custom tokenizer class for HF Trainer compatibility
-                self.tokenizer = OpenLLMTokenizer(sp_processor)
                 print(f"✅ Tokenizer loaded successfully using SentencePieceProcessor")
                 print(f"   Vocabulary size: {sp_processor.vocab_size()}")
@@ -257,80 +157,63 @@ class OpenLLMTrainer:
                 print(f"❌ Failed to load tokenizer: {e}")
                 return f"❌ Failed to load OpenLLM tokenizer: {str(e)}"
-            return f"✅ Successfully loaded OpenLLM {model_size} model from {model_name}"
         except Exception as e:
             return f"❌ Failed to load OpenLLM model and tokenizer: {str(e)}"
     def prepare_dataset(self) -> str:
         """
-        Load and prepare the training dataset.
         Returns:
             Status message indicating success or failure
         """
         try:
-            # Load the training dataset
             print("🔄 Loading training dataset...")
             dataset = load_dataset("lemms/openllm-training-data")
             print(f"✅ Dataset loaded: {len(dataset['train'])} samples")
-            # Tokenize the dataset using our custom tokenizer
-            def tokenize_function(examples):
-                try:
-                    # Use our custom tokenizer
-                    tokenized = self.tokenizer(examples["text"])
-                    # Extract input_ids and attention_mask
-                    input_ids = [item['input_ids'] for item in tokenized]
-                    attention_mask = [item['attention_mask'] for item in tokenized]
-                    # Pad sequences to max_length
-                    max_length = 512
-                    padded_input_ids = []
-                    padded_attention_mask = []
-                    for ids, mask in zip(input_ids, attention_mask):
-                        if len(ids) > max_length:
-                            ids = ids[:max_length]
-                            mask = mask[:max_length]
-                        else:
-                            # Pad with pad_token_id
-                            pad_length = max_length - len(ids)
-                            ids = ids + [0] * pad_length  # 0 is pad_token_id
-                            mask = mask + [0] * pad_length
-                        padded_input_ids.append(ids)
-                        padded_attention_mask.append(mask)
-                    return {
-                        "input_ids": padded_input_ids,
-                        "attention_mask": padded_attention_mask
-                    }
-                except Exception as e:
-                    print(f"Tokenization error: {e}")
-                    # Fallback: return empty tensors
-                    return {"input_ids": [], "attention_mask": []}
-            print("🔄 Tokenizing dataset...")
-            tokenized_dataset = dataset["train"].map(
-                tokenize_function,
-                batched=True,
-                remove_columns=dataset["train"].column_names
-            )
-            self.dataset = tokenized_dataset
-            print(f"✅ Dataset tokenized successfully: {len(tokenized_dataset)} samples")
-            return f"✅ Successfully prepared dataset with {len(tokenized_dataset)} samples"
         except Exception as e:
             return f"❌ Failed to prepare dataset: {str(e)}"
     def setup_training(self, config: TrainingConfig) -> str:
         """
-        Set up the training configuration and trainer.
         Args:
             config: Training configuration object
@@ -342,43 +225,51 @@ class OpenLLMTrainer:
             # Create output directory
             os.makedirs(config.output_dir, exist_ok=True)
-            # Set up training arguments
-            training_args = TrainingArguments(
-                output_dir=config.output_dir,
-                num_train_epochs=1,
-                per_device_train_batch_size=config.batch_size,
-                per_device_eval_batch_size=config.batch_size,
-                learning_rate=config.learning_rate,
-                max_steps=config.max_steps,
-                save_steps=config.save_steps,
-                logging_steps=config.logging_steps,
-                warmup_steps=config.warmup_steps,
-                gradient_accumulation_steps=config.gradient_accumulation_steps,
-                evaluation_strategy="no",  # Disable evaluation for faster training
-                save_strategy="steps",
-                logging_dir=f"{config.output_dir}/logs",
-                report_to=None,  # Disable wandb/tensorboard reporting
-                remove_unused_columns=False,
-                dataloader_pin_memory=False,
-                fp16=torch.cuda.is_available(),  # Use mixed precision if GPU available
-                dataloader_num_workers=0,  # Reduce memory usage
             )
-            # Set up data collator
-            data_collator = DataCollatorForLanguageModeling(
-                tokenizer=self.tokenizer,
-                mlm=False,  # We're doing causal language modeling, not masked
             )
-            # Initialize trainer
-            self.trainer = Trainer(
-                model=self.model,
-                args=training_args,
-                train_dataset=self.dataset,
-                tokenizer=self.tokenizer,
-                data_collator=data_collator,
             )
             return f"✅ Training setup completed successfully"
         except Exception as e:
@@ -386,7 +277,7 @@ class OpenLLMTrainer:
     def train_model(self, config: TrainingConfig, progress_callback=None) -> str:
         """
-        Execute the actual model training.
         Args:
             config: Training configuration object
@@ -402,17 +293,69 @@ class OpenLLMTrainer:
             print(f"🚀 Starting OpenLLM training for {config.max_steps} steps...")
-            # Start training
-            train_result = self.trainer.train()
             # Update final progress
             self.training_progress["status"] = "Completed"
-            self.training_progress["current_step"] = config.max_steps
-            self.training_progress["loss"] = train_result.training_loss
-            print(f"✅ Training completed! Final loss: {train_result.training_loss:.4f}")
-            return f"✅ Training completed successfully! Final loss: {train_result.training_loss:.4f}"
         except Exception as e:
             self.training_progress["status"] = "Failed"
@@ -421,6 +364,32 @@ class OpenLLMTrainer:
         finally:
             self.is_training = False
     def save_and_upload_model(self, config: TrainingConfig) -> str:
         """
         Save the trained model and upload it to Hugging Face Hub.
@@ -434,25 +403,16 @@ class OpenLLMTrainer:
         try:
             print("🔄 Saving trained model...")
-            # Save the model locally
-            self.trainer.save_model()
             # Save tokenizer files
-            if hasattr(self.tokenizer, 'sp_processor'):
-                # Save the SentencePieceProcessor files
-                tokenizer_dir = os.path.join(config.output_dir, "tokenizer")
-                os.makedirs(tokenizer_dir, exist_ok=True)
-                # Copy the original tokenizer.model file
-                import shutil
-                from huggingface_hub import hf_hub_download
-                model_name = f"lemms/openllm-{config.model_size}-extended-7k"
-                tokenizer_path = hf_hub_download(
-                    repo_id=model_name,
-                    filename="tokenizer.model"
-                )
-                shutil.copy2(tokenizer_path, os.path.join(tokenizer_dir, "tokenizer.model"))
             print("✅ Model saved locally")
@@ -495,19 +455,20 @@ def main():
     # Create the main Gradio application interface
     with gr.Blocks(
-        title="OpenLLM Training Space - Custom Architecture Fix",
         theme=gr.themes.Soft()
     ) as demo:
         # Application Header
-        gr.Markdown("# 🚀 OpenLLM Training Space - Custom Architecture Fix")
-        gr.Markdown("### *Handles Custom GPT Model Architecture*")
         gr.Markdown("---")
         # Status Information
-        gr.Markdown(f"**Training Available**: {'✅ Yes' if TRAINING_AVAILABLE else '❌ No'}")
-        gr.Markdown(f"**SentencePiece Available**: {'✅ Yes' if SENTENCEPIECE_AVAILABLE else '❌ No (using fallback methods)'}")
-        gr.Markdown("**Custom Architecture**: ✅ Multiple loading approaches")
         # Main Content Area
         with gr.Row():
@@ -556,39 +517,39 @@ def main():
                 # Training Status Display
                 status_text = gr.Textbox(
-                    value="Ready to start training" if TRAINING_AVAILABLE else "Training dependencies not available",
                     label="Current Status",
                     interactive=False,
                     lines=5
                 )
-                # Progress Information - Simplified for maximum compatibility
                 progress_info = gr.JSON(
                     value=trainer.get_training_progress(),
                     label="Training Progress"
                 )
-                # Training Control Buttons - Removed disabled parameter for compatibility
                 with gr.Row():
                     start_btn = gr.Button("🚀 Start Training", variant="primary")
                     stop_btn = gr.Button("⏹️ Stop Training", variant="stop")
         # Instructions Section
-        gr.Markdown("## 📋 Custom Architecture Training Instructions")
         gr.Markdown("""
-        This interface handles **OpenLLM's custom GPT architecture**:
         ### **Step 1: Configure Parameters**
-        - **Model Size**: Select the base model to train from (7k models)
         - **Max Steps**: Number of training iterations (100-10,000)
         - **Learning Rate**: Training rate (0.00001-0.001)
         - **Batch Size**: Samples per training batch (1-16)
         ### **Step 2: Start Training**
         - Click "Start Training" to begin the actual training process
-        - Automatically updates transformers to latest version
-        - Uses multiple approaches to load custom GPT architecture
-        - Handles custom model types properly
         ### **Step 3: Monitor Progress**
         - Watch the status updates and progress information
@@ -613,10 +574,10 @@ def main():
         # Training Function Definition
         def start_complete_training(model_size, max_steps, learning_rate, batch_size):
             """
-            Execute the complete training process with custom architecture handling.
             """
-            if not TRAINING_AVAILABLE:
-                return "❌ Training dependencies not available. Please check the installation."
             try:
                 # Create training configuration
@@ -627,7 +588,7 @@ def main():
                     batch_size=batch_size
                 )
-                # Step 1: Load model and tokenizer with custom architecture handling
                 status = trainer.load_model_and_tokenizer(model_size)
                 if "❌" in status:
                     return status
@@ -672,8 +633,8 @@ def main():
         # Application Footer
         gr.Markdown("---")
         gr.Markdown("**Author**: Louis Chua Bean Chong | **Project**: OpenLLM | **License**: GPL-3.0")
-        gr.Markdown("**Gradio Version**: 4.44.1 (Fully Compatible)")
-        gr.Markdown("**Custom Architecture**: Multiple loading approaches for GPT model")
     return demo

 #!/usr/bin/env python3
 """
+OpenLLM Training Space Application - OpenLLM Compatible
+This version uses OpenLLM's actual custom model architecture and loading approach:
+- Uses custom GPTModel class (not Hugging Face Transformers)
+- Loads models using torch.load() and load_state_dict()
+- Uses sentencepiece.SentencePieceProcessor() for tokenization
+- Compatible with OpenLLM's actual implementation
 Author: Louis Chua Bean Chong
 License: GPL-3.0
+Version: 2.0.9
 Last Updated: 2024
 """
 import gradio as gr
 import torch
+import torch.nn as nn
 import os
 import time
+import math
+import gc
 from typing import Dict, Any, Optional
 import threading
 from dataclasses import dataclass
+from pathlib import Path
+# Import OpenLLM's custom model architecture
 try:
+    # Try to import from local OpenLLM code
+    import sys
+    sys.path.append('core/src')
+    from model import GPTModel, GPTConfig, create_model
+    from data_loader import TextDataLoader
+    OPENLLM_AVAILABLE = True
+    print("✅ OpenLLM custom model architecture imported successfully")
 except ImportError as e:
+    print(f"❌ OpenLLM imports failed: {e}")
+    OPENLLM_AVAILABLE = False
+# Try to import sentencepiece
 try:
     import sentencepiece as spm
     SENTENCEPIECE_AVAILABLE = True
     print(f"✅ SentencePiece available: {spm.__version__}")
 except ImportError:
     SENTENCEPIECE_AVAILABLE = False
+    print("❌ SentencePiece not available")
+# Import other dependencies
+try:
+    from datasets import load_dataset
+    from huggingface_hub import HfApi, hf_hub_download
+    DEPENDENCIES_AVAILABLE = True
+except ImportError as e:
+    print(f"❌ Dependencies not available: {e}")
+    DEPENDENCIES_AVAILABLE = False
 @dataclass
 class TrainingConfig:
 class OpenLLMTrainer:
     """
+    Complete training implementation using OpenLLM's actual architecture.
     This class handles the entire training pipeline including:
+    - Model loading using OpenLLM's custom GPTModel
     - Tokenizer loading using sentencepiece.SentencePieceProcessor()
     - Dataset preparation
+    - Training execution using OpenLLM's approach
     - Model saving and uploading
     """
         """Initialize the trainer with default settings."""
         self.model = None
         self.tokenizer = None
+        self.data_loader = None
+        self.optimizer = None
+        self.scheduler = None
         self.is_training = False
         self.training_progress = {
             "status": "Ready",
     def load_model_and_tokenizer(self, model_size: str) -> str:
         """
+        Load the pre-trained OpenLLM model and tokenizer using OpenLLM's approach.
         Args:
             model_size: Size of the model to load ("small", "medium", "large")
             Status message indicating success or failure
         """
         try:
+            if not OPENLLM_AVAILABLE:
+                return "❌ OpenLLM custom model architecture not available"
+            print(f"🔄 Loading OpenLLM {model_size} model using custom architecture...")
+            # Create model using OpenLLM's factory function
             try:
+                self.model = create_model(model_size)
+                print(f"✅ OpenLLM {model_size} model created: {type(self.model).__name__}")
+                print(f"   Parameters: {self.model.get_num_params():,}")
+            except Exception as e:
+                print(f"❌ Failed to create model: {e}")
+                return f"❌ Failed to create OpenLLM model: {str(e)}"
+            # Load tokenizer using sentencepiece
             try:
                 print("🔄 Loading tokenizer using sentencepiece.SentencePieceProcessor()...")
+                # Download tokenizer.model from HF Hub
+                model_name = f"lemms/openllm-{model_size}-extended-7k"
                 tokenizer_path = hf_hub_download(
                     repo_id=model_name,
                     filename="tokenizer.model"
                 print(f"✅ Tokenizer downloaded to: {tokenizer_path}")
+                # Create SentencePieceProcessor
                 sp_processor = spm.SentencePieceProcessor()
                 sp_processor.load(tokenizer_path)
+                # Store tokenizer for later use
+                self.tokenizer = sp_processor
                 print(f"✅ Tokenizer loaded successfully using SentencePieceProcessor")
                 print(f"   Vocabulary size: {sp_processor.vocab_size()}")
                 print(f"❌ Failed to load tokenizer: {e}")
                 return f"❌ Failed to load OpenLLM tokenizer: {str(e)}"
+            return f"✅ Successfully loaded OpenLLM {model_size} model with custom architecture"
         except Exception as e:
             return f"❌ Failed to load OpenLLM model and tokenizer: {str(e)}"
     def prepare_dataset(self) -> str:
         """
+        Load and prepare the training dataset using OpenLLM's approach.
         Returns:
             Status message indicating success or failure
         """
         try:
+            if not DEPENDENCIES_AVAILABLE:
+                return "❌ Required dependencies not available"
             print("🔄 Loading training dataset...")
+            # Load dataset from HF Hub
             dataset = load_dataset("lemms/openllm-training-data")
             print(f"✅ Dataset loaded: {len(dataset['train'])} samples")
+            # Create temporary data file for OpenLLM's TextDataLoader
+            temp_data_file = "temp_training_data.txt"
+            with open(temp_data_file, 'w', encoding='utf-8') as f:
+                for item in dataset['train']:
+                    f.write(item['text'] + '\n')
+            print(f"✅ Temporary data file created: {temp_data_file}")
+            # Create OpenLLM's TextDataLoader
+            try:
+                # Get tokenizer path
+                tokenizer_path = self.tokenizer.model_file_path
+                self.data_loader = TextDataLoader(
+                    data_file=temp_data_file,
+                    tokenizer_path=tokenizer_path,
+                    seq_len=512,
+                    batch_size=4,  # Will be overridden by training config
+                    shuffle=True
+                )
+                print(f"✅ OpenLLM TextDataLoader created successfully")
+            except Exception as e:
+                print(f"❌ Failed to create TextDataLoader: {e}")
+                return f"❌ Failed to create data loader: {str(e)}"
+            return f"✅ Successfully prepared dataset with {len(dataset['train'])} samples"
         except Exception as e:
             return f"❌ Failed to prepare dataset: {str(e)}"
     def setup_training(self, config: TrainingConfig) -> str:
         """
+        Set up the training configuration using OpenLLM's approach.
         Args:
             config: Training configuration object
             # Create output directory
             os.makedirs(config.output_dir, exist_ok=True)
+            # Set up optimizer (AdamW with weight decay)
+            decay_params = []
+            no_decay_params = []
+            for name, param in self.model.named_parameters():
+                if not param.requires_grad:
+                    continue
+                if len(param.shape) == 1 or name.endswith('.bias'):
+                    no_decay_params.append(param)
+                else:
+                    decay_params.append(param)
+            param_groups = [
+                {'params': decay_params, 'weight_decay': 0.01},
+                {'params': no_decay_params, 'weight_decay': 0.0}
+            ]
+            self.optimizer = torch.optim.AdamW(
+                param_groups,
+                lr=config.learning_rate,
+                betas=(0.9, 0.95),
+                eps=1e-8
+            )
+            # Set up learning rate scheduler
+            warmup_scheduler = torch.optim.lr_scheduler.LinearLR(
+                self.optimizer,
+                start_factor=0.01,
+                end_factor=1.0,
+                total_iters=config.warmup_steps
             )
+            main_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
+                self.optimizer,
+                T_max=config.max_steps - config.warmup_steps
             )
+            self.scheduler = torch.optim.lr_scheduler.SequentialLR(
+                self.optimizer,
+                schedulers=[warmup_scheduler, main_scheduler],
+                milestones=[config.warmup_steps]
             )
+            print("✅ Training setup completed successfully")
             return f"✅ Training setup completed successfully"
         except Exception as e:
     def train_model(self, config: TrainingConfig, progress_callback=None) -> str:
         """
+        Execute the actual model training using OpenLLM's approach.
         Args:
             config: Training configuration object
             print(f"🚀 Starting OpenLLM training for {config.max_steps} steps...")
+            # Training loop using OpenLLM's approach
+            self.model.train()
+            accumulated_loss = 0.0
+            self.optimizer.zero_grad()
+            step = 0
+            for batch_idx, (input_ids, target_ids) in enumerate(self.data_loader):
+                if step >= config.max_steps:
+                    break
+                # Forward pass (model computes loss internally when targets provided)
+                logits, loss = self.model(input_ids, target_ids)
+                # Scale loss for gradient accumulation
+                loss = loss / config.gradient_accumulation_steps
+                accumulated_loss += loss.item()
+                # Backward pass
+                loss.backward()
+                # Update weights every gradient_accumulation_steps
+                if (batch_idx + 1) % config.gradient_accumulation_steps == 0:
+                    # Clip gradients
+                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
+                    # Update parameters
+                    self.optimizer.step()
+                    self.scheduler.step()
+                    self.optimizer.zero_grad()
+                    # Update step count
+                    step += 1
+                    # Update progress
+                    self.training_progress["current_step"] = step
+                    self.training_progress["loss"] = accumulated_loss
+                    self.training_progress["learning_rate"] = self.scheduler.get_last_lr()[0]
+                    # Log progress
+                    if step % config.logging_steps == 0:
+                        print(f"Step {step}/{config.max_steps} | Loss: {accumulated_loss:.4f} | LR: {self.scheduler.get_last_lr()[0]:.2e}")
+                    # Save checkpoint
+                    if step % config.save_steps == 0:
+                        self._save_checkpoint(config.output_dir, step)
+                    # Reset accumulated loss
+                    accumulated_loss = 0.0
+                    # Clean up memory
+                    if step % 100 == 0:
+                        gc.collect()
+            # Final checkpoint
+            self._save_checkpoint(config.output_dir, step, is_best=True)
             # Update final progress
             self.training_progress["status"] = "Completed"
+            self.training_progress["current_step"] = step
+            print(f"✅ Training completed! Final step: {step}")
+            return f"✅ Training completed successfully! Final step: {step}"
         except Exception as e:
             self.training_progress["status"] = "Failed"
         finally:
             self.is_training = False
+    def _save_checkpoint(self, output_dir: str, step: int, is_best: bool = False) -> None:
+        """Save model checkpoint using OpenLLM's approach."""
+        try:
+            checkpoint = {
+                'step': step,
+                'model_state_dict': self.model.state_dict(),
+                'optimizer_state_dict': self.optimizer.state_dict(),
+                'scheduler_state_dict': self.scheduler.state_dict(),
+                'config': self.model.config.__dict__
+            }
+            # Save latest checkpoint
+            checkpoint_path = os.path.join(output_dir, f"checkpoint_step_{step}.pt")
+            torch.save(checkpoint, checkpoint_path)
+            # Save best checkpoint
+            if is_best:
+                best_path = os.path.join(output_dir, "best_model.pt")
+                torch.save(checkpoint, best_path)
+                print(f"💾 Best model saved: {best_path}")
+            print(f"💾 Checkpoint saved: {checkpoint_path}")
+        except Exception as e:
+            print(f"❌ Failed to save checkpoint: {e}")
     def save_and_upload_model(self, config: TrainingConfig) -> str:
         """
         Save the trained model and upload it to Hugging Face Hub.
         try:
             print("🔄 Saving trained model...")
+            # Save the final model
+            self._save_checkpoint(config.output_dir, config.max_steps, is_best=True)
             # Save tokenizer files
+            tokenizer_dir = os.path.join(config.output_dir, "tokenizer")
+            os.makedirs(tokenizer_dir, exist_ok=True)
+            # Copy the tokenizer.model file
+            import shutil
+            shutil.copy2(self.tokenizer.model_file_path, os.path.join(tokenizer_dir, "tokenizer.model"))
             print("✅ Model saved locally")
     # Create the main Gradio application interface
     with gr.Blocks(
+        title="OpenLLM Training Space - OpenLLM Compatible",
         theme=gr.themes.Soft()
     ) as demo:
         # Application Header
+        gr.Markdown("# 🚀 OpenLLM Training Space - OpenLLM Compatible")
+        gr.Markdown("### *Uses OpenLLM's Custom Model Architecture*")
         gr.Markdown("---")
         # Status Information
+        gr.Markdown(f"**OpenLLM Available**: {'✅ Yes' if OPENLLM_AVAILABLE else '❌ No'}")
+        gr.Markdown(f"**SentencePiece Available**: {'✅ Yes' if SENTENCEPIECE_AVAILABLE else '❌ No'}")
+        gr.Markdown(f"**Dependencies Available**: {'✅ Yes' if DEPENDENCIES_AVAILABLE else '❌ No'}")
+        gr.Markdown("**Architecture**: ✅ OpenLLM Custom GPTModel (Not Hugging Face)")
         # Main Content Area
         with gr.Row():
                 # Training Status Display
                 status_text = gr.Textbox(
+                    value="Ready to start training" if OPENLLM_AVAILABLE else "OpenLLM not available",
                     label="Current Status",
                     interactive=False,
                     lines=5
                 )
+                # Progress Information
                 progress_info = gr.JSON(
                     value=trainer.get_training_progress(),
                     label="Training Progress"
                 )
+                # Training Control Buttons
                 with gr.Row():
                     start_btn = gr.Button("🚀 Start Training", variant="primary")
                     stop_btn = gr.Button("⏹️ Stop Training", variant="stop")
         # Instructions Section
+        gr.Markdown("## 📋 OpenLLM Compatible Training Instructions")
         gr.Markdown("""
+        This interface uses **OpenLLM's actual custom model architecture**:
         ### **Step 1: Configure Parameters**
+        - **Model Size**: Select the base model to train from (small, medium, large)
         - **Max Steps**: Number of training iterations (100-10,000)
         - **Learning Rate**: Training rate (0.00001-0.001)
         - **Batch Size**: Samples per training batch (1-16)
         ### **Step 2: Start Training**
         - Click "Start Training" to begin the actual training process
+        - Uses OpenLLM's custom GPTModel class (not Hugging Face Transformers)
+        - Uses sentencepiece.SentencePieceProcessor() for tokenization
+        - Compatible with OpenLLM's actual implementation
         ### **Step 3: Monitor Progress**
         - Watch the status updates and progress information
         # Training Function Definition
         def start_complete_training(model_size, max_steps, learning_rate, batch_size):
             """
+            Execute the complete training process using OpenLLM's approach.
             """
+            if not OPENLLM_AVAILABLE:
+                return "❌ OpenLLM custom model architecture not available. Please check the installation."
             try:
                 # Create training configuration
                     batch_size=batch_size
                 )
+                # Step 1: Load model and tokenizer using OpenLLM's approach
                 status = trainer.load_model_and_tokenizer(model_size)
                 if "❌" in status:
                     return status
         # Application Footer
         gr.Markdown("---")
         gr.Markdown("**Author**: Louis Chua Bean Chong | **Project**: OpenLLM | **License**: GPL-3.0")
+        gr.Markdown("**Architecture**: OpenLLM Custom GPTModel (Not Hugging Face Transformers)")
+        gr.Markdown("**Tokenizer**: sentencepiece.SentencePieceProcessor()")
     return demo