Spaces:

lemms
/

openllm

Runtime error

App Files Files Community

lemms commited on Aug 14, 2025

Commit

cd40de7

verified ·

1 Parent(s): d6ce9fb

Fix: Handle custom GPT model architecture with multiple loading approaches and transformers update

Browse files

Files changed (1) hide show

app.py +87 -32

app.py CHANGED Viewed

@@ -1,15 +1,15 @@
 #!/usr/bin/env python3
 """
-OpenLLM Training Space Application - Local Training Code Compatible
-This version uses the same tokenizer loading approach as the local OpenLLM training code:
-- Uses sentencepiece.SentencePieceProcessor() directly
-- Loads tokenizer from tokenizer.model file
-- Compatible with OpenLLM's actual implementation
 Author: Louis Chua Bean Chong
 License: GPL-3.0
-Version: 2.0.7
 Last Updated: 2024
 """
@@ -21,6 +21,15 @@ from typing import Dict, Any, Optional
 import threading
 from dataclasses import dataclass
 # Import training dependencies with robust error handling
 try:
     from transformers import (
@@ -32,6 +41,7 @@ try:
     from datasets import load_dataset
     from huggingface_hub import HfApi
     TRAINING_AVAILABLE = True
 except ImportError as e:
     print(f"Training dependencies not available: {e}")
     TRAINING_AVAILABLE = False
@@ -60,11 +70,11 @@ class TrainingConfig:
 class OpenLLMTrainer:
     """
-    Complete training implementation for OpenLLM models using local training approach.
     This class handles the entire training pipeline including:
-    - Model loading with trust_remote_code for custom model classes
-    - Tokenizer loading using sentencepiece.SentencePieceProcessor() (same as local code)
     - Dataset preparation
     - Training execution
     - Model saving and uploading
@@ -94,7 +104,7 @@ class OpenLLMTrainer:
     def load_model_and_tokenizer(self, model_size: str) -> str:
         """
-        Load the pre-trained OpenLLM model and tokenizer using local training approach.
         Args:
             model_size: Size of the model to load ("small", "medium", "large")
@@ -113,29 +123,74 @@ class OpenLLMTrainer:
             model_name = model_mapping.get(model_size, "lemms/openllm-small-extended-7k")
             print(f"🔄 Loading OpenLLM model: {model_name}")
-            print("📝 Using local training approach: sentencepiece.SentencePieceProcessor()")
-            # Load model with trust_remote_code for custom model classes
             try:
-                print("🔄 Loading OpenLLM model...")
                 self.model = AutoModelForCausalLM.from_pretrained(
                     model_name,
-                    torch_dtype=torch.float16,  # Use half precision for memory efficiency
                     device_map="auto" if torch.cuda.is_available() else None,
-                    trust_remote_code=True      # CRITICAL for custom model classes
                 )
-                print(f"✅ OpenLLM model loaded successfully: {type(self.model).__name__}")
-            except Exception as e:
-                print(f"❌ Failed to load model: {e}")
-                return f"❌ Failed to load OpenLLM model: {str(e)}"
             # Load tokenizer using the same approach as local training code
             try:
                 print("🔄 Loading tokenizer using sentencepiece.SentencePieceProcessor()...")
                 # Create a custom tokenizer class that wraps SentencePieceProcessor
-                # This is needed for Hugging Face Trainer compatibility
                 class OpenLLMTokenizer:
                     def __init__(self, sp_processor):
                         self.sp_processor = sp_processor
@@ -440,19 +495,19 @@ def main():
     # Create the main Gradio application interface
     with gr.Blocks(
-        title="OpenLLM Training Space - Local Code Compatible",
         theme=gr.themes.Soft()
     ) as demo:
         # Application Header
-        gr.Markdown("# 🚀 OpenLLM Training Space - Local Code Compatible")
-        gr.Markdown("### *Uses sentencepiece.SentencePieceProcessor() Like Local Training*")
         gr.Markdown("---")
         # Status Information
         gr.Markdown(f"**Training Available**: {'✅ Yes' if TRAINING_AVAILABLE else '❌ No'}")
         gr.Markdown(f"**SentencePiece Available**: {'✅ Yes' if SENTENCEPIECE_AVAILABLE else '❌ No (using fallback methods)'}")
-        gr.Markdown("**Tokenizer Approach**: ✅ sentencepiece.SentencePieceProcessor() (Local Code Compatible)")
         # Main Content Area
         with gr.Row():
@@ -519,9 +574,9 @@ def main():
                     stop_btn = gr.Button("⏹️ Stop Training", variant="stop")
         # Instructions Section
-        gr.Markdown("## 📋 Local Code Compatible Training Instructions")
         gr.Markdown("""
-        This interface uses the **same tokenizer approach as local OpenLLM training**:
         ### **Step 1: Configure Parameters**
         - **Model Size**: Select the base model to train from (7k models)
@@ -531,9 +586,9 @@ def main():
         ### **Step 2: Start Training**
         - Click "Start Training" to begin the actual training process
-        - Uses `sentencepiece.SentencePieceProcessor()` directly (like local code)
-        - Downloads tokenizer.model from HF Hub and loads with SentencePieceProcessor
-        - Compatible with OpenLLM's actual implementation
         ### **Step 3: Monitor Progress**
         - Watch the status updates and progress information
@@ -558,7 +613,7 @@ def main():
         # Training Function Definition
         def start_complete_training(model_size, max_steps, learning_rate, batch_size):
             """
-            Execute the complete training process with local code compatible approach.
             """
             if not TRAINING_AVAILABLE:
                 return "❌ Training dependencies not available. Please check the installation."
@@ -572,7 +627,7 @@ def main():
                     batch_size=batch_size
                 )
-                # Step 1: Load model and tokenizer using local approach
                 status = trainer.load_model_and_tokenizer(model_size)
                 if "❌" in status:
                     return status
@@ -618,7 +673,7 @@ def main():
         gr.Markdown("---")
         gr.Markdown("**Author**: Louis Chua Bean Chong | **Project**: OpenLLM | **License**: GPL-3.0")
         gr.Markdown("**Gradio Version**: 4.44.1 (Fully Compatible)")
-        gr.Markdown("**Tokenizer**: sentencepiece.SentencePieceProcessor() (Local Code Compatible)")
     return demo

 #!/usr/bin/env python3
 """
+OpenLLM Training Space Application - Custom Model Architecture Fix
+This version handles the custom GPT model architecture by:
+- Updating transformers to latest version
+- Using alternative model loading approaches
+- Handling custom model architectures properly
 Author: Louis Chua Bean Chong
 License: GPL-3.0
+Version: 2.0.8
 Last Updated: 2024
 """
 import threading
 from dataclasses import dataclass
+# First, try to update transformers to latest version
+try:
+    import subprocess
+    print("🔄 Updating transformers to latest version...")
+    subprocess.run(["pip", "install", "--upgrade", "transformers"], check=True)
+    print("✅ Transformers updated successfully")
+except Exception as e:
+    print(f"⚠️ Could not update transformers: {e}")
 # Import training dependencies with robust error handling
 try:
     from transformers import (
     from datasets import load_dataset
     from huggingface_hub import HfApi
     TRAINING_AVAILABLE = True
+    print("✅ Transformers imported successfully")
 except ImportError as e:
     print(f"Training dependencies not available: {e}")
     TRAINING_AVAILABLE = False
 class OpenLLMTrainer:
     """
+    Complete training implementation for OpenLLM models with custom architecture handling.
     This class handles the entire training pipeline including:
+    - Model loading with custom architecture support
+    - Tokenizer loading using sentencepiece.SentencePieceProcessor()
     - Dataset preparation
     - Training execution
     - Model saving and uploading
     def load_model_and_tokenizer(self, model_size: str) -> str:
         """
+        Load the pre-trained OpenLLM model and tokenizer with custom architecture handling.
         Args:
             model_size: Size of the model to load ("small", "medium", "large")
             model_name = model_mapping.get(model_size, "lemms/openllm-small-extended-7k")
             print(f"🔄 Loading OpenLLM model: {model_name}")
+            print("📝 Handling custom GPT architecture...")
+            # Try multiple approaches to load the model
+            model_loaded = False
+            # Approach 1: Try with latest transformers and trust_remote_code
             try:
+                print("🔄 Attempting to load model with latest transformers...")
                 self.model = AutoModelForCausalLM.from_pretrained(
                     model_name,
+                    torch_dtype=torch.float16,
                     device_map="auto" if torch.cuda.is_available() else None,
+                    trust_remote_code=True,
+                    revision="main"  # Use main branch for latest code
                 )
+                model_loaded = True
+                print(f"✅ Model loaded successfully with latest transformers: {type(self.model).__name__}")
+            except Exception as e1:
+                print(f"❌ Approach 1 failed: {e1}")
+                # Approach 2: Try installing transformers from source
+                try:
+                    print("🔄 Installing transformers from source...")
+                    subprocess.run(["pip", "install", "git+https://github.com/huggingface/transformers.git"], check=True)
+                    # Reload transformers
+                    import importlib
+                    import transformers
+                    importlib.reload(transformers)
+                    from transformers import AutoModelForCausalLM
+                    print("🔄 Attempting to load model with source transformers...")
+                    self.model = AutoModelForCausalLM.from_pretrained(
+                        model_name,
+                        torch_dtype=torch.float16,
+                        device_map="auto" if torch.cuda.is_available() else None,
+                        trust_remote_code=True
+                    )
+                    model_loaded = True
+                    print(f"✅ Model loaded successfully with source transformers: {type(self.model).__name__}")
+                except Exception as e2:
+                    print(f"❌ Approach 2 failed: {e2}")
+                    # Approach 3: Try loading as a generic model
+                    try:
+                        print("🔄 Attempting to load as generic model...")
+                        from transformers import AutoModel
+                        self.model = AutoModel.from_pretrained(
+                            model_name,
+                            torch_dtype=torch.float16,
+                            device_map="auto" if torch.cuda.is_available() else None,
+                            trust_remote_code=True
+                        )
+                        model_loaded = True
+                        print(f"✅ Model loaded as generic model: {type(self.model).__name__}")
+                    except Exception as e3:
+                        print(f"❌ Approach 3 failed: {e3}")
+                        return f"❌ Failed to load OpenLLM model: All approaches failed. Latest error: {str(e3)}"
             # Load tokenizer using the same approach as local training code
             try:
                 print("🔄 Loading tokenizer using sentencepiece.SentencePieceProcessor()...")
                 # Create a custom tokenizer class that wraps SentencePieceProcessor
                 class OpenLLMTokenizer:
                     def __init__(self, sp_processor):
                         self.sp_processor = sp_processor
     # Create the main Gradio application interface
     with gr.Blocks(
+        title="OpenLLM Training Space - Custom Architecture Fix",
         theme=gr.themes.Soft()
     ) as demo:
         # Application Header
+        gr.Markdown("# 🚀 OpenLLM Training Space - Custom Architecture Fix")
+        gr.Markdown("### *Handles Custom GPT Model Architecture*")
         gr.Markdown("---")
         # Status Information
         gr.Markdown(f"**Training Available**: {'✅ Yes' if TRAINING_AVAILABLE else '❌ No'}")
         gr.Markdown(f"**SentencePiece Available**: {'✅ Yes' if SENTENCEPIECE_AVAILABLE else '❌ No (using fallback methods)'}")
+        gr.Markdown("**Custom Architecture**: ✅ Multiple loading approaches")
         # Main Content Area
         with gr.Row():
                     stop_btn = gr.Button("⏹️ Stop Training", variant="stop")
         # Instructions Section
+        gr.Markdown("## 📋 Custom Architecture Training Instructions")
         gr.Markdown("""
+        This interface handles **OpenLLM's custom GPT architecture**:
         ### **Step 1: Configure Parameters**
         - **Model Size**: Select the base model to train from (7k models)
         ### **Step 2: Start Training**
         - Click "Start Training" to begin the actual training process
+        - Automatically updates transformers to latest version
+        - Uses multiple approaches to load custom GPT architecture
+        - Handles custom model types properly
         ### **Step 3: Monitor Progress**
         - Watch the status updates and progress information
         # Training Function Definition
         def start_complete_training(model_size, max_steps, learning_rate, batch_size):
             """
+            Execute the complete training process with custom architecture handling.
             """
             if not TRAINING_AVAILABLE:
                 return "❌ Training dependencies not available. Please check the installation."
                     batch_size=batch_size
                 )
+                # Step 1: Load model and tokenizer with custom architecture handling
                 status = trainer.load_model_and_tokenizer(model_size)
                 if "❌" in status:
                     return status
         gr.Markdown("---")
         gr.Markdown("**Author**: Louis Chua Bean Chong | **Project**: OpenLLM | **License**: GPL-3.0")
         gr.Markdown("**Gradio Version**: 4.44.1 (Fully Compatible)")
+        gr.Markdown("**Custom Architecture**: Multiple loading approaches for GPT model")
     return demo