Fix: Handle custom GPT model architecture with multiple loading approaches and transformers update
Browse files
app.py
CHANGED
|
@@ -1,15 +1,15 @@
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
-
OpenLLM Training Space Application -
|
| 4 |
|
| 5 |
-
This version
|
| 6 |
-
-
|
| 7 |
-
-
|
| 8 |
-
-
|
| 9 |
|
| 10 |
Author: Louis Chua Bean Chong
|
| 11 |
License: GPL-3.0
|
| 12 |
-
Version: 2.0.
|
| 13 |
Last Updated: 2024
|
| 14 |
"""
|
| 15 |
|
|
@@ -21,6 +21,15 @@ from typing import Dict, Any, Optional
|
|
| 21 |
import threading
|
| 22 |
from dataclasses import dataclass
|
| 23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
# Import training dependencies with robust error handling
|
| 25 |
try:
|
| 26 |
from transformers import (
|
|
@@ -32,6 +41,7 @@ try:
|
|
| 32 |
from datasets import load_dataset
|
| 33 |
from huggingface_hub import HfApi
|
| 34 |
TRAINING_AVAILABLE = True
|
|
|
|
| 35 |
except ImportError as e:
|
| 36 |
print(f"Training dependencies not available: {e}")
|
| 37 |
TRAINING_AVAILABLE = False
|
|
@@ -60,11 +70,11 @@ class TrainingConfig:
|
|
| 60 |
|
| 61 |
class OpenLLMTrainer:
|
| 62 |
"""
|
| 63 |
-
Complete training implementation for OpenLLM models
|
| 64 |
|
| 65 |
This class handles the entire training pipeline including:
|
| 66 |
-
- Model loading with
|
| 67 |
-
- Tokenizer loading using sentencepiece.SentencePieceProcessor()
|
| 68 |
- Dataset preparation
|
| 69 |
- Training execution
|
| 70 |
- Model saving and uploading
|
|
@@ -94,7 +104,7 @@ class OpenLLMTrainer:
|
|
| 94 |
|
| 95 |
def load_model_and_tokenizer(self, model_size: str) -> str:
|
| 96 |
"""
|
| 97 |
-
Load the pre-trained OpenLLM model and tokenizer
|
| 98 |
|
| 99 |
Args:
|
| 100 |
model_size: Size of the model to load ("small", "medium", "large")
|
|
@@ -113,29 +123,74 @@ class OpenLLMTrainer:
|
|
| 113 |
model_name = model_mapping.get(model_size, "lemms/openllm-small-extended-7k")
|
| 114 |
|
| 115 |
print(f"π Loading OpenLLM model: {model_name}")
|
| 116 |
-
print("π
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
-
#
|
| 119 |
try:
|
| 120 |
-
print("π
|
| 121 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 122 |
model_name,
|
| 123 |
-
torch_dtype=torch.float16,
|
| 124 |
device_map="auto" if torch.cuda.is_available() else None,
|
| 125 |
-
trust_remote_code=True
|
|
|
|
| 126 |
)
|
| 127 |
-
|
|
|
|
| 128 |
|
| 129 |
-
except Exception as
|
| 130 |
-
print(f"β
|
| 131 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
# Load tokenizer using the same approach as local training code
|
| 134 |
try:
|
| 135 |
print("π Loading tokenizer using sentencepiece.SentencePieceProcessor()...")
|
| 136 |
|
| 137 |
# Create a custom tokenizer class that wraps SentencePieceProcessor
|
| 138 |
-
# This is needed for Hugging Face Trainer compatibility
|
| 139 |
class OpenLLMTokenizer:
|
| 140 |
def __init__(self, sp_processor):
|
| 141 |
self.sp_processor = sp_processor
|
|
@@ -440,19 +495,19 @@ def main():
|
|
| 440 |
|
| 441 |
# Create the main Gradio application interface
|
| 442 |
with gr.Blocks(
|
| 443 |
-
title="OpenLLM Training Space -
|
| 444 |
theme=gr.themes.Soft()
|
| 445 |
) as demo:
|
| 446 |
|
| 447 |
# Application Header
|
| 448 |
-
gr.Markdown("# π OpenLLM Training Space -
|
| 449 |
-
gr.Markdown("### *
|
| 450 |
gr.Markdown("---")
|
| 451 |
|
| 452 |
# Status Information
|
| 453 |
gr.Markdown(f"**Training Available**: {'β
Yes' if TRAINING_AVAILABLE else 'β No'}")
|
| 454 |
gr.Markdown(f"**SentencePiece Available**: {'β
Yes' if SENTENCEPIECE_AVAILABLE else 'β No (using fallback methods)'}")
|
| 455 |
-
gr.Markdown("**
|
| 456 |
|
| 457 |
# Main Content Area
|
| 458 |
with gr.Row():
|
|
@@ -519,9 +574,9 @@ def main():
|
|
| 519 |
stop_btn = gr.Button("βΉοΈ Stop Training", variant="stop")
|
| 520 |
|
| 521 |
# Instructions Section
|
| 522 |
-
gr.Markdown("## π
|
| 523 |
gr.Markdown("""
|
| 524 |
-
This interface
|
| 525 |
|
| 526 |
### **Step 1: Configure Parameters**
|
| 527 |
- **Model Size**: Select the base model to train from (7k models)
|
|
@@ -531,9 +586,9 @@ def main():
|
|
| 531 |
|
| 532 |
### **Step 2: Start Training**
|
| 533 |
- Click "Start Training" to begin the actual training process
|
| 534 |
-
-
|
| 535 |
-
-
|
| 536 |
-
-
|
| 537 |
|
| 538 |
### **Step 3: Monitor Progress**
|
| 539 |
- Watch the status updates and progress information
|
|
@@ -558,7 +613,7 @@ def main():
|
|
| 558 |
# Training Function Definition
|
| 559 |
def start_complete_training(model_size, max_steps, learning_rate, batch_size):
|
| 560 |
"""
|
| 561 |
-
Execute the complete training process with
|
| 562 |
"""
|
| 563 |
if not TRAINING_AVAILABLE:
|
| 564 |
return "β Training dependencies not available. Please check the installation."
|
|
@@ -572,7 +627,7 @@ def main():
|
|
| 572 |
batch_size=batch_size
|
| 573 |
)
|
| 574 |
|
| 575 |
-
# Step 1: Load model and tokenizer
|
| 576 |
status = trainer.load_model_and_tokenizer(model_size)
|
| 577 |
if "β" in status:
|
| 578 |
return status
|
|
@@ -618,7 +673,7 @@ def main():
|
|
| 618 |
gr.Markdown("---")
|
| 619 |
gr.Markdown("**Author**: Louis Chua Bean Chong | **Project**: OpenLLM | **License**: GPL-3.0")
|
| 620 |
gr.Markdown("**Gradio Version**: 4.44.1 (Fully Compatible)")
|
| 621 |
-
gr.Markdown("**
|
| 622 |
|
| 623 |
return demo
|
| 624 |
|
|
|
|
| 1 |
#!/usr/bin/env python3
|
| 2 |
"""
|
| 3 |
+
OpenLLM Training Space Application - Custom Model Architecture Fix
|
| 4 |
|
| 5 |
+
This version handles the custom GPT model architecture by:
|
| 6 |
+
- Updating transformers to latest version
|
| 7 |
+
- Using alternative model loading approaches
|
| 8 |
+
- Handling custom model architectures properly
|
| 9 |
|
| 10 |
Author: Louis Chua Bean Chong
|
| 11 |
License: GPL-3.0
|
| 12 |
+
Version: 2.0.8
|
| 13 |
Last Updated: 2024
|
| 14 |
"""
|
| 15 |
|
|
|
|
| 21 |
import threading
|
| 22 |
from dataclasses import dataclass
|
| 23 |
|
| 24 |
+
# First, try to update transformers to latest version
|
| 25 |
+
try:
|
| 26 |
+
import subprocess
|
| 27 |
+
print("π Updating transformers to latest version...")
|
| 28 |
+
subprocess.run(["pip", "install", "--upgrade", "transformers"], check=True)
|
| 29 |
+
print("β
Transformers updated successfully")
|
| 30 |
+
except Exception as e:
|
| 31 |
+
print(f"β οΈ Could not update transformers: {e}")
|
| 32 |
+
|
| 33 |
# Import training dependencies with robust error handling
|
| 34 |
try:
|
| 35 |
from transformers import (
|
|
|
|
| 41 |
from datasets import load_dataset
|
| 42 |
from huggingface_hub import HfApi
|
| 43 |
TRAINING_AVAILABLE = True
|
| 44 |
+
print("β
Transformers imported successfully")
|
| 45 |
except ImportError as e:
|
| 46 |
print(f"Training dependencies not available: {e}")
|
| 47 |
TRAINING_AVAILABLE = False
|
|
|
|
| 70 |
|
| 71 |
class OpenLLMTrainer:
|
| 72 |
"""
|
| 73 |
+
Complete training implementation for OpenLLM models with custom architecture handling.
|
| 74 |
|
| 75 |
This class handles the entire training pipeline including:
|
| 76 |
+
- Model loading with custom architecture support
|
| 77 |
+
- Tokenizer loading using sentencepiece.SentencePieceProcessor()
|
| 78 |
- Dataset preparation
|
| 79 |
- Training execution
|
| 80 |
- Model saving and uploading
|
|
|
|
| 104 |
|
| 105 |
def load_model_and_tokenizer(self, model_size: str) -> str:
|
| 106 |
"""
|
| 107 |
+
Load the pre-trained OpenLLM model and tokenizer with custom architecture handling.
|
| 108 |
|
| 109 |
Args:
|
| 110 |
model_size: Size of the model to load ("small", "medium", "large")
|
|
|
|
| 123 |
model_name = model_mapping.get(model_size, "lemms/openllm-small-extended-7k")
|
| 124 |
|
| 125 |
print(f"π Loading OpenLLM model: {model_name}")
|
| 126 |
+
print("π Handling custom GPT architecture...")
|
| 127 |
+
|
| 128 |
+
# Try multiple approaches to load the model
|
| 129 |
+
model_loaded = False
|
| 130 |
|
| 131 |
+
# Approach 1: Try with latest transformers and trust_remote_code
|
| 132 |
try:
|
| 133 |
+
print("π Attempting to load model with latest transformers...")
|
| 134 |
self.model = AutoModelForCausalLM.from_pretrained(
|
| 135 |
model_name,
|
| 136 |
+
torch_dtype=torch.float16,
|
| 137 |
device_map="auto" if torch.cuda.is_available() else None,
|
| 138 |
+
trust_remote_code=True,
|
| 139 |
+
revision="main" # Use main branch for latest code
|
| 140 |
)
|
| 141 |
+
model_loaded = True
|
| 142 |
+
print(f"β
Model loaded successfully with latest transformers: {type(self.model).__name__}")
|
| 143 |
|
| 144 |
+
except Exception as e1:
|
| 145 |
+
print(f"β Approach 1 failed: {e1}")
|
| 146 |
+
|
| 147 |
+
# Approach 2: Try installing transformers from source
|
| 148 |
+
try:
|
| 149 |
+
print("π Installing transformers from source...")
|
| 150 |
+
subprocess.run(["pip", "install", "git+https://github.com/huggingface/transformers.git"], check=True)
|
| 151 |
+
|
| 152 |
+
# Reload transformers
|
| 153 |
+
import importlib
|
| 154 |
+
import transformers
|
| 155 |
+
importlib.reload(transformers)
|
| 156 |
+
from transformers import AutoModelForCausalLM
|
| 157 |
+
|
| 158 |
+
print("π Attempting to load model with source transformers...")
|
| 159 |
+
self.model = AutoModelForCausalLM.from_pretrained(
|
| 160 |
+
model_name,
|
| 161 |
+
torch_dtype=torch.float16,
|
| 162 |
+
device_map="auto" if torch.cuda.is_available() else None,
|
| 163 |
+
trust_remote_code=True
|
| 164 |
+
)
|
| 165 |
+
model_loaded = True
|
| 166 |
+
print(f"β
Model loaded successfully with source transformers: {type(self.model).__name__}")
|
| 167 |
+
|
| 168 |
+
except Exception as e2:
|
| 169 |
+
print(f"β Approach 2 failed: {e2}")
|
| 170 |
+
|
| 171 |
+
# Approach 3: Try loading as a generic model
|
| 172 |
+
try:
|
| 173 |
+
print("π Attempting to load as generic model...")
|
| 174 |
+
from transformers import AutoModel
|
| 175 |
+
|
| 176 |
+
self.model = AutoModel.from_pretrained(
|
| 177 |
+
model_name,
|
| 178 |
+
torch_dtype=torch.float16,
|
| 179 |
+
device_map="auto" if torch.cuda.is_available() else None,
|
| 180 |
+
trust_remote_code=True
|
| 181 |
+
)
|
| 182 |
+
model_loaded = True
|
| 183 |
+
print(f"β
Model loaded as generic model: {type(self.model).__name__}")
|
| 184 |
+
|
| 185 |
+
except Exception as e3:
|
| 186 |
+
print(f"β Approach 3 failed: {e3}")
|
| 187 |
+
return f"β Failed to load OpenLLM model: All approaches failed. Latest error: {str(e3)}"
|
| 188 |
|
| 189 |
# Load tokenizer using the same approach as local training code
|
| 190 |
try:
|
| 191 |
print("π Loading tokenizer using sentencepiece.SentencePieceProcessor()...")
|
| 192 |
|
| 193 |
# Create a custom tokenizer class that wraps SentencePieceProcessor
|
|
|
|
| 194 |
class OpenLLMTokenizer:
|
| 195 |
def __init__(self, sp_processor):
|
| 196 |
self.sp_processor = sp_processor
|
|
|
|
| 495 |
|
| 496 |
# Create the main Gradio application interface
|
| 497 |
with gr.Blocks(
|
| 498 |
+
title="OpenLLM Training Space - Custom Architecture Fix",
|
| 499 |
theme=gr.themes.Soft()
|
| 500 |
) as demo:
|
| 501 |
|
| 502 |
# Application Header
|
| 503 |
+
gr.Markdown("# π OpenLLM Training Space - Custom Architecture Fix")
|
| 504 |
+
gr.Markdown("### *Handles Custom GPT Model Architecture*")
|
| 505 |
gr.Markdown("---")
|
| 506 |
|
| 507 |
# Status Information
|
| 508 |
gr.Markdown(f"**Training Available**: {'β
Yes' if TRAINING_AVAILABLE else 'β No'}")
|
| 509 |
gr.Markdown(f"**SentencePiece Available**: {'β
Yes' if SENTENCEPIECE_AVAILABLE else 'β No (using fallback methods)'}")
|
| 510 |
+
gr.Markdown("**Custom Architecture**: β
Multiple loading approaches")
|
| 511 |
|
| 512 |
# Main Content Area
|
| 513 |
with gr.Row():
|
|
|
|
| 574 |
stop_btn = gr.Button("βΉοΈ Stop Training", variant="stop")
|
| 575 |
|
| 576 |
# Instructions Section
|
| 577 |
+
gr.Markdown("## π Custom Architecture Training Instructions")
|
| 578 |
gr.Markdown("""
|
| 579 |
+
This interface handles **OpenLLM's custom GPT architecture**:
|
| 580 |
|
| 581 |
### **Step 1: Configure Parameters**
|
| 582 |
- **Model Size**: Select the base model to train from (7k models)
|
|
|
|
| 586 |
|
| 587 |
### **Step 2: Start Training**
|
| 588 |
- Click "Start Training" to begin the actual training process
|
| 589 |
+
- Automatically updates transformers to latest version
|
| 590 |
+
- Uses multiple approaches to load custom GPT architecture
|
| 591 |
+
- Handles custom model types properly
|
| 592 |
|
| 593 |
### **Step 3: Monitor Progress**
|
| 594 |
- Watch the status updates and progress information
|
|
|
|
| 613 |
# Training Function Definition
|
| 614 |
def start_complete_training(model_size, max_steps, learning_rate, batch_size):
|
| 615 |
"""
|
| 616 |
+
Execute the complete training process with custom architecture handling.
|
| 617 |
"""
|
| 618 |
if not TRAINING_AVAILABLE:
|
| 619 |
return "β Training dependencies not available. Please check the installation."
|
|
|
|
| 627 |
batch_size=batch_size
|
| 628 |
)
|
| 629 |
|
| 630 |
+
# Step 1: Load model and tokenizer with custom architecture handling
|
| 631 |
status = trainer.load_model_and_tokenizer(model_size)
|
| 632 |
if "β" in status:
|
| 633 |
return status
|
|
|
|
| 673 |
gr.Markdown("---")
|
| 674 |
gr.Markdown("**Author**: Louis Chua Bean Chong | **Project**: OpenLLM | **License**: GPL-3.0")
|
| 675 |
gr.Markdown("**Gradio Version**: 4.44.1 (Fully Compatible)")
|
| 676 |
+
gr.Markdown("**Custom Architecture**: Multiple loading approaches for GPT model")
|
| 677 |
|
| 678 |
return demo
|
| 679 |
|