lemms commited on
Commit
cd40de7
Β·
verified Β·
1 Parent(s): d6ce9fb

Fix: Handle custom GPT model architecture with multiple loading approaches and transformers update

Browse files
Files changed (1) hide show
  1. app.py +87 -32
app.py CHANGED
@@ -1,15 +1,15 @@
1
  #!/usr/bin/env python3
2
  """
3
- OpenLLM Training Space Application - Local Training Code Compatible
4
 
5
- This version uses the same tokenizer loading approach as the local OpenLLM training code:
6
- - Uses sentencepiece.SentencePieceProcessor() directly
7
- - Loads tokenizer from tokenizer.model file
8
- - Compatible with OpenLLM's actual implementation
9
 
10
  Author: Louis Chua Bean Chong
11
  License: GPL-3.0
12
- Version: 2.0.7
13
  Last Updated: 2024
14
  """
15
 
@@ -21,6 +21,15 @@ from typing import Dict, Any, Optional
21
  import threading
22
  from dataclasses import dataclass
23
 
 
 
 
 
 
 
 
 
 
24
  # Import training dependencies with robust error handling
25
  try:
26
  from transformers import (
@@ -32,6 +41,7 @@ try:
32
  from datasets import load_dataset
33
  from huggingface_hub import HfApi
34
  TRAINING_AVAILABLE = True
 
35
  except ImportError as e:
36
  print(f"Training dependencies not available: {e}")
37
  TRAINING_AVAILABLE = False
@@ -60,11 +70,11 @@ class TrainingConfig:
60
 
61
  class OpenLLMTrainer:
62
  """
63
- Complete training implementation for OpenLLM models using local training approach.
64
 
65
  This class handles the entire training pipeline including:
66
- - Model loading with trust_remote_code for custom model classes
67
- - Tokenizer loading using sentencepiece.SentencePieceProcessor() (same as local code)
68
  - Dataset preparation
69
  - Training execution
70
  - Model saving and uploading
@@ -94,7 +104,7 @@ class OpenLLMTrainer:
94
 
95
  def load_model_and_tokenizer(self, model_size: str) -> str:
96
  """
97
- Load the pre-trained OpenLLM model and tokenizer using local training approach.
98
 
99
  Args:
100
  model_size: Size of the model to load ("small", "medium", "large")
@@ -113,29 +123,74 @@ class OpenLLMTrainer:
113
  model_name = model_mapping.get(model_size, "lemms/openllm-small-extended-7k")
114
 
115
  print(f"πŸ”„ Loading OpenLLM model: {model_name}")
116
- print("πŸ“ Using local training approach: sentencepiece.SentencePieceProcessor()")
 
 
 
117
 
118
- # Load model with trust_remote_code for custom model classes
119
  try:
120
- print("πŸ”„ Loading OpenLLM model...")
121
  self.model = AutoModelForCausalLM.from_pretrained(
122
  model_name,
123
- torch_dtype=torch.float16, # Use half precision for memory efficiency
124
  device_map="auto" if torch.cuda.is_available() else None,
125
- trust_remote_code=True # CRITICAL for custom model classes
 
126
  )
127
- print(f"βœ… OpenLLM model loaded successfully: {type(self.model).__name__}")
 
128
 
129
- except Exception as e:
130
- print(f"❌ Failed to load model: {e}")
131
- return f"❌ Failed to load OpenLLM model: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
  # Load tokenizer using the same approach as local training code
134
  try:
135
  print("πŸ”„ Loading tokenizer using sentencepiece.SentencePieceProcessor()...")
136
 
137
  # Create a custom tokenizer class that wraps SentencePieceProcessor
138
- # This is needed for Hugging Face Trainer compatibility
139
  class OpenLLMTokenizer:
140
  def __init__(self, sp_processor):
141
  self.sp_processor = sp_processor
@@ -440,19 +495,19 @@ def main():
440
 
441
  # Create the main Gradio application interface
442
  with gr.Blocks(
443
- title="OpenLLM Training Space - Local Code Compatible",
444
  theme=gr.themes.Soft()
445
  ) as demo:
446
 
447
  # Application Header
448
- gr.Markdown("# πŸš€ OpenLLM Training Space - Local Code Compatible")
449
- gr.Markdown("### *Uses sentencepiece.SentencePieceProcessor() Like Local Training*")
450
  gr.Markdown("---")
451
 
452
  # Status Information
453
  gr.Markdown(f"**Training Available**: {'βœ… Yes' if TRAINING_AVAILABLE else '❌ No'}")
454
  gr.Markdown(f"**SentencePiece Available**: {'βœ… Yes' if SENTENCEPIECE_AVAILABLE else '❌ No (using fallback methods)'}")
455
- gr.Markdown("**Tokenizer Approach**: βœ… sentencepiece.SentencePieceProcessor() (Local Code Compatible)")
456
 
457
  # Main Content Area
458
  with gr.Row():
@@ -519,9 +574,9 @@ def main():
519
  stop_btn = gr.Button("⏹️ Stop Training", variant="stop")
520
 
521
  # Instructions Section
522
- gr.Markdown("## πŸ“‹ Local Code Compatible Training Instructions")
523
  gr.Markdown("""
524
- This interface uses the **same tokenizer approach as local OpenLLM training**:
525
 
526
  ### **Step 1: Configure Parameters**
527
  - **Model Size**: Select the base model to train from (7k models)
@@ -531,9 +586,9 @@ def main():
531
 
532
  ### **Step 2: Start Training**
533
  - Click "Start Training" to begin the actual training process
534
- - Uses `sentencepiece.SentencePieceProcessor()` directly (like local code)
535
- - Downloads tokenizer.model from HF Hub and loads with SentencePieceProcessor
536
- - Compatible with OpenLLM's actual implementation
537
 
538
  ### **Step 3: Monitor Progress**
539
  - Watch the status updates and progress information
@@ -558,7 +613,7 @@ def main():
558
  # Training Function Definition
559
  def start_complete_training(model_size, max_steps, learning_rate, batch_size):
560
  """
561
- Execute the complete training process with local code compatible approach.
562
  """
563
  if not TRAINING_AVAILABLE:
564
  return "❌ Training dependencies not available. Please check the installation."
@@ -572,7 +627,7 @@ def main():
572
  batch_size=batch_size
573
  )
574
 
575
- # Step 1: Load model and tokenizer using local approach
576
  status = trainer.load_model_and_tokenizer(model_size)
577
  if "❌" in status:
578
  return status
@@ -618,7 +673,7 @@ def main():
618
  gr.Markdown("---")
619
  gr.Markdown("**Author**: Louis Chua Bean Chong | **Project**: OpenLLM | **License**: GPL-3.0")
620
  gr.Markdown("**Gradio Version**: 4.44.1 (Fully Compatible)")
621
- gr.Markdown("**Tokenizer**: sentencepiece.SentencePieceProcessor() (Local Code Compatible)")
622
 
623
  return demo
624
 
 
1
  #!/usr/bin/env python3
2
  """
3
+ OpenLLM Training Space Application - Custom Model Architecture Fix
4
 
5
+ This version handles the custom GPT model architecture by:
6
+ - Updating transformers to latest version
7
+ - Using alternative model loading approaches
8
+ - Handling custom model architectures properly
9
 
10
  Author: Louis Chua Bean Chong
11
  License: GPL-3.0
12
+ Version: 2.0.8
13
  Last Updated: 2024
14
  """
15
 
 
21
  import threading
22
  from dataclasses import dataclass
23
 
24
+ # First, try to update transformers to latest version
25
+ try:
26
+ import subprocess
27
+ print("πŸ”„ Updating transformers to latest version...")
28
+ subprocess.run(["pip", "install", "--upgrade", "transformers"], check=True)
29
+ print("βœ… Transformers updated successfully")
30
+ except Exception as e:
31
+ print(f"⚠️ Could not update transformers: {e}")
32
+
33
  # Import training dependencies with robust error handling
34
  try:
35
  from transformers import (
 
41
  from datasets import load_dataset
42
  from huggingface_hub import HfApi
43
  TRAINING_AVAILABLE = True
44
+ print("βœ… Transformers imported successfully")
45
  except ImportError as e:
46
  print(f"Training dependencies not available: {e}")
47
  TRAINING_AVAILABLE = False
 
70
 
71
  class OpenLLMTrainer:
72
  """
73
+ Complete training implementation for OpenLLM models with custom architecture handling.
74
 
75
  This class handles the entire training pipeline including:
76
+ - Model loading with custom architecture support
77
+ - Tokenizer loading using sentencepiece.SentencePieceProcessor()
78
  - Dataset preparation
79
  - Training execution
80
  - Model saving and uploading
 
104
 
105
  def load_model_and_tokenizer(self, model_size: str) -> str:
106
  """
107
+ Load the pre-trained OpenLLM model and tokenizer with custom architecture handling.
108
 
109
  Args:
110
  model_size: Size of the model to load ("small", "medium", "large")
 
123
  model_name = model_mapping.get(model_size, "lemms/openllm-small-extended-7k")
124
 
125
  print(f"πŸ”„ Loading OpenLLM model: {model_name}")
126
+ print("πŸ“ Handling custom GPT architecture...")
127
+
128
+ # Try multiple approaches to load the model
129
+ model_loaded = False
130
 
131
+ # Approach 1: Try with latest transformers and trust_remote_code
132
  try:
133
+ print("πŸ”„ Attempting to load model with latest transformers...")
134
  self.model = AutoModelForCausalLM.from_pretrained(
135
  model_name,
136
+ torch_dtype=torch.float16,
137
  device_map="auto" if torch.cuda.is_available() else None,
138
+ trust_remote_code=True,
139
+ revision="main" # Use main branch for latest code
140
  )
141
+ model_loaded = True
142
+ print(f"βœ… Model loaded successfully with latest transformers: {type(self.model).__name__}")
143
 
144
+ except Exception as e1:
145
+ print(f"❌ Approach 1 failed: {e1}")
146
+
147
+ # Approach 2: Try installing transformers from source
148
+ try:
149
+ print("πŸ”„ Installing transformers from source...")
150
+ subprocess.run(["pip", "install", "git+https://github.com/huggingface/transformers.git"], check=True)
151
+
152
+ # Reload transformers
153
+ import importlib
154
+ import transformers
155
+ importlib.reload(transformers)
156
+ from transformers import AutoModelForCausalLM
157
+
158
+ print("πŸ”„ Attempting to load model with source transformers...")
159
+ self.model = AutoModelForCausalLM.from_pretrained(
160
+ model_name,
161
+ torch_dtype=torch.float16,
162
+ device_map="auto" if torch.cuda.is_available() else None,
163
+ trust_remote_code=True
164
+ )
165
+ model_loaded = True
166
+ print(f"βœ… Model loaded successfully with source transformers: {type(self.model).__name__}")
167
+
168
+ except Exception as e2:
169
+ print(f"❌ Approach 2 failed: {e2}")
170
+
171
+ # Approach 3: Try loading as a generic model
172
+ try:
173
+ print("πŸ”„ Attempting to load as generic model...")
174
+ from transformers import AutoModel
175
+
176
+ self.model = AutoModel.from_pretrained(
177
+ model_name,
178
+ torch_dtype=torch.float16,
179
+ device_map="auto" if torch.cuda.is_available() else None,
180
+ trust_remote_code=True
181
+ )
182
+ model_loaded = True
183
+ print(f"βœ… Model loaded as generic model: {type(self.model).__name__}")
184
+
185
+ except Exception as e3:
186
+ print(f"❌ Approach 3 failed: {e3}")
187
+ return f"❌ Failed to load OpenLLM model: All approaches failed. Latest error: {str(e3)}"
188
 
189
  # Load tokenizer using the same approach as local training code
190
  try:
191
  print("πŸ”„ Loading tokenizer using sentencepiece.SentencePieceProcessor()...")
192
 
193
  # Create a custom tokenizer class that wraps SentencePieceProcessor
 
194
  class OpenLLMTokenizer:
195
  def __init__(self, sp_processor):
196
  self.sp_processor = sp_processor
 
495
 
496
  # Create the main Gradio application interface
497
  with gr.Blocks(
498
+ title="OpenLLM Training Space - Custom Architecture Fix",
499
  theme=gr.themes.Soft()
500
  ) as demo:
501
 
502
  # Application Header
503
+ gr.Markdown("# πŸš€ OpenLLM Training Space - Custom Architecture Fix")
504
+ gr.Markdown("### *Handles Custom GPT Model Architecture*")
505
  gr.Markdown("---")
506
 
507
  # Status Information
508
  gr.Markdown(f"**Training Available**: {'βœ… Yes' if TRAINING_AVAILABLE else '❌ No'}")
509
  gr.Markdown(f"**SentencePiece Available**: {'βœ… Yes' if SENTENCEPIECE_AVAILABLE else '❌ No (using fallback methods)'}")
510
+ gr.Markdown("**Custom Architecture**: βœ… Multiple loading approaches")
511
 
512
  # Main Content Area
513
  with gr.Row():
 
574
  stop_btn = gr.Button("⏹️ Stop Training", variant="stop")
575
 
576
  # Instructions Section
577
+ gr.Markdown("## πŸ“‹ Custom Architecture Training Instructions")
578
  gr.Markdown("""
579
+ This interface handles **OpenLLM's custom GPT architecture**:
580
 
581
  ### **Step 1: Configure Parameters**
582
  - **Model Size**: Select the base model to train from (7k models)
 
586
 
587
  ### **Step 2: Start Training**
588
  - Click "Start Training" to begin the actual training process
589
+ - Automatically updates transformers to latest version
590
+ - Uses multiple approaches to load custom GPT architecture
591
+ - Handles custom model types properly
592
 
593
  ### **Step 3: Monitor Progress**
594
  - Watch the status updates and progress information
 
613
  # Training Function Definition
614
  def start_complete_training(model_size, max_steps, learning_rate, batch_size):
615
  """
616
+ Execute the complete training process with custom architecture handling.
617
  """
618
  if not TRAINING_AVAILABLE:
619
  return "❌ Training dependencies not available. Please check the installation."
 
627
  batch_size=batch_size
628
  )
629
 
630
+ # Step 1: Load model and tokenizer with custom architecture handling
631
  status = trainer.load_model_and_tokenizer(model_size)
632
  if "❌" in status:
633
  return status
 
673
  gr.Markdown("---")
674
  gr.Markdown("**Author**: Louis Chua Bean Chong | **Project**: OpenLLM | **License**: GPL-3.0")
675
  gr.Markdown("**Gradio Version**: 4.44.1 (Fully Compatible)")
676
+ gr.Markdown("**Custom Architecture**: Multiple loading approaches for GPT model")
677
 
678
  return demo
679