Spaces:
Sleeping
Sleeping
Upload run_cloud_training.py with huggingface_hub
Browse files- run_cloud_training.py +63 -12
run_cloud_training.py
CHANGED
|
@@ -16,7 +16,7 @@ from dotenv import load_dotenv
|
|
| 16 |
import torch
|
| 17 |
from datasets import load_dataset
|
| 18 |
import transformers
|
| 19 |
-
from transformers import AutoTokenizer, TrainingArguments, Trainer
|
| 20 |
from transformers.data.data_collator import DataCollatorMixin
|
| 21 |
from peft import LoraConfig
|
| 22 |
from unsloth import FastLanguageModel
|
|
@@ -153,6 +153,58 @@ def remove_training_marker():
|
|
| 153 |
os.remove("TRAINING_ACTIVE")
|
| 154 |
logger.info("Removed training active marker")
|
| 155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
def train(config_path, dataset_name, output_dir):
|
| 157 |
"""Main training function - RESEARCH TRAINING PHASE ONLY"""
|
| 158 |
# Load environment variables
|
|
@@ -186,7 +238,8 @@ def train(config_path, dataset_name, output_dir):
|
|
| 186 |
# Print configuration summary
|
| 187 |
logger.info("RESEARCH TRAINING PHASE ACTIVE - No output generation")
|
| 188 |
logger.info("Configuration Summary:")
|
| 189 |
-
|
|
|
|
| 190 |
logger.info(f"Dataset: {dataset_name if dataset_name != 'phi4-cognitive-dataset' else DEFAULT_DATASET}")
|
| 191 |
logger.info(f"Output directory: {output_dir}")
|
| 192 |
logger.info("IMPORTANT: Using already 4-bit quantized model - not re-quantizing")
|
|
@@ -197,7 +250,7 @@ def train(config_path, dataset_name, output_dir):
|
|
| 197 |
# Initialize tokenizer (just for model initialization, not for tokenizing data)
|
| 198 |
logger.info("Loading tokenizer (for model initialization only, not for tokenizing data)")
|
| 199 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 200 |
-
|
| 201 |
trust_remote_code=True
|
| 202 |
)
|
| 203 |
tokenizer.pad_token = tokenizer.eos_token
|
|
@@ -215,15 +268,13 @@ def train(config_path, dataset_name, output_dir):
|
|
| 215 |
target_modules=lora_config.get("target_modules", ["q_proj", "k_proj", "v_proj", "o_proj"])
|
| 216 |
)
|
| 217 |
|
| 218 |
-
# Initialize model with
|
| 219 |
-
logger.info("Loading pre-quantized model
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
use_existing_bnb_quantization=True # Use the existing quantization
|
| 226 |
-
)
|
| 227 |
model = FastLanguageModel.get_peft_model(
|
| 228 |
model,
|
| 229 |
peft_config=peft_config,
|
|
|
|
| 16 |
import torch
|
| 17 |
from datasets import load_dataset
|
| 18 |
import transformers
|
| 19 |
+
from transformers import AutoTokenizer, TrainingArguments, Trainer, AutoModelForCausalLM
|
| 20 |
from transformers.data.data_collator import DataCollatorMixin
|
| 21 |
from peft import LoraConfig
|
| 22 |
from unsloth import FastLanguageModel
|
|
|
|
| 153 |
os.remove("TRAINING_ACTIVE")
|
| 154 |
logger.info("Removed training active marker")
|
| 155 |
|
| 156 |
+
def load_model_safely(model_name, max_seq_length, dtype=None):
|
| 157 |
+
"""
|
| 158 |
+
Load the model in a safe way that works with Qwen models
|
| 159 |
+
by trying different loading strategies.
|
| 160 |
+
"""
|
| 161 |
+
try:
|
| 162 |
+
logger.info(f"Attempting to load model with unsloth optimizations: {model_name}")
|
| 163 |
+
# First try the standard unsloth loading
|
| 164 |
+
try:
|
| 165 |
+
# Try loading with unsloth but without the problematic parameter
|
| 166 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 167 |
+
model_name=model_name,
|
| 168 |
+
max_seq_length=max_seq_length,
|
| 169 |
+
dtype=dtype,
|
| 170 |
+
load_in_4bit=True, # This should work for already quantized models
|
| 171 |
+
)
|
| 172 |
+
logger.info("Model loaded successfully with unsloth with 4-bit quantization")
|
| 173 |
+
return model, tokenizer
|
| 174 |
+
|
| 175 |
+
except TypeError as e:
|
| 176 |
+
# If we get a TypeError about unexpected keyword arguments
|
| 177 |
+
if "unexpected keyword argument" in str(e):
|
| 178 |
+
logger.warning(f"Unsloth loading error with 4-bit: {e}")
|
| 179 |
+
logger.info("Trying alternative loading method for Qwen model...")
|
| 180 |
+
|
| 181 |
+
# Try loading with different parameters for Qwen model
|
| 182 |
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
| 183 |
+
model_name=model_name,
|
| 184 |
+
max_seq_length=max_seq_length,
|
| 185 |
+
dtype=dtype,
|
| 186 |
+
)
|
| 187 |
+
logger.info("Model loaded successfully with unsloth using alternative method")
|
| 188 |
+
return model, tokenizer
|
| 189 |
+
else:
|
| 190 |
+
# Re-raise if it's a different type error
|
| 191 |
+
raise
|
| 192 |
+
|
| 193 |
+
except Exception as e:
|
| 194 |
+
# Fallback to standard loading if unsloth methods fail
|
| 195 |
+
logger.warning(f"Unsloth loading failed: {e}")
|
| 196 |
+
logger.info("Falling back to standard Hugging Face loading...")
|
| 197 |
+
|
| 198 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
| 199 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 200 |
+
model_name,
|
| 201 |
+
device_map="auto",
|
| 202 |
+
torch_dtype=dtype or torch.float16,
|
| 203 |
+
load_in_4bit=True,
|
| 204 |
+
)
|
| 205 |
+
logger.info("Model loaded successfully with standard HF loading")
|
| 206 |
+
return model, tokenizer
|
| 207 |
+
|
| 208 |
def train(config_path, dataset_name, output_dir):
|
| 209 |
"""Main training function - RESEARCH TRAINING PHASE ONLY"""
|
| 210 |
# Load environment variables
|
|
|
|
| 238 |
# Print configuration summary
|
| 239 |
logger.info("RESEARCH TRAINING PHASE ACTIVE - No output generation")
|
| 240 |
logger.info("Configuration Summary:")
|
| 241 |
+
model_name = model_config.get("model_name_or_path")
|
| 242 |
+
logger.info(f"Model: {model_name}")
|
| 243 |
logger.info(f"Dataset: {dataset_name if dataset_name != 'phi4-cognitive-dataset' else DEFAULT_DATASET}")
|
| 244 |
logger.info(f"Output directory: {output_dir}")
|
| 245 |
logger.info("IMPORTANT: Using already 4-bit quantized model - not re-quantizing")
|
|
|
|
| 250 |
# Initialize tokenizer (just for model initialization, not for tokenizing data)
|
| 251 |
logger.info("Loading tokenizer (for model initialization only, not for tokenizing data)")
|
| 252 |
tokenizer = AutoTokenizer.from_pretrained(
|
| 253 |
+
model_name,
|
| 254 |
trust_remote_code=True
|
| 255 |
)
|
| 256 |
tokenizer.pad_token = tokenizer.eos_token
|
|
|
|
| 268 |
target_modules=lora_config.get("target_modules", ["q_proj", "k_proj", "v_proj", "o_proj"])
|
| 269 |
)
|
| 270 |
|
| 271 |
+
# Initialize model with our safe loading function
|
| 272 |
+
logger.info("Loading pre-quantized model safely")
|
| 273 |
+
dtype = torch.float16 if hardware_config.get("fp16", True) else None
|
| 274 |
+
model, tokenizer = load_model_safely(model_name, max_seq_length, dtype)
|
| 275 |
+
|
| 276 |
+
# Apply LoRA
|
| 277 |
+
logger.info("Applying LoRA to model")
|
|
|
|
|
|
|
| 278 |
model = FastLanguageModel.get_peft_model(
|
| 279 |
model,
|
| 280 |
peft_config=peft_config,
|