Spaces:
Sleeping
Sleeping
Upload run_cloud_training.py with huggingface_hub
Browse files- run_cloud_training.py +32 -3
run_cloud_training.py
CHANGED
|
@@ -28,6 +28,9 @@ os.environ["TRANSFORMERS_NO_FLASH_ATTENTION"] = "1"
|
|
| 28 |
# Force GPU mode in Space if we're using a pre-quantized model
|
| 29 |
os.environ["FORCE_GPU"] = "1"
|
| 30 |
|
|
|
|
|
|
|
|
|
|
| 31 |
# Create triton directory to avoid warning
|
| 32 |
os.makedirs(os.path.expanduser("~/.triton/autotune"), exist_ok=True)
|
| 33 |
|
|
@@ -524,12 +527,24 @@ def train(config_path, dataset_name, output_dir, upload_to_hub=False, hub_repo_n
|
|
| 524 |
|
| 525 |
# Create LoRA config
|
| 526 |
logger.info("Creating LoRA configuration")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 527 |
lora_config_obj = LoraConfig(
|
| 528 |
r=lora_config.get("r", 16),
|
| 529 |
lora_alpha=lora_config.get("lora_alpha", 32),
|
| 530 |
lora_dropout=lora_config.get("lora_dropout", 0.05),
|
| 531 |
bias=lora_config.get("bias", "none"),
|
| 532 |
-
|
|
|
|
| 533 |
)
|
| 534 |
|
| 535 |
# Apply LoRA to model
|
|
@@ -537,6 +552,15 @@ def train(config_path, dataset_name, output_dir, upload_to_hub=False, hub_repo_n
|
|
| 537 |
model = get_peft_model(model, lora_config_obj)
|
| 538 |
logger.info("Successfully applied LoRA")
|
| 539 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 540 |
# Always use minimal batch size for HF Space CPU
|
| 541 |
if is_running_in_space() and not can_use_4bit and not is_pre_quantized:
|
| 542 |
per_device_train_batch_size = 1
|
|
@@ -566,10 +590,15 @@ def train(config_path, dataset_name, output_dir, upload_to_hub=False, hub_repo_n
|
|
| 566 |
gradient_accumulation_steps = training_config.get("gradient_accumulation_steps", 4)
|
| 567 |
fp16 = torch.cuda.is_available() and hardware_config.get("fp16", True)
|
| 568 |
bf16 = torch.cuda.is_available() and hardware_config.get("bf16", False)
|
| 569 |
-
|
|
|
|
| 570 |
dataloader_workers = training_config.get("dataloader_num_workers", 4)
|
| 571 |
-
eval_strategy = training_config.get("eval_strategy", "no")
|
| 572 |
load_best_model_at_end = False # Must be False when eval_strategy is "no"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 573 |
logger.info("Using full training parameters for GPU mode")
|
| 574 |
else:
|
| 575 |
# For Space CPU training mode, use minimal parameters
|
|
|
|
| 28 |
# Force GPU mode in Space if we're using a pre-quantized model
|
| 29 |
os.environ["FORCE_GPU"] = "1"
|
| 30 |
|
| 31 |
+
# Disable tokenizers parallelism warning
|
| 32 |
+
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
| 33 |
+
|
| 34 |
# Create triton directory to avoid warning
|
| 35 |
os.makedirs(os.path.expanduser("~/.triton/autotune"), exist_ok=True)
|
| 36 |
|
|
|
|
| 527 |
|
| 528 |
# Create LoRA config
|
| 529 |
logger.info("Creating LoRA configuration")
|
| 530 |
+
|
| 531 |
+
# For pre-quantized models, we need proper target modules
|
| 532 |
+
default_target_modules = ["q_proj", "k_proj", "v_proj", "o_proj"]
|
| 533 |
+
|
| 534 |
+
# For pre-quantized models, especially Unsloth ones, we need to be careful with the target modules
|
| 535 |
+
if is_pre_quantized:
|
| 536 |
+
# For Unsloth models, use special configuration
|
| 537 |
+
if "unsloth" in model_name.lower():
|
| 538 |
+
default_target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
|
| 539 |
+
logger.info("Using Unsloth-specific LoRA target modules")
|
| 540 |
+
|
| 541 |
lora_config_obj = LoraConfig(
|
| 542 |
r=lora_config.get("r", 16),
|
| 543 |
lora_alpha=lora_config.get("lora_alpha", 32),
|
| 544 |
lora_dropout=lora_config.get("lora_dropout", 0.05),
|
| 545 |
bias=lora_config.get("bias", "none"),
|
| 546 |
+
task_type="CAUSAL_LM", # Explicitly set the task type
|
| 547 |
+
target_modules=lora_config.get("target_modules", default_target_modules)
|
| 548 |
)
|
| 549 |
|
| 550 |
# Apply LoRA to model
|
|
|
|
| 552 |
model = get_peft_model(model, lora_config_obj)
|
| 553 |
logger.info("Successfully applied LoRA")
|
| 554 |
|
| 555 |
+
# Ensure model parameters that need gradients are properly set
|
| 556 |
+
if is_pre_quantized:
|
| 557 |
+
logger.info("Verifying gradient settings for pre-quantized model")
|
| 558 |
+
for name, param in model.named_parameters():
|
| 559 |
+
if 'lora' in name: # Only LoRA parameters should be trained
|
| 560 |
+
if not param.requires_grad:
|
| 561 |
+
logger.warning(f"LoRA parameter {name} doesn't have requires_grad=True, fixing...")
|
| 562 |
+
param.requires_grad = True
|
| 563 |
+
|
| 564 |
# Always use minimal batch size for HF Space CPU
|
| 565 |
if is_running_in_space() and not can_use_4bit and not is_pre_quantized:
|
| 566 |
per_device_train_batch_size = 1
|
|
|
|
| 590 |
gradient_accumulation_steps = training_config.get("gradient_accumulation_steps", 4)
|
| 591 |
fp16 = torch.cuda.is_available() and hardware_config.get("fp16", True)
|
| 592 |
bf16 = torch.cuda.is_available() and hardware_config.get("bf16", False)
|
| 593 |
+
# Disable gradient checkpointing for pre-quantized models as it can cause gradient issues
|
| 594 |
+
gradient_checkpointing = torch.cuda.is_available() and hardware_config.get("gradient_checkpointing", True) and not is_pre_quantized
|
| 595 |
dataloader_workers = training_config.get("dataloader_num_workers", 4)
|
| 596 |
+
eval_strategy = training_config.get("eval_strategy", "no")
|
| 597 |
load_best_model_at_end = False # Must be False when eval_strategy is "no"
|
| 598 |
+
|
| 599 |
+
if is_pre_quantized:
|
| 600 |
+
logger.info("Disabled gradient checkpointing for pre-quantized model to avoid gradient issues")
|
| 601 |
+
|
| 602 |
logger.info("Using full training parameters for GPU mode")
|
| 603 |
else:
|
| 604 |
# For Space CPU training mode, use minimal parameters
|