Spaces:
Sleeping
Sleeping
Upload run_cloud_training.py with huggingface_hub
Browse files- run_cloud_training.py +8 -0
run_cloud_training.py
CHANGED
|
@@ -21,6 +21,9 @@ from transformers.data.data_collator import DataCollatorMixin
|
|
| 21 |
from peft import LoraConfig
|
| 22 |
from unsloth import FastLanguageModel
|
| 23 |
|
|
|
|
|
|
|
|
|
|
| 24 |
# Disable flash attention globally
|
| 25 |
os.environ["TRANSFORMERS_NO_FLASH_ATTENTION"] = "1"
|
| 26 |
|
|
@@ -514,6 +517,11 @@ def train(config_path, dataset_name, output_dir):
|
|
| 514 |
dtype = torch.float16 if hardware_config.get("fp16", True) else None
|
| 515 |
model, tokenizer = load_model_safely(model_name, max_seq_length, dtype)
|
| 516 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 517 |
# Try different approaches to apply LoRA
|
| 518 |
logger.info("Applying LoRA to model")
|
| 519 |
|
|
|
|
| 21 |
from peft import LoraConfig
|
| 22 |
from unsloth import FastLanguageModel
|
| 23 |
|
| 24 |
+
# Configure PyTorch memory allocator for better memory management
|
| 25 |
+
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
|
| 26 |
+
|
| 27 |
# Disable flash attention globally
|
| 28 |
os.environ["TRANSFORMERS_NO_FLASH_ATTENTION"] = "1"
|
| 29 |
|
|
|
|
| 517 |
dtype = torch.float16 if hardware_config.get("fp16", True) else None
|
| 518 |
model, tokenizer = load_model_safely(model_name, max_seq_length, dtype)
|
| 519 |
|
| 520 |
+
# Disable generation capabilities for research training
|
| 521 |
+
logger.info("Disabling generation capabilities - Research training only")
|
| 522 |
+
model.config.is_decoder = False
|
| 523 |
+
model.config.task_specific_params = None
|
| 524 |
+
|
| 525 |
# Try different approaches to apply LoRA
|
| 526 |
logger.info("Applying LoRA to model")
|
| 527 |
|