Spaces:
Sleeping
Sleeping
Upload run_cloud_training.py with huggingface_hub
Browse files- run_cloud_training.py +9 -0
run_cloud_training.py
CHANGED
|
@@ -515,6 +515,15 @@ def train(config_path, dataset_name, output_dir):
|
|
| 515 |
# Initialize model with our safe loading function
|
| 516 |
logger.info("Loading pre-quantized model safely")
|
| 517 |
dtype = torch.float16 if hardware_config.get("fp16", True) else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 518 |
model, tokenizer = load_model_safely(model_name, max_seq_length, dtype)
|
| 519 |
|
| 520 |
# Disable generation capabilities for research training
|
|
|
|
| 515 |
# Initialize model with our safe loading function
|
| 516 |
logger.info("Loading pre-quantized model safely")
|
| 517 |
dtype = torch.float16 if hardware_config.get("fp16", True) else None
|
| 518 |
+
|
| 519 |
+
# Force eager attention implementation
|
| 520 |
+
os.environ["TRANSFORMERS_NO_FLASH_ATTENTION"] = "1"
|
| 521 |
+
logger.info("Flash attention has been DISABLED globally via environment variable")
|
| 522 |
+
|
| 523 |
+
# Update hardware config to ensure eager attention
|
| 524 |
+
hardware_config["attn_implementation"] = "eager"
|
| 525 |
+
hardware_config["use_flash_attention"] = False
|
| 526 |
+
|
| 527 |
model, tokenizer = load_model_safely(model_name, max_seq_length, dtype)
|
| 528 |
|
| 529 |
# Disable generation capabilities for research training
|