Spaces:

George-API
/

qwen4bit

Sleeping

George-API commited on Mar 11

Commit

2457cec

verified ·

1 Parent(s): eab09f4

Upload run_cloud_training.py with huggingface_hub

Files changed (1) hide show

run_cloud_training.py CHANGED Viewed

@@ -515,6 +515,15 @@ def train(config_path, dataset_name, output_dir):
         # Initialize model with our safe loading function
         logger.info("Loading pre-quantized model safely")
         dtype = torch.float16 if hardware_config.get("fp16", True) else None
         model, tokenizer = load_model_safely(model_name, max_seq_length, dtype)
         # Disable generation capabilities for research training

         # Initialize model with our safe loading function
         logger.info("Loading pre-quantized model safely")
         dtype = torch.float16 if hardware_config.get("fp16", True) else None
+        # Force eager attention implementation
+        os.environ["TRANSFORMERS_NO_FLASH_ATTENTION"] = "1"
+        logger.info("Flash attention has been DISABLED globally via environment variable")
+        # Update hardware config to ensure eager attention
+        hardware_config["attn_implementation"] = "eager"
+        hardware_config["use_flash_attention"] = False
         model, tokenizer = load_model_safely(model_name, max_seq_length, dtype)
         # Disable generation capabilities for research training