walidsobhie-code commited on
Commit
27a755a
·
1 Parent(s): b098bb5

fix: disable AMP (fp16=False, bf16=False) to bypass P100 GradScaler bug

Browse files
Files changed (1) hide show
  1. train_simple_nobnb.py +2 -2
train_simple_nobnb.py CHANGED
@@ -226,8 +226,8 @@ def train(config: dict):
226
  logging_steps=training_config.get("logging_steps", 10),
227
  save_steps=training_config.get("save_steps", 100),
228
  save_total_limit=training_config.get("save_total_limit", 2),
229
- bf16=use_bf16,
230
- fp16=use_fp16,
231
  gradient_checkpointing=training_config.get("gradient_checkpointing", True),
232
  gradient_checkpointing_kwargs={"use_reentrant": False},
233
  evaluation_strategy="steps" if eval_dataset else "no",
 
226
  logging_steps=training_config.get("logging_steps", 10),
227
  save_steps=training_config.get("save_steps", 100),
228
  save_total_limit=training_config.get("save_total_limit", 2),
229
+ bf16=False,
230
+ fp16=False, # Disabled — P100/Pascal AMP has GradScaler bugs with fp16
231
  gradient_checkpointing=training_config.get("gradient_checkpointing", True),
232
  gradient_checkpointing_kwargs={"use_reentrant": False},
233
  evaluation_strategy="steps" if eval_dataset else "no",