Spaces:
Paused
Paused
slivk commited on
Commit ·
cbaf615
1
Parent(s): da22234
fix: Explicitly disable bf16 for T4 GPU compatibility
Browse files- run_sft_full.py +3 -0
- run_sft_test.py +1 -0
run_sft_full.py
CHANGED
|
@@ -89,6 +89,7 @@ training_args = SFTConfig(
|
|
| 89 |
|
| 90 |
# Precision
|
| 91 |
fp16=True, # Use FP16 for training
|
|
|
|
| 92 |
|
| 93 |
# Logging
|
| 94 |
logging_steps=5,
|
|
@@ -127,6 +128,8 @@ trainer = SFTTrainer(
|
|
| 127 |
print("✅ Trainer initialized")
|
| 128 |
|
| 129 |
# Show GPU memory before training
|
|
|
|
|
|
|
| 130 |
if torch.cuda.is_available():
|
| 131 |
gpu_stats = torch.cuda.get_device_properties(0)
|
| 132 |
start_memory = round(torch.cuda.max_memory_reserved() / 1024**3, 3)
|
|
|
|
| 89 |
|
| 90 |
# Precision
|
| 91 |
fp16=True, # Use FP16 for training
|
| 92 |
+
bf16=False, # Explicitly disable bfloat16 (T4 compatibility)
|
| 93 |
|
| 94 |
# Logging
|
| 95 |
logging_steps=5,
|
|
|
|
| 128 |
print("✅ Trainer initialized")
|
| 129 |
|
| 130 |
# Show GPU memory before training
|
| 131 |
+
print(f"CUDA available: {torch.cuda.is_available()}")
|
| 132 |
+
print(f"PyTorch CUDA version: {torch.version.cuda}")
|
| 133 |
if torch.cuda.is_available():
|
| 134 |
gpu_stats = torch.cuda.get_device_properties(0)
|
| 135 |
start_memory = round(torch.cuda.max_memory_reserved() / 1024**3, 3)
|
run_sft_test.py
CHANGED
|
@@ -85,6 +85,7 @@ training_args = SFTConfig(
|
|
| 85 |
|
| 86 |
# Precision
|
| 87 |
fp16=True,
|
|
|
|
| 88 |
|
| 89 |
# Logging
|
| 90 |
logging_steps=1,
|
|
|
|
| 85 |
|
| 86 |
# Precision
|
| 87 |
fp16=True,
|
| 88 |
+
bf16=False, # Explicitly disable bfloat16 (T4 compatibility)
|
| 89 |
|
| 90 |
# Logging
|
| 91 |
logging_steps=1,
|