Spaces:
Running
Running
fix: use float32 not bfloat16 for CPU training (bf16 deadlocks on CPU)
Browse files
app.py
CHANGED
|
@@ -349,7 +349,7 @@ try:
|
|
| 349 |
output_dir="{output_dir}/preprocessed_tensors",
|
| 350 |
checkpoint_dir="{ACE_CHECKPOINT_DIR}",
|
| 351 |
variant="turbo", max_duration=60.0,
|
| 352 |
-
device="cpu", precision="
|
| 353 |
)
|
| 354 |
processed = result.get("processed", 0)
|
| 355 |
failed = result.get("failed", 0)
|
|
@@ -366,8 +366,8 @@ try:
|
|
| 366 |
|
| 367 |
model = load_decoder_for_training(
|
| 368 |
checkpoint_dir="{ACE_CHECKPOINT_DIR}", variant="turbo",
|
| 369 |
-
device="cpu", precision="
|
| 370 |
-
).
|
| 371 |
|
| 372 |
trainer = FixedLoRATrainer(model,
|
| 373 |
LoRAConfigV2(r={rank}, alpha={rank}, dropout=0.0),
|
|
@@ -376,7 +376,7 @@ try:
|
|
| 376 |
dataset_dir="{output_dir}/preprocessed_tensors",
|
| 377 |
output_dir="{output_dir}",
|
| 378 |
max_epochs={epochs}, batch_size=1, learning_rate={lr},
|
| 379 |
-
device="cpu", precision="
|
| 380 |
num_workers=0, pin_memory=False,
|
| 381 |
))
|
| 382 |
|
|
|
|
| 349 |
output_dir="{output_dir}/preprocessed_tensors",
|
| 350 |
checkpoint_dir="{ACE_CHECKPOINT_DIR}",
|
| 351 |
variant="turbo", max_duration=60.0,
|
| 352 |
+
device="cpu", precision="float32",
|
| 353 |
)
|
| 354 |
processed = result.get("processed", 0)
|
| 355 |
failed = result.get("failed", 0)
|
|
|
|
| 366 |
|
| 367 |
model = load_decoder_for_training(
|
| 368 |
checkpoint_dir="{ACE_CHECKPOINT_DIR}", variant="turbo",
|
| 369 |
+
device="cpu", precision="float32",
|
| 370 |
+
).float()
|
| 371 |
|
| 372 |
trainer = FixedLoRATrainer(model,
|
| 373 |
LoRAConfigV2(r={rank}, alpha={rank}, dropout=0.0),
|
|
|
|
| 376 |
dataset_dir="{output_dir}/preprocessed_tensors",
|
| 377 |
output_dir="{output_dir}",
|
| 378 |
max_epochs={epochs}, batch_size=1, learning_rate={lr},
|
| 379 |
+
device="cpu", precision="float32", seed=42,
|
| 380 |
num_workers=0, pin_memory=False,
|
| 381 |
))
|
| 382 |
|