Spaces:

mindchain
/

rlm-arithmetic-training

Runtime error

mindchain commited on Feb 17

Commit

95008ad

verified ·

1 Parent(s): 0168a3e

Fix hang: remove use_cpu parameter, reduce generations to 2, batch to 2, steps to 20

Files changed (1) hide show

train_arithmetic.py CHANGED Viewed

@@ -19,7 +19,7 @@ from trl import GRPOConfig, GRPOTrainer
 BASE_MODEL = "Qwen/Qwen3-0.6B-Base"
 OUTPUT_MODEL = "mindchain/qwen3-0.6b-arithmetic"
-MAX_STEPS = 50
 NUM_SAMPLES = 500  # Training samples
 EVAL_SAMPLES = 20   # For baseline test
@@ -184,8 +184,8 @@ def main():
     training_args = GRPOConfig(
         output_dir="./outputs",
         max_steps=MAX_STEPS,
-        per_device_train_batch_size=4,
-        num_generations=4,
         learning_rate=2e-4,
         beta=0.0,  # No KL penalty for this task
         bf16=torch.cuda.is_available() and torch.cuda.is_bf16_supported(),
@@ -196,7 +196,6 @@ def main():
         save_steps=MAX_STEPS,  # Save at end
         push_to_hub=False,  # We'll push manually
         report_to="none",
-        use_cpu=is_cpu,  # Explicitly tell trainer to use CPU
     )
     print("🚀 Starting GRPO Training...")

 BASE_MODEL = "Qwen/Qwen3-0.6B-Base"
 OUTPUT_MODEL = "mindchain/qwen3-0.6b-arithmetic"
+MAX_STEPS = 20  # Reduced for CPU testing
 NUM_SAMPLES = 500  # Training samples
 EVAL_SAMPLES = 20   # For baseline test
     training_args = GRPOConfig(
         output_dir="./outputs",
         max_steps=MAX_STEPS,
+        per_device_train_batch_size=2,  # Reduced for CPU
+        num_generations=2,  # Reduced for CPU (faster)
         learning_rate=2e-4,
         beta=0.0,  # No KL penalty for this task
         bf16=torch.cuda.is_available() and torch.cuda.is_bf16_supported(),
         save_steps=MAX_STEPS,  # Save at end
         push_to_hub=False,  # We'll push manually
         report_to="none",
     )
     print("🚀 Starting GRPO Training...")