Keshav051
/

AntiAtropos

Model card Files Files and versions

div18 commited on Apr 25

Commit

86112b1

·

1 Parent(s): b1e6564

don't OOM

Files changed (2) hide show

training/config.yaml +1 -1
training/launch_train.py +1 -1

training/config.yaml CHANGED Viewed

@@ -46,7 +46,7 @@ loss_type: "reinforce_baseline"       # reinforce | reinforce_baseline | grpo
 num_episodes_per_iteration: 4         # Safe now: max_seq_len=512 + loss_batch_size=8 + CPU offload
 num_iterations: 500                   # Total training iterations
 parallel_episodes: true               # Batch generation across episodes (10x faster)
-loss_batch_size: 8                    # Safe now: for_training removed, ~19 GiB freed
 learning_rate: 2.0e-4
 per_device_train_batch_size: 2        # A10G can handle 2 with seq_len=1024
 gradient_accumulation_steps: 4        # Effective batch = 2*4 = 8 transitions

 num_episodes_per_iteration: 4         # Safe now: max_seq_len=512 + loss_batch_size=8 + CPU offload
 num_iterations: 500                   # Total training iterations
 parallel_episodes: true               # Batch generation across episodes (10x faster)
+loss_batch_size: 4                    # Safe now: for_training removed, ~19 GiB freed
 learning_rate: 2.0e-4
 per_device_train_batch_size: 2        # A10G can handle 2 with seq_len=1024
 gradient_accumulation_steps: 4        # Effective batch = 2*4 = 8 transitions

training/launch_train.py CHANGED Viewed

@@ -78,7 +78,7 @@ DOCKER_IMAGE = "pytorch/pytorch:2.10.0-cuda12.6-cudnn9-devel"
 DEFAULT_NUM_ITERATIONS = 500
 DEFAULT_NUM_EPISODES = 4
-DEFAULT_MAX_STEPS = 40
 DEFAULT_EVAL_INTERVAL = 50
 DEFAULT_CHECKPOINT_INTERVAL = 25
 DEFAULT_PLOT_INTERVAL = 25

 DEFAULT_NUM_ITERATIONS = 500
 DEFAULT_NUM_EPISODES = 4
+DEFAULT_MAX_STEPS = 20
 DEFAULT_EVAL_INTERVAL = 50
 DEFAULT_CHECKPOINT_INTERVAL = 25
 DEFAULT_PLOT_INTERVAL = 25