harsha070 commited on
Commit
3064a20
·
verified ·
1 Parent(s): 6851022

Training in progress, step 140, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:212db1c26305411a7481f2d8ce83d1de464611483b3d2a7e0073511a9a28bd50
3
  size 7642181896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b2557098ef4b87535e5e0fd4378a8cf45a8b75aabf3a08c3cd2adb53c632e4d
3
  size 7642181896
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.8666666666666667,
6
  "eval_steps": 500,
7
- "global_step": 130,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -372,11 +372,39 @@
372
  "rewards/JointRewardFunction/std": 0.24954410195350646,
373
  "step": 130,
374
  "step_time": 18.954484624400358
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
375
  }
376
  ],
377
  "logging_steps": 10,
378
  "max_steps": 140,
379
- "num_input_tokens_seen": 669175,
380
  "num_train_epochs": 1,
381
  "save_steps": 10,
382
  "stateful_callbacks": {
@@ -386,7 +414,7 @@
386
  "should_evaluate": false,
387
  "should_log": false,
388
  "should_save": true,
389
- "should_training_stop": false
390
  },
391
  "attributes": {}
392
  }
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.9333333333333333,
6
  "eval_steps": 500,
7
+ "global_step": 140,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
372
  "rewards/JointRewardFunction/std": 0.24954410195350646,
373
  "step": 130,
374
  "step_time": 18.954484624400358
375
+ },
376
+ {
377
+ "clip_ratio/high_max": 0.0,
378
+ "clip_ratio/high_mean": 0.0,
379
+ "clip_ratio/low_mean": 0.0,
380
+ "clip_ratio/low_min": 0.0,
381
+ "clip_ratio/region_mean": 0.0,
382
+ "completions/clipped_ratio": 0.65,
383
+ "completions/max_length": 512.0,
384
+ "completions/max_terminated_length": 358.6,
385
+ "completions/mean_length": 460.2875,
386
+ "completions/mean_terminated_length": 291.386669921875,
387
+ "completions/min_length": 324.7,
388
+ "completions/min_terminated_length": 222.3,
389
+ "entropy": 0.27632327415049074,
390
+ "epoch": 0.9333333333333333,
391
+ "frac_reward_zero_std": 0.8,
392
+ "grad_norm": 0.00445556640625,
393
+ "kl": 0.03374400998000056,
394
+ "learning_rate": 7.142857142857144e-08,
395
+ "loss": 0.00835585966706276,
396
+ "num_tokens": 720794.0,
397
+ "reward": 0.925,
398
+ "reward_std": 0.1632926881313324,
399
+ "rewards/JointRewardFunction/mean": 0.925,
400
+ "rewards/JointRewardFunction/std": 0.1632926881313324,
401
+ "step": 140,
402
+ "step_time": 18.98217402150076
403
  }
404
  ],
405
  "logging_steps": 10,
406
  "max_steps": 140,
407
+ "num_input_tokens_seen": 720794,
408
  "num_train_epochs": 1,
409
  "save_steps": 10,
410
  "stateful_callbacks": {
 
414
  "should_evaluate": false,
415
  "should_log": false,
416
  "should_save": true,
417
+ "should_training_stop": true
418
  },
419
  "attributes": {}
420
  }