smirki commited on
Commit
d553fae
·
verified ·
1 Parent(s): 3884208

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f17872418aae0905dfae97ebb7aebf37822ab2eb996c26e42a3aaf949bfcd271
3
  size 479005064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4107159b4de1a0016c3037c95f8767db5293436cfc3872136bfe4ef2abab1c4
3
  size 479005064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6747204f02d0ca992a089068f22369910c018177bb254de015c0f24b1ce164a1
3
  size 958299770
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7a2e6285b957e4b33a1874bb188455e0aee029f3189c5296dea9eb7f8a4b5e5
3
  size 958299770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24472d7ba56158cdbd89fd1876b65e3d33b864a75708f9c89471972ee7e27f65
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b1cd6f4b1f4bf6d48fec1a76cbc162cf62188f491054248af00ed2b0989265a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f85c03bc0baab845592b9a318e5d8ba23f2d327a8bd0c8d4182e735b0e052fc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4dd73233e298447d6740eca7860dc08eb78819edfe930a45b32b54e01d37739
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0001991064104299902,
5
  "eval_steps": 500,
6
- "global_step": 25,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -31,6 +31,42 @@
31
  "reward_std": 0.25719649270176886,
32
  "rewards/custom_reward_simplified_v7_dblog": 0.496875,
33
  "step": 20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  }
35
  ],
36
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0003982128208599804,
5
  "eval_steps": 500,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
31
  "reward_std": 0.25719649270176886,
32
  "rewards/custom_reward_simplified_v7_dblog": 0.496875,
33
  "step": 20
34
+ },
35
+ {
36
+ "completion_length": 750.46875,
37
+ "epoch": 0.00023892769251598824,
38
+ "grad_norm": 0.15792745351791382,
39
+ "kl": 0.0007828957575839012,
40
+ "learning_rate": 6.25e-07,
41
+ "loss": 0.0,
42
+ "reward": 0.684375,
43
+ "reward_std": 0.3755971297621727,
44
+ "rewards/custom_reward_simplified_v7_dblog": 0.684375,
45
+ "step": 30
46
+ },
47
+ {
48
+ "completion_length": 813.94375,
49
+ "epoch": 0.00031857025668798435,
50
+ "grad_norm": 0.12503573298454285,
51
+ "kl": 0.0007155703555326909,
52
+ "learning_rate": 8.333333333333333e-07,
53
+ "loss": 0.0,
54
+ "reward": 0.565625,
55
+ "reward_std": 0.2761854581534863,
56
+ "rewards/custom_reward_simplified_v7_dblog": 0.565625,
57
+ "step": 40
58
+ },
59
+ {
60
+ "completion_length": 747.675,
61
+ "epoch": 0.0003982128208599804,
62
+ "grad_norm": 0.10329681634902954,
63
+ "kl": 0.0007686431898036971,
64
+ "learning_rate": 1.0416666666666667e-06,
65
+ "loss": 0.0,
66
+ "reward": 0.621875,
67
+ "reward_std": 0.30715219378471376,
68
+ "rewards/custom_reward_simplified_v7_dblog": 0.621875,
69
+ "step": 50
70
  }
71
  ],
72
  "logging_steps": 10,