SamMikaelson commited on
Commit
6edc835
·
verified ·
1 Parent(s): 4d46fad

Upload folder using huggingface_hub

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c95fd793fc0337979ec6c4129a6080db38f5dbe61244bcc458cca5dc773ea2c
3
  size 264308896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48fc89abd90a21ef7b36e2b1f8fbb4476ac359132a337229fa6f31e81625e032
3
  size 264308896
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbf558869a3675275ba8ba849ee958ef7d45c6303085d6c7a7ab1e5d746ef26a
3
  size 136089907
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:682da4dd151c64857eabf99f9fbc7c7e767ac42a1b626f95f3f9abc1cabd7582
3
  size 136089907
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02b05853d6fc885b4edf189843dd1363333f7b7831d00142f0b849df78b46cdd
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88dc39fe998588d5850095831bc034e4ff1606c17e9a23c7c4eb6652f638f52f
3
  size 14645
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fc0fa4ae2cdc54c52e59725723f1bf62720aad412266cfb060da831da5d7b169
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:473fb6db6e2b1e587cd461a781ffd1335528b38f4ac58fcad263180181c9f9f5
3
  size 1383
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f85084d35070cb6a6db912f297b8d1016ee4f73d52182deee9f152e602928a16
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9bfc4cf403bbe48b7708cd2f91056532b3ac5b91e064eba6cfc7c555597c4bde
3
  size 1465
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.6072067115940035,
6
  "eval_steps": 500,
7
- "global_step": 8830,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -24732,11 +24732,39 @@
24732
  "rewards/match_format_exactly/mean": 1.0,
24733
  "rewards/match_format_exactly/std": 0.0,
24734
  "step": 8830
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24735
  }
24736
  ],
24737
  "logging_steps": 10,
24738
  "max_steps": 12500,
24739
- "num_input_tokens_seen": 11005827,
24740
  "num_train_epochs": 1,
24741
  "save_steps": 10,
24742
  "stateful_callbacks": {
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.6078943749140421,
6
  "eval_steps": 500,
7
+ "global_step": 8840,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
24732
  "rewards/match_format_exactly/mean": 1.0,
24733
  "rewards/match_format_exactly/std": 0.0,
24734
  "step": 8830
24735
+ },
24736
+ {
24737
+ "completion_length": 23.075,
24738
+ "completions/clipped_ratio": 0.0,
24739
+ "completions/max_length": 57.9,
24740
+ "completions/max_terminated_length": 57.9,
24741
+ "completions/mean_length": 23.075,
24742
+ "completions/mean_terminated_length": 23.075,
24743
+ "completions/min_length": 9.7,
24744
+ "completions/min_terminated_length": 9.7,
24745
+ "epoch": 0.6078943749140421,
24746
+ "frac_reward_zero_std": 0.9,
24747
+ "grad_norm": 0.004883910529315472,
24748
+ "kl": 1.190100622177124,
24749
+ "learning_rate": 1.6284444444444448e-06,
24750
+ "loss": 0.0012,
24751
+ "num_tokens": 11018086.0,
24752
+ "reward": 5.95,
24753
+ "reward_std": 0.1,
24754
+ "rewards/check_coherence/mean": 1.475,
24755
+ "rewards/check_coherence/std": 0.05,
24756
+ "rewards/check_response_quality/mean": 2.4875,
24757
+ "rewards/check_response_quality/std": 0.025,
24758
+ "rewards/match_format_approximately/mean": 0.9875,
24759
+ "rewards/match_format_approximately/std": 0.025,
24760
+ "rewards/match_format_exactly/mean": 1.0,
24761
+ "rewards/match_format_exactly/std": 0.0,
24762
+ "step": 8840
24763
  }
24764
  ],
24765
  "logging_steps": 10,
24766
  "max_steps": 12500,
24767
+ "num_input_tokens_seen": 11018086,
24768
  "num_train_epochs": 1,
24769
  "save_steps": 10,
24770
  "stateful_callbacks": {