MatchaLwc commited on
Commit
5045b8e
·
verified ·
1 Parent(s): 46f8ec0

Model save

Browse files
README.md CHANGED
@@ -26,7 +26,7 @@ print(output["generated_text"])
26
 
27
  ## Training procedure
28
 
29
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/1105645918-bit/huggingface/runs/2trr7wkk)
30
 
31
 
32
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 
26
 
27
  ## Training procedure
28
 
29
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/1105645918-bit/huggingface/runs/jsx6zali)
30
 
31
 
32
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.057270023971796036,
4
- "train_runtime": 3066.7214,
5
  "train_samples": 336,
6
- "train_samples_per_second": 0.548,
7
  "train_steps_per_second": 0.003
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": -0.017851791344583035,
4
+ "train_runtime": 640.8452,
5
  "train_samples": 336,
6
+ "train_samples_per_second": 0.524,
7
  "train_steps_per_second": 0.003
8
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d695ba33451eb7a1bf201982146c72f5315781c4edb0880d441f804363bafbdf
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1dae0d24228e609375afccbaed20d92d1061c369f4ed2cb897ab2d19556fc86c
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:240bc2c3be7f25610723478c3fba8cfc7ffea89000dca5c83e33896010ac2bf6
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:245b96b78a3c03ccfb56e5c692e182cfc46966ca6242d62c58b138e10c101189
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c2d798bae01b93ef2439b1b1868076e8ce331883d5be9ea20543af984498df8
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9a9b62b510dd7f0e881c5749fe446a29f44e5f4adaf9e1b11d8248831586d63
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b23d0800c8a6b0fc258da6ed43a1df4e253dcec06c0b9ff79c4c6bb56286b7d
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f30aac2af48ddfb58cb3c4043625e64fb40d0aa3fc8dcd18e5db0b119e19734
3
  size 1089994880
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": 0.057270023971796036,
4
- "train_runtime": 3066.7214,
5
  "train_samples": 336,
6
- "train_samples_per_second": 0.548,
7
  "train_steps_per_second": 0.003
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": -0.017851791344583035,
4
+ "train_runtime": 640.8452,
5
  "train_samples": 336,
6
+ "train_samples_per_second": 0.524,
7
  "train_steps_per_second": 0.003
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.761904761904762,
5
  "eval_steps": 500,
6
- "global_step": 10,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -12,7 +12,7 @@
12
  "clip_ratio": 0.0,
13
  "completion_length": 999.6105346679688,
14
  "epoch": 0.38095238095238093,
15
- "grad_norm": 154.4364013671875,
16
  "kl": 0.0,
17
  "learning_rate": 3e-06,
18
  "loss": -0.019,
@@ -24,46 +24,25 @@
24
  },
25
  {
26
  "clip_ratio": 0.0,
27
- "completion_length": 1006.7606468200684,
28
- "epoch": 2.380952380952381,
29
- "grad_norm": 8.609855651855469,
30
- "kl": 1.3177490234375,
31
- "learning_rate": 1.7604722665003958e-06,
32
- "loss": 0.0393,
33
- "reward": 0.7695312835276127,
34
- "reward_std": 0.3738137981854379,
35
- "rewards/accuracy_reward": 0.04101562706637196,
36
- "rewards/format_reward": 0.7285156585276127,
37
- "step": 5
38
- },
39
- {
40
- "clip_ratio": 0.0,
41
- "completion_length": 1006.158299255371,
42
- "epoch": 4.761904761904762,
43
- "grad_norm": 0.9559445381164551,
44
- "kl": 2.4314453125,
45
- "learning_rate": 0.0,
46
- "loss": 0.0869,
47
- "reward": 0.7937500372529029,
48
- "reward_std": 0.36111804023385047,
49
- "rewards/accuracy_reward": 0.03906250209547579,
50
- "rewards/format_reward": 0.7546875327825546,
51
- "step": 10
52
- },
53
- {
54
- "epoch": 4.761904761904762,
55
- "step": 10,
56
  "total_flos": 0.0,
57
- "train_loss": 0.057270023971796036,
58
- "train_runtime": 3066.7214,
59
- "train_samples_per_second": 0.548,
60
  "train_steps_per_second": 0.003
61
  }
62
  ],
63
  "logging_steps": 5,
64
- "max_steps": 10,
65
  "num_input_tokens_seen": 0,
66
- "num_train_epochs": 5,
67
  "save_steps": 500,
68
  "stateful_callbacks": {
69
  "TrainerControl": {
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7619047619047619,
5
  "eval_steps": 500,
6
+ "global_step": 2,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
12
  "clip_ratio": 0.0,
13
  "completion_length": 999.6105346679688,
14
  "epoch": 0.38095238095238093,
15
+ "grad_norm": 154.43353271484375,
16
  "kl": 0.0,
17
  "learning_rate": 3e-06,
18
  "loss": -0.019,
 
24
  },
25
  {
26
  "clip_ratio": 0.0,
27
+ "completion_length": 1004.9062957763672,
28
+ "epoch": 0.7619047619047619,
29
+ "kl": 0.0,
30
+ "reward": 0.7745536118745804,
31
+ "reward_std": 0.36208853125572205,
32
+ "rewards/accuracy_reward": 0.039062502793967724,
33
+ "rewards/format_reward": 0.7354911044239998,
34
+ "step": 2,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  "total_flos": 0.0,
36
+ "train_loss": -0.017851791344583035,
37
+ "train_runtime": 640.8452,
38
+ "train_samples_per_second": 0.524,
39
  "train_steps_per_second": 0.003
40
  }
41
  ],
42
  "logging_steps": 5,
43
+ "max_steps": 2,
44
  "num_input_tokens_seen": 0,
45
+ "num_train_epochs": 1,
46
  "save_steps": 500,
47
  "stateful_callbacks": {
48
  "TrainerControl": {
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2208791588500d187944b80bb13411e7e17df3477815ff62483411ddd5abe7a7
3
  size 8120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b27eaedb182a0597cd6ecc7b8c1cb288486f47a0bf64ad9b966c57dee92a09fe
3
  size 8120