Model save

Browse files

Files changed (9) hide show

README.md +1 -1
all_results.json +3 -3
model-00001-of-00004.safetensors +1 -1
model-00002-of-00004.safetensors +1 -1
model-00003-of-00004.safetensors +1 -1
model-00004-of-00004.safetensors +1 -1
train_results.json +3 -3
trainer_state.json +16 -37
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -26,7 +26,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/1105645918-bit/huggingface/runs/2trr7wkk)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).

 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/1105645918-bit/huggingface/runs/jsx6zali)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 0.057270023971796036,
-    "train_runtime": 3066.7214,
     "train_samples": 336,
-    "train_samples_per_second": 0.548,
     "train_steps_per_second": 0.003
 }

 {
     "total_flos": 0.0,
+    "train_loss": -0.017851791344583035,
+    "train_runtime": 640.8452,
     "train_samples": 336,
+    "train_samples_per_second": 0.524,
     "train_steps_per_second": 0.003
 }

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d695ba33451eb7a1bf201982146c72f5315781c4edb0880d441f804363bafbdf
 size 4877660776

 version https://git-lfs.github.com/spec/v1
+oid sha256:1dae0d24228e609375afccbaed20d92d1061c369f4ed2cb897ab2d19556fc86c
 size 4877660776

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:240bc2c3be7f25610723478c3fba8cfc7ffea89000dca5c83e33896010ac2bf6
 size 4932751008

 version https://git-lfs.github.com/spec/v1
+oid sha256:245b96b78a3c03ccfb56e5c692e182cfc46966ca6242d62c58b138e10c101189
 size 4932751008

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1c2d798bae01b93ef2439b1b1868076e8ce331883d5be9ea20543af984498df8
 size 4330865200

 version https://git-lfs.github.com/spec/v1
+oid sha256:d9a9b62b510dd7f0e881c5749fe446a29f44e5f4adaf9e1b11d8248831586d63
 size 4330865200

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b23d0800c8a6b0fc258da6ed43a1df4e253dcec06c0b9ff79c4c6bb56286b7d
 size 1089994880

 version https://git-lfs.github.com/spec/v1
+oid sha256:4f30aac2af48ddfb58cb3c4043625e64fb40d0aa3fc8dcd18e5db0b119e19734
 size 1089994880

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 0.057270023971796036,
-    "train_runtime": 3066.7214,
     "train_samples": 336,
-    "train_samples_per_second": 0.548,
     "train_steps_per_second": 0.003
 }

 {
     "total_flos": 0.0,
+    "train_loss": -0.017851791344583035,
+    "train_runtime": 640.8452,
     "train_samples": 336,
+    "train_samples_per_second": 0.524,
     "train_steps_per_second": 0.003
 }

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 4.761904761904762,
   "eval_steps": 500,
-  "global_step": 10,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -12,7 +12,7 @@
       "clip_ratio": 0.0,
       "completion_length": 999.6105346679688,
       "epoch": 0.38095238095238093,
-      "grad_norm": 154.4364013671875,
       "kl": 0.0,
       "learning_rate": 3e-06,
       "loss": -0.019,
@@ -24,46 +24,25 @@
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 1006.7606468200684,
-      "epoch": 2.380952380952381,
-      "grad_norm": 8.609855651855469,
-      "kl": 1.3177490234375,
-      "learning_rate": 1.7604722665003958e-06,
-      "loss": 0.0393,
-      "reward": 0.7695312835276127,
-      "reward_std": 0.3738137981854379,
-      "rewards/accuracy_reward": 0.04101562706637196,
-      "rewards/format_reward": 0.7285156585276127,
-      "step": 5
-    },
-    {
-      "clip_ratio": 0.0,
-      "completion_length": 1006.158299255371,
-      "epoch": 4.761904761904762,
-      "grad_norm": 0.9559445381164551,
-      "kl": 2.4314453125,
-      "learning_rate": 0.0,
-      "loss": 0.0869,
-      "reward": 0.7937500372529029,
-      "reward_std": 0.36111804023385047,
-      "rewards/accuracy_reward": 0.03906250209547579,
-      "rewards/format_reward": 0.7546875327825546,
-      "step": 10
-    },
-    {
-      "epoch": 4.761904761904762,
-      "step": 10,
       "total_flos": 0.0,
-      "train_loss": 0.057270023971796036,
-      "train_runtime": 3066.7214,
-      "train_samples_per_second": 0.548,
       "train_steps_per_second": 0.003
     }
   ],
   "logging_steps": 5,
-  "max_steps": 10,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 5,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.7619047619047619,
   "eval_steps": 500,
+  "global_step": 2,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "clip_ratio": 0.0,
       "completion_length": 999.6105346679688,
       "epoch": 0.38095238095238093,
+      "grad_norm": 154.43353271484375,
       "kl": 0.0,
       "learning_rate": 3e-06,
       "loss": -0.019,
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 1004.9062957763672,
+      "epoch": 0.7619047619047619,
+      "kl": 0.0,
+      "reward": 0.7745536118745804,
+      "reward_std": 0.36208853125572205,
+      "rewards/accuracy_reward": 0.039062502793967724,
+      "rewards/format_reward": 0.7354911044239998,
+      "step": 2,
       "total_flos": 0.0,
+      "train_loss": -0.017851791344583035,
+      "train_runtime": 640.8452,
+      "train_samples_per_second": 0.524,
       "train_steps_per_second": 0.003
     }
   ],
   "logging_steps": 5,
+  "max_steps": 2,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2208791588500d187944b80bb13411e7e17df3477815ff62483411ddd5abe7a7
 size 8120

 version https://git-lfs.github.com/spec/v1
+oid sha256:b27eaedb182a0597cd6ecc7b8c1cb288486f47a0bf64ad9b966c57dee92a09fe
 size 8120