Model save

Browse files

Files changed (9) hide show

README.md +1 -1
all_results.json +3 -3
model-00001-of-00004.safetensors +1 -1
model-00002-of-00004.safetensors +1 -1
model-00003-of-00004.safetensors +1 -1
model-00004-of-00004.safetensors +1 -1
train_results.json +3 -3
trainer_state.json +91 -91
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -27,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/matrig/huggingface/runs/jxoq26bc)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).

 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/matrig/huggingface/runs/z89grvzv)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 0.043375617856609414,
-    "train_runtime": 7156.2523,
     "train_samples": 7500,
-    "train_samples_per_second": 1.048,
     "train_steps_per_second": 0.008
 }

 {
     "total_flos": 0.0,
+    "train_loss": 0.04243174159963583,
+    "train_runtime": 7171.9003,
     "train_samples": 7500,
+    "train_samples_per_second": 1.046,
     "train_steps_per_second": 0.008
 }

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5060de9f04dc693ea35a217ef2e700926b654d040440c9278bbc215f5fa98822
 size 4877660776

 version https://git-lfs.github.com/spec/v1
+oid sha256:5f72b17bf1ecbb9b43ff508a8d5089311498c87c0cf5a9d71fcc747da4d2d1f1
 size 4877660776

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0d650bf55087aa18c8fac6bc65547220baa9acfd1f74e683634c6d2a52cc3927
 size 4932751008

 version https://git-lfs.github.com/spec/v1
+oid sha256:0eccca5bcc4023fe572ad637a8d23c61570c57645878aeeee1fb53c7b6011e8b
 size 4932751008

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba667f28f745f2877bc5e0d58006a156f7da7e3b3b14e038efb64808548bfb32
 size 4330865200

 version https://git-lfs.github.com/spec/v1
+oid sha256:d0d1171cc48dcf0e6d0cd989a475c8e52861013dc49581dc2e63c4b52066061e
 size 4330865200

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a7d2ab63ce7a6aeb492b96a77c09633c61416cbafe27cf796509de18bd5e0612
 size 1089994880

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2098c98966562a74b60ac0115fb68ad529afc91cfad206b7e7d21bf0a3d04c9
 size 1089994880

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 0.043375617856609414,
-    "train_runtime": 7156.2523,
     "train_samples": 7500,
-    "train_samples_per_second": 1.048,
     "train_steps_per_second": 0.008
 }

 {
     "total_flos": 0.0,
+    "train_loss": 0.04243174159963583,
+    "train_runtime": 7171.9003,
     "train_samples": 7500,
+    "train_samples_per_second": 1.046,
     "train_steps_per_second": 0.008
 }

trainer_state.json CHANGED Viewed

@@ -10,186 +10,186 @@
   "log_history": [
     {
       "clip_ratio": 0.0,
-      "completion_length": 606.4085006713867,
       "epoch": 0.017057569296375266,
-      "grad_norm": 0.5296090245246887,
       "kl": 0.0,
       "learning_rate": 5e-07,
-      "loss": 0.0365,
-      "reward": 0.6216518133878708,
-      "reward_std": 0.3478058036416769,
-      "rewards/accuracy_reward": 0.6216518133878708,
       "rewards/format_reward": 0.0,
       "step": 1
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 602.2664861679077,
       "epoch": 0.08528784648187633,
-      "grad_norm": 1.4470244646072388,
-      "kl": 0.0002715587615966797,
       "learning_rate": 2.5e-06,
-      "loss": 0.0273,
-      "reward": 0.6037946781143546,
-      "reward_std": 0.3562235124409199,
-      "rewards/accuracy_reward": 0.6037946781143546,
       "rewards/format_reward": 0.0,
       "step": 5
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 611.7169906616211,
       "epoch": 0.17057569296375266,
-      "grad_norm": 3.7916312217712402,
-      "kl": 0.01889810562133789,
       "learning_rate": 2.956412726139078e-06,
-      "loss": 0.0486,
-      "reward": 0.6729911074042321,
-      "reward_std": 0.3084342211484909,
-      "rewards/accuracy_reward": 0.6729911074042321,
       "rewards/format_reward": 0.0,
       "step": 10
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 612.168775177002,
       "epoch": 0.255863539445629,
-      "grad_norm": 0.4115124046802521,
-      "kl": 0.03681182861328125,
       "learning_rate": 2.7836719084521715e-06,
-      "loss": 0.0717,
-      "reward": 0.7399553909897805,
-      "reward_std": 0.25419430434703827,
-      "rewards/accuracy_reward": 0.7399553909897805,
       "rewards/format_reward": 0.0,
       "step": 15
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 600.9047164916992,
       "epoch": 0.3411513859275053,
-      "grad_norm": 0.7677471041679382,
-      "kl": 0.005244255065917969,
       "learning_rate": 2.4946839873611927e-06,
-      "loss": 0.0661,
-      "reward": 0.7549107432365417,
-      "reward_std": 0.220241367444396,
-      "rewards/accuracy_reward": 0.7549107432365417,
       "rewards/format_reward": 0.0,
       "step": 20
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 598.3078430175781,
       "epoch": 0.42643923240938164,
-      "grad_norm": 0.16668860614299774,
-      "kl": 0.004644012451171875,
       "learning_rate": 2.1156192081791355e-06,
-      "loss": 0.0425,
-      "reward": 0.7542411059141159,
-      "reward_std": 0.18852919656783343,
-      "rewards/accuracy_reward": 0.7542411059141159,
       "rewards/format_reward": 0.0,
       "step": 25
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 597.7076141357422,
       "epoch": 0.511727078891258,
-      "grad_norm": 0.31668710708618164,
-      "kl": 0.01171703338623047,
       "learning_rate": 1.6808050203829845e-06,
-      "loss": 0.037,
-      "reward": 0.7459821775555611,
-      "reward_std": 0.1830730192363262,
-      "rewards/accuracy_reward": 0.7459821775555611,
       "rewards/format_reward": 0.0,
       "step": 30
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 593.6857360839844,
       "epoch": 0.5970149253731343,
-      "grad_norm": 0.18625693023204803,
-      "kl": 0.005298995971679687,
       "learning_rate": 1.2296174432791415e-06,
-      "loss": 0.0354,
-      "reward": 0.735044676065445,
-      "reward_std": 0.19286190588027238,
-      "rewards/accuracy_reward": 0.735044676065445,
       "rewards/format_reward": 0.0,
       "step": 35
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 582.8056091308594,
       "epoch": 0.6823027718550106,
-      "grad_norm": 0.24103553593158722,
-      "kl": 0.006205368041992188,
       "learning_rate": 8.029152419343472e-07,
-      "loss": 0.0354,
-      "reward": 0.7558036059141159,
-      "reward_std": 0.1935162746347487,
-      "rewards/accuracy_reward": 0.7558036059141159,
       "rewards/format_reward": 0.0,
       "step": 40
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 596.4489128112793,
       "epoch": 0.767590618336887,
-      "grad_norm": 1.4750374555587769,
-      "kl": 0.009486007690429687,
       "learning_rate": 4.3933982822017883e-07,
-      "loss": 0.0303,
-      "reward": 0.742857177555561,
-      "reward_std": 0.2004485998302698,
-      "rewards/accuracy_reward": 0.742857177555561,
       "rewards/format_reward": 0.0,
       "step": 45
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 601.0569496154785,
       "epoch": 0.8528784648187633,
-      "grad_norm": 0.2125380039215088,
-      "kl": 0.0059661865234375,
       "learning_rate": 1.718159615201853e-07,
-      "loss": 0.041,
-      "reward": 0.7366071745753289,
-      "reward_std": 0.2024520305916667,
-      "rewards/accuracy_reward": 0.7366071745753289,
       "rewards/format_reward": 0.0,
       "step": 50
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 589.6513717651367,
       "epoch": 0.9381663113006397,
-      "grad_norm": 0.255264014005661,
-      "kl": 0.0066814422607421875,
       "learning_rate": 2.4570139579284723e-08,
-      "loss": 0.0317,
-      "reward": 0.7718750342726708,
-      "reward_std": 0.20143432933837174,
-      "rewards/accuracy_reward": 0.7718750342726708,
       "rewards/format_reward": 0.0,
       "step": 55
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 589.5065752665201,
       "epoch": 0.9893390191897654,
-      "kl": 0.007272084554036458,
-      "reward": 0.75632444024086,
-      "reward_std": 0.1969691620518764,
-      "rewards/accuracy_reward": 0.75632444024086,
       "rewards/format_reward": 0.0,
       "step": 58,
       "total_flos": 0.0,
-      "train_loss": 0.043375617856609414,
-      "train_runtime": 7156.2523,
-      "train_samples_per_second": 1.048,
       "train_steps_per_second": 0.008
     }
   ],

   "log_history": [
     {
       "clip_ratio": 0.0,
+      "completion_length": 605.6372985839844,
       "epoch": 0.017057569296375266,
+      "grad_norm": 0.5198791027069092,
       "kl": 0.0,
       "learning_rate": 5e-07,
+      "loss": 0.0297,
+      "reward": 0.6272321715950966,
+      "reward_std": 0.3432547841221094,
+      "rewards/accuracy_reward": 0.6272321715950966,
       "rewards/format_reward": 0.0,
       "step": 1
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 601.5561113357544,
       "epoch": 0.08528784648187633,
+      "grad_norm": 0.3226652443408966,
+      "kl": 0.0007017850875854492,
       "learning_rate": 2.5e-06,
+      "loss": 0.0077,
+      "reward": 0.603794670663774,
+      "reward_std": 0.35096015920862556,
+      "rewards/accuracy_reward": 0.603794670663774,
       "rewards/format_reward": 0.0,
       "step": 5
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 608.2205635070801,
       "epoch": 0.17057569296375266,
+      "grad_norm": 0.2746102213859558,
+      "kl": 0.0061968803405761715,
       "learning_rate": 2.956412726139078e-06,
+      "loss": 0.0503,
+      "reward": 0.6727678865194321,
+      "reward_std": 0.3072200361639261,
+      "rewards/accuracy_reward": 0.6727678865194321,
       "rewards/format_reward": 0.0,
       "step": 10
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 600.5703376770019,
       "epoch": 0.255863539445629,
+      "grad_norm": 0.15307798981666565,
+      "kl": 0.005248260498046875,
       "learning_rate": 2.7836719084521715e-06,
+      "loss": 0.0737,
+      "reward": 0.758705398440361,
+      "reward_std": 0.22059124447405337,
+      "rewards/accuracy_reward": 0.758705398440361,
       "rewards/format_reward": 0.0,
       "step": 15
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 589.4640838623047,
       "epoch": 0.3411513859275053,
+      "grad_norm": 0.13478825986385345,
+      "kl": 0.004410362243652344,
       "learning_rate": 2.4946839873611927e-06,
+      "loss": 0.0521,
+      "reward": 0.7656250417232513,
+      "reward_std": 0.19759314637631178,
+      "rewards/accuracy_reward": 0.7656250417232513,
       "rewards/format_reward": 0.0,
       "step": 20
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 593.369002532959,
       "epoch": 0.42643923240938164,
+      "grad_norm": 0.13584953546524048,
+      "kl": 0.005138778686523437,
       "learning_rate": 2.1156192081791355e-06,
+      "loss": 0.0484,
+      "reward": 0.7676339626312256,
+      "reward_std": 0.17082388242706656,
+      "rewards/accuracy_reward": 0.7676339626312256,
       "rewards/format_reward": 0.0,
       "step": 25
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 598.1558288574219,
       "epoch": 0.511727078891258,
+      "grad_norm": 0.09639401733875275,
+      "kl": 0.003318023681640625,
       "learning_rate": 1.6808050203829845e-06,
+      "loss": 0.0395,
+      "reward": 0.7537946760654449,
+      "reward_std": 0.17525787679478527,
+      "rewards/accuracy_reward": 0.7537946760654449,
       "rewards/format_reward": 0.0,
       "step": 30
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 593.4710098266602,
       "epoch": 0.5970149253731343,
+      "grad_norm": 0.10184694081544876,
+      "kl": 0.0056324005126953125,
       "learning_rate": 1.2296174432791415e-06,
+      "loss": 0.0295,
+      "reward": 0.7473214656114578,
+      "reward_std": 0.1758826152421534,
+      "rewards/accuracy_reward": 0.7473214656114578,
       "rewards/format_reward": 0.0,
       "step": 35
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 579.4178863525391,
       "epoch": 0.6823027718550106,
+      "grad_norm": 0.0822824090719223,
+      "kl": 0.005002212524414062,
       "learning_rate": 8.029152419343472e-07,
+      "loss": 0.0407,
+      "reward": 0.7607143178582192,
+      "reward_std": 0.17344756741076708,
+      "rewards/accuracy_reward": 0.7607143178582192,
       "rewards/format_reward": 0.0,
       "step": 40
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 592.2201156616211,
       "epoch": 0.767590618336887,
+      "grad_norm": 0.08887068182229996,
+      "kl": 0.004409027099609375,
       "learning_rate": 4.3933982822017883e-07,
+      "loss": 0.0351,
+      "reward": 0.7473214611411094,
+      "reward_std": 0.18249810487031937,
+      "rewards/accuracy_reward": 0.7473214611411094,
       "rewards/format_reward": 0.0,
       "step": 45
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 599.4567222595215,
       "epoch": 0.8528784648187633,
+      "grad_norm": 0.09352638572454453,
+      "kl": 0.004033279418945312,
       "learning_rate": 1.718159615201853e-07,
+      "loss": 0.0373,
+      "reward": 0.747544676065445,
+      "reward_std": 0.18660746775567533,
+      "rewards/accuracy_reward": 0.747544676065445,
       "rewards/format_reward": 0.0,
       "step": 50
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 588.4038238525391,
       "epoch": 0.9381663113006397,
+      "grad_norm": 0.512617290019989,
+      "kl": 0.004796600341796875,
       "learning_rate": 2.4570139579284723e-08,
+      "loss": 0.0337,
+      "reward": 0.7707589581608772,
+      "reward_std": 0.18231952451169492,
+      "rewards/accuracy_reward": 0.7707589581608772,
       "rewards/format_reward": 0.0,
       "step": 55
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 578.9475390116373,
       "epoch": 0.9893390191897654,
+      "kl": 0.0038859049479166665,
+      "reward": 0.7656250298023224,
+      "reward_std": 0.1823850224415461,
+      "rewards/accuracy_reward": 0.7656250298023224,
       "rewards/format_reward": 0.0,
       "step": 58,
       "total_flos": 0.0,
+      "train_loss": 0.04243174159963583,
+      "train_runtime": 7171.9003,
+      "train_samples_per_second": 1.046,
       "train_steps_per_second": 0.008
     }
   ],

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1296abcf48c305f93ea57b6b1d462ae5bda4a1337b41a43d9c9f5087aafa270f
 size 7992

 version https://git-lfs.github.com/spec/v1
+oid sha256:311ce107a29421d5d273e4ae679537cf3812ea1e8f575fb7725ecc28ae41b9e1
 size 7992