Model save

Browse files

Files changed (9) hide show

README.md +1 -1
all_results.json +3 -3
model-00001-of-00004.safetensors +1 -1
model-00002-of-00004.safetensors +1 -1
model-00003-of-00004.safetensors +1 -1
model-00004-of-00004.safetensors +1 -1
train_results.json +3 -3
trainer_state.json +86 -86
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -27,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/xu-chenhui-university-at-buffalo/huggingface/runs/6vym73va)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).

 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/xu-chenhui-university-at-buffalo/huggingface/runs/kngvriz9)
 This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 4.388007793262409,
-    "train_runtime": 12362.3849,
     "train_samples": 7500,
-    "train_samples_per_second": 0.607,
     "train_steps_per_second": 0.005
 }

 {
     "total_flos": 0.0,
+    "train_loss": 0.1775998589502455,
+    "train_runtime": 12344.1744,
     "train_samples": 7500,
+    "train_samples_per_second": 0.608,
     "train_steps_per_second": 0.005
 }

model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:13c9b80e402dee3f491c30d70242b8d5a35474d993c3344f25eb00d47abcd75f
 size 4877660776

 version https://git-lfs.github.com/spec/v1
+oid sha256:e142400035f4987d6f51fdee88a9392b4bc9b4c81365d4a200cd51943231c557
 size 4877660776

model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9e103756e1b1e5795c03b9792af7eaa911ee339b9771e06f1f88a7b7d680fe56
 size 4932751008

 version https://git-lfs.github.com/spec/v1
+oid sha256:85c1946ad4f5cfc6524d240d49f8efc23fb6d34716bf51c9455158663d2b9c0c
 size 4932751008

model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:50f0495772f3bc2af5d23d70d6fda1f2446925f69440b84e5b306ddcd03fc170
 size 4330865200

 version https://git-lfs.github.com/spec/v1
+oid sha256:ff4acc930ccb23d9a2ff03a77d8533cb803cf50654e36d748623845308eefd60
 size 4330865200

model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0333313ee66af909e14a590df9353448e619bd785ff71ddc7baaebe5c17a9fda
 size 1089994880

 version https://git-lfs.github.com/spec/v1
+oid sha256:4cc28e0df4f7aa580096a8cd5d178ffbca499490d9e8c70c6005fc668be6dbf0
 size 1089994880

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "total_flos": 0.0,
-    "train_loss": 4.388007793262409,
-    "train_runtime": 12362.3849,
     "train_samples": 7500,
-    "train_samples_per_second": 0.607,
     "train_steps_per_second": 0.005
 }

 {
     "total_flos": 0.0,
+    "train_loss": 0.1775998589502455,
+    "train_runtime": 12344.1744,
     "train_samples": 7500,
+    "train_samples_per_second": 0.608,
     "train_steps_per_second": 0.005
 }

trainer_state.json CHANGED Viewed

@@ -12,7 +12,7 @@
       "clip_ratio": 0.0,
       "completion_length": 594.6719055175781,
       "epoch": 0.017057569296375266,
-      "grad_norm": 0.3890414834022522,
       "kl": 0.0,
       "learning_rate": 5e-07,
       "loss": -0.0015,
@@ -24,172 +24,172 @@
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 595.9648714065552,
       "epoch": 0.08528784648187633,
-      "grad_norm": 0.4401928186416626,
-      "kl": 0.00044229626655578613,
       "learning_rate": 2.5e-06,
-      "loss": 0.0173,
-      "reward": 0.6018415447324514,
-      "reward_std": 0.34527207911014557,
-      "rewards/accuracy_reward": 0.6018415447324514,
       "rewards/format_reward": 0.0,
       "step": 5
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 615.8573936462402,
       "epoch": 0.17057569296375266,
-      "grad_norm": 30.63307762145996,
-      "kl": 984.0092399597168,
       "learning_rate": 2.956412726139078e-06,
-      "loss": 50.2875,
-      "reward": 0.6660714596509933,
-      "reward_std": 0.3170145872980356,
-      "rewards/accuracy_reward": 0.6660714596509933,
       "rewards/format_reward": 0.0,
       "step": 10
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 607.8788246154785,
       "epoch": 0.255863539445629,
-      "grad_norm": 0.2509593069553375,
-      "kl": 3.2091552734375,
       "learning_rate": 2.7836719084521715e-06,
-      "loss": 0.2167,
-      "reward": 0.7511161014437675,
-      "reward_std": 0.22802229821681977,
-      "rewards/accuracy_reward": 0.7511161014437675,
       "rewards/format_reward": 0.0,
       "step": 15
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 598.6384201049805,
       "epoch": 0.3411513859275053,
-      "grad_norm": 0.41024672985076904,
-      "kl": 0.00501861572265625,
       "learning_rate": 2.4946839873611927e-06,
-      "loss": 0.0701,
-      "reward": 0.7580357491970062,
-      "reward_std": 0.2117175567895174,
-      "rewards/accuracy_reward": 0.7580357491970062,
       "rewards/format_reward": 0.0,
       "step": 20
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 600.18842086792,
       "epoch": 0.42643923240938164,
-      "grad_norm": 0.7129307985305786,
-      "kl": 0.01684722900390625,
       "learning_rate": 2.1156192081791355e-06,
-      "loss": 0.05,
-      "reward": 0.7569196775555611,
-      "reward_std": 0.19429435413330792,
-      "rewards/accuracy_reward": 0.7569196775555611,
       "rewards/format_reward": 0.0,
       "step": 25
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 600.5020393371582,
       "epoch": 0.511727078891258,
-      "grad_norm": 0.0800187885761261,
-      "kl": 0.003276824951171875,
       "learning_rate": 1.6808050203829845e-06,
-      "loss": 0.0427,
-      "reward": 0.7566964626312256,
-      "reward_std": 0.17697864044457673,
-      "rewards/accuracy_reward": 0.7566964626312256,
       "rewards/format_reward": 0.0,
       "step": 30
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 596.0426574707031,
       "epoch": 0.5970149253731343,
-      "grad_norm": 0.23726417124271393,
-      "kl": 0.00370025634765625,
       "learning_rate": 1.2296174432791415e-06,
-      "loss": 0.0363,
-      "reward": 0.7477678917348385,
-      "reward_std": 0.18066087178885937,
-      "rewards/accuracy_reward": 0.7477678917348385,
       "rewards/format_reward": 0.0,
       "step": 35
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 582.9788246154785,
       "epoch": 0.6823027718550106,
-      "grad_norm": 0.12389203161001205,
-      "kl": 0.004132461547851562,
       "learning_rate": 8.029152419343472e-07,
-      "loss": 0.0357,
-      "reward": 0.7654018223285675,
-      "reward_std": 0.17269687270745634,
-      "rewards/accuracy_reward": 0.7654018223285675,
       "rewards/format_reward": 0.0,
       "step": 40
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 602.0515853881836,
       "epoch": 0.767590618336887,
-      "grad_norm": 0.19584427773952484,
-      "kl": 0.0103118896484375,
       "learning_rate": 4.3933982822017883e-07,
-      "loss": 0.0363,
-      "reward": 0.7444196760654449,
-      "reward_std": 0.19971248973160982,
-      "rewards/accuracy_reward": 0.7444196760654449,
       "rewards/format_reward": 0.0,
       "step": 45
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 607.6902076721192,
       "epoch": 0.8528784648187633,
-      "grad_norm": 0.12652064859867096,
-      "kl": 0.014777755737304688,
       "learning_rate": 1.718159615201853e-07,
-      "loss": 0.0375,
-      "reward": 0.7363839626312256,
-      "reward_std": 0.19309807270765306,
-      "rewards/accuracy_reward": 0.7363839626312256,
       "rewards/format_reward": 0.0,
       "step": 50
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 596.0453399658203,
       "epoch": 0.9381663113006397,
-      "grad_norm": 0.10281035304069519,
-      "kl": 0.010649490356445312,
       "learning_rate": 2.4570139579284723e-08,
-      "loss": 0.039,
-      "reward": 0.7823661074042321,
-      "reward_std": 0.18417297434061766,
-      "rewards/accuracy_reward": 0.7823661074042321,
       "rewards/format_reward": 0.0,
       "step": 55
     },
     {
       "clip_ratio": 0.0,
-      "completion_length": 590.6122868855795,
       "epoch": 0.9893390191897654,
-      "kl": 0.014527638753255209,
-      "reward": 0.7656250397364298,
-      "reward_std": 0.1785029562500616,
-      "rewards/accuracy_reward": 0.7656250397364298,
       "rewards/format_reward": 0.0,
       "step": 58,
       "total_flos": 0.0,
-      "train_loss": 4.388007793262409,
-      "train_runtime": 12362.3849,
-      "train_samples_per_second": 0.607,
       "train_steps_per_second": 0.005
     }
   ],

       "clip_ratio": 0.0,
       "completion_length": 594.6719055175781,
       "epoch": 0.017057569296375266,
+      "grad_norm": 0.3901163935661316,
       "kl": 0.0,
       "learning_rate": 5e-07,
       "loss": -0.0015,
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 609.8289909362793,
       "epoch": 0.08528784648187633,
+      "grad_norm": 0.5891452431678772,
+      "kl": 0.0003406107425689697,
       "learning_rate": 2.5e-06,
+      "loss": 0.028,
+      "reward": 0.5943080652505159,
+      "reward_std": 0.35135408770293,
+      "rewards/accuracy_reward": 0.5943080652505159,
       "rewards/format_reward": 0.0,
       "step": 5
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 622.6977935791016,
       "epoch": 0.17057569296375266,
+      "grad_norm": 4.026884078979492,
+      "kl": 0.009809780120849609,
       "learning_rate": 2.956412726139078e-06,
+      "loss": 0.0682,
+      "reward": 0.6770089581608772,
+      "reward_std": 0.3084997434169054,
+      "rewards/accuracy_reward": 0.6770089581608772,
       "rewards/format_reward": 0.0,
       "step": 10
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 604.310961151123,
       "epoch": 0.255863539445629,
+      "grad_norm": 0.44674399495124817,
+      "kl": 30.434170150756835,
       "learning_rate": 2.7836719084521715e-06,
+      "loss": 1.5901,
+      "reward": 0.763392886519432,
+      "reward_std": 0.21235014032572508,
+      "rewards/accuracy_reward": 0.763392886519432,
       "rewards/format_reward": 0.0,
       "step": 15
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 595.2562767028809,
       "epoch": 0.3411513859275053,
+      "grad_norm": 0.13691851496696472,
+      "kl": 0.004651832580566406,
       "learning_rate": 2.4946839873611927e-06,
+      "loss": 0.0554,
+      "reward": 0.7613839641213417,
+      "reward_std": 0.21757735572755338,
+      "rewards/accuracy_reward": 0.7613839641213417,
       "rewards/format_reward": 0.0,
       "step": 20
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 601.6230163574219,
       "epoch": 0.42643923240938164,
+      "grad_norm": 0.20785608887672424,
+      "kl": 0.02872943878173828,
       "learning_rate": 2.1156192081791355e-06,
+      "loss": 0.0575,
+      "reward": 0.7502232536673545,
+      "reward_std": 0.2002022891305387,
+      "rewards/accuracy_reward": 0.7502232536673545,
       "rewards/format_reward": 0.0,
       "step": 25
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 603.0156471252442,
       "epoch": 0.511727078891258,
+      "grad_norm": 0.1560502052307129,
+      "kl": 0.00396270751953125,
       "learning_rate": 1.6808050203829845e-06,
+      "loss": 0.0352,
+      "reward": 0.7459821790456772,
+      "reward_std": 0.18388189654797316,
+      "rewards/accuracy_reward": 0.7459821790456772,
       "rewards/format_reward": 0.0,
       "step": 30
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 601.2908729553222,
       "epoch": 0.5970149253731343,
+      "grad_norm": 0.11733976751565933,
+      "kl": 0.04864578247070313,
       "learning_rate": 1.2296174432791415e-06,
+      "loss": 0.0487,
+      "reward": 0.7290178924798966,
+      "reward_std": 0.19413371523842216,
+      "rewards/accuracy_reward": 0.7290178924798966,
       "rewards/format_reward": 0.0,
       "step": 35
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 581.8761444091797,
       "epoch": 0.6823027718550106,
+      "grad_norm": 0.21926052868366241,
+      "kl": 0.005985260009765625,
       "learning_rate": 8.029152419343472e-07,
+      "loss": 0.049,
+      "reward": 0.7500000327825547,
+      "reward_std": 0.19939510114490985,
+      "rewards/accuracy_reward": 0.7500000327825547,
       "rewards/format_reward": 0.0,
       "step": 40
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 601.2828376770019,
       "epoch": 0.767590618336887,
+      "grad_norm": 0.22137849032878876,
+      "kl": 0.004445266723632812,
       "learning_rate": 4.3933982822017883e-07,
+      "loss": 0.043,
+      "reward": 0.7392857477068902,
+      "reward_std": 0.21266973707824946,
+      "rewards/accuracy_reward": 0.7392857477068902,
       "rewards/format_reward": 0.0,
       "step": 45
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 607.684846496582,
       "epoch": 0.8528784648187633,
+      "grad_norm": 0.12936115264892578,
+      "kl": 0.010895919799804688,
       "learning_rate": 1.718159615201853e-07,
+      "loss": 0.0298,
+      "reward": 0.7388393193483352,
+      "reward_std": 0.19434305084869266,
+      "rewards/accuracy_reward": 0.7388393193483352,
       "rewards/format_reward": 0.0,
       "step": 50
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 595.9553848266602,
       "epoch": 0.9381663113006397,
+      "grad_norm": 0.17488008737564087,
+      "kl": 0.0073909759521484375,
       "learning_rate": 2.4570139579284723e-08,
+      "loss": 0.0423,
+      "reward": 0.7638393193483353,
+      "reward_std": 0.2086773581802845,
+      "rewards/accuracy_reward": 0.7638393193483353,
       "rewards/format_reward": 0.0,
       "step": 55
     },
     {
       "clip_ratio": 0.0,
+      "completion_length": 583.5217819213867,
       "epoch": 0.9893390191897654,
+      "kl": 0.0069802602132161455,
+      "reward": 0.7615327710906664,
+      "reward_std": 0.1866085703174273,
+      "rewards/accuracy_reward": 0.7615327710906664,
       "rewards/format_reward": 0.0,
       "step": 58,
       "total_flos": 0.0,
+      "train_loss": 0.1775998589502455,
+      "train_runtime": 12344.1744,
+      "train_samples_per_second": 0.608,
       "train_steps_per_second": 0.005
     }
   ],

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:40548b7ba1945e131c4fd6018bea3574757ea071c3c3d41188a11baf07d15fca
 size 7992

 version https://git-lfs.github.com/spec/v1
+oid sha256:5a0d090777e2139e561f164b5f8a1981f106c6db4ec7ebb318aac6848731f2fe
 size 7992