Blancy
/

Qwen3-0.6B-Open-R1-Distill

@@ -1,10 +1,8 @@
 ---
-datasets: Blancy/verifiable-coding-problems-SFT
 library_name: transformers
 model_name: Qwen3-0.6B-Open-R1-Distill
 tags:
 - generated_from_trainer
-- open-r1
 - trl
 - sft
 licence: license
@@ -12,7 +10,7 @@ licence: license
 # Model Card for Qwen3-0.6B-Open-R1-Distill
-This model is a fine-tuned version of [None](https://huggingface.co/None) on the [Blancy/verifiable-coding-problems-SFT](https://huggingface.co/datasets/Blancy/verifiable-coding-problems-SFT) dataset.
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start

 ---
 library_name: transformers
 model_name: Qwen3-0.6B-Open-R1-Distill
 tags:
 - generated_from_trainer
 - trl
 - sft
 licence: license
 # Model Card for Qwen3-0.6B-Open-R1-Distill
+This model is a fine-tuned version of [None](https://huggingface.co/None).
 It has been trained using [TRL](https://github.com/huggingface/trl).
 ## Quick start

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "total_flos": 1.0840722057776333e+17,
-    "train_loss": 0.09704786779904606,
-    "train_runtime": 144.1296,
     "train_samples": 1086,
-    "train_samples_per_second": 86.859,
-    "train_steps_per_second": 0.687
 }

 {
+    "total_flos": 7.227148038517555e+17,
+    "train_loss": 0.08479578431808588,
+    "train_runtime": 842.3759,
     "train_samples": 1086,
+    "train_samples_per_second": 99.077,
+    "train_steps_per_second": 0.783
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "total_flos": 1.0840722057776333e+17,
-    "train_loss": 0.09704786779904606,
-    "train_runtime": 144.1296,
     "train_samples": 1086,
-    "train_samples_per_second": 86.859,
-    "train_steps_per_second": 0.687
 }

 {
+    "total_flos": 7.227148038517555e+17,
+    "train_loss": 0.08479578431808588,
+    "train_runtime": 842.3759,
     "train_samples": 1086,
+    "train_samples_per_second": 99.077,
+    "train_steps_per_second": 0.783
 }

trainer_state.json CHANGED Viewed

@@ -2,160 +2,951 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 3.0,
   "eval_steps": 500,
-  "global_step": 99,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.15151515151515152,
-      "grad_norm": 0.77734375,
-      "learning_rate": 2.4e-05,
-      "loss": 0.1367,
       "step": 5
     },
     {
       "epoch": 0.30303030303030304,
-      "grad_norm": 0.44140625,
-      "learning_rate": 2.9879546090089533e-05,
-      "loss": 0.1117,
       "step": 10
     },
     {
       "epoch": 0.45454545454545453,
-      "grad_norm": 0.28515625,
-      "learning_rate": 2.9393883712293316e-05,
-      "loss": 0.1015,
       "step": 15
     },
     {
       "epoch": 0.6060606060606061,
-      "grad_norm": 0.244140625,
-      "learning_rate": 2.8549004284108398e-05,
-      "loss": 0.099,
       "step": 20
     },
     {
       "epoch": 0.7575757575757576,
-      "grad_norm": 0.228515625,
-      "learning_rate": 2.7368445717222102e-05,
-      "loss": 0.0965,
       "step": 25
     },
     {
       "epoch": 0.9090909090909091,
-      "grad_norm": 0.220703125,
-      "learning_rate": 2.5885097773607675e-05,
-      "loss": 0.0973,
       "step": 30
     },
     {
       "epoch": 1.0606060606060606,
-      "grad_norm": 0.2255859375,
-      "learning_rate": 2.4140285773463036e-05,
-      "loss": 0.0936,
       "step": 35
     },
     {
       "epoch": 1.2121212121212122,
-      "grad_norm": 0.2138671875,
-      "learning_rate": 2.2182619292782524e-05,
-      "loss": 0.0961,
       "step": 40
     },
     {
       "epoch": 1.3636363636363638,
-      "grad_norm": 0.2041015625,
-      "learning_rate": 2.0066637925262362e-05,
-      "loss": 0.0931,
       "step": 45
     },
     {
       "epoch": 1.5151515151515151,
-      "grad_norm": 0.2177734375,
-      "learning_rate": 1.7851291836925332e-05,
-      "loss": 0.0936,
       "step": 50
     },
     {
       "epoch": 1.6666666666666665,
-      "grad_norm": 0.2119140625,
-      "learning_rate": 1.559829944444086e-05,
-      "loss": 0.0932,
       "step": 55
     },
     {
       "epoch": 1.8181818181818183,
-      "grad_norm": 0.2099609375,
-      "learning_rate": 1.3370427971388369e-05,
-      "loss": 0.0933,
       "step": 60
     },
     {
       "epoch": 1.9696969696969697,
-      "grad_norm": 0.2275390625,
-      "learning_rate": 1.1229744785292821e-05,
-      "loss": 0.0924,
       "step": 65
     },
     {
       "epoch": 2.121212121212121,
-      "grad_norm": 0.197265625,
-      "learning_rate": 9.235888232294472e-06,
-      "loss": 0.092,
       "step": 70
     },
     {
       "epoch": 2.2727272727272725,
-      "grad_norm": 0.20703125,
-      "learning_rate": 7.444406143120487e-06,
-      "loss": 0.0904,
       "step": 75
     },
     {
       "epoch": 2.4242424242424243,
-      "grad_norm": 0.220703125,
-      "learning_rate": 5.9052082987380775e-06,
-      "loss": 0.0926,
       "step": 80
     },
     {
       "epoch": 2.5757575757575757,
-      "grad_norm": 0.2080078125,
-      "learning_rate": 4.6611759692099345e-06,
-      "loss": 0.0925,
       "step": 85
     },
     {
       "epoch": 2.7272727272727275,
-      "grad_norm": 0.2099609375,
-      "learning_rate": 3.746967263293098e-06,
-      "loss": 0.0932,
       "step": 90
     },
     {
       "epoch": 2.878787878787879,
-      "grad_norm": 0.2109375,
-      "learning_rate": 3.188051571134615e-06,
-      "loss": 0.0916,
       "step": 95
     },
     {
-      "epoch": 3.0,
-      "step": 99,
-      "total_flos": 1.0840722057776333e+17,
-      "train_loss": 0.09704786779904606,
-      "train_runtime": 144.1296,
-      "train_samples_per_second": 86.859,
-      "train_steps_per_second": 0.687
     }
   ],
   "logging_steps": 5,
-  "max_steps": 99,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 3,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -169,7 +960,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.0840722057776333e+17,
   "train_batch_size": 128,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 20.0,
   "eval_steps": 500,
+  "global_step": 660,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.15151515151515152,
+      "grad_norm": 1.984375,
+      "learning_rate": 3.6363636363636366e-06,
+      "loss": 0.1441,
       "step": 5
     },
     {
       "epoch": 0.30303030303030304,
+      "grad_norm": 1.40625,
+      "learning_rate": 8.181818181818181e-06,
+      "loss": 0.1383,
       "step": 10
     },
     {
       "epoch": 0.45454545454545453,
+      "grad_norm": 0.6484375,
+      "learning_rate": 1.2727272727272728e-05,
+      "loss": 0.1213,
       "step": 15
     },
     {
       "epoch": 0.6060606060606061,
+      "grad_norm": 0.423828125,
+      "learning_rate": 1.7272727272727274e-05,
+      "loss": 0.1133,
       "step": 20
     },
     {
       "epoch": 0.7575757575757576,
+      "grad_norm": 0.392578125,
+      "learning_rate": 2.1818181818181818e-05,
+      "loss": 0.1085,
       "step": 25
     },
     {
       "epoch": 0.9090909090909091,
+      "grad_norm": 0.28125,
+      "learning_rate": 2.6363636363636365e-05,
+      "loss": 0.1055,
       "step": 30
     },
     {
       "epoch": 1.0606060606060606,
+      "grad_norm": 0.2353515625,
+      "learning_rate": 2.9999830539872836e-05,
+      "loss": 0.0987,
       "step": 35
     },
     {
       "epoch": 1.2121212121212122,
+      "grad_norm": 0.2412109375,
+      "learning_rate": 2.9993899882114902e-05,
+      "loss": 0.1005,
       "step": 40
     },
     {
       "epoch": 1.3636363636363638,
+      "grad_norm": 0.21484375,
+      "learning_rate": 2.997950047184977e-05,
+      "loss": 0.0954,
       "step": 45
     },
     {
       "epoch": 1.5151515151515151,
+      "grad_norm": 0.2333984375,
+      "learning_rate": 2.9956641346126986e-05,
+      "loss": 0.0955,
       "step": 50
     },
     {
       "epoch": 1.6666666666666665,
+      "grad_norm": 0.224609375,
+      "learning_rate": 2.9925336851301575e-05,
+      "loss": 0.0946,
       "step": 55
     },
     {
       "epoch": 1.8181818181818183,
+      "grad_norm": 0.224609375,
+      "learning_rate": 2.9885606634030267e-05,
+      "loss": 0.0941,
       "step": 60
     },
     {
       "epoch": 1.9696969696969697,
+      "grad_norm": 0.2265625,
+      "learning_rate": 2.98374756289413e-05,
+      "loss": 0.0926,
       "step": 65
     },
     {
       "epoch": 2.121212121212121,
+      "grad_norm": 0.20703125,
+      "learning_rate": 2.9780974042985506e-05,
+      "loss": 0.0913,
       "step": 70
     },
     {
       "epoch": 2.2727272727272725,
+      "grad_norm": 0.2119140625,
+      "learning_rate": 2.971613733647841e-05,
+      "loss": 0.0898,
       "step": 75
     },
     {
       "epoch": 2.4242424242424243,
+      "grad_norm": 0.2236328125,
+      "learning_rate": 2.9643006200845458e-05,
+      "loss": 0.0914,
       "step": 80
     },
     {
       "epoch": 2.5757575757575757,
+      "grad_norm": 0.2177734375,
+      "learning_rate": 2.9561626533084068e-05,
+      "loss": 0.0912,
       "step": 85
     },
     {
       "epoch": 2.7272727272727275,
+      "grad_norm": 0.2138671875,
+      "learning_rate": 2.9472049406958788e-05,
+      "loss": 0.0906,
       "step": 90
     },
     {
       "epoch": 2.878787878787879,
+      "grad_norm": 0.2197265625,
+      "learning_rate": 2.937433104094746e-05,
+      "loss": 0.09,
       "step": 95
     },
     {
+      "epoch": 3.0303030303030303,
+      "grad_norm": 0.2197265625,
+      "learning_rate": 2.9268532762958568e-05,
+      "loss": 0.0873,
+      "step": 100
+    },
+    {
+      "epoch": 3.1818181818181817,
+      "grad_norm": 0.2216796875,
+      "learning_rate": 2.915472097184196e-05,
+      "loss": 0.0893,
+      "step": 105
+    },
+    {
+      "epoch": 3.3333333333333335,
+      "grad_norm": 0.212890625,
+      "learning_rate": 2.903296709571698e-05,
+      "loss": 0.0877,
+      "step": 110
+    },
+    {
+      "epoch": 3.484848484848485,
+      "grad_norm": 0.20703125,
+      "learning_rate": 2.8903347547144327e-05,
+      "loss": 0.0881,
+      "step": 115
+    },
+    {
+      "epoch": 3.6363636363636362,
+      "grad_norm": 0.21875,
+      "learning_rate": 2.876594367516961e-05,
+      "loss": 0.0885,
+      "step": 120
+    },
+    {
+      "epoch": 3.787878787878788,
+      "grad_norm": 0.21875,
+      "learning_rate": 2.8620841714268804e-05,
+      "loss": 0.0886,
+      "step": 125
+    },
+    {
+      "epoch": 3.9393939393939394,
+      "grad_norm": 0.2158203125,
+      "learning_rate": 2.846813273022764e-05,
+      "loss": 0.0867,
+      "step": 130
+    },
+    {
+      "epoch": 4.090909090909091,
+      "grad_norm": 0.2041015625,
+      "learning_rate": 2.83079125629888e-05,
+      "loss": 0.0877,
+      "step": 135
+    },
+    {
+      "epoch": 4.242424242424242,
+      "grad_norm": 0.2294921875,
+      "learning_rate": 2.8140281766502957e-05,
+      "loss": 0.0877,
+      "step": 140
+    },
+    {
+      "epoch": 4.393939393939394,
+      "grad_norm": 0.2216796875,
+      "learning_rate": 2.7965345545621217e-05,
+      "loss": 0.0875,
+      "step": 145
+    },
+    {
+      "epoch": 4.545454545454545,
+      "grad_norm": 0.228515625,
+      "learning_rate": 2.7783213690068737e-05,
+      "loss": 0.0861,
+      "step": 150
+    },
+    {
+      "epoch": 4.696969696969697,
+      "grad_norm": 0.2158203125,
+      "learning_rate": 2.7594000505540807e-05,
+      "loss": 0.0896,
+      "step": 155
+    },
+    {
+      "epoch": 4.848484848484849,
+      "grad_norm": 0.224609375,
+      "learning_rate": 2.7397824741964805e-05,
+      "loss": 0.0834,
+      "step": 160
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 0.2890625,
+      "learning_rate": 2.7194809518972856e-05,
+      "loss": 0.0865,
+      "step": 165
+    },
+    {
+      "epoch": 5.151515151515151,
+      "grad_norm": 0.2177734375,
+      "learning_rate": 2.6985082248632174e-05,
+      "loss": 0.0828,
+      "step": 170
+    },
+    {
+      "epoch": 5.303030303030303,
+      "grad_norm": 0.2197265625,
+      "learning_rate": 2.676877455548141e-05,
+      "loss": 0.0852,
+      "step": 175
+    },
+    {
+      "epoch": 5.454545454545454,
+      "grad_norm": 0.2109375,
+      "learning_rate": 2.6546022193923274e-05,
+      "loss": 0.086,
+      "step": 180
+    },
+    {
+      "epoch": 5.606060606060606,
+      "grad_norm": 0.22265625,
+      "learning_rate": 2.631696496302526e-05,
+      "loss": 0.0869,
+      "step": 185
+    },
+    {
+      "epoch": 5.757575757575758,
+      "grad_norm": 0.2158203125,
+      "learning_rate": 2.6081746618781953e-05,
+      "loss": 0.0862,
+      "step": 190
+    },
+    {
+      "epoch": 5.909090909090909,
+      "grad_norm": 0.23046875,
+      "learning_rate": 2.584051478389399e-05,
+      "loss": 0.0847,
+      "step": 195
+    },
+    {
+      "epoch": 6.0606060606060606,
+      "grad_norm": 0.212890625,
+      "learning_rate": 2.559342085512022e-05,
+      "loss": 0.0853,
+      "step": 200
+    },
+    {
+      "epoch": 6.212121212121212,
+      "grad_norm": 0.2265625,
+      "learning_rate": 2.5340619908261352e-05,
+      "loss": 0.0877,
+      "step": 205
+    },
+    {
+      "epoch": 6.363636363636363,
+      "grad_norm": 0.2197265625,
+      "learning_rate": 2.508227060083457e-05,
+      "loss": 0.0807,
+      "step": 210
+    },
+    {
+      "epoch": 6.515151515151516,
+      "grad_norm": 0.2236328125,
+      "learning_rate": 2.4818535072500327e-05,
+      "loss": 0.0793,
+      "step": 215
+    },
+    {
+      "epoch": 6.666666666666667,
+      "grad_norm": 0.224609375,
+      "learning_rate": 2.4549578843303708e-05,
+      "loss": 0.0821,
+      "step": 220
+    },
+    {
+      "epoch": 6.818181818181818,
+      "grad_norm": 0.2412109375,
+      "learning_rate": 2.427557070979427e-05,
+      "loss": 0.0832,
+      "step": 225
+    },
+    {
+      "epoch": 6.96969696969697,
+      "grad_norm": 0.220703125,
+      "learning_rate": 2.399668263908961e-05,
+      "loss": 0.0844,
+      "step": 230
+    },
+    {
+      "epoch": 7.121212121212121,
+      "grad_norm": 0.2236328125,
+      "learning_rate": 2.3713089660948985e-05,
+      "loss": 0.0846,
+      "step": 235
+    },
+    {
+      "epoch": 7.2727272727272725,
+      "grad_norm": 0.208984375,
+      "learning_rate": 2.342496975792494e-05,
+      "loss": 0.0824,
+      "step": 240
+    },
+    {
+      "epoch": 7.424242424242424,
+      "grad_norm": 0.1953125,
+      "learning_rate": 2.313250375366167e-05,
+      "loss": 0.0796,
+      "step": 245
+    },
+    {
+      "epoch": 7.575757575757576,
+      "grad_norm": 0.21484375,
+      "learning_rate": 2.283587519941036e-05,
+      "loss": 0.0846,
+      "step": 250
+    },
+    {
+      "epoch": 7.7272727272727275,
+      "grad_norm": 0.2275390625,
+      "learning_rate": 2.253527025883271e-05,
+      "loss": 0.0814,
+      "step": 255
+    },
+    {
+      "epoch": 7.878787878787879,
+      "grad_norm": 0.228515625,
+      "learning_rate": 2.2230877591164858e-05,
+      "loss": 0.0846,
+      "step": 260
+    },
+    {
+      "epoch": 8.030303030303031,
+      "grad_norm": 0.21875,
+      "learning_rate": 2.192288823281509e-05,
+      "loss": 0.0809,
+      "step": 265
+    },
+    {
+      "epoch": 8.181818181818182,
+      "grad_norm": 0.2373046875,
+      "learning_rate": 2.1611495477469712e-05,
+      "loss": 0.0819,
+      "step": 270
+    },
+    {
+      "epoch": 8.333333333333334,
+      "grad_norm": 0.2197265625,
+      "learning_rate": 2.1296894754782155e-05,
+      "loss": 0.0851,
+      "step": 275
+    },
+    {
+      "epoch": 8.484848484848484,
+      "grad_norm": 0.22265625,
+      "learning_rate": 2.0979283507721653e-05,
+      "loss": 0.0821,
+      "step": 280
+    },
+    {
+      "epoch": 8.636363636363637,
+      "grad_norm": 0.2119140625,
+      "learning_rate": 2.0658861068658254e-05,
+      "loss": 0.0788,
+      "step": 285
+    },
+    {
+      "epoch": 8.787878787878787,
+      "grad_norm": 0.2216796875,
+      "learning_rate": 2.0335828534262148e-05,
+      "loss": 0.0819,
+      "step": 290
+    },
+    {
+      "epoch": 8.93939393939394,
+      "grad_norm": 0.2353515625,
+      "learning_rate": 2.001038863929568e-05,
+      "loss": 0.0839,
+      "step": 295
+    },
+    {
+      "epoch": 9.090909090909092,
+      "grad_norm": 0.2412109375,
+      "learning_rate": 1.9682745629377267e-05,
+      "loss": 0.0826,
+      "step": 300
+    },
+    {
+      "epoch": 9.242424242424242,
+      "grad_norm": 0.220703125,
+      "learning_rate": 1.9353105132797175e-05,
+      "loss": 0.0796,
+      "step": 305
+    },
+    {
+      "epoch": 9.393939393939394,
+      "grad_norm": 0.216796875,
+      "learning_rate": 1.902167403146548e-05,
+      "loss": 0.0803,
+      "step": 310
+    },
+    {
+      "epoch": 9.545454545454545,
+      "grad_norm": 0.2314453125,
+      "learning_rate": 1.8688660331073253e-05,
+      "loss": 0.0812,
+      "step": 315
+    },
+    {
+      "epoch": 9.696969696969697,
+      "grad_norm": 0.2158203125,
+      "learning_rate": 1.8354273030548512e-05,
+      "loss": 0.0792,
+      "step": 320
+    },
+    {
+      "epoch": 9.848484848484848,
+      "grad_norm": 0.22265625,
+      "learning_rate": 1.801872199088878e-05,
+      "loss": 0.0783,
+      "step": 325
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.271484375,
+      "learning_rate": 1.7682217803452616e-05,
+      "loss": 0.0825,
+      "step": 330
+    },
+    {
+      "epoch": 10.151515151515152,
+      "grad_norm": 0.216796875,
+      "learning_rate": 1.7344971657792768e-05,
+      "loss": 0.0846,
+      "step": 335
+    },
+    {
+      "epoch": 10.303030303030303,
+      "grad_norm": 0.21484375,
+      "learning_rate": 1.7007195209113934e-05,
+      "loss": 0.0809,
+      "step": 340
+    },
+    {
+      "epoch": 10.454545454545455,
+      "grad_norm": 0.2236328125,
+      "learning_rate": 1.666910044543822e-05,
+      "loss": 0.0823,
+      "step": 345
+    },
+    {
+      "epoch": 10.606060606060606,
+      "grad_norm": 0.21484375,
+      "learning_rate": 1.6330899554561785e-05,
+      "loss": 0.0814,
+      "step": 350
+    },
+    {
+      "epoch": 10.757575757575758,
+      "grad_norm": 0.2197265625,
+      "learning_rate": 1.5992804790886075e-05,
+      "loss": 0.0795,
+      "step": 355
+    },
+    {
+      "epoch": 10.909090909090908,
+      "grad_norm": 0.2177734375,
+      "learning_rate": 1.5655028342207235e-05,
+      "loss": 0.0818,
+      "step": 360
+    },
+    {
+      "epoch": 11.06060606060606,
+      "grad_norm": 0.228515625,
+      "learning_rate": 1.5317782196547387e-05,
+      "loss": 0.0817,
+      "step": 365
+    },
+    {
+      "epoch": 11.212121212121213,
+      "grad_norm": 0.2373046875,
+      "learning_rate": 1.4981278009111222e-05,
+      "loss": 0.0819,
+      "step": 370
+    },
+    {
+      "epoch": 11.363636363636363,
+      "grad_norm": 0.2294921875,
+      "learning_rate": 1.4645726969451489e-05,
+      "loss": 0.0778,
+      "step": 375
+    },
+    {
+      "epoch": 11.515151515151516,
+      "grad_norm": 0.2197265625,
+      "learning_rate": 1.4311339668926748e-05,
+      "loss": 0.0796,
+      "step": 380
+    },
+    {
+      "epoch": 11.666666666666666,
+      "grad_norm": 0.23828125,
+      "learning_rate": 1.397832596853452e-05,
+      "loss": 0.0823,
+      "step": 385
+    },
+    {
+      "epoch": 11.818181818181818,
+      "grad_norm": 0.228515625,
+      "learning_rate": 1.3646894867202821e-05,
+      "loss": 0.0794,
+      "step": 390
+    },
+    {
+      "epoch": 11.969696969696969,
+      "grad_norm": 0.2314453125,
+      "learning_rate": 1.3317254370622732e-05,
+      "loss": 0.0834,
+      "step": 395
+    },
+    {
+      "epoch": 12.121212121212121,
+      "grad_norm": 0.22265625,
+      "learning_rate": 1.298961136070432e-05,
+      "loss": 0.0811,
+      "step": 400
+    },
+    {
+      "epoch": 12.272727272727273,
+      "grad_norm": 0.2275390625,
+      "learning_rate": 1.266417146573785e-05,
+      "loss": 0.078,
+      "step": 405
+    },
+    {
+      "epoch": 12.424242424242424,
+      "grad_norm": 0.2216796875,
+      "learning_rate": 1.2341138931341752e-05,
+      "loss": 0.0814,
+      "step": 410
+    },
+    {
+      "epoch": 12.575757575757576,
+      "grad_norm": 0.2373046875,
+      "learning_rate": 1.2020716492278353e-05,
+      "loss": 0.0811,
+      "step": 415
+    },
+    {
+      "epoch": 12.727272727272727,
+      "grad_norm": 0.212890625,
+      "learning_rate": 1.1703105245217848e-05,
+      "loss": 0.0787,
+      "step": 420
+    },
+    {
+      "epoch": 12.878787878787879,
+      "grad_norm": 0.224609375,
+      "learning_rate": 1.1388504522530296e-05,
+      "loss": 0.0792,
+      "step": 425
+    },
+    {
+      "epoch": 13.030303030303031,
+      "grad_norm": 0.2119140625,
+      "learning_rate": 1.1077111767184916e-05,
+      "loss": 0.0816,
+      "step": 430
+    },
+    {
+      "epoch": 13.181818181818182,
+      "grad_norm": 0.234375,
+      "learning_rate": 1.0769122408835148e-05,
+      "loss": 0.0786,
+      "step": 435
+    },
+    {
+      "epoch": 13.333333333333334,
+      "grad_norm": 0.216796875,
+      "learning_rate": 1.0464729741167291e-05,
+      "loss": 0.0825,
+      "step": 440
+    },
+    {
+      "epoch": 13.484848484848484,
+      "grad_norm": 0.2119140625,
+      "learning_rate": 1.016412480058964e-05,
+      "loss": 0.0807,
+      "step": 445
+    },
+    {
+      "epoch": 13.636363636363637,
+      "grad_norm": 0.2255859375,
+      "learning_rate": 9.86749624633833e-06,
+      "loss": 0.0832,
+      "step": 450
+    },
+    {
+      "epoch": 13.787878787878787,
+      "grad_norm": 0.2294921875,
+      "learning_rate": 9.575030242075062e-06,
+      "loss": 0.0778,
+      "step": 455
+    },
+    {
+      "epoch": 13.93939393939394,
+      "grad_norm": 0.4296875,
+      "learning_rate": 9.286910339051015e-06,
+      "loss": 0.0846,
+      "step": 460
+    },
+    {
+      "epoch": 14.090909090909092,
+      "grad_norm": 0.216796875,
+      "learning_rate": 9.003317360910392e-06,
+      "loss": 0.0806,
+      "step": 465
+    },
+    {
+      "epoch": 14.242424242424242,
+      "grad_norm": 0.220703125,
+      "learning_rate": 8.724429290205732e-06,
+      "loss": 0.0787,
+      "step": 470
+    },
+    {
+      "epoch": 14.393939393939394,
+      "grad_norm": 0.2197265625,
+      "learning_rate": 8.450421156696298e-06,
+      "loss": 0.0817,
+      "step": 475
+    },
+    {
+      "epoch": 14.545454545454545,
+      "grad_norm": 0.2158203125,
+      "learning_rate": 8.181464927499674e-06,
+      "loss": 0.0776,
+      "step": 480
+    },
+    {
+      "epoch": 14.696969696969697,
+      "grad_norm": 0.228515625,
+      "learning_rate": 7.917729399165435e-06,
+      "loss": 0.0801,
+      "step": 485
+    },
+    {
+      "epoch": 14.848484848484848,
+      "grad_norm": 0.2314453125,
+      "learning_rate": 7.659380091738652e-06,
+      "loss": 0.0781,
+      "step": 490
+    },
+    {
+      "epoch": 15.0,
+      "grad_norm": 0.31640625,
+      "learning_rate": 7.406579144879779e-06,
+      "loss": 0.0786,
+      "step": 495
+    },
+    {
+      "epoch": 15.151515151515152,
+      "grad_norm": 0.2275390625,
+      "learning_rate": 7.159485216106013e-06,
+      "loss": 0.0807,
+      "step": 500
+    },
+    {
+      "epoch": 15.303030303030303,
+      "grad_norm": 0.232421875,
+      "learning_rate": 6.918253381218046e-06,
+      "loss": 0.0767,
+      "step": 505
+    },
+    {
+      "epoch": 15.454545454545455,
+      "grad_norm": 0.234375,
+      "learning_rate": 6.683035036974742e-06,
+      "loss": 0.0787,
+      "step": 510
+    },
+    {
+      "epoch": 15.606060606060606,
+      "grad_norm": 0.2373046875,
+      "learning_rate": 6.45397780607673e-06,
+      "loss": 0.0763,
+      "step": 515
+    },
+    {
+      "epoch": 15.757575757575758,
+      "grad_norm": 0.2216796875,
+      "learning_rate": 6.23122544451859e-06,
+      "loss": 0.081,
+      "step": 520
+    },
+    {
+      "epoch": 15.909090909090908,
+      "grad_norm": 0.234375,
+      "learning_rate": 6.014917751367825e-06,
+      "loss": 0.0794,
+      "step": 525
+    },
+    {
+      "epoch": 16.060606060606062,
+      "grad_norm": 0.2236328125,
+      "learning_rate": 5.80519048102715e-06,
+      "loss": 0.0787,
+      "step": 530
+    },
+    {
+      "epoch": 16.21212121212121,
+      "grad_norm": 0.2314453125,
+      "learning_rate": 5.602175258035204e-06,
+      "loss": 0.077,
+      "step": 535
+    },
+    {
+      "epoch": 16.363636363636363,
+      "grad_norm": 0.2412109375,
+      "learning_rate": 5.4059994944591914e-06,
+      "loss": 0.0798,
+      "step": 540
+    },
+    {
+      "epoch": 16.515151515151516,
+      "grad_norm": 0.21875,
+      "learning_rate": 5.2167863099312636e-06,
+      "loss": 0.0794,
+      "step": 545
+    },
+    {
+      "epoch": 16.666666666666668,
+      "grad_norm": 0.2373046875,
+      "learning_rate": 5.034654454378783e-06,
+      "loss": 0.0793,
+      "step": 550
+    },
+    {
+      "epoch": 16.818181818181817,
+      "grad_norm": 0.2353515625,
+      "learning_rate": 4.859718233497048e-06,
+      "loss": 0.0801,
+      "step": 555
+    },
+    {
+      "epoch": 16.96969696969697,
+      "grad_norm": 0.2177734375,
+      "learning_rate": 4.692087437011203e-06,
+      "loss": 0.0791,
+      "step": 560
+    },
+    {
+      "epoch": 17.12121212121212,
+      "grad_norm": 0.2255859375,
+      "learning_rate": 4.5318672697723665e-06,
+      "loss": 0.081,
+      "step": 565
+    },
+    {
+      "epoch": 17.272727272727273,
+      "grad_norm": 0.2265625,
+      "learning_rate": 4.3791582857311975e-06,
+      "loss": 0.0792,
+      "step": 570
+    },
+    {
+      "epoch": 17.424242424242426,
+      "grad_norm": 0.2119140625,
+      "learning_rate": 4.2340563248303915e-06,
+      "loss": 0.0805,
+      "step": 575
+    },
+    {
+      "epoch": 17.575757575757574,
+      "grad_norm": 0.2197265625,
+      "learning_rate": 4.096652452855675e-06,
+      "loss": 0.0797,
+      "step": 580
+    },
+    {
+      "epoch": 17.727272727272727,
+      "grad_norm": 0.212890625,
+      "learning_rate": 3.967032904283021e-06,
+      "loss": 0.0809,
+      "step": 585
+    },
+    {
+      "epoch": 17.87878787878788,
+      "grad_norm": 0.236328125,
+      "learning_rate": 3.8452790281580445e-06,
+      "loss": 0.0803,
+      "step": 590
+    },
+    {
+      "epoch": 18.03030303030303,
+      "grad_norm": 0.23828125,
+      "learning_rate": 3.731467237041433e-06,
+      "loss": 0.0804,
+      "step": 595
+    },
+    {
+      "epoch": 18.181818181818183,
+      "grad_norm": 0.2236328125,
+      "learning_rate": 3.6256689590525444e-06,
+      "loss": 0.0824,
+      "step": 600
+    },
+    {
+      "epoch": 18.333333333333332,
+      "grad_norm": 0.224609375,
+      "learning_rate": 3.5279505930412164e-06,
+      "loss": 0.0809,
+      "step": 605
+    },
+    {
+      "epoch": 18.484848484848484,
+      "grad_norm": 0.22265625,
+      "learning_rate": 3.4383734669159366e-06,
+      "loss": 0.0811,
+      "step": 610
+    },
+    {
+      "epoch": 18.636363636363637,
+      "grad_norm": 0.2265625,
+      "learning_rate": 3.356993799154545e-06,
+      "loss": 0.0786,
+      "step": 615
+    },
+    {
+      "epoch": 18.78787878787879,
+      "grad_norm": 0.2158203125,
+      "learning_rate": 3.2838626635215874e-06,
+      "loss": 0.0811,
+      "step": 620
+    },
+    {
+      "epoch": 18.939393939393938,
+      "grad_norm": 0.2216796875,
+      "learning_rate": 3.2190259570144957e-06,
+      "loss": 0.0834,
+      "step": 625
+    },
+    {
+      "epoch": 19.09090909090909,
+      "grad_norm": 0.2255859375,
+      "learning_rate": 3.162524371058697e-06,
+      "loss": 0.0804,
+      "step": 630
+    },
+    {
+      "epoch": 19.242424242424242,
+      "grad_norm": 0.220703125,
+      "learning_rate": 3.1143933659697377e-06,
+      "loss": 0.0777,
+      "step": 635
+    },
+    {
+      "epoch": 19.393939393939394,
+      "grad_norm": 0.2333984375,
+      "learning_rate": 3.0746631486984266e-06,
+      "loss": 0.081,
+      "step": 640
+    },
+    {
+      "epoch": 19.545454545454547,
+      "grad_norm": 0.2294921875,
+      "learning_rate": 3.043358653873013e-06,
+      "loss": 0.0776,
+      "step": 645
+    },
+    {
+      "epoch": 19.696969696969695,
+      "grad_norm": 0.22265625,
+      "learning_rate": 3.020499528150232e-06,
+      "loss": 0.077,
+      "step": 650
+    },
+    {
+      "epoch": 19.848484848484848,
+      "grad_norm": 0.220703125,
+      "learning_rate": 3.006100117885101e-06,
+      "loss": 0.0814,
+      "step": 655
+    },
+    {
+      "epoch": 20.0,
+      "grad_norm": 0.298828125,
+      "learning_rate": 3.000169460127164e-06,
+      "loss": 0.0805,
+      "step": 660
+    },
+    {
+      "epoch": 20.0,
+      "step": 660,
+      "total_flos": 7.227148038517555e+17,
+      "train_loss": 0.08479578431808588,
+      "train_runtime": 842.3759,
+      "train_samples_per_second": 99.077,
+      "train_steps_per_second": 0.783
     }
   ],
   "logging_steps": 5,
+  "max_steps": 660,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 7.227148038517555e+17,
   "train_batch_size": 128,
   "trial_name": null,
   "trial_params": null