Model save

Browse files

Files changed (4) hide show

README.md +6 -7
all_results.json +6 -11
train_results.json +6 -6
trainer_state.json +407 -344

README.md CHANGED Viewed

@@ -2,13 +2,12 @@
 license: llama3
 library_name: peft
 tags:
-- alignment-handbook
 - trl
 - sft
 - generated_from_trainer
 base_model: meta-llama/Meta-Llama-3-8B
 datasets:
-- preference-data
 model-index:
 - name: downstream_0.1p_seed42_level2_syntax
   results: []
@@ -19,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
 # downstream_0.1p_seed42_level2_syntax
-This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the preference-data dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.0375
 ## Model description
@@ -56,9 +55,9 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch  | Step | Validation Loss |
-|:-------------:|:------:|:----:|:---------------:|
-| 1.1119        | 0.9994 | 408  | 1.0375          |
 ### Framework versions

 license: llama3
 library_name: peft
 tags:
 - trl
 - sft
 - generated_from_trainer
 base_model: meta-llama/Meta-Llama-3-8B
 datasets:
+- generator
 model-index:
 - name: downstream_0.1p_seed42_level2_syntax
   results: []
 # downstream_0.1p_seed42_level2_syntax
+This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.0367
 ## Model description
 ### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 1.094         | 1.0   | 454  | 1.0367          |
 ### Framework versions

all_results.json CHANGED Viewed

@@ -1,14 +1,9 @@
 {
-    "epoch": 0.9993876301285977,
-    "eval_loss": 1.037530541419983,
-    "eval_runtime": 2.3375,
-    "eval_samples": 10,
-    "eval_samples_per_second": 2.995,
-    "eval_steps_per_second": 0.856,
-    "total_flos": 1.2948113606049792e+16,
-    "train_loss": 1.1371603935372596,
-    "train_runtime": 15464.5644,
-    "train_samples": 90000,
-    "train_samples_per_second": 3.378,
     "train_steps_per_second": 0.026
 }

 {
+    "epoch": 1.0,
+    "total_flos": 1.4408836278386688e+16,
+    "train_loss": 1.131237539688396,
+    "train_runtime": 17315.3957,
+    "train_samples": 100000,
+    "train_samples_per_second": 3.356,
     "train_steps_per_second": 0.026
 }

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-    "epoch": 0.9993876301285977,
-    "total_flos": 1.2948113606049792e+16,
-    "train_loss": 1.1371603935372596,
-    "train_runtime": 15464.5644,
-    "train_samples": 90000,
-    "train_samples_per_second": 3.378,
     "train_steps_per_second": 0.026
 }

 {
+    "epoch": 1.0,
+    "total_flos": 1.4408836278386688e+16,
+    "train_loss": 1.131237539688396,
+    "train_runtime": 17315.3957,
+    "train_samples": 100000,
+    "train_samples_per_second": 3.356,
     "train_steps_per_second": 0.026
 }

trainer_state.json CHANGED Viewed

@@ -1,607 +1,670 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.9993876301285977,
   "eval_steps": 500,
-  "global_step": 408,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.002449479485609308,
-      "grad_norm": 1.0681437691408622,
-      "learning_rate": 4.8780487804878055e-06,
-      "loss": 1.4232,
       "step": 1
     },
     {
-      "epoch": 0.01224739742804654,
-      "grad_norm": 1.136306986221139,
-      "learning_rate": 2.4390243902439026e-05,
-      "loss": 1.3933,
       "step": 5
     },
     {
-      "epoch": 0.02449479485609308,
-      "grad_norm": 0.245338434807422,
-      "learning_rate": 4.878048780487805e-05,
-      "loss": 1.3421,
       "step": 10
     },
     {
-      "epoch": 0.03674219228413962,
-      "grad_norm": 0.2375193035213865,
-      "learning_rate": 7.317073170731707e-05,
-      "loss": 1.3138,
       "step": 15
     },
     {
-      "epoch": 0.04898958971218616,
-      "grad_norm": 0.18791567927506073,
-      "learning_rate": 9.75609756097561e-05,
-      "loss": 1.2873,
       "step": 20
     },
     {
-      "epoch": 0.0612369871402327,
-      "grad_norm": 0.12816603940375418,
-      "learning_rate": 0.00012195121951219512,
-      "loss": 1.2549,
       "step": 25
     },
     {
-      "epoch": 0.07348438456827924,
-      "grad_norm": 0.11662240238108945,
-      "learning_rate": 0.00014634146341463414,
-      "loss": 1.2099,
       "step": 30
     },
     {
-      "epoch": 0.08573178199632578,
-      "grad_norm": 0.08250874631468892,
-      "learning_rate": 0.0001707317073170732,
-      "loss": 1.1902,
       "step": 35
     },
     {
-      "epoch": 0.09797917942437231,
-      "grad_norm": 0.09447143113709522,
-      "learning_rate": 0.0001951219512195122,
-      "loss": 1.2005,
       "step": 40
     },
     {
-      "epoch": 0.11022657685241886,
-      "grad_norm": 0.0824021009170569,
-      "learning_rate": 0.00019994138413588491,
-      "loss": 1.1864,
       "step": 45
     },
     {
-      "epoch": 0.1224739742804654,
-      "grad_norm": 0.0894227247413217,
-      "learning_rate": 0.0001997033749537941,
-      "loss": 1.1574,
       "step": 50
     },
     {
-      "epoch": 0.13472137170851195,
-      "grad_norm": 0.07854947144546601,
-      "learning_rate": 0.00019928274457498818,
-      "loss": 1.1595,
       "step": 55
     },
     {
-      "epoch": 0.14696876913655849,
-      "grad_norm": 0.08179447168807087,
-      "learning_rate": 0.00019868026344503306,
-      "loss": 1.1668,
       "step": 60
     },
     {
-      "epoch": 0.15921616656460502,
-      "grad_norm": 0.09018877740754977,
-      "learning_rate": 0.00019789703509552945,
-      "loss": 1.1517,
       "step": 65
     },
     {
-      "epoch": 0.17146356399265156,
-      "grad_norm": 0.08070743453648781,
-      "learning_rate": 0.00019693449412283435,
-      "loss": 1.1557,
       "step": 70
     },
     {
-      "epoch": 0.1837109614206981,
-      "grad_norm": 0.17006096808234306,
-      "learning_rate": 0.00019579440356038967,
-      "loss": 1.1265,
       "step": 75
     },
     {
-      "epoch": 0.19595835884874463,
-      "grad_norm": 0.07490774235953891,
-      "learning_rate": 0.00019447885164947088,
-      "loss": 1.1411,
       "step": 80
     },
     {
-      "epoch": 0.2082057562767912,
-      "grad_norm": 0.07560268552998486,
-      "learning_rate": 0.00019299024801426994,
-      "loss": 1.1346,
       "step": 85
     },
     {
-      "epoch": 0.22045315370483773,
-      "grad_norm": 0.06989244321896809,
-      "learning_rate": 0.00019133131924831917,
-      "loss": 1.1373,
       "step": 90
     },
     {
-      "epoch": 0.23270055113288426,
-      "grad_norm": 0.07297326140007601,
-      "learning_rate": 0.00018950510392033945,
-      "loss": 1.1262,
       "step": 95
     },
     {
-      "epoch": 0.2449479485609308,
-      "grad_norm": 0.08107861966515256,
-      "learning_rate": 0.00018751494700866087,
-      "loss": 1.1266,
       "step": 100
     },
     {
-      "epoch": 0.25719534598897736,
-      "grad_norm": 0.07525334503932822,
-      "learning_rate": 0.0001853644937744095,
-      "loss": 1.1337,
       "step": 105
     },
     {
-      "epoch": 0.2694427434170239,
-      "grad_norm": 0.07418001338537485,
-      "learning_rate": 0.00018305768308468293,
-      "loss": 1.1527,
       "step": 110
     },
     {
-      "epoch": 0.28169014084507044,
-      "grad_norm": 0.07966858826560685,
-      "learning_rate": 0.00018059874019794351,
-      "loss": 1.1275,
       "step": 115
     },
     {
-      "epoch": 0.29393753827311697,
-      "grad_norm": 0.06884328126643421,
-      "learning_rate": 0.00017799216902484466,
-      "loss": 1.1142,
       "step": 120
     },
     {
-      "epoch": 0.3061849357011635,
-      "grad_norm": 0.07638833793093423,
-      "learning_rate": 0.00017524274387866484,
-      "loss": 1.1489,
       "step": 125
     },
     {
-      "epoch": 0.31843233312921004,
-      "grad_norm": 0.07163478075363215,
-      "learning_rate": 0.00017235550073046028,
-      "loss": 1.1334,
       "step": 130
     },
     {
-      "epoch": 0.3306797305572566,
-      "grad_norm": 0.07584970266147063,
-      "learning_rate": 0.00016933572798495328,
-      "loss": 1.1394,
       "step": 135
     },
     {
-      "epoch": 0.3429271279853031,
-      "grad_norm": 0.0882549132067985,
-      "learning_rate": 0.00016618895679405165,
-      "loss": 1.1266,
       "step": 140
     },
     {
-      "epoch": 0.35517452541334965,
-      "grad_norm": 0.0738337228599522,
-      "learning_rate": 0.00016292095092574154,
-      "loss": 1.1356,
       "step": 145
     },
     {
-      "epoch": 0.3674219228413962,
-      "grad_norm": 0.07323403324052054,
-      "learning_rate": 0.00015953769620691022,
-      "loss": 1.1448,
       "step": 150
     },
     {
-      "epoch": 0.3796693202694427,
-      "grad_norm": 0.07258910733356848,
-      "learning_rate": 0.0001560453895594354,
-      "loss": 1.1255,
       "step": 155
     },
     {
-      "epoch": 0.39191671769748926,
-      "grad_norm": 0.08483741713706569,
-      "learning_rate": 0.00015245042764962417,
-      "loss": 1.1203,
       "step": 160
     },
     {
-      "epoch": 0.40416411512553585,
-      "grad_norm": 0.07393069983884801,
-      "learning_rate": 0.00014875939517179016,
-      "loss": 1.1305,
       "step": 165
     },
     {
-      "epoch": 0.4164115125535824,
-      "grad_norm": 0.07536661950821844,
-      "learning_rate": 0.00014497905278743083,
-      "loss": 1.1142,
       "step": 170
     },
     {
-      "epoch": 0.4286589099816289,
-      "grad_norm": 0.0774588990644394,
-      "learning_rate": 0.00014111632474209505,
-      "loss": 1.1014,
       "step": 175
     },
     {
-      "epoch": 0.44090630740967546,
-      "grad_norm": 0.0723327812244184,
-      "learning_rate": 0.0001371782861826226,
-      "loss": 1.1215,
       "step": 180
     },
     {
-      "epoch": 0.453153704837722,
-      "grad_norm": 0.07454342646966894,
-      "learning_rate": 0.00013317215019798638,
-      "loss": 1.1276,
       "step": 185
     },
     {
-      "epoch": 0.46540110226576853,
-      "grad_norm": 0.07195661618627822,
-      "learning_rate": 0.00012910525460747344,
-      "loss": 1.1083,
       "step": 190
     },
     {
-      "epoch": 0.47764849969381507,
-      "grad_norm": 0.07092309315305423,
-      "learning_rate": 0.00012498504852040434,
-      "loss": 1.1373,
       "step": 195
     },
     {
-      "epoch": 0.4898958971218616,
-      "grad_norm": 0.07301281736550075,
-      "learning_rate": 0.00012081907869200849,
-      "loss": 1.1312,
       "step": 200
     },
     {
-      "epoch": 0.5021432945499081,
-      "grad_norm": 0.07484347637628397,
-      "learning_rate": 0.00011661497570044738,
-      "loss": 1.1208,
       "step": 205
     },
     {
-      "epoch": 0.5143906919779547,
-      "grad_norm": 0.0724091132876655,
-      "learning_rate": 0.00011238043997030329,
-      "loss": 1.1309,
       "step": 210
     },
     {
-      "epoch": 0.5266380894060012,
-      "grad_norm": 0.2342422867496652,
-      "learning_rate": 0.00010812322766813461,
-      "loss": 1.1138,
       "step": 215
     },
     {
-      "epoch": 0.5388854868340478,
-      "grad_norm": 0.07212287103404749,
-      "learning_rate": 0.00010385113649593137,
-      "loss": 1.1192,
       "step": 220
     },
     {
-      "epoch": 0.5511328842620943,
-      "grad_norm": 0.07073394667449048,
-      "learning_rate": 9.957199140849278e-05,
-      "loss": 1.109,
       "step": 225
     },
     {
-      "epoch": 0.5633802816901409,
-      "grad_norm": 0.06964674186116192,
-      "learning_rate": 9.529363028088725e-05,
-      "loss": 1.115,
       "step": 230
     },
     {
-      "epoch": 0.5756276791181874,
-      "grad_norm": 0.07117704481271163,
-      "learning_rate": 9.102388955224703e-05,
-      "loss": 1.1099,
       "step": 235
     },
     {
-      "epoch": 0.5878750765462339,
-      "grad_norm": 0.07216465095526178,
-      "learning_rate": 8.677058987219295e-05,
-      "loss": 1.113,
       "step": 240
     },
     {
-      "epoch": 0.6001224739742804,
-      "grad_norm": 0.0725366248294856,
-      "learning_rate": 8.254152177618e-05,
-      "loss": 1.1047,
       "step": 245
     },
     {
-      "epoch": 0.612369871402327,
-      "grad_norm": 0.07979788000565378,
-      "learning_rate": 7.83444314160013e-05,
-      "loss": 1.1275,
       "step": 250
     },
     {
-      "epoch": 0.6246172688303735,
-      "grad_norm": 0.07038014346187686,
-      "learning_rate": 7.418700637158742e-05,
-      "loss": 1.0942,
       "step": 255
     },
     {
-      "epoch": 0.6368646662584201,
-      "grad_norm": 0.07043699403227373,
-      "learning_rate": 7.00768615700881e-05,
-      "loss": 1.1188,
       "step": 260
     },
     {
-      "epoch": 0.6491120636864667,
-      "grad_norm": 0.07317860829882807,
-      "learning_rate": 6.60215253380287e-05,
-      "loss": 1.1228,
       "step": 265
     },
     {
-      "epoch": 0.6613594611145132,
-      "grad_norm": 0.0736268694865736,
-      "learning_rate": 6.202842561208758e-05,
-      "loss": 1.1004,
       "step": 270
     },
     {
-      "epoch": 0.6736068585425597,
-      "grad_norm": 0.0681966580195897,
-      "learning_rate": 5.810487633375261e-05,
-      "loss": 1.0964,
       "step": 275
     },
     {
-      "epoch": 0.6858542559706062,
-      "grad_norm": 0.06988692587157964,
-      "learning_rate": 5.425806405277609e-05,
-      "loss": 1.1123,
       "step": 280
     },
     {
-      "epoch": 0.6981016533986528,
-      "grad_norm": 0.06961689512931302,
-      "learning_rate": 5.049503476396627e-05,
-      "loss": 1.1254,
       "step": 285
     },
     {
-      "epoch": 0.7103490508266993,
-      "grad_norm": 0.06848007555420067,
-      "learning_rate": 4.682268100142566e-05,
-      "loss": 1.1064,
       "step": 290
     },
     {
-      "epoch": 0.7225964482547459,
-      "grad_norm": 0.06848238221490942,
-      "learning_rate": 4.32477292138746e-05,
-      "loss": 1.1078,
       "step": 295
     },
     {
-      "epoch": 0.7348438456827924,
-      "grad_norm": 0.06932096702672658,
-      "learning_rate": 3.9776727444184744e-05,
-      "loss": 1.1359,
       "step": 300
     },
     {
-      "epoch": 0.747091243110839,
-      "grad_norm": 0.06964742874998163,
-      "learning_rate": 3.641603333568831e-05,
-      "loss": 1.1071,
       "step": 305
     },
     {
-      "epoch": 0.7593386405388854,
-      "grad_norm": 0.07515967784857266,
-      "learning_rate": 3.3171802487232086e-05,
-      "loss": 1.114,
       "step": 310
     },
     {
-      "epoch": 0.771586037966932,
-      "grad_norm": 0.07140996525669459,
-      "learning_rate": 3.0049977178305076e-05,
-      "loss": 1.1179,
       "step": 315
     },
     {
-      "epoch": 0.7838334353949785,
-      "grad_norm": 0.06922024794802567,
-      "learning_rate": 2.7056275484891304e-05,
-      "loss": 1.0962,
       "step": 320
     },
     {
-      "epoch": 0.7960808328230251,
-      "grad_norm": 0.07028157088055875,
-      "learning_rate": 2.419618080598417e-05,
-      "loss": 1.1361,
       "step": 325
     },
     {
-      "epoch": 0.8083282302510717,
-      "grad_norm": 0.07083633675990936,
-      "learning_rate": 2.1474931819945553e-05,
-      "loss": 1.1025,
       "step": 330
     },
     {
-      "epoch": 0.8205756276791182,
-      "grad_norm": 0.07118501791774294,
-      "learning_rate": 1.889751288910645e-05,
-      "loss": 1.0959,
       "step": 335
     },
     {
-      "epoch": 0.8328230251071648,
-      "grad_norm": 0.0724941459460009,
-      "learning_rate": 1.6468644930184095e-05,
-      "loss": 1.0963,
       "step": 340
     },
     {
-      "epoch": 0.8450704225352113,
-      "grad_norm": 0.07065248333558355,
-      "learning_rate": 1.4192776767238158e-05,
-      "loss": 1.1097,
       "step": 345
     },
     {
-      "epoch": 0.8573178199632578,
-      "grad_norm": 0.06638354595318986,
-      "learning_rate": 1.2074076983003958e-05,
-      "loss": 1.1086,
       "step": 350
     },
     {
-      "epoch": 0.8695652173913043,
-      "grad_norm": 0.0678250769481932,
-      "learning_rate": 1.0116426283528302e-05,
-      "loss": 1.1164,
       "step": 355
     },
     {
-      "epoch": 0.8818126148193509,
-      "grad_norm": 0.06908465334552778,
-      "learning_rate": 8.323410390093522e-06,
-      "loss": 1.1219,
       "step": 360
     },
     {
-      "epoch": 0.8940600122473974,
-      "grad_norm": 0.07002593669930346,
-      "learning_rate": 6.698313471448547e-06,
-      "loss": 1.1057,
       "step": 365
     },
     {
-      "epoch": 0.906307409675444,
-      "grad_norm": 0.06951335625337747,
-      "learning_rate": 5.244112128377476e-06,
-      "loss": 1.1156,
       "step": 370
     },
     {
-      "epoch": 0.9185548071034905,
-      "grad_norm": 0.07086629076783696,
-      "learning_rate": 3.963469941623288e-06,
-      "loss": 1.0996,
       "step": 375
     },
     {
-      "epoch": 0.9308022045315371,
-      "grad_norm": 0.07007458613323735,
-      "learning_rate": 2.858732593153246e-06,
-      "loss": 1.1211,
       "step": 380
     },
     {
-      "epoch": 0.9430496019595835,
-      "grad_norm": 0.0665201643250434,
-      "learning_rate": 1.9319235697021763e-06,
-      "loss": 1.1165,
       "step": 385
     },
     {
-      "epoch": 0.9552969993876301,
-      "grad_norm": 0.06858672635827863,
-      "learning_rate": 1.1847404564628185e-06,
-      "loss": 1.0881,
       "step": 390
     },
     {
-      "epoch": 0.9675443968156767,
-      "grad_norm": 0.07063191807948407,
-      "learning_rate": 6.185518277123214e-07,
-      "loss": 1.1031,
       "step": 395
     },
     {
-      "epoch": 0.9797917942437232,
-      "grad_norm": 0.06843622739420911,
-      "learning_rate": 2.343947400698432e-07,
-      "loss": 1.1103,
       "step": 400
     },
     {
-      "epoch": 0.9920391916717698,
-      "grad_norm": 0.0681228499191145,
-      "learning_rate": 3.2972832976918554e-08,
-      "loss": 1.1119,
       "step": 405
     },
     {
-      "epoch": 0.9993876301285977,
-      "eval_loss": 1.037530541419983,
-      "eval_runtime": 2.1401,
-      "eval_samples_per_second": 3.271,
-      "eval_steps_per_second": 0.935,
-      "step": 408
     },
     {
-      "epoch": 0.9993876301285977,
-      "step": 408,
-      "total_flos": 1.2948113606049792e+16,
-      "train_loss": 1.1371603935372596,
-      "train_runtime": 15464.5644,
-      "train_samples_per_second": 3.378,
       "train_steps_per_second": 0.026
     }
   ],
   "logging_steps": 5,
-  "max_steps": 408,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 100,
@@ -617,7 +680,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.2948113606049792e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
   "eval_steps": 500,
+  "global_step": 454,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.0022026431718061676,
+      "grad_norm": 1.0637864900736664,
+      "learning_rate": 4.347826086956522e-06,
+      "loss": 1.4366,
       "step": 1
     },
     {
+      "epoch": 0.011013215859030838,
+      "grad_norm": 1.153707883328667,
+      "learning_rate": 2.173913043478261e-05,
+      "loss": 1.4048,
       "step": 5
     },
     {
+      "epoch": 0.022026431718061675,
+      "grad_norm": 0.24033826612761172,
+      "learning_rate": 4.347826086956522e-05,
+      "loss": 1.3638,
       "step": 10
     },
     {
+      "epoch": 0.03303964757709251,
+      "grad_norm": 0.23098755386890235,
+      "learning_rate": 6.521739130434783e-05,
+      "loss": 1.3035,
       "step": 15
     },
     {
+      "epoch": 0.04405286343612335,
+      "grad_norm": 0.18769938367180908,
+      "learning_rate": 8.695652173913044e-05,
+      "loss": 1.2767,
       "step": 20
     },
     {
+      "epoch": 0.05506607929515418,
+      "grad_norm": 0.14239545432122608,
+      "learning_rate": 0.00010869565217391305,
+      "loss": 1.2196,
       "step": 25
     },
     {
+      "epoch": 0.06607929515418502,
+      "grad_norm": 0.10372099126300886,
+      "learning_rate": 0.00013043478260869567,
+      "loss": 1.21,
       "step": 30
     },
     {
+      "epoch": 0.07709251101321586,
+      "grad_norm": 0.10109724419729917,
+      "learning_rate": 0.00015217391304347827,
+      "loss": 1.2161,
       "step": 35
     },
     {
+      "epoch": 0.0881057268722467,
+      "grad_norm": 0.08677348076375273,
+      "learning_rate": 0.00017391304347826088,
+      "loss": 1.2002,
       "step": 40
     },
     {
+      "epoch": 0.09911894273127753,
+      "grad_norm": 0.07988238174290498,
+      "learning_rate": 0.0001956521739130435,
+      "loss": 1.1907,
       "step": 45
     },
     {
+      "epoch": 0.11013215859030837,
+      "grad_norm": 0.08057978560906214,
+      "learning_rate": 0.0001999525719713366,
+      "loss": 1.1572,
       "step": 50
     },
     {
+      "epoch": 0.1211453744493392,
+      "grad_norm": 0.08125586602301926,
+      "learning_rate": 0.0001997599727063717,
+      "loss": 1.1575,
       "step": 55
     },
     {
+      "epoch": 0.13215859030837004,
+      "grad_norm": 0.08174265113795184,
+      "learning_rate": 0.00019941952317728147,
+      "loss": 1.1662,
       "step": 60
     },
     {
+      "epoch": 0.14317180616740088,
+      "grad_norm": 0.09240936715995939,
+      "learning_rate": 0.00019893172795069144,
+      "loss": 1.1561,
       "step": 65
     },
     {
+      "epoch": 0.15418502202643172,
+      "grad_norm": 0.07863084458052047,
+      "learning_rate": 0.0001982973099683902,
+      "loss": 1.137,
       "step": 70
     },
     {
+      "epoch": 0.16519823788546256,
+      "grad_norm": 0.08035585420548778,
+      "learning_rate": 0.00019751720947588602,
+      "loss": 1.1417,
       "step": 75
     },
     {
+      "epoch": 0.1762114537444934,
+      "grad_norm": 0.07156903107546937,
+      "learning_rate": 0.00019659258262890683,
+      "loss": 1.156,
       "step": 80
     },
     {
+      "epoch": 0.18722466960352424,
+      "grad_norm": 0.07056162758348125,
+      "learning_rate": 0.000195524799779908,
+      "loss": 1.135,
       "step": 85
     },
     {
+      "epoch": 0.19823788546255505,
+      "grad_norm": 0.07468007657011916,
+      "learning_rate": 0.00019431544344712776,
+      "loss": 1.1285,
       "step": 90
     },
     {
+      "epoch": 0.2092511013215859,
+      "grad_norm": 0.07250524328449914,
+      "learning_rate": 0.00019296630596920023,
+      "loss": 1.1335,
       "step": 95
     },
     {
+      "epoch": 0.22026431718061673,
+      "grad_norm": 0.06955764714773345,
+      "learning_rate": 0.0001914793868488021,
+      "loss": 1.1512,
       "step": 100
     },
     {
+      "epoch": 0.23127753303964757,
+      "grad_norm": 0.07221286385490396,
+      "learning_rate": 0.0001898568897892697,
+      "loss": 1.1306,
       "step": 105
     },
     {
+      "epoch": 0.2422907488986784,
+      "grad_norm": 0.07486140156540808,
+      "learning_rate": 0.00018810121942857845,
+      "loss": 1.1342,
       "step": 110
     },
     {
+      "epoch": 0.2533039647577093,
+      "grad_norm": 0.07131722083122453,
+      "learning_rate": 0.00018621497777552507,
+      "loss": 1.1307,
       "step": 115
     },
     {
+      "epoch": 0.2643171806167401,
+      "grad_norm": 0.07348336925535924,
+      "learning_rate": 0.00018420096035339452,
+      "loss": 1.1222,
       "step": 120
     },
     {
+      "epoch": 0.2753303964757709,
+      "grad_norm": 0.07501908796003587,
+      "learning_rate": 0.00018206215205682683,
+      "loss": 1.1116,
       "step": 125
     },
     {
+      "epoch": 0.28634361233480177,
+      "grad_norm": 0.07330385148735,
+      "learning_rate": 0.000179801722728024,
+      "loss": 1.1373,
       "step": 130
     },
     {
+      "epoch": 0.2973568281938326,
+      "grad_norm": 0.073420013451975,
+      "learning_rate": 0.00017742302245885383,
+      "loss": 1.1053,
       "step": 135
     },
     {
+      "epoch": 0.30837004405286345,
+      "grad_norm": 0.07954193429333527,
+      "learning_rate": 0.00017492957662581295,
+      "loss": 1.1232,
       "step": 140
     },
     {
+      "epoch": 0.31938325991189426,
+      "grad_norm": 0.07217417992157714,
+      "learning_rate": 0.00017232508066520702,
+      "loss": 1.1237,
       "step": 145
     },
     {
+      "epoch": 0.3303964757709251,
+      "grad_norm": 0.07802555489353481,
+      "learning_rate": 0.0001696133945962927,
+      "loss": 1.1208,
       "step": 150
     },
     {
+      "epoch": 0.34140969162995594,
+      "grad_norm": 0.08970080549205696,
+      "learning_rate": 0.00016679853730049743,
+      "loss": 1.1404,
       "step": 155
     },
     {
+      "epoch": 0.3524229074889868,
+      "grad_norm": 0.07726401697227049,
+      "learning_rate": 0.00016388468056519612,
+      "loss": 1.0981,
       "step": 160
     },
     {
+      "epoch": 0.3634361233480176,
+      "grad_norm": 0.0738469257596569,
+      "learning_rate": 0.00016087614290087208,
+      "loss": 1.1245,
       "step": 165
     },
     {
+      "epoch": 0.3744493392070485,
+      "grad_norm": 0.0760961629898493,
+      "learning_rate": 0.00015777738314082514,
+      "loss": 1.1282,
       "step": 170
     },
     {
+      "epoch": 0.3854625550660793,
+      "grad_norm": 0.07461575712201869,
+      "learning_rate": 0.00015459299383291345,
+      "loss": 1.1206,
       "step": 175
     },
     {
+      "epoch": 0.3964757709251101,
+      "grad_norm": 0.07597642898156688,
+      "learning_rate": 0.00015132769443312207,
+      "loss": 1.1151,
       "step": 180
     },
     {
+      "epoch": 0.40748898678414097,
+      "grad_norm": 0.07220979125832422,
+      "learning_rate": 0.00014798632431104592,
+      "loss": 1.1313,
       "step": 185
     },
     {
+      "epoch": 0.4185022026431718,
+      "grad_norm": 0.07235062387347811,
+      "learning_rate": 0.00014457383557765386,
+      "loss": 1.126,
       "step": 190
     },
     {
+      "epoch": 0.42951541850220265,
+      "grad_norm": 0.07249541910324554,
+      "learning_rate": 0.00014109528574596301,
+      "loss": 1.1223,
       "step": 195
     },
     {
+      "epoch": 0.44052863436123346,
+      "grad_norm": 0.07276295542602365,
+      "learning_rate": 0.00013755583023550126,
+      "loss": 1.0954,
       "step": 200
     },
     {
+      "epoch": 0.45154185022026433,
+      "grad_norm": 0.08222792797320272,
+      "learning_rate": 0.00013396071473166613,
+      "loss": 1.1109,
       "step": 205
     },
     {
+      "epoch": 0.46255506607929514,
+      "grad_norm": 0.07000329815419548,
+      "learning_rate": 0.00013031526741130435,
+      "loss": 1.1122,
       "step": 210
     },
     {
+      "epoch": 0.473568281938326,
+      "grad_norm": 0.07273029465748866,
+      "learning_rate": 0.0001266248910460341,
+      "loss": 1.1098,
       "step": 215
     },
     {
+      "epoch": 0.4845814977973568,
+      "grad_norm": 0.07891968539218898,
+      "learning_rate": 0.0001228950549950134,
+      "loss": 1.1235,
       "step": 220
     },
     {
+      "epoch": 0.4955947136563877,
+      "grad_norm": 0.07948656131261005,
+      "learning_rate": 0.00011913128709902181,
+      "loss": 1.119,
       "step": 225
     },
     {
+      "epoch": 0.5066079295154186,
+      "grad_norm": 0.07426812917091022,
+      "learning_rate": 0.00011533916548786857,
+      "loss": 1.1153,
       "step": 230
     },
     {
+      "epoch": 0.5176211453744494,
+      "grad_norm": 0.07320813310432901,
+      "learning_rate": 0.00011152431031326978,
+      "loss": 1.1189,
       "step": 235
     },
     {
+      "epoch": 0.5286343612334802,
+      "grad_norm": 0.07027215565752497,
+      "learning_rate": 0.0001076923754194464,
+      "loss": 1.0921,
       "step": 240
     },
     {
+      "epoch": 0.539647577092511,
+      "grad_norm": 0.07197803964893518,
+      "learning_rate": 0.00010384903996378783,
+      "loss": 1.139,
       "step": 245
     },
     {
+      "epoch": 0.5506607929515418,
+      "grad_norm": 0.07476406384806204,
+      "learning_rate": 0.0001,
+      "loss": 1.113,
       "step": 250
     },
     {
+      "epoch": 0.5616740088105727,
+      "grad_norm": 0.07773914011299175,
+      "learning_rate": 9.615096003621221e-05,
+      "loss": 1.1112,
       "step": 255
     },
     {
+      "epoch": 0.5726872246696035,
+      "grad_norm": 0.07599058071843097,
+      "learning_rate": 9.230762458055363e-05,
+      "loss": 1.0823,
       "step": 260
     },
     {
+      "epoch": 0.5837004405286343,
+      "grad_norm": 0.0791178168791927,
+      "learning_rate": 8.847568968673026e-05,
+      "loss": 1.1322,
       "step": 265
     },
     {
+      "epoch": 0.5947136563876652,
+      "grad_norm": 0.08321320900809528,
+      "learning_rate": 8.466083451213144e-05,
+      "loss": 1.1129,
       "step": 270
     },
     {
+      "epoch": 0.6057268722466961,
+      "grad_norm": 0.07548913525051125,
+      "learning_rate": 8.086871290097821e-05,
+      "loss": 1.1062,
       "step": 275
     },
     {
+      "epoch": 0.6167400881057269,
+      "grad_norm": 0.07309802934315239,
+      "learning_rate": 7.710494500498662e-05,
+      "loss": 1.1129,
       "step": 280
     },
     {
+      "epoch": 0.6277533039647577,
+      "grad_norm": 0.069929912224867,
+      "learning_rate": 7.337510895396591e-05,
+      "loss": 1.1142,
       "step": 285
     },
     {
+      "epoch": 0.6387665198237885,
+      "grad_norm": 0.06916254050976436,
+      "learning_rate": 6.968473258869566e-05,
+      "loss": 1.1115,
       "step": 290
     },
     {
+      "epoch": 0.6497797356828194,
+      "grad_norm": 0.07179141392925117,
+      "learning_rate": 6.603928526833387e-05,
+      "loss": 1.1114,
       "step": 295
     },
     {
+      "epoch": 0.6607929515418502,
+      "grad_norm": 0.07393948815408477,
+      "learning_rate": 6.244416976449875e-05,
+      "loss": 1.0976,
       "step": 300
     },
     {
+      "epoch": 0.6718061674008811,
+      "grad_norm": 0.06872619691818142,
+      "learning_rate": 5.890471425403703e-05,
+      "loss": 1.1146,
       "step": 305
     },
     {
+      "epoch": 0.6828193832599119,
+      "grad_norm": 0.07576414521745392,
+      "learning_rate": 5.542616442234618e-05,
+      "loss": 1.1147,
       "step": 310
     },
     {
+      "epoch": 0.6938325991189427,
+      "grad_norm": 0.07330914446070216,
+      "learning_rate": 5.201367568895408e-05,
+      "loss": 1.0951,
       "step": 315
     },
     {
+      "epoch": 0.7048458149779736,
+      "grad_norm": 0.07224487325459926,
+      "learning_rate": 4.8672305566877964e-05,
+      "loss": 1.1086,
       "step": 320
     },
     {
+      "epoch": 0.7158590308370044,
+      "grad_norm": 0.075306080937695,
+      "learning_rate": 4.540700616708658e-05,
+      "loss": 1.1092,
       "step": 325
     },
     {
+      "epoch": 0.7268722466960352,
+      "grad_norm": 0.07437067674720446,
+      "learning_rate": 4.222261685917489e-05,
+      "loss": 1.0921,
       "step": 330
     },
     {
+      "epoch": 0.737885462555066,
+      "grad_norm": 0.0711166096176443,
+      "learning_rate": 3.9123857099127936e-05,
+      "loss": 1.1074,
       "step": 335
     },
     {
+      "epoch": 0.748898678414097,
+      "grad_norm": 0.07140111450874426,
+      "learning_rate": 3.6115319434803894e-05,
+      "loss": 1.1162,
       "step": 340
     },
     {
+      "epoch": 0.7599118942731278,
+      "grad_norm": 0.07495611302185953,
+      "learning_rate": 3.32014626995026e-05,
+      "loss": 1.1003,
       "step": 345
     },
     {
+      "epoch": 0.7709251101321586,
+      "grad_norm": 0.07717586378795258,
+      "learning_rate": 3.0386605403707346e-05,
+      "loss": 1.0927,
       "step": 350
     },
     {
+      "epoch": 0.7819383259911894,
+      "grad_norm": 0.07229554748902306,
+      "learning_rate": 2.7674919334793035e-05,
+      "loss": 1.1108,
       "step": 355
     },
     {
+      "epoch": 0.7929515418502202,
+      "grad_norm": 0.0768434750345143,
+      "learning_rate": 2.507042337418707e-05,
+      "loss": 1.1058,
       "step": 360
     },
     {
+      "epoch": 0.8039647577092511,
+      "grad_norm": 0.07347159962460627,
+      "learning_rate": 2.2576977541146193e-05,
+      "loss": 1.1115,
       "step": 365
     },
     {
+      "epoch": 0.8149779735682819,
+      "grad_norm": 0.0690702684713988,
+      "learning_rate": 2.0198277271976052e-05,
+      "loss": 1.1091,
       "step": 370
     },
     {
+      "epoch": 0.8259911894273128,
+      "grad_norm": 0.06989352121524041,
+      "learning_rate": 1.793784794317319e-05,
+      "loss": 1.0988,
       "step": 375
     },
     {
+      "epoch": 0.8370044052863436,
+      "grad_norm": 0.06951408734875156,
+      "learning_rate": 1.5799039646605486e-05,
+      "loss": 1.1,
       "step": 380
     },
     {
+      "epoch": 0.8480176211453745,
+      "grad_norm": 0.06836481993836284,
+      "learning_rate": 1.3785022224474942e-05,
+      "loss": 1.102,
       "step": 385
     },
     {
+      "epoch": 0.8590308370044053,
+      "grad_norm": 0.06852988441116457,
+      "learning_rate": 1.1898780571421552e-05,
+      "loss": 1.0987,
       "step": 390
     },
     {
+      "epoch": 0.8700440528634361,
+      "grad_norm": 0.07030606192740464,
+      "learning_rate": 1.0143110210730312e-05,
+      "loss": 1.1009,
       "step": 395
     },
     {
+      "epoch": 0.8810572687224669,
+      "grad_norm": 0.06918150786144664,
+      "learning_rate": 8.520613151197898e-06,
+      "loss": 1.1123,
       "step": 400
     },
     {
+      "epoch": 0.8920704845814978,
+      "grad_norm": 0.07235026662376734,
+      "learning_rate": 7.033694030799787e-06,
+      "loss": 1.098,
       "step": 405
     },
     {
+      "epoch": 0.9030837004405287,
+      "grad_norm": 0.06951148470437334,
+      "learning_rate": 5.684556552872256e-06,
+      "loss": 1.0975,
+      "step": 410
+    },
+    {
+      "epoch": 0.9140969162995595,
+      "grad_norm": 0.06872065759217105,
+      "learning_rate": 4.475200220092002e-06,
+      "loss": 1.0896,
+      "step": 415
+    },
+    {
+      "epoch": 0.9251101321585903,
+      "grad_norm": 0.06809525315060157,
+      "learning_rate": 3.40741737109318e-06,
+      "loss": 1.1158,
+      "step": 420
+    },
+    {
+      "epoch": 0.9361233480176211,
+      "grad_norm": 0.07195357579119646,
+      "learning_rate": 2.482790524113998e-06,
+      "loss": 1.1097,
+      "step": 425
+    },
+    {
+      "epoch": 0.947136563876652,
+      "grad_norm": 0.06817141756269769,
+      "learning_rate": 1.7026900316098215e-06,
+      "loss": 1.1113,
+      "step": 430
+    },
+    {
+      "epoch": 0.9581497797356828,
+      "grad_norm": 0.07218201005051188,
+      "learning_rate": 1.0682720493085607e-06,
+      "loss": 1.1049,
+      "step": 435
+    },
+    {
+      "epoch": 0.9691629955947136,
+      "grad_norm": 0.06979182488846636,
+      "learning_rate": 5.804768227185565e-07,
+      "loss": 1.1125,
+      "step": 440
+    },
+    {
+      "epoch": 0.9801762114537445,
+      "grad_norm": 0.06813879663952505,
+      "learning_rate": 2.400272936283088e-07,
+      "loss": 1.1002,
+      "step": 445
+    },
+    {
+      "epoch": 0.9911894273127754,
+      "grad_norm": 0.06852453577386937,
+      "learning_rate": 4.74280286634099e-08,
+      "loss": 1.094,
+      "step": 450
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 1.0366965532302856,
+      "eval_runtime": 2.2297,
+      "eval_samples_per_second": 3.139,
+      "eval_steps_per_second": 0.897,
+      "step": 454
     },
     {
+      "epoch": 1.0,
+      "step": 454,
+      "total_flos": 1.4408836278386688e+16,
+      "train_loss": 1.131237539688396,
+      "train_runtime": 17315.3957,
+      "train_samples_per_second": 3.356,
       "train_steps_per_second": 0.026
     }
   ],
   "logging_steps": 5,
+  "max_steps": 454,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 1.4408836278386688e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null