Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

config.json +1 -1
model-00001-of-00002.safetensors +2 -2
model-00002-of-00002.safetensors +1 -1
model.safetensors.index.json +2 -2
optimizer.pt +2 -2
rng_state.pth +1 -1
scheduler.pt +1 -1
trainer_state.json +55 -600
training_args.bin +1 -1

config.json CHANGED Viewed

@@ -31,5 +31,5 @@
   "tie_word_embeddings": true,
   "transformers_version": "4.56.0",
   "use_cache": true,
-  "vocab_size": 156939
 }

   "tie_word_embeddings": true,
   "transformers_version": "4.56.0",
   "use_cache": true,
+  "vocab_size": 156940
 }

model-00001-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fec0777b0697274e419843b43e7de2cb30a5fd61e41ce44dba71bf3de7bd5058
-size 4991031824

 version https://git-lfs.github.com/spec/v1
+oid sha256:33a77d12adc3c90a6d03d30b2ebb684027e4f21c9ca19ce87864192c1cbdd7b8
+size 4991037968

model-00002-of-00002.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4313244f0e8492462dafae0f065014006765bf6e54482ae970cf91017cc78878
 size 1610725592

 version https://git-lfs.github.com/spec/v1
+oid sha256:16b197f53c7eb5f953acb0917626be62d881b989a1678589016fddbdd2e620b8
 size 1610725592

model.safetensors.index.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "metadata": {
-    "total_parameters": 3300864000,
-    "total_size": 6601728000
   },
   "weight_map": {
     "model.embed_tokens.weight": "model-00001-of-00002.safetensors",

 {
   "metadata": {
+    "total_parameters": 3300867072,
+    "total_size": 6601734144
   },
   "weight_map": {
     "model.embed_tokens.weight": "model-00001-of-00002.safetensors",

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c16c13d4fe81d73e8b7fddc7ce07badbdd55c4ed7d56a00eba76c2709bcaadd9
-size 13203678103

 version https://git-lfs.github.com/spec/v1
+oid sha256:709540f11f094ab7fcb18f525b097ef780a66646213b0f225b0cec2172f4c781
+size 13203690391

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:17cd930da9783ca70bad4b9cdeee6a06c0acea8f34645a333c93341f487f66a3
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:a8e2011629d8bed3ef560fa11175cac55684c4e12a72634bb24abf767b6c7399
 size 14645

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab13e011ef5c6b4c442bc5f32b542c348311a0c4bff74117266a9be0164ed15b
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:c3159b8255e3ba63ecfbf9ad9882d37c8b55d7643e07b70fee54fef23e5ee0ce
 size 1465

trainer_state.json CHANGED Viewed

@@ -1,642 +1,97 @@
 {
-  "best_global_step": null,
-  "best_metric": null,
-  "best_model_checkpoint": null,
-  "epoch": 10.0,
   "eval_steps": 500,
-  "global_step": 4460,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.11210762331838565,
-      "grad_norm": 4.59375,
-      "learning_rate": 9.14179104477612e-06,
-      "loss": 5.4921,
       "step": 50
     },
     {
-      "epoch": 0.2242152466367713,
-      "grad_norm": 4.9375,
-      "learning_rate": 1.8470149253731344e-05,
-      "loss": 5.4388,
       "step": 100
     },
     {
-      "epoch": 0.336322869955157,
-      "grad_norm": 3.8125,
-      "learning_rate": 2.7798507462686568e-05,
-      "loss": 5.2504,
       "step": 150
     },
     {
-      "epoch": 0.4484304932735426,
-      "grad_norm": 6.5,
-      "learning_rate": 3.7126865671641795e-05,
-      "loss": 5.1609,
       "step": 200
     },
     {
-      "epoch": 0.5605381165919282,
-      "grad_norm": 6.4375,
-      "learning_rate": 4.645522388059701e-05,
-      "loss": 5.0546,
       "step": 250
     },
     {
-      "epoch": 0.672645739910314,
-      "grad_norm": 6.96875,
-      "learning_rate": 4.999325361589072e-05,
-      "loss": 5.0152,
       "step": 300
     },
     {
-      "epoch": 0.7847533632286996,
-      "grad_norm": 6.03125,
-      "learning_rate": 4.9953952730494324e-05,
-      "loss": 5.0036,
       "step": 350
     },
     {
-      "epoch": 0.8968609865470852,
-      "grad_norm": 4.8125,
-      "learning_rate": 4.987961816680492e-05,
-      "loss": 4.9593,
       "step": 400
     },
     {
-      "epoch": 1.0089686098654709,
-      "grad_norm": 4.8125,
-      "learning_rate": 4.977035428557125e-05,
-      "loss": 4.9279,
       "step": 450
     },
     {
-      "epoch": 1.1210762331838564,
-      "grad_norm": 4.6875,
-      "learning_rate": 4.9626314485964385e-05,
-      "loss": 4.7683,
       "step": 500
     },
     {
-      "epoch": 1.2331838565022422,
-      "grad_norm": 4.34375,
-      "learning_rate": 4.944770099021562e-05,
-      "loss": 4.7472,
-      "step": 550
-    },
-    {
-      "epoch": 1.3452914798206277,
-      "grad_norm": 5.15625,
-      "learning_rate": 4.923476455971e-05,
-      "loss": 4.7371,
-      "step": 600
-    },
-    {
-      "epoch": 1.4573991031390134,
-      "grad_norm": 7.3125,
-      "learning_rate": 4.898780414293411e-05,
-      "loss": 4.7189,
-      "step": 650
-    },
-    {
-      "epoch": 1.5695067264573992,
-      "grad_norm": 4.21875,
-      "learning_rate": 4.870716645577244e-05,
-      "loss": 4.7196,
-      "step": 700
-    },
-    {
-      "epoch": 1.6816143497757847,
-      "grad_norm": 4.96875,
-      "learning_rate": 4.839324549474148e-05,
-      "loss": 4.7285,
-      "step": 750
-    },
-    {
-      "epoch": 1.7937219730941703,
-      "grad_norm": 4.125,
-      "learning_rate": 4.804648198384507e-05,
-      "loss": 4.7366,
-      "step": 800
-    },
-    {
-      "epoch": 1.905829596412556,
-      "grad_norm": 4.21875,
-      "learning_rate": 4.7667362755827306e-05,
-      "loss": 4.712,
-      "step": 850
-    },
-    {
-      "epoch": 2.0179372197309418,
-      "grad_norm": 4.09375,
-      "learning_rate": 4.725642006869207e-05,
-      "loss": 4.6238,
-      "step": 900
-    },
-    {
-      "epoch": 2.1300448430493275,
-      "grad_norm": 4.3125,
-      "learning_rate": 4.68142308584484e-05,
-      "loss": 4.3582,
-      "step": 950
-    },
-    {
-      "epoch": 2.242152466367713,
-      "grad_norm": 6.625,
-      "learning_rate": 4.634141592913097e-05,
-      "loss": 4.3645,
-      "step": 1000
-    },
-    {
-      "epoch": 2.3542600896860986,
-      "grad_norm": 4.15625,
-      "learning_rate": 4.583863908123282e-05,
-      "loss": 4.413,
-      "step": 1050
-    },
-    {
-      "epoch": 2.4663677130044843,
-      "grad_norm": 4.21875,
-      "learning_rate": 4.530660617977393e-05,
-      "loss": 4.3569,
-      "step": 1100
-    },
-    {
-      "epoch": 2.57847533632287,
-      "grad_norm": 4.125,
-      "learning_rate": 4.474606416331397e-05,
-      "loss": 4.3704,
-      "step": 1150
-    },
-    {
-      "epoch": 2.6905829596412554,
-      "grad_norm": 4.15625,
-      "learning_rate": 4.415779999530064e-05,
-      "loss": 4.406,
-      "step": 1200
-    },
-    {
-      "epoch": 2.802690582959641,
-      "grad_norm": 3.921875,
-      "learning_rate": 4.354263955922568e-05,
-      "loss": 4.3779,
-      "step": 1250
-    },
-    {
-      "epoch": 2.914798206278027,
-      "grad_norm": 4.21875,
-      "learning_rate": 4.290144649913973e-05,
-      "loss": 4.3843,
-      "step": 1300
-    },
-    {
-      "epoch": 3.0269058295964126,
-      "grad_norm": 5.65625,
-      "learning_rate": 4.2235121007153975e-05,
-      "loss": 4.246,
-      "step": 1350
-    },
-    {
-      "epoch": 3.1390134529147984,
-      "grad_norm": 5.3125,
-      "learning_rate": 4.1544598559630694e-05,
-      "loss": 3.891,
-      "step": 1400
-    },
-    {
-      "epoch": 3.2511210762331837,
-      "grad_norm": 5.9375,
-      "learning_rate": 4.083084860383708e-05,
-      "loss": 3.9163,
-      "step": 1450
-    },
-    {
-      "epoch": 3.3632286995515694,
-      "grad_norm": 4.96875,
-      "learning_rate": 4.009487319690626e-05,
-      "loss": 3.9105,
-      "step": 1500
-    },
-    {
-      "epoch": 3.475336322869955,
-      "grad_norm": 4.90625,
-      "learning_rate": 3.9337705599016145e-05,
-      "loss": 3.9388,
-      "step": 1550
-    },
-    {
-      "epoch": 3.587443946188341,
-      "grad_norm": 4.78125,
-      "learning_rate": 3.856040882276136e-05,
-      "loss": 3.9035,
-      "step": 1600
-    },
-    {
-      "epoch": 3.6995515695067267,
-      "grad_norm": 4.6875,
-      "learning_rate": 3.776407414075477e-05,
-      "loss": 3.9022,
-      "step": 1650
-    },
-    {
-      "epoch": 3.811659192825112,
-      "grad_norm": 5.625,
-      "learning_rate": 3.6949819553553814e-05,
-      "loss": 3.9107,
-      "step": 1700
-    },
-    {
-      "epoch": 3.9237668161434978,
-      "grad_norm": 4.71875,
-      "learning_rate": 3.611878822006261e-05,
-      "loss": 3.9135,
-      "step": 1750
-    },
-    {
-      "epoch": 4.0358744394618835,
-      "grad_norm": 7.25,
-      "learning_rate": 3.527214685261339e-05,
-      "loss": 3.7243,
-      "step": 1800
-    },
-    {
-      "epoch": 4.147982062780269,
-      "grad_norm": 6.21875,
-      "learning_rate": 3.4411084078980574e-05,
-      "loss": 3.3222,
-      "step": 1850
-    },
-    {
-      "epoch": 4.260089686098655,
-      "grad_norm": 6.65625,
-      "learning_rate": 3.353680877362694e-05,
-      "loss": 3.3072,
-      "step": 1900
-    },
-    {
-      "epoch": 4.37219730941704,
-      "grad_norm": 6.75,
-      "learning_rate": 3.265054836052488e-05,
-      "loss": 3.2928,
-      "step": 1950
-    },
-    {
-      "epoch": 4.484304932735426,
-      "grad_norm": 7.15625,
-      "learning_rate": 3.1753547089935345e-05,
-      "loss": 3.2997,
-      "step": 2000
-    },
-    {
-      "epoch": 4.596412556053812,
-      "grad_norm": 6.71875,
-      "learning_rate": 3.084706429156379e-05,
-      "loss": 3.3155,
-      "step": 2050
-    },
-    {
-      "epoch": 4.708520179372197,
-      "grad_norm": 5.5625,
-      "learning_rate": 2.9932372606545638e-05,
-      "loss": 3.3295,
-      "step": 2100
-    },
-    {
-      "epoch": 4.820627802690583,
-      "grad_norm": 6.65625,
-      "learning_rate": 2.9010756200743363e-05,
-      "loss": 3.3681,
-      "step": 2150
-    },
-    {
-      "epoch": 4.932735426008969,
-      "grad_norm": 6.40625,
-      "learning_rate": 2.808350896186362e-05,
-      "loss": 3.3639,
-      "step": 2200
-    },
-    {
-      "epoch": 5.044843049327354,
-      "grad_norm": 7.875,
-      "learning_rate": 2.7151932682925563e-05,
-      "loss": 3.115,
-      "step": 2250
-    },
-    {
-      "epoch": 5.15695067264574,
-      "grad_norm": 10.5,
-      "learning_rate": 2.6217335234630654e-05,
-      "loss": 2.8213,
-      "step": 2300
-    },
-    {
-      "epoch": 5.2690582959641254,
-      "grad_norm": 7.84375,
-      "learning_rate": 2.5281028729199775e-05,
-      "loss": 2.7705,
-      "step": 2350
-    },
-    {
-      "epoch": 5.381165919282511,
-      "grad_norm": 9.6875,
-      "learning_rate": 2.4344327678255555e-05,
-      "loss": 2.7746,
-      "step": 2400
-    },
-    {
-      "epoch": 5.493273542600897,
-      "grad_norm": 6.8125,
-      "learning_rate": 2.34085471473361e-05,
-      "loss": 2.7827,
-      "step": 2450
-    },
-    {
-      "epoch": 5.605381165919282,
-      "grad_norm": 8.8125,
-      "learning_rate": 2.2475000909631068e-05,
-      "loss": 2.8412,
-      "step": 2500
-    },
-    {
-      "epoch": 5.7174887892376685,
-      "grad_norm": 7.78125,
-      "learning_rate": 2.1544999601532084e-05,
-      "loss": 2.7982,
-      "step": 2550
-    },
-    {
-      "epoch": 5.829596412556054,
-      "grad_norm": 8.1875,
-      "learning_rate": 2.0619848882587013e-05,
-      "loss": 2.8419,
-      "step": 2600
-    },
-    {
-      "epoch": 5.941704035874439,
-      "grad_norm": 8.4375,
-      "learning_rate": 1.9700847602441465e-05,
-      "loss": 2.7266,
-      "step": 2650
-    },
-    {
-      "epoch": 6.053811659192825,
-      "grad_norm": 8.25,
-      "learning_rate": 1.878928597734082e-05,
-      "loss": 2.6429,
-      "step": 2700
-    },
-    {
-      "epoch": 6.165919282511211,
-      "grad_norm": 8.25,
-      "learning_rate": 1.7886443778753052e-05,
-      "loss": 2.4437,
-      "step": 2750
-    },
-    {
-      "epoch": 6.278026905829597,
-      "grad_norm": 9.75,
-      "learning_rate": 1.699358853665535e-05,
-      "loss": 2.3789,
-      "step": 2800
-    },
-    {
-      "epoch": 6.390134529147982,
-      "grad_norm": 8.75,
-      "learning_rate": 1.6111973760006838e-05,
-      "loss": 2.4337,
-      "step": 2850
-    },
-    {
-      "epoch": 6.502242152466367,
-      "grad_norm": 7.6875,
-      "learning_rate": 1.5242837176906038e-05,
-      "loss": 2.4162,
-      "step": 2900
-    },
-    {
-      "epoch": 6.614349775784754,
-      "grad_norm": 9.875,
-      "learning_rate": 1.4387398996903488e-05,
-      "loss": 2.4613,
-      "step": 2950
-    },
-    {
-      "epoch": 6.726457399103139,
-      "grad_norm": 9.5,
-      "learning_rate": 1.3546860197909212e-05,
-      "loss": 2.4606,
-      "step": 3000
-    },
-    {
-      "epoch": 6.838565022421525,
-      "grad_norm": 9.875,
-      "learning_rate": 1.2722400840100257e-05,
-      "loss": 2.4374,
-      "step": 3050
-    },
-    {
-      "epoch": 6.95067264573991,
-      "grad_norm": 9.8125,
-      "learning_rate": 1.1915178409195171e-05,
-      "loss": 2.4009,
-      "step": 3100
-    },
-    {
-      "epoch": 7.062780269058296,
-      "grad_norm": 8.875,
-      "learning_rate": 1.1126326191421625e-05,
-      "loss": 2.3286,
-      "step": 3150
-    },
-    {
-      "epoch": 7.174887892376682,
-      "grad_norm": 8.25,
-      "learning_rate": 1.035695168245843e-05,
-      "loss": 2.2594,
-      "step": 3200
-    },
-    {
-      "epoch": 7.286995515695067,
-      "grad_norm": 8.1875,
-      "learning_rate": 9.608135032585758e-06,
-      "loss": 2.2772,
-      "step": 3250
-    },
-    {
-      "epoch": 7.3991031390134525,
-      "grad_norm": 8.25,
-      "learning_rate": 8.88092753022657e-06,
-      "loss": 2.2576,
-      "step": 3300
-    },
-    {
-      "epoch": 7.511210762331839,
-      "grad_norm": 8.1875,
-      "learning_rate": 8.176350126008015e-06,
-      "loss": 2.2233,
-      "step": 3350
-    },
-    {
-      "epoch": 7.623318385650224,
-      "grad_norm": 10.4375,
-      "learning_rate": 7.49539199941511e-06,
-      "loss": 2.2273,
-      "step": 3400
-    },
-    {
-      "epoch": 7.73542600896861,
-      "grad_norm": 9.625,
-      "learning_rate": 6.839009170049096e-06,
-      "loss": 2.2692,
-      "step": 3450
-    },
-    {
-      "epoch": 7.8475336322869955,
-      "grad_norm": 9.0,
-      "learning_rate": 6.208123155439854e-06,
-      "loss": 2.2847,
-      "step": 3500
-    },
-    {
-      "epoch": 7.959641255605381,
-      "grad_norm": 8.875,
-      "learning_rate": 5.603619677297028e-06,
-      "loss": 2.251,
-      "step": 3550
-    },
-    {
-      "epoch": 8.071748878923767,
-      "grad_norm": 8.6875,
-      "learning_rate": 5.026347418016134e-06,
-      "loss": 2.2404,
-      "step": 3600
-    },
-    {
-      "epoch": 8.183856502242152,
-      "grad_norm": 8.4375,
-      "learning_rate": 4.477116829185235e-06,
-      "loss": 2.2465,
-      "step": 3650
-    },
-    {
-      "epoch": 8.295964125560538,
-      "grad_norm": 8.9375,
-      "learning_rate": 3.956698993765226e-06,
-      "loss": 2.2267,
-      "step": 3700
-    },
-    {
-      "epoch": 8.408071748878923,
-      "grad_norm": 8.625,
-      "learning_rate": 3.4658245435410407e-06,
-      "loss": 2.1912,
-      "step": 3750
-    },
-    {
-      "epoch": 8.52017937219731,
-      "grad_norm": 9.5,
-      "learning_rate": 3.0051826333634818e-06,
-      "loss": 2.2076,
-      "step": 3800
-    },
-    {
-      "epoch": 8.632286995515695,
-      "grad_norm": 8.5,
-      "learning_rate": 2.5754199736220312e-06,
-      "loss": 2.2122,
-      "step": 3850
-    },
-    {
-      "epoch": 8.74439461883408,
-      "grad_norm": 9.3125,
-      "learning_rate": 2.177139922306773e-06,
-      "loss": 2.1805,
-      "step": 3900
-    },
-    {
-      "epoch": 8.856502242152466,
-      "grad_norm": 9.125,
-      "learning_rate": 1.810901637934137e-06,
-      "loss": 2.1735,
-      "step": 3950
-    },
-    {
-      "epoch": 8.968609865470851,
-      "grad_norm": 9.125,
-      "learning_rate": 1.4772192945258528e-06,
-      "loss": 2.186,
-      "step": 4000
-    },
-    {
-      "epoch": 9.080717488789238,
-      "grad_norm": 9.0,
-      "learning_rate": 1.1765613597430309e-06,
-      "loss": 2.1942,
-      "step": 4050
-    },
-    {
-      "epoch": 9.192825112107624,
-      "grad_norm": 8.4375,
-      "learning_rate": 9.093499371889385e-07,
-      "loss": 2.2041,
-      "step": 4100
-    },
-    {
-      "epoch": 9.304932735426009,
-      "grad_norm": 8.75,
-      "learning_rate": 6.75960173803819e-07,
-      "loss": 2.1515,
-      "step": 4150
-    },
-    {
-      "epoch": 9.417040358744394,
-      "grad_norm": 8.6875,
-      "learning_rate": 4.767197331837364e-07,
-      "loss": 2.1865,
-      "step": 4200
-    },
-    {
-      "epoch": 9.52914798206278,
-      "grad_norm": 8.875,
-      "learning_rate": 3.1190833556278276e-07,
-      "loss": 2.1974,
-      "step": 4250
-    },
-    {
-      "epoch": 9.641255605381167,
-      "grad_norm": 9.0625,
-      "learning_rate": 1.8175736510465114e-07,
-      "loss": 2.1827,
-      "step": 4300
-    },
-    {
-      "epoch": 9.753363228699552,
-      "grad_norm": 8.875,
-      "learning_rate": 8.644954505474812e-08,
-      "loss": 2.1896,
-      "step": 4350
-    },
-    {
-      "epoch": 9.865470852017937,
-      "grad_norm": 8.6875,
-      "learning_rate": 2.611868120898919e-08,
-      "loss": 2.2084,
-      "step": 4400
-    },
-    {
-      "epoch": 9.977578475336323,
-      "grad_norm": 8.875,
-      "learning_rate": 8.494740594333639e-10,
-      "loss": 2.2342,
-      "step": 4450
     }
   ],
   "logging_steps": 50,
-  "max_steps": 4460,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 10,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -645,12 +100,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": true
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.5271922632402944e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

 {
+  "best_global_step": 500,
+  "best_metric": 1.3274219036102295,
+  "best_model_checkpoint": "./orpheus-turkish-emotion-finetune/checkpoint-500",
+  "epoch": 2.4884735202492214,
   "eval_steps": 500,
+  "global_step": 500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.24922118380062305,
+      "grad_norm": 100.0,
+      "learning_rate": 1.218905472636816e-06,
+      "loss": 9.7783,
       "step": 50
     },
     {
+      "epoch": 0.4984423676012461,
+      "grad_norm": 74.5,
+      "learning_rate": 2.4626865671641794e-06,
+      "loss": 9.5155,
       "step": 100
     },
     {
+      "epoch": 0.7476635514018691,
+      "grad_norm": 74.0,
+      "learning_rate": 3.706467661691542e-06,
+      "loss": 9.0351,
       "step": 150
     },
     {
+      "epoch": 0.9968847352024922,
+      "grad_norm": 80.5,
+      "learning_rate": 4.950248756218906e-06,
+      "loss": 8.4023,
       "step": 200
     },
     {
+      "epoch": 1.2442367601246107,
+      "grad_norm": 80.0,
+      "learning_rate": 6.194029850746269e-06,
+      "loss": 7.3784,
       "step": 250
     },
     {
+      "epoch": 1.4934579439252336,
+      "grad_norm": 134.0,
+      "learning_rate": 7.437810945273633e-06,
+      "loss": 5.7507,
       "step": 300
     },
     {
+      "epoch": 1.7426791277258566,
+      "grad_norm": 65.0,
+      "learning_rate": 8.681592039800995e-06,
+      "loss": 3.8008,
       "step": 350
     },
     {
+      "epoch": 1.9919003115264797,
+      "grad_norm": 17.0,
+      "learning_rate": 9.925373134328359e-06,
+      "loss": 2.103,
       "step": 400
     },
     {
+      "epoch": 2.2392523364485983,
+      "grad_norm": 1.65625,
+      "learning_rate": 9.995836696556696e-06,
+      "loss": 1.4184,
       "step": 450
     },
     {
+      "epoch": 2.4884735202492214,
+      "grad_norm": 0.84765625,
+      "learning_rate": 9.982274873915892e-06,
+      "loss": 1.2978,
       "step": 500
     },
     {
+      "epoch": 2.4884735202492214,
+      "eval_loss": 1.3274219036102295,
+      "eval_runtime": 15.5223,
+      "eval_samples_per_second": 11.532,
+      "eval_steps_per_second": 1.482,
+      "step": 500
     }
   ],
   "logging_steps": 50,
+  "max_steps": 4020,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 20,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": false
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.3833925120386662e+17,
   "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:311df3e7bc623c2f9d68c0452bb813155325fd14e9332718300e0deeb7ad9750
 size 5777

 version https://git-lfs.github.com/spec/v1
+oid sha256:e91f58a9a988419f219f097fff3f0e1762f623e4b2b1a8cf942cacee3271dc13
 size 5777