Training in progress, step 1196, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +326 -4

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f9c1aa13c3c5817e2fa598b5b536720d683cfafff224fa37b79f7684a482f0b
 size 310152752

 version https://git-lfs.github.com/spec/v1
+oid sha256:26c57f537c6e8cb1f250dc86e919d048561f018dbb08c57403f8908ff3476437
 size 310152752

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3c2d5b3b0bb33cbea232fc3499e4b0f66d0c0e42402b974b2b74cdc33cb50e1
 size 133909059

 version https://git-lfs.github.com/spec/v1
+oid sha256:5efc5ad94f15e851bd1bbc752176792db889009682cb9c361d1d3290ee91066f
 size 133909059

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ae6c927eb09246f1e597bb2ec85da20567dd614a07f2630055148a5c724de9f7
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:304e4f2a6e2248bf62fc7d1ebac820e06d4e575b12b458ce60adbbc6486b8711
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": 1150,
   "best_metric": 1.943885087966919,
   "best_model_checkpoint": "outputs/checkpoint-1150",
-  "epoch": 1.9230769230769231,
   "eval_steps": 50,
-  "global_step": 1150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -8242,6 +8242,328 @@
       "eval_samples_per_second": 22.611,
       "eval_steps_per_second": 2.885,
       "step": 1150
     }
   ],
   "logging_steps": 1,
@@ -8256,12 +8578,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 1.3491082933875825e+18,
   "train_batch_size": 12,
   "trial_name": null,
   "trial_params": null

   "best_global_step": 1150,
   "best_metric": 1.943885087966919,
   "best_model_checkpoint": "outputs/checkpoint-1150",
+  "epoch": 2.0,
   "eval_steps": 50,
+  "global_step": 1196,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.611,
       "eval_steps_per_second": 2.885,
       "step": 1150
+    },
+    {
+      "epoch": 1.9247491638795986,
+      "grad_norm": 0.22156818211078644,
+      "learning_rate": 1.9850918440857958e-07,
+      "loss": 1.7031984329223633,
+      "step": 1151
+    },
+    {
+      "epoch": 1.9264214046822743,
+      "grad_norm": 0.24198277294635773,
+      "learning_rate": 1.899829931495012e-07,
+      "loss": 1.9431957006454468,
+      "step": 1152
+    },
+    {
+      "epoch": 1.92809364548495,
+      "grad_norm": 0.23016783595085144,
+      "learning_rate": 1.8164324970625645e-07,
+      "loss": 1.9043176174163818,
+      "step": 1153
+    },
+    {
+      "epoch": 1.9297658862876255,
+      "grad_norm": 0.23631806671619415,
+      "learning_rate": 1.7349001675219245e-07,
+      "loss": 2.0027687549591064,
+      "step": 1154
+    },
+    {
+      "epoch": 1.931438127090301,
+      "grad_norm": 0.24670401215553284,
+      "learning_rate": 1.6552335555903297e-07,
+      "loss": 2.1321308612823486,
+      "step": 1155
+    },
+    {
+      "epoch": 1.9331103678929766,
+      "grad_norm": 0.2432417869567871,
+      "learning_rate": 1.577433259964123e-07,
+      "loss": 1.8444653749465942,
+      "step": 1156
+    },
+    {
+      "epoch": 1.9347826086956523,
+      "grad_norm": 0.2338031530380249,
+      "learning_rate": 1.501499865314171e-07,
+      "loss": 1.697690486907959,
+      "step": 1157
+    },
+    {
+      "epoch": 1.9364548494983278,
+      "grad_norm": 0.23973986506462097,
+      "learning_rate": 1.4274339422816195e-07,
+      "loss": 1.9107983112335205,
+      "step": 1158
+    },
+    {
+      "epoch": 1.9381270903010033,
+      "grad_norm": 0.2242889702320099,
+      "learning_rate": 1.3552360474734793e-07,
+      "loss": 1.8276660442352295,
+      "step": 1159
+    },
+    {
+      "epoch": 1.939799331103679,
+      "grad_norm": 0.2711448669433594,
+      "learning_rate": 1.284906723458462e-07,
+      "loss": 1.7825736999511719,
+      "step": 1160
+    },
+    {
+      "epoch": 1.9414715719063547,
+      "grad_norm": 0.2267025262117386,
+      "learning_rate": 1.216446498763013e-07,
+      "loss": 1.7734841108322144,
+      "step": 1161
+    },
+    {
+      "epoch": 1.9431438127090301,
+      "grad_norm": 0.22980396449565887,
+      "learning_rate": 1.1498558878672016e-07,
+      "loss": 1.7618581056594849,
+      "step": 1162
+    },
+    {
+      "epoch": 1.9448160535117056,
+      "grad_norm": 0.23873302340507507,
+      "learning_rate": 1.0851353912008644e-07,
+      "loss": 1.9185343980789185,
+      "step": 1163
+    },
+    {
+      "epoch": 1.9464882943143813,
+      "grad_norm": 0.24374531209468842,
+      "learning_rate": 1.0222854951399407e-07,
+      "loss": 1.9559900760650635,
+      "step": 1164
+    },
+    {
+      "epoch": 1.948160535117057,
+      "grad_norm": 0.23170500993728638,
+      "learning_rate": 9.613066720028097e-08,
+      "loss": 2.0244576930999756,
+      "step": 1165
+    },
+    {
+      "epoch": 1.9498327759197325,
+      "grad_norm": 0.2283419668674469,
+      "learning_rate": 9.021993800466256e-08,
+      "loss": 1.7836267948150635,
+      "step": 1166
+    },
+    {
+      "epoch": 1.951505016722408,
+      "grad_norm": 0.25570887327194214,
+      "learning_rate": 8.449640634639878e-08,
+      "loss": 2.202239513397217,
+      "step": 1167
+    },
+    {
+      "epoch": 1.9531772575250836,
+      "grad_norm": 0.23577114939689636,
+      "learning_rate": 7.896011523794988e-08,
+      "loss": 1.9459967613220215,
+      "step": 1168
+    },
+    {
+      "epoch": 1.9548494983277593,
+      "grad_norm": 0.23402273654937744,
+      "learning_rate": 7.361110628466838e-08,
+      "loss": 1.822799801826477,
+      "step": 1169
+    },
+    {
+      "epoch": 1.9565217391304348,
+      "grad_norm": 0.23017290234565735,
+      "learning_rate": 6.84494196844715e-08,
+      "loss": 1.8911821842193604,
+      "step": 1170
+    },
+    {
+      "epoch": 1.9581939799331103,
+      "grad_norm": 0.23453032970428467,
+      "learning_rate": 6.347509422754139e-08,
+      "loss": 1.8932383060455322,
+      "step": 1171
+    },
+    {
+      "epoch": 1.959866220735786,
+      "grad_norm": 0.2325022965669632,
+      "learning_rate": 5.868816729604765e-08,
+      "loss": 1.9721827507019043,
+      "step": 1172
+    },
+    {
+      "epoch": 1.9615384615384617,
+      "grad_norm": 0.2408953160047531,
+      "learning_rate": 5.408867486384472e-08,
+      "loss": 2.094602584838867,
+      "step": 1173
+    },
+    {
+      "epoch": 1.9632107023411371,
+      "grad_norm": 0.23328392207622528,
+      "learning_rate": 4.9676651496222136e-08,
+      "loss": 1.8785374164581299,
+      "step": 1174
+    },
+    {
+      "epoch": 1.9648829431438126,
+      "grad_norm": 0.2375405728816986,
+      "learning_rate": 4.5452130349629694e-08,
+      "loss": 2.090651273727417,
+      "step": 1175
+    },
+    {
+      "epoch": 1.9665551839464883,
+      "grad_norm": 0.2384941726922989,
+      "learning_rate": 4.141514317143602e-08,
+      "loss": 1.932543396949768,
+      "step": 1176
+    },
+    {
+      "epoch": 1.968227424749164,
+      "grad_norm": 0.22377879917621613,
+      "learning_rate": 3.7565720299687076e-08,
+      "loss": 1.8287705183029175,
+      "step": 1177
+    },
+    {
+      "epoch": 1.9698996655518395,
+      "grad_norm": 0.22661490738391876,
+      "learning_rate": 3.3903890662878576e-08,
+      "loss": 1.8487858772277832,
+      "step": 1178
+    },
+    {
+      "epoch": 1.971571906354515,
+      "grad_norm": 0.23505854606628418,
+      "learning_rate": 3.0429681779739484e-08,
+      "loss": 1.9943294525146484,
+      "step": 1179
+    },
+    {
+      "epoch": 1.9732441471571907,
+      "grad_norm": 0.23116961121559143,
+      "learning_rate": 2.7143119759026613e-08,
+      "loss": 1.818049669265747,
+      "step": 1180
+    },
+    {
+      "epoch": 1.9749163879598663,
+      "grad_norm": 0.22798657417297363,
+      "learning_rate": 2.404422929932204e-08,
+      "loss": 1.8310678005218506,
+      "step": 1181
+    },
+    {
+      "epoch": 1.9765886287625418,
+      "grad_norm": 0.24219320714473724,
+      "learning_rate": 2.113303368885822e-08,
+      "loss": 1.975824236869812,
+      "step": 1182
+    },
+    {
+      "epoch": 1.9782608695652173,
+      "grad_norm": 0.244304358959198,
+      "learning_rate": 1.8409554805329245e-08,
+      "loss": 2.0979132652282715,
+      "step": 1183
+    },
+    {
+      "epoch": 1.979933110367893,
+      "grad_norm": 0.22320342063903809,
+      "learning_rate": 1.5873813115740988e-08,
+      "loss": 1.6685010194778442,
+      "step": 1184
+    },
+    {
+      "epoch": 1.9816053511705687,
+      "grad_norm": 0.24459514021873474,
+      "learning_rate": 1.3525827676247326e-08,
+      "loss": 1.8652524948120117,
+      "step": 1185
+    },
+    {
+      "epoch": 1.9832775919732442,
+      "grad_norm": 0.23350538313388824,
+      "learning_rate": 1.1365616132008593e-08,
+      "loss": 1.7653487920761108,
+      "step": 1186
+    },
+    {
+      "epoch": 1.9849498327759196,
+      "grad_norm": 0.23290188610553741,
+      "learning_rate": 9.393194717061127e-09,
+      "loss": 1.9263311624526978,
+      "step": 1187
+    },
+    {
+      "epoch": 1.9866220735785953,
+      "grad_norm": 0.23780138790607452,
+      "learning_rate": 7.608578254195142e-09,
+      "loss": 2.1235318183898926,
+      "step": 1188
+    },
+    {
+      "epoch": 1.988294314381271,
+      "grad_norm": 0.221963033080101,
+      "learning_rate": 6.0117801548437155e-09,
+      "loss": 1.9430090188980103,
+      "step": 1189
+    },
+    {
+      "epoch": 1.9899665551839465,
+      "grad_norm": 0.24497728049755096,
+      "learning_rate": 4.602812418974534e-09,
+      "loss": 1.9605302810668945,
+      "step": 1190
+    },
+    {
+      "epoch": 1.991638795986622,
+      "grad_norm": 0.24507929384708405,
+      "learning_rate": 3.3816856350177284e-09,
+      "loss": 1.8447235822677612,
+      "step": 1191
+    },
+    {
+      "epoch": 1.9933110367892977,
+      "grad_norm": 0.2350272387266159,
+      "learning_rate": 2.348408979760408e-09,
+      "loss": 1.8975446224212646,
+      "step": 1192
+    },
+    {
+      "epoch": 1.9949832775919734,
+      "grad_norm": 0.24266520142555237,
+      "learning_rate": 1.502990218302247e-09,
+      "loss": 1.9446773529052734,
+      "step": 1193
+    },
+    {
+      "epoch": 1.9966555183946488,
+      "grad_norm": 0.23780354857444763,
+      "learning_rate": 8.454357039860971e-10,
+      "loss": 1.9795520305633545,
+      "step": 1194
+    },
+    {
+      "epoch": 1.9983277591973243,
+      "grad_norm": 0.2187688648700714,
+      "learning_rate": 3.757503783424765e-10,
+      "loss": 1.903891682624817,
+      "step": 1195
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 0.2732059061527252,
+      "learning_rate": 9.393777107291613e-11,
+      "loss": 2.0153965950012207,
+      "step": 1196
     }
   ],
   "logging_steps": 1,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 1.4028145166609603e+18,
   "train_batch_size": 12,
   "trial_name": null,
   "trial_params": null