Training in progress, step 1192, checkpoint

Browse files

Files changed (4) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +270 -4

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fa23844813712215c3e317ac601a23c61dbabb2025bb8d9d1761f202dc9648f7
 size 791869518

 version https://git-lfs.github.com/spec/v1
+oid sha256:f41dbd134eb35770af086a2fc18e0d281fd4b1a6f9d1aabf53883de70234ba83
 size 791869518

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:82665c62b90edf301b70ae4bd2b8ad3351c4dcf4307ab0abb54fcaa59976966d
 size 2375752250

 version https://git-lfs.github.com/spec/v1
+oid sha256:4adf7ed35f9ee07ab90d7682190f703441a80f34395a26cc75716bb6f77e1d5c
 size 2375752250

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c18b4e2e89d973663b034f196f33ea51a1543f40916f5c6695e7c3b25fb20c4e
 size 1000

 version https://git-lfs.github.com/spec/v1
+oid sha256:378851ced5b160e1084be88052e6491387296da871a84fe5b4200346a2ca2994
 size 1000

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.8389041814130292,
   "eval_steps": 500,
-  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1423,6 +1423,272 @@
       "eval_samples_per_second": 598.662,
       "eval_steps_per_second": 18.712,
       "step": 1000
     }
   ],
   "logging_steps": 5,
@@ -1437,12 +1703,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 4.332357992788787e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9999737842443308,
   "eval_steps": 500,
+  "global_step": 1192,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 598.662,
       "eval_steps_per_second": 18.712,
       "step": 1000
+    },
+    {
+      "epoch": 0.8430987023200944,
+      "grad_norm": 45.65625,
+      "learning_rate": 1.7444029850746267e-07,
+      "loss": 147.9916,
+      "step": 1005
+    },
+    {
+      "epoch": 0.8472932232271595,
+      "grad_norm": 48.625,
+      "learning_rate": 1.6977611940298506e-07,
+      "loss": 148.1211,
+      "step": 1010
+    },
+    {
+      "epoch": 0.8514877441342247,
+      "grad_norm": 43.5,
+      "learning_rate": 1.6511194029850746e-07,
+      "loss": 148.0889,
+      "step": 1015
+    },
+    {
+      "epoch": 0.8556822650412899,
+      "grad_norm": 41.84375,
+      "learning_rate": 1.6044776119402983e-07,
+      "loss": 147.4216,
+      "step": 1020
+    },
+    {
+      "epoch": 0.8598767859483549,
+      "grad_norm": 43.28125,
+      "learning_rate": 1.5578358208955222e-07,
+      "loss": 147.1561,
+      "step": 1025
+    },
+    {
+      "epoch": 0.8640713068554201,
+      "grad_norm": 50.4375,
+      "learning_rate": 1.5111940298507462e-07,
+      "loss": 146.7463,
+      "step": 1030
+    },
+    {
+      "epoch": 0.8682658277624853,
+      "grad_norm": 42.84375,
+      "learning_rate": 1.46455223880597e-07,
+      "loss": 146.9864,
+      "step": 1035
+    },
+    {
+      "epoch": 0.8724603486695504,
+      "grad_norm": 44.75,
+      "learning_rate": 1.4179104477611938e-07,
+      "loss": 147.2126,
+      "step": 1040
+    },
+    {
+      "epoch": 0.8766548695766155,
+      "grad_norm": 42.5625,
+      "learning_rate": 1.3712686567164177e-07,
+      "loss": 146.4982,
+      "step": 1045
+    },
+    {
+      "epoch": 0.8808493904836807,
+      "grad_norm": 40.65625,
+      "learning_rate": 1.3246268656716417e-07,
+      "loss": 146.3903,
+      "step": 1050
+    },
+    {
+      "epoch": 0.8850439113907458,
+      "grad_norm": 42.25,
+      "learning_rate": 1.2779850746268656e-07,
+      "loss": 146.2571,
+      "step": 1055
+    },
+    {
+      "epoch": 0.889238432297811,
+      "grad_norm": 43.96875,
+      "learning_rate": 1.2313432835820893e-07,
+      "loss": 146.2127,
+      "step": 1060
+    },
+    {
+      "epoch": 0.8934329532048761,
+      "grad_norm": 44.34375,
+      "learning_rate": 1.1847014925373134e-07,
+      "loss": 146.0634,
+      "step": 1065
+    },
+    {
+      "epoch": 0.8976274741119413,
+      "grad_norm": 42.84375,
+      "learning_rate": 1.1380597014925372e-07,
+      "loss": 146.5255,
+      "step": 1070
+    },
+    {
+      "epoch": 0.9018219950190064,
+      "grad_norm": 41.78125,
+      "learning_rate": 1.0914179104477612e-07,
+      "loss": 145.7911,
+      "step": 1075
+    },
+    {
+      "epoch": 0.9060165159260716,
+      "grad_norm": 43.90625,
+      "learning_rate": 1.044776119402985e-07,
+      "loss": 145.654,
+      "step": 1080
+    },
+    {
+      "epoch": 0.9102110368331368,
+      "grad_norm": 43.96875,
+      "learning_rate": 9.981343283582089e-08,
+      "loss": 146.0228,
+      "step": 1085
+    },
+    {
+      "epoch": 0.9144055577402018,
+      "grad_norm": 42.75,
+      "learning_rate": 9.514925373134327e-08,
+      "loss": 146.2392,
+      "step": 1090
+    },
+    {
+      "epoch": 0.918600078647267,
+      "grad_norm": 42.78125,
+      "learning_rate": 9.048507462686567e-08,
+      "loss": 145.7595,
+      "step": 1095
+    },
+    {
+      "epoch": 0.9227945995543322,
+      "grad_norm": 46.65625,
+      "learning_rate": 8.582089552238805e-08,
+      "loss": 145.3029,
+      "step": 1100
+    },
+    {
+      "epoch": 0.9269891204613973,
+      "grad_norm": 39.03125,
+      "learning_rate": 8.115671641791044e-08,
+      "loss": 145.1068,
+      "step": 1105
+    },
+    {
+      "epoch": 0.9311836413684624,
+      "grad_norm": 40.5,
+      "learning_rate": 7.649253731343283e-08,
+      "loss": 144.9648,
+      "step": 1110
+    },
+    {
+      "epoch": 0.9353781622755276,
+      "grad_norm": 39.3125,
+      "learning_rate": 7.182835820895522e-08,
+      "loss": 145.055,
+      "step": 1115
+    },
+    {
+      "epoch": 0.9395726831825927,
+      "grad_norm": 43.53125,
+      "learning_rate": 6.71641791044776e-08,
+      "loss": 144.7464,
+      "step": 1120
+    },
+    {
+      "epoch": 0.9437672040896579,
+      "grad_norm": 40.0625,
+      "learning_rate": 6.25e-08,
+      "loss": 144.7482,
+      "step": 1125
+    },
+    {
+      "epoch": 0.947961724996723,
+      "grad_norm": 40.3125,
+      "learning_rate": 5.7835820895522385e-08,
+      "loss": 144.9773,
+      "step": 1130
+    },
+    {
+      "epoch": 0.9521562459037882,
+      "grad_norm": 38.34375,
+      "learning_rate": 5.3171641791044774e-08,
+      "loss": 144.8226,
+      "step": 1135
+    },
+    {
+      "epoch": 0.9563507668108533,
+      "grad_norm": 38.28125,
+      "learning_rate": 4.850746268656716e-08,
+      "loss": 144.0508,
+      "step": 1140
+    },
+    {
+      "epoch": 0.9605452877179185,
+      "grad_norm": 36.8125,
+      "learning_rate": 4.384328358208955e-08,
+      "loss": 144.5738,
+      "step": 1145
+    },
+    {
+      "epoch": 0.9647398086249837,
+      "grad_norm": 35.8125,
+      "learning_rate": 3.917910447761194e-08,
+      "loss": 143.7115,
+      "step": 1150
+    },
+    {
+      "epoch": 0.9689343295320487,
+      "grad_norm": 37.375,
+      "learning_rate": 3.4514925373134326e-08,
+      "loss": 144.4058,
+      "step": 1155
+    },
+    {
+      "epoch": 0.9731288504391139,
+      "grad_norm": 35.53125,
+      "learning_rate": 2.9850746268656714e-08,
+      "loss": 143.9776,
+      "step": 1160
+    },
+    {
+      "epoch": 0.9773233713461791,
+      "grad_norm": 34.96875,
+      "learning_rate": 2.5186567164179103e-08,
+      "loss": 143.9102,
+      "step": 1165
+    },
+    {
+      "epoch": 0.9815178922532442,
+      "grad_norm": 34.875,
+      "learning_rate": 2.052238805970149e-08,
+      "loss": 144.5179,
+      "step": 1170
+    },
+    {
+      "epoch": 0.9857124131603093,
+      "grad_norm": 34.6875,
+      "learning_rate": 1.5858208955223882e-08,
+      "loss": 144.5262,
+      "step": 1175
+    },
+    {
+      "epoch": 0.9899069340673745,
+      "grad_norm": 33.53125,
+      "learning_rate": 1.1194029850746267e-08,
+      "loss": 144.4803,
+      "step": 1180
+    },
+    {
+      "epoch": 0.9941014549744397,
+      "grad_norm": 32.90625,
+      "learning_rate": 6.529850746268656e-09,
+      "loss": 144.1151,
+      "step": 1185
+    },
+    {
+      "epoch": 0.9982959758815048,
+      "grad_norm": 32.25,
+      "learning_rate": 1.8656716417910446e-09,
+      "loss": 143.4677,
+      "step": 1190
     }
   ],
   "logging_steps": 5,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 5.16417072729686e+18,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null