Training in progress, step 1200, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +212 -2

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1935637205cb627b948fe1329a80486b1da1feb7f14f8a0e15acab010a97b90c
 size 2558403928

 version https://git-lfs.github.com/spec/v1
+oid sha256:9ad9704bc5747fc17b3c1496ffdc9c4a82ee0c2ceb16f4e948b6593950765fc1
 size 2558403928

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:575872657bd8f5c69c8e9a049519a8b9a0d9795ca6890003a302f811f9d4108a
 size 1313638993

 version https://git-lfs.github.com/spec/v1
+oid sha256:18ce1ad6cf71b73814b98c18ffc3d1dbf6b9d7b64f05e65823d86e41a0a2a0f4
 size 1313638993

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:41d7cb8df90bbc1a1f334913d48d210d3a9a45cf39cb2aba7ed6759fa8b44c3a
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:68e939733619667823ff09361a70b450356b35690c073061e24545321b21c4b0
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4a0b4230f34cfc1b81dc2c15ef8d265bdd348193f5a746ca2018df11549c7ac0
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:18783150ac09b6b81cea5af47876a10bfe5f36c3d76aca4ffce5382bdfaf7b28
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:46dbc8a28dada13dfcd70ea962672a500c66aa01dc461c5d292f261a3ca3d0fc
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:c2c4ff631d77bc2fe5cad879e6c434ab3b6d8a7e0b9cce252cee47e42bdf838a
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.72,
   "eval_steps": 500,
-  "global_step": 900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -645,6 +645,216 @@
       "learning_rate": 0.0001504157768367901,
       "loss": 6.6899,
       "step": 900
     }
   ],
   "logging_steps": 10,

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.96,
   "eval_steps": 500,
+  "global_step": 1200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0001504157768367901,
       "loss": 6.6899,
       "step": 900
+    },
+    {
+      "epoch": 0.728,
+      "grad_norm": 5.094054222106934,
+      "learning_rate": 0.00014927912345112616,
+      "loss": 6.2008,
+      "step": 910
+    },
+    {
+      "epoch": 0.736,
+      "grad_norm": 6.107059955596924,
+      "learning_rate": 0.0001481339981254846,
+      "loss": 6.2149,
+      "step": 920
+    },
+    {
+      "epoch": 0.744,
+      "grad_norm": 6.355636119842529,
+      "learning_rate": 0.00014698059772686202,
+      "loss": 6.7521,
+      "step": 930
+    },
+    {
+      "epoch": 0.752,
+      "grad_norm": 7.230486869812012,
+      "learning_rate": 0.00014581912054488413,
+      "loss": 6.3823,
+      "step": 940
+    },
+    {
+      "epoch": 0.76,
+      "grad_norm": 4.960805416107178,
+      "learning_rate": 0.00014464976625771654,
+      "loss": 6.6149,
+      "step": 950
+    },
+    {
+      "epoch": 0.768,
+      "grad_norm": 5.269943714141846,
+      "learning_rate": 0.00014347273589773637,
+      "loss": 6.1001,
+      "step": 960
+    },
+    {
+      "epoch": 0.776,
+      "grad_norm": 6.840855598449707,
+      "learning_rate": 0.0001422882318169716,
+      "loss": 6.3677,
+      "step": 970
+    },
+    {
+      "epoch": 0.784,
+      "grad_norm": 5.1594038009643555,
+      "learning_rate": 0.00014109645765231278,
+      "loss": 6.3929,
+      "step": 980
+    },
+    {
+      "epoch": 0.792,
+      "grad_norm": 5.096086025238037,
+      "learning_rate": 0.00013989761829050475,
+      "loss": 6.1354,
+      "step": 990
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 5.235525608062744,
+      "learning_rate": 0.00013869191983292283,
+      "loss": 6.4954,
+      "step": 1000
+    },
+    {
+      "epoch": 0.808,
+      "grad_norm": 5.518918991088867,
+      "learning_rate": 0.00013747956956014037,
+      "loss": 6.449,
+      "step": 1010
+    },
+    {
+      "epoch": 0.816,
+      "grad_norm": 4.848990440368652,
+      "learning_rate": 0.00013626077589629367,
+      "loss": 6.392,
+      "step": 1020
+    },
+    {
+      "epoch": 0.824,
+      "grad_norm": 7.234468460083008,
+      "learning_rate": 0.00013503574837325015,
+      "loss": 6.5465,
+      "step": 1030
+    },
+    {
+      "epoch": 0.832,
+      "grad_norm": 6.593731880187988,
+      "learning_rate": 0.00013380469759458643,
+      "loss": 6.574,
+      "step": 1040
+    },
+    {
+      "epoch": 0.84,
+      "grad_norm": 5.687368392944336,
+      "learning_rate": 0.00013256783519938154,
+      "loss": 6.1995,
+      "step": 1050
+    },
+    {
+      "epoch": 0.848,
+      "grad_norm": 4.857635498046875,
+      "learning_rate": 0.00013132537382583274,
+      "loss": 5.8422,
+      "step": 1060
+    },
+    {
+      "epoch": 0.856,
+      "grad_norm": 7.068734645843506,
+      "learning_rate": 0.00013007752707469924,
+      "loss": 6.0601,
+      "step": 1070
+    },
+    {
+      "epoch": 0.864,
+      "grad_norm": 4.396754741668701,
+      "learning_rate": 0.00012882450947258045,
+      "loss": 5.8387,
+      "step": 1080
+    },
+    {
+      "epoch": 0.872,
+      "grad_norm": 9.501909255981445,
+      "learning_rate": 0.0001275665364350352,
+      "loss": 5.9831,
+      "step": 1090
+    },
+    {
+      "epoch": 0.88,
+      "grad_norm": 6.957056522369385,
+      "learning_rate": 0.00012630382422954795,
+      "loss": 6.1359,
+      "step": 1100
+    },
+    {
+      "epoch": 0.888,
+      "grad_norm": 5.782343864440918,
+      "learning_rate": 0.00012503658993834885,
+      "loss": 6.0754,
+      "step": 1110
+    },
+    {
+      "epoch": 0.896,
+      "grad_norm": 5.452831268310547,
+      "learning_rate": 0.0001237650514210932,
+      "loss": 5.6186,
+      "step": 1120
+    },
+    {
+      "epoch": 0.904,
+      "grad_norm": 6.382038593292236,
+      "learning_rate": 0.00012248942727740783,
+      "loss": 5.7174,
+      "step": 1130
+    },
+    {
+      "epoch": 0.912,
+      "grad_norm": 6.288851261138916,
+      "learning_rate": 0.00012120993680931003,
+      "loss": 5.6529,
+      "step": 1140
+    },
+    {
+      "epoch": 0.92,
+      "grad_norm": 6.7387166023254395,
+      "learning_rate": 0.0001199267999835055,
+      "loss": 5.603,
+      "step": 1150
+    },
+    {
+      "epoch": 0.928,
+      "grad_norm": 5.694065093994141,
+      "learning_rate": 0.00011864023739357235,
+      "loss": 5.2627,
+      "step": 1160
+    },
+    {
+      "epoch": 0.936,
+      "grad_norm": 6.711731910705566,
+      "learning_rate": 0.00011735047022203741,
+      "loss": 5.4706,
+      "step": 1170
+    },
+    {
+      "epoch": 0.944,
+      "grad_norm": 5.517411708831787,
+      "learning_rate": 0.00011605772020235072,
+      "loss": 5.6277,
+      "step": 1180
+    },
+    {
+      "epoch": 0.952,
+      "grad_norm": 6.785055160522461,
+      "learning_rate": 0.00011476220958076607,
+      "loss": 5.9611,
+      "step": 1190
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 5.702793121337891,
+      "learning_rate": 0.00011346416107813267,
+      "loss": 5.9226,
+      "step": 1200
     }
   ],
   "logging_steps": 10,