Training in progress, step 72000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:757633efe84a53c5ec97a90a7f4675f908dbeafb070171c08276f4ceae89bf82
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:fbefc43fcc2f8bf8bb8522016041f2a9a7a1389e937a0c7f9efe740c9281e923
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6c319382408e536debfaba9985144c2b85aedc267f1adb41fa2fcd682a710d69
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:0e10ee0c90a6cc09cdc24b1085749ee192ca52841ac52349ee023c635a106f71
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b55180ad5c333f626bc6ef839beda747e8f0633fdb8a2329d1af0642155fcad0
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:0a2f1706dfc950df47249e8d65d6df596c2f98887c24dba54cde743e4804d2cf
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6fb9e669a1e66d6084675ac17f9361f1d66f6538870dda5d62bb9fedf0717021
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:488c74f8a1dc2a7148ae3d9f18c7e9fcbb141512e2f149cd1d29674d054be2f3
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:93c86e46203b6a91184b0093d776c5c5cbb5568a55f409f62928f5b11605d793
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:77448ddbc0e5f35d8ef3a4b1063eb25209d701957cc23b3671796af1520e431c
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57649dd5fae41007b8326ad8bceda3664e8263c16462c398827f7c60518777a9
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:c3acb48030fde17938d59bf929c695a9b6dbd4fe2687e2cce76096a6e14351d6
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:065fc078fd1aeeb645695c18fb1eff98c533b26302779a57f06b17d1e0565e6a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:514d743b09cdf67b5f7ccba0c67283da3d20aa73a759bcf5ebfccf66234e08c8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.10517334344577499,
   "eval_steps": 500,
-  "global_step": 71000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -24858,6 +24858,356 @@
       "learning_rate": 0.00048259050104507866,
       "loss": 16.5599,
       "step": 71000
     }
   ],
   "logging_steps": 20,
@@ -24877,7 +25227,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.220171364156257e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.10665465814219437,
   "eval_steps": 500,
+  "global_step": 72000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00048259050104507866,
       "loss": 16.5599,
       "step": 71000
+    },
+    {
+      "epoch": 0.10520296973970338,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.0004825855621100711,
+      "loss": 16.4962,
+      "step": 71020
+    },
+    {
+      "epoch": 0.10523259603363178,
+      "grad_norm": 6.875,
+      "learning_rate": 0.0004825806231750635,
+      "loss": 16.4845,
+      "step": 71040
+    },
+    {
+      "epoch": 0.10526222232756016,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.00048257568424005595,
+      "loss": 16.5411,
+      "step": 71060
+    },
+    {
+      "epoch": 0.10529184862148855,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.0004825707453050484,
+      "loss": 16.5235,
+      "step": 71080
+    },
+    {
+      "epoch": 0.10532147491541693,
+      "grad_norm": 6.375,
+      "learning_rate": 0.00048256580637004085,
+      "loss": 16.5079,
+      "step": 71100
+    },
+    {
+      "epoch": 0.10535110120934532,
+      "grad_norm": 6.375,
+      "learning_rate": 0.00048256086743503324,
+      "loss": 16.5093,
+      "step": 71120
+    },
+    {
+      "epoch": 0.10538072750327371,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.0004825559285000257,
+      "loss": 16.5136,
+      "step": 71140
+    },
+    {
+      "epoch": 0.1054103537972021,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.00048255098956501814,
+      "loss": 16.4724,
+      "step": 71160
+    },
+    {
+      "epoch": 0.10543998009113048,
+      "grad_norm": 6.25,
+      "learning_rate": 0.0004825460506300106,
+      "loss": 16.4691,
+      "step": 71180
+    },
+    {
+      "epoch": 0.10546960638505887,
+      "grad_norm": 7.96875,
+      "learning_rate": 0.000482541111695003,
+      "loss": 16.4072,
+      "step": 71200
+    },
+    {
+      "epoch": 0.10549923267898725,
+      "grad_norm": 6.375,
+      "learning_rate": 0.0004825361727599954,
+      "loss": 16.531,
+      "step": 71220
+    },
+    {
+      "epoch": 0.10552885897291564,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.0004825312338249879,
+      "loss": 16.5211,
+      "step": 71240
+    },
+    {
+      "epoch": 0.10555848526684403,
+      "grad_norm": 6.625,
+      "learning_rate": 0.00048252629488998027,
+      "loss": 16.5079,
+      "step": 71260
+    },
+    {
+      "epoch": 0.10558811156077241,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.0004825213559549727,
+      "loss": 16.4813,
+      "step": 71280
+    },
+    {
+      "epoch": 0.1056177378547008,
+      "grad_norm": 7.5,
+      "learning_rate": 0.00048251641701996516,
+      "loss": 16.5194,
+      "step": 71300
+    },
+    {
+      "epoch": 0.10564736414862919,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.0004825114780849576,
+      "loss": 16.4672,
+      "step": 71320
+    },
+    {
+      "epoch": 0.10567699044255757,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.00048250653914995,
+      "loss": 16.5088,
+      "step": 71340
+    },
+    {
+      "epoch": 0.10570661673648597,
+      "grad_norm": 7.53125,
+      "learning_rate": 0.00048250160021494245,
+      "loss": 16.5076,
+      "step": 71360
+    },
+    {
+      "epoch": 0.10573624303041436,
+      "grad_norm": 5.65625,
+      "learning_rate": 0.0004824966612799349,
+      "loss": 16.4723,
+      "step": 71380
+    },
+    {
+      "epoch": 0.10576586932434275,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.00048249172234492735,
+      "loss": 16.4759,
+      "step": 71400
+    },
+    {
+      "epoch": 0.10579549561827113,
+      "grad_norm": 6.03125,
+      "learning_rate": 0.00048248678340991974,
+      "loss": 16.4789,
+      "step": 71420
+    },
+    {
+      "epoch": 0.10582512191219952,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.0004824818444749122,
+      "loss": 16.5194,
+      "step": 71440
+    },
+    {
+      "epoch": 0.1058547482061279,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.00048247690553990464,
+      "loss": 16.5426,
+      "step": 71460
+    },
+    {
+      "epoch": 0.10588437450005629,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.0004824719666048971,
+      "loss": 16.5093,
+      "step": 71480
+    },
+    {
+      "epoch": 0.10591400079398468,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.0004824670276698895,
+      "loss": 16.5204,
+      "step": 71500
+    },
+    {
+      "epoch": 0.10594362708791306,
+      "grad_norm": 7.0,
+      "learning_rate": 0.000482462088734882,
+      "loss": 16.5197,
+      "step": 71520
+    },
+    {
+      "epoch": 0.10597325338184145,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.0004824571497998744,
+      "loss": 16.4954,
+      "step": 71540
+    },
+    {
+      "epoch": 0.10600287967576984,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.00048245221086486677,
+      "loss": 16.4379,
+      "step": 71560
+    },
+    {
+      "epoch": 0.10603250596969822,
+      "grad_norm": 7.8125,
+      "learning_rate": 0.0004824472719298592,
+      "loss": 16.4314,
+      "step": 71580
+    },
+    {
+      "epoch": 0.10606213226362661,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.00048244233299485166,
+      "loss": 16.4561,
+      "step": 71600
+    },
+    {
+      "epoch": 0.106091758557555,
+      "grad_norm": 6.625,
+      "learning_rate": 0.0004824373940598441,
+      "loss": 16.5224,
+      "step": 71620
+    },
+    {
+      "epoch": 0.10612138485148338,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.0004824324551248365,
+      "loss": 16.4732,
+      "step": 71640
+    },
+    {
+      "epoch": 0.10615101114541177,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.00048242751618982895,
+      "loss": 16.4654,
+      "step": 71660
+    },
+    {
+      "epoch": 0.10618063743934017,
+      "grad_norm": 7.875,
+      "learning_rate": 0.0004824225772548214,
+      "loss": 16.5161,
+      "step": 71680
+    },
+    {
+      "epoch": 0.10621026373326856,
+      "grad_norm": 6.09375,
+      "learning_rate": 0.00048241763831981385,
+      "loss": 16.446,
+      "step": 71700
+    },
+    {
+      "epoch": 0.10623989002719694,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.00048241269938480624,
+      "loss": 16.5245,
+      "step": 71720
+    },
+    {
+      "epoch": 0.10626951632112533,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.0004824077604497987,
+      "loss": 16.4989,
+      "step": 71740
+    },
+    {
+      "epoch": 0.10629914261505372,
+      "grad_norm": 7.0,
+      "learning_rate": 0.00048240282151479114,
+      "loss": 16.5321,
+      "step": 71760
+    },
+    {
+      "epoch": 0.1063287689089821,
+      "grad_norm": 6.15625,
+      "learning_rate": 0.0004823978825797836,
+      "loss": 16.4498,
+      "step": 71780
+    },
+    {
+      "epoch": 0.10635839520291049,
+      "grad_norm": 6.875,
+      "learning_rate": 0.000482392943644776,
+      "loss": 16.4793,
+      "step": 71800
+    },
+    {
+      "epoch": 0.10638802149683887,
+      "grad_norm": 7.90625,
+      "learning_rate": 0.0004823880047097685,
+      "loss": 16.4312,
+      "step": 71820
+    },
+    {
+      "epoch": 0.10641764779076726,
+      "grad_norm": 6.625,
+      "learning_rate": 0.0004823830657747609,
+      "loss": 16.5154,
+      "step": 71840
+    },
+    {
+      "epoch": 0.10644727408469565,
+      "grad_norm": 7.5,
+      "learning_rate": 0.00048237812683975327,
+      "loss": 16.4446,
+      "step": 71860
+    },
+    {
+      "epoch": 0.10647690037862403,
+      "grad_norm": 7.25,
+      "learning_rate": 0.0004823731879047457,
+      "loss": 16.5021,
+      "step": 71880
+    },
+    {
+      "epoch": 0.10650652667255242,
+      "grad_norm": 6.5,
+      "learning_rate": 0.00048236824896973816,
+      "loss": 16.4583,
+      "step": 71900
+    },
+    {
+      "epoch": 0.10653615296648081,
+      "grad_norm": 6.625,
+      "learning_rate": 0.0004823633100347306,
+      "loss": 16.4529,
+      "step": 71920
+    },
+    {
+      "epoch": 0.1065657792604092,
+      "grad_norm": 6.15625,
+      "learning_rate": 0.000482358371099723,
+      "loss": 16.4529,
+      "step": 71940
+    },
+    {
+      "epoch": 0.10659540555433758,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.00048235343216471545,
+      "loss": 16.4638,
+      "step": 71960
+    },
+    {
+      "epoch": 0.10662503184826597,
+      "grad_norm": 6.5,
+      "learning_rate": 0.0004823484932297079,
+      "loss": 16.4739,
+      "step": 71980
+    },
+    {
+      "epoch": 0.10665465814219437,
+      "grad_norm": 6.75,
+      "learning_rate": 0.00048234355429470035,
+      "loss": 16.5261,
+      "step": 72000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 5.293707639198528e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null