Training in progress, step 16500, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:281d4ef0a8e7b7872e39b497a3a30d1eab74ceb3607386521e444e38cb3a1999
 size 487156538

 version https://git-lfs.github.com/spec/v1
+oid sha256:289545fc1552428b0e12aeeecd55b134e9a52241f44036de8d8f204e35e20afb
 size 487156538

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:08bfdaa6e85ad39297909804012faf4615d0b2ab8a37ba9d564ba4b20c5afe3b
 size 1059459406

 version https://git-lfs.github.com/spec/v1
+oid sha256:99059f4bf0ce62b572ac5e7aed5f529d09c121705516bcb6c43b763dcdea026d
 size 1059459406

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:914d2ab3f587e800d17f7d196d6f0092d0a87493b41d0a671e21b5adb1f2d2a0
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:82d0cb560e8cc88d37b6fdb38283c527bf386371741e3fb12423b76b412c4d30
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:344eca6c5ec13dc95525cab835b1c86942a9064531b87da2a47c4c1d44c791cc
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:139282925be08220983826aa431aa288b1fc5afb82a768d6f91bc4f11be56858
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dfc05f18aa518e0b225107b64c8d08b389e8623e906d941f8628b36cccf0462d
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:495800332f29bfc930f2d135466505a7115bb60583302cd925a56ab992bb542c
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c4c006f6e9030f012e3a14c721b6fec459b16163302ae533134c5c4431765d62
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:8384800dd0611fe7ff52172a3c0ca3c74f64cf34436f28d688acecfccf0334f6
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ccc2a52ae0327def30cc40f7f273a4a1537961b9b580753fe57ec7ecdab69b35
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e1fa44d83f1ea27212c9079f128b8147324741571d792587433ce7cd41805e05
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.031199574905791908,
   "eval_steps": 500,
-  "global_step": 16000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -11208,6 +11208,356 @@
       "learning_rate": 0.0004949612511467957,
       "loss": 16.847,
       "step": 16000
     }
   ],
   "logging_steps": 10,
@@ -11227,7 +11577,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.5599099377236312e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0321745616215979,
   "eval_steps": 500,
+  "global_step": 16500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004949612511467957,
       "loss": 16.847,
       "step": 16000
+    },
+    {
+      "epoch": 0.03121907464010803,
+      "grad_norm": 7.65625,
+      "learning_rate": 0.000494958000131341,
+      "loss": 17.0239,
+      "step": 16010
+    },
+    {
+      "epoch": 0.03123857437442415,
+      "grad_norm": 10.125,
+      "learning_rate": 0.0004949547491158863,
+      "loss": 16.8909,
+      "step": 16020
+    },
+    {
+      "epoch": 0.03125807410874027,
+      "grad_norm": 10.75,
+      "learning_rate": 0.0004949514981004317,
+      "loss": 16.8678,
+      "step": 16030
+    },
+    {
+      "epoch": 0.03127757384305639,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.000494948247084977,
+      "loss": 16.7921,
+      "step": 16040
+    },
+    {
+      "epoch": 0.031297073577372506,
+      "grad_norm": 6.875,
+      "learning_rate": 0.0004949449960695223,
+      "loss": 16.8111,
+      "step": 16050
+    },
+    {
+      "epoch": 0.03131657331168863,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004949417450540676,
+      "loss": 16.8964,
+      "step": 16060
+    },
+    {
+      "epoch": 0.03133607304600475,
+      "grad_norm": 7.96875,
+      "learning_rate": 0.000494938494038613,
+      "loss": 16.9569,
+      "step": 16070
+    },
+    {
+      "epoch": 0.03135557278032087,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.0004949352430231583,
+      "loss": 16.9247,
+      "step": 16080
+    },
+    {
+      "epoch": 0.03137507251463699,
+      "grad_norm": 9.6875,
+      "learning_rate": 0.0004949319920077036,
+      "loss": 16.9235,
+      "step": 16090
+    },
+    {
+      "epoch": 0.03139457224895311,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.000494928740992249,
+      "loss": 17.061,
+      "step": 16100
+    },
+    {
+      "epoch": 0.031414071983269225,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.0004949254899767943,
+      "loss": 16.9691,
+      "step": 16110
+    },
+    {
+      "epoch": 0.031433571717585346,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.0004949222389613396,
+      "loss": 16.9493,
+      "step": 16120
+    },
+    {
+      "epoch": 0.03145307145190147,
+      "grad_norm": 7.625,
+      "learning_rate": 0.0004949189879458849,
+      "loss": 16.8725,
+      "step": 16130
+    },
+    {
+      "epoch": 0.03147257118621759,
+      "grad_norm": 8.125,
+      "learning_rate": 0.0004949157369304303,
+      "loss": 16.9221,
+      "step": 16140
+    },
+    {
+      "epoch": 0.03149207092053371,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004949124859149756,
+      "loss": 16.964,
+      "step": 16150
+    },
+    {
+      "epoch": 0.03151157065484983,
+      "grad_norm": 7.03125,
+      "learning_rate": 0.0004949092348995209,
+      "loss": 16.9567,
+      "step": 16160
+    },
+    {
+      "epoch": 0.03153107038916595,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.0004949059838840663,
+      "loss": 16.8704,
+      "step": 16170
+    },
+    {
+      "epoch": 0.031550570123482065,
+      "grad_norm": 9.0625,
+      "learning_rate": 0.0004949027328686116,
+      "loss": 16.8933,
+      "step": 16180
+    },
+    {
+      "epoch": 0.031570069857798186,
+      "grad_norm": 7.625,
+      "learning_rate": 0.0004948994818531569,
+      "loss": 17.0769,
+      "step": 16190
+    },
+    {
+      "epoch": 0.03158956959211431,
+      "grad_norm": 7.375,
+      "learning_rate": 0.0004948962308377022,
+      "loss": 16.9815,
+      "step": 16200
+    },
+    {
+      "epoch": 0.03160906932643043,
+      "grad_norm": 143.0,
+      "learning_rate": 0.0004948929798222476,
+      "loss": 16.9903,
+      "step": 16210
+    },
+    {
+      "epoch": 0.03162856906074655,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.0004948897288067928,
+      "loss": 16.9974,
+      "step": 16220
+    },
+    {
+      "epoch": 0.03164806879506267,
+      "grad_norm": 7.21875,
+      "learning_rate": 0.0004948864777913381,
+      "loss": 16.8849,
+      "step": 16230
+    },
+    {
+      "epoch": 0.031667568529378784,
+      "grad_norm": 10.125,
+      "learning_rate": 0.0004948832267758834,
+      "loss": 16.9787,
+      "step": 16240
+    },
+    {
+      "epoch": 0.031687068263694905,
+      "grad_norm": 10.4375,
+      "learning_rate": 0.0004948799757604288,
+      "loss": 16.941,
+      "step": 16250
+    },
+    {
+      "epoch": 0.031706567998011026,
+      "grad_norm": 9.0625,
+      "learning_rate": 0.0004948767247449741,
+      "loss": 16.7498,
+      "step": 16260
+    },
+    {
+      "epoch": 0.03172606773232715,
+      "grad_norm": 9.3125,
+      "learning_rate": 0.0004948734737295194,
+      "loss": 16.8968,
+      "step": 16270
+    },
+    {
+      "epoch": 0.03174556746664327,
+      "grad_norm": 23.25,
+      "learning_rate": 0.0004948702227140648,
+      "loss": 16.9446,
+      "step": 16280
+    },
+    {
+      "epoch": 0.03176506720095939,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.0004948669716986101,
+      "loss": 16.9886,
+      "step": 16290
+    },
+    {
+      "epoch": 0.03178456693527551,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.0004948637206831554,
+      "loss": 16.8989,
+      "step": 16300
+    },
+    {
+      "epoch": 0.031804066669591624,
+      "grad_norm": 7.125,
+      "learning_rate": 0.0004948604696677007,
+      "loss": 16.9622,
+      "step": 16310
+    },
+    {
+      "epoch": 0.031823566403907745,
+      "grad_norm": 10.6875,
+      "learning_rate": 0.0004948572186522461,
+      "loss": 16.9232,
+      "step": 16320
+    },
+    {
+      "epoch": 0.031843066138223866,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.0004948539676367914,
+      "loss": 17.0506,
+      "step": 16330
+    },
+    {
+      "epoch": 0.03186256587253999,
+      "grad_norm": 8.875,
+      "learning_rate": 0.0004948507166213367,
+      "loss": 16.8868,
+      "step": 16340
+    },
+    {
+      "epoch": 0.03188206560685611,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.0004948474656058821,
+      "loss": 16.9945,
+      "step": 16350
+    },
+    {
+      "epoch": 0.03190156534117223,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004948442145904274,
+      "loss": 16.832,
+      "step": 16360
+    },
+    {
+      "epoch": 0.03192106507548834,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.0004948409635749726,
+      "loss": 17.0248,
+      "step": 16370
+    },
+    {
+      "epoch": 0.031940564809804464,
+      "grad_norm": 7.03125,
+      "learning_rate": 0.0004948377125595179,
+      "loss": 17.0616,
+      "step": 16380
+    },
+    {
+      "epoch": 0.031960064544120585,
+      "grad_norm": 6.34375,
+      "learning_rate": 0.0004948344615440633,
+      "loss": 16.9359,
+      "step": 16390
+    },
+    {
+      "epoch": 0.031979564278436706,
+      "grad_norm": 18.0,
+      "learning_rate": 0.0004948312105286086,
+      "loss": 16.9874,
+      "step": 16400
+    },
+    {
+      "epoch": 0.03199906401275283,
+      "grad_norm": 9.25,
+      "learning_rate": 0.0004948279595131539,
+      "loss": 16.9289,
+      "step": 16410
+    },
+    {
+      "epoch": 0.03201856374706895,
+      "grad_norm": 6.5,
+      "learning_rate": 0.0004948247084976992,
+      "loss": 16.8866,
+      "step": 16420
+    },
+    {
+      "epoch": 0.03203806348138507,
+      "grad_norm": 7.5625,
+      "learning_rate": 0.0004948214574822446,
+      "loss": 16.8968,
+      "step": 16430
+    },
+    {
+      "epoch": 0.03205756321570118,
+      "grad_norm": 14.0,
+      "learning_rate": 0.0004948182064667899,
+      "loss": 16.8328,
+      "step": 16440
+    },
+    {
+      "epoch": 0.032077062950017304,
+      "grad_norm": 8.125,
+      "learning_rate": 0.0004948149554513352,
+      "loss": 16.9513,
+      "step": 16450
+    },
+    {
+      "epoch": 0.032096562684333425,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.0004948117044358806,
+      "loss": 16.9399,
+      "step": 16460
+    },
+    {
+      "epoch": 0.032116062418649546,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.0004948084534204259,
+      "loss": 16.9612,
+      "step": 16470
+    },
+    {
+      "epoch": 0.03213556215296567,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.0004948052024049712,
+      "loss": 16.9981,
+      "step": 16480
+    },
+    {
+      "epoch": 0.03215506188728179,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004948019513895165,
+      "loss": 16.9472,
+      "step": 16490
+    },
+    {
+      "epoch": 0.0321745616215979,
+      "grad_norm": 7.3125,
+      "learning_rate": 0.0004947987003740619,
+      "loss": 16.9264,
+      "step": 16500
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3.6711547362756e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null