Training in progress, step 75000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c14f92422cc30c9605f95654d62c250bad463581bd3da10bb7b17093206005e
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:a67b2c60c5b42d0ad22d6b38771528b94fc53ceec628d0597d6fa521952a684c
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cdbe93c9686a0a02ecdcba702915ad1389c2bb261f4103c48b737864febba412
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:b6916c88bb66f81e6f1308f6aadeffdb932cc73012f17c967d2f81582f0d6ec4
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cf863b0b895309e73d9088642dd8d00845be8fee481352073f05fd0bd67029a2
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:1caa66015d3956d30ec507257de058a8c2fd4bde8e3572a38d393062e23e25fa
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7f0942e1e9569ddb210dcd2d42bc92e339bbd2239990fd3cc546265bee775d39
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:43374ebce165dffb63c7f0a02b8a1fb69d9d2182c0805086854a706ff35de8db
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d2a7d2488bf1d4b76628b506fc6b6fb862cbf4396985e4c9e2f16e4262ba5085
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:23f5704701def73bff9de54ed2bc9c44e464b4fd7bf79cf9e15b571b97700de5
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:115e6df582159f803bd87cdfeee2a6c991779cf09357b4ef2537b502b04c878f
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2ec98dcfff897ba38371ec424fd9cb0533d296496a8ad5f5af6ba3e2b631320
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9391a0b437930e5697a6d0905f7bf157b3a70a9ca0d6fddfd220757077049906
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:83d418122fbb0fa369cfecb2f66848d24fc6c35ef433b91965b2ecce9163409e
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.1096172875350331,
   "eval_steps": 500,
-  "global_step": 74000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -25908,6 +25908,356 @@
       "learning_rate": 0.0004818496607939437,
       "loss": 16.3994,
       "step": 74000
     }
   ],
   "logging_steps": 20,
@@ -25927,7 +26277,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.440780396085746e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.11109860223145246,
   "eval_steps": 500,
+  "global_step": 75000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004818496607939437,
       "loss": 16.3994,
       "step": 74000
+    },
+    {
+      "epoch": 0.10964691382896148,
+      "grad_norm": 6.34375,
+      "learning_rate": 0.0004818447218589361,
+      "loss": 16.3548,
+      "step": 74020
+    },
+    {
+      "epoch": 0.10967654012288987,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.00048183978292392856,
+      "loss": 16.4066,
+      "step": 74040
+    },
+    {
+      "epoch": 0.10970616641681825,
+      "grad_norm": 7.34375,
+      "learning_rate": 0.00048183484398892096,
+      "loss": 16.4287,
+      "step": 74060
+    },
+    {
+      "epoch": 0.10973579271074664,
+      "grad_norm": 6.34375,
+      "learning_rate": 0.00048182990505391346,
+      "loss": 16.4596,
+      "step": 74080
+    },
+    {
+      "epoch": 0.10976541900467503,
+      "grad_norm": 7.625,
+      "learning_rate": 0.00048182496611890585,
+      "loss": 16.4221,
+      "step": 74100
+    },
+    {
+      "epoch": 0.10979504529860341,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.0004818200271838983,
+      "loss": 16.4392,
+      "step": 74120
+    },
+    {
+      "epoch": 0.1098246715925318,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.0004818150882488907,
+      "loss": 16.4181,
+      "step": 74140
+    },
+    {
+      "epoch": 0.10985429788646019,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.0004818101493138832,
+      "loss": 16.4593,
+      "step": 74160
+    },
+    {
+      "epoch": 0.10988392418038857,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.0004818052103788756,
+      "loss": 16.4166,
+      "step": 74180
+    },
+    {
+      "epoch": 0.10991355047431696,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.00048180027144386804,
+      "loss": 16.422,
+      "step": 74200
+    },
+    {
+      "epoch": 0.10994317676824536,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.0004817953325088605,
+      "loss": 16.3946,
+      "step": 74220
+    },
+    {
+      "epoch": 0.10997280306217375,
+      "grad_norm": 6.5,
+      "learning_rate": 0.0004817903935738529,
+      "loss": 16.4022,
+      "step": 74240
+    },
+    {
+      "epoch": 0.11000242935610213,
+      "grad_norm": 7.25,
+      "learning_rate": 0.0004817854546388453,
+      "loss": 16.4411,
+      "step": 74260
+    },
+    {
+      "epoch": 0.11003205565003052,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.0004817805157038377,
+      "loss": 16.3874,
+      "step": 74280
+    },
+    {
+      "epoch": 0.1100616819439589,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.0004817755767688302,
+      "loss": 16.4294,
+      "step": 74300
+    },
+    {
+      "epoch": 0.11009130823788729,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.0004817706378338226,
+      "loss": 16.4261,
+      "step": 74320
+    },
+    {
+      "epoch": 0.11012093453181568,
+      "grad_norm": 7.34375,
+      "learning_rate": 0.00048176569889881506,
+      "loss": 16.4238,
+      "step": 74340
+    },
+    {
+      "epoch": 0.11015056082574406,
+      "grad_norm": 7.125,
+      "learning_rate": 0.00048176075996380746,
+      "loss": 16.3817,
+      "step": 74360
+    },
+    {
+      "epoch": 0.11018018711967245,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.00048175582102879996,
+      "loss": 16.3883,
+      "step": 74380
+    },
+    {
+      "epoch": 0.11020981341360084,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.00048175088209379235,
+      "loss": 16.4262,
+      "step": 74400
+    },
+    {
+      "epoch": 0.11023943970752922,
+      "grad_norm": 7.0,
+      "learning_rate": 0.0004817459431587848,
+      "loss": 16.4212,
+      "step": 74420
+    },
+    {
+      "epoch": 0.11026906600145761,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.0004817410042237772,
+      "loss": 16.4718,
+      "step": 74440
+    },
+    {
+      "epoch": 0.110298692295386,
+      "grad_norm": 7.8125,
+      "learning_rate": 0.0004817360652887697,
+      "loss": 16.4202,
+      "step": 74460
+    },
+    {
+      "epoch": 0.11032831858931438,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.0004817311263537621,
+      "loss": 16.3904,
+      "step": 74480
+    },
+    {
+      "epoch": 0.11035794488324277,
+      "grad_norm": 6.75,
+      "learning_rate": 0.00048172618741875454,
+      "loss": 16.4918,
+      "step": 74500
+    },
+    {
+      "epoch": 0.11038757117717116,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.000481721248483747,
+      "loss": 16.3993,
+      "step": 74520
+    },
+    {
+      "epoch": 0.11041719747109956,
+      "grad_norm": 6.5,
+      "learning_rate": 0.00048171630954873943,
+      "loss": 16.3485,
+      "step": 74540
+    },
+    {
+      "epoch": 0.11044682376502794,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.0004817113706137318,
+      "loss": 16.3389,
+      "step": 74560
+    },
+    {
+      "epoch": 0.11047645005895633,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.0004817064316787242,
+      "loss": 16.3738,
+      "step": 74580
+    },
+    {
+      "epoch": 0.11050607635288472,
+      "grad_norm": 7.5,
+      "learning_rate": 0.0004817014927437167,
+      "loss": 16.3881,
+      "step": 74600
+    },
+    {
+      "epoch": 0.1105357026468131,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.0004816965538087091,
+      "loss": 16.3802,
+      "step": 74620
+    },
+    {
+      "epoch": 0.11056532894074149,
+      "grad_norm": 7.375,
+      "learning_rate": 0.00048169161487370156,
+      "loss": 16.4216,
+      "step": 74640
+    },
+    {
+      "epoch": 0.11059495523466988,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.00048168667593869396,
+      "loss": 16.4166,
+      "step": 74660
+    },
+    {
+      "epoch": 0.11062458152859826,
+      "grad_norm": 7.21875,
+      "learning_rate": 0.00048168173700368646,
+      "loss": 16.358,
+      "step": 74680
+    },
+    {
+      "epoch": 0.11065420782252665,
+      "grad_norm": 7.59375,
+      "learning_rate": 0.00048167679806867885,
+      "loss": 16.4844,
+      "step": 74700
+    },
+    {
+      "epoch": 0.11068383411645503,
+      "grad_norm": 7.59375,
+      "learning_rate": 0.0004816718591336713,
+      "loss": 16.4061,
+      "step": 74720
+    },
+    {
+      "epoch": 0.11071346041038342,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.0004816669201986637,
+      "loss": 16.4073,
+      "step": 74740
+    },
+    {
+      "epoch": 0.11074308670431181,
+      "grad_norm": 6.28125,
+      "learning_rate": 0.0004816619812636562,
+      "loss": 16.3988,
+      "step": 74760
+    },
+    {
+      "epoch": 0.1107727129982402,
+      "grad_norm": 5.96875,
+      "learning_rate": 0.0004816570423286486,
+      "loss": 16.4417,
+      "step": 74780
+    },
+    {
+      "epoch": 0.11080233929216858,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.00048165210339364104,
+      "loss": 16.3517,
+      "step": 74800
+    },
+    {
+      "epoch": 0.11083196558609697,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.0004816471644586335,
+      "loss": 16.3409,
+      "step": 74820
+    },
+    {
+      "epoch": 0.11086159188002535,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.00048164222552362593,
+      "loss": 16.3664,
+      "step": 74840
+    },
+    {
+      "epoch": 0.11089121817395375,
+      "grad_norm": 6.75,
+      "learning_rate": 0.0004816372865886183,
+      "loss": 16.4146,
+      "step": 74860
+    },
+    {
+      "epoch": 0.11092084446788214,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.0004816323476536108,
+      "loss": 16.3548,
+      "step": 74880
+    },
+    {
+      "epoch": 0.11095047076181053,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.0004816274087186032,
+      "loss": 16.4546,
+      "step": 74900
+    },
+    {
+      "epoch": 0.11098009705573891,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.0004816224697835956,
+      "loss": 16.3883,
+      "step": 74920
+    },
+    {
+      "epoch": 0.1110097233496673,
+      "grad_norm": 7.0,
+      "learning_rate": 0.00048161753084858806,
+      "loss": 16.4069,
+      "step": 74940
+    },
+    {
+      "epoch": 0.11103934964359569,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.00048161259191358046,
+      "loss": 16.3556,
+      "step": 74960
+    },
+    {
+      "epoch": 0.11106897593752407,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.00048160765297857296,
+      "loss": 16.4227,
+      "step": 74980
+    },
+    {
+      "epoch": 0.11109860223145246,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.00048160271404356535,
+      "loss": 16.4454,
+      "step": 75000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 5.514317317520595e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null