Training in progress, step 40500, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cbb933599d03723090d50f6ffb70296e6eebfa61eacef48a3a190ab7f15c58c2
 size 448472762

 version https://git-lfs.github.com/spec/v1
+oid sha256:d03259d2e256ae1843dd9d93c96f13ae033fa50104b7b3335fc98eda0124f7a5
 size 448472762

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:903023559671a4d82750f12e5ca9d151bdd8d505a4fce885d24d671e2610e186
 size 151589028

 version https://git-lfs.github.com/spec/v1
+oid sha256:15854e4e6f02157f1526c4c8d47e876fa37ca90091d0530a30a41ceb5b08bcec
 size 151589028

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81957fa12fa0c422c90aa82913486d7b1e310985e3f015d6405f86eae7e8caf3
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:30b82ff92adb88680c27be8e5a2b5c9da63ae08090ef3a7c14508b2164f79186
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da890422df85affed3b9f319afd342babff58a471fda9ec061f701ac1b55f1c7
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf6d32c22f27c022798bfaaaca4ba2cb9286d958e4d3a8bc21674e25ce2e9897
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8851c06e96c8092fbedd67a7750090010c0af343cfa97642b1e2f88d3465e85d
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:bad3a95014e8c014b1b0e1d2c7e862e00a64dd0395000ae8357a96077edd14c5
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96398a4c09a68bacf3ea4300ac6d599a0630a0edcecaa220a7a8972091f45d18
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:ce9750971f49c876ed40cfbeda660585c3f668d39961f483bf1708ae57b0f2eb
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f1aa1b92b39f2173cd0223cc365bd686d25497911f61b1c0837c564d84b38f68
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5b16d6b3e5a3f4efdb62ade99a44d77fcf809fff9d006debbc9f917125a34ca7
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.07799893726447978,
   "eval_steps": 500,
-  "global_step": 40000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -28008,6 +28008,356 @@
       "learning_rate": 0.00048715881405557025,
       "loss": 2.0898,
       "step": 40000
     }
   ],
   "logging_steps": 10,
@@ -28027,7 +28377,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.3080923317873934e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.07897392398028577,
   "eval_steps": 500,
+  "global_step": 40500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00048715881405557025,
       "loss": 2.0898,
       "step": 40000
+    },
+    {
+      "epoch": 0.07801843699879589,
+      "grad_norm": 0.37890625,
+      "learning_rate": 0.0004871555630401156,
+      "loss": 2.0975,
+      "step": 40010
+    },
+    {
+      "epoch": 0.07803793673311202,
+      "grad_norm": 0.416015625,
+      "learning_rate": 0.0004871523120246609,
+      "loss": 2.0899,
+      "step": 40020
+    },
+    {
+      "epoch": 0.07805743646742813,
+      "grad_norm": 0.4453125,
+      "learning_rate": 0.00048714906100920624,
+      "loss": 2.1008,
+      "step": 40030
+    },
+    {
+      "epoch": 0.07807693620174425,
+      "grad_norm": 0.490234375,
+      "learning_rate": 0.00048714580999375157,
+      "loss": 2.0912,
+      "step": 40040
+    },
+    {
+      "epoch": 0.07809643593606037,
+      "grad_norm": 0.39453125,
+      "learning_rate": 0.0004871425589782969,
+      "loss": 2.0913,
+      "step": 40050
+    },
+    {
+      "epoch": 0.07811593567037649,
+      "grad_norm": 0.396484375,
+      "learning_rate": 0.0004871393079628422,
+      "loss": 2.0787,
+      "step": 40060
+    },
+    {
+      "epoch": 0.07813543540469262,
+      "grad_norm": 0.369140625,
+      "learning_rate": 0.00048713605694738755,
+      "loss": 2.1002,
+      "step": 40070
+    },
+    {
+      "epoch": 0.07815493513900873,
+      "grad_norm": 0.396484375,
+      "learning_rate": 0.0004871328059319329,
+      "loss": 2.1055,
+      "step": 40080
+    },
+    {
+      "epoch": 0.07817443487332486,
+      "grad_norm": 0.392578125,
+      "learning_rate": 0.00048712955491647816,
+      "loss": 2.0943,
+      "step": 40090
+    },
+    {
+      "epoch": 0.07819393460764097,
+      "grad_norm": 0.412109375,
+      "learning_rate": 0.0004871263039010235,
+      "loss": 2.096,
+      "step": 40100
+    },
+    {
+      "epoch": 0.07821343434195709,
+      "grad_norm": 0.462890625,
+      "learning_rate": 0.0004871230528855688,
+      "loss": 2.0891,
+      "step": 40110
+    },
+    {
+      "epoch": 0.07823293407627321,
+      "grad_norm": 0.3828125,
+      "learning_rate": 0.00048711980187011414,
+      "loss": 2.0926,
+      "step": 40120
+    },
+    {
+      "epoch": 0.07825243381058933,
+      "grad_norm": 0.45703125,
+      "learning_rate": 0.00048711655085465947,
+      "loss": 2.0898,
+      "step": 40130
+    },
+    {
+      "epoch": 0.07827193354490546,
+      "grad_norm": 0.37109375,
+      "learning_rate": 0.00048711329983920474,
+      "loss": 2.0886,
+      "step": 40140
+    },
+    {
+      "epoch": 0.07829143327922157,
+      "grad_norm": 0.373046875,
+      "learning_rate": 0.00048711004882375007,
+      "loss": 2.0855,
+      "step": 40150
+    },
+    {
+      "epoch": 0.07831093301353768,
+      "grad_norm": 0.55859375,
+      "learning_rate": 0.0004871067978082954,
+      "loss": 2.1028,
+      "step": 40160
+    },
+    {
+      "epoch": 0.07833043274785381,
+      "grad_norm": 0.42578125,
+      "learning_rate": 0.00048710354679284073,
+      "loss": 2.0743,
+      "step": 40170
+    },
+    {
+      "epoch": 0.07834993248216993,
+      "grad_norm": 0.53125,
+      "learning_rate": 0.00048710029577738606,
+      "loss": 2.0989,
+      "step": 40180
+    },
+    {
+      "epoch": 0.07836943221648605,
+      "grad_norm": 0.39453125,
+      "learning_rate": 0.0004870970447619314,
+      "loss": 2.0705,
+      "step": 40190
+    },
+    {
+      "epoch": 0.07838893195080217,
+      "grad_norm": 0.3515625,
+      "learning_rate": 0.0004870937937464767,
+      "loss": 2.0708,
+      "step": 40200
+    },
+    {
+      "epoch": 0.0784084316851183,
+      "grad_norm": 0.48828125,
+      "learning_rate": 0.00048709054273102204,
+      "loss": 2.0809,
+      "step": 40210
+    },
+    {
+      "epoch": 0.07842793141943441,
+      "grad_norm": 0.43359375,
+      "learning_rate": 0.00048708729171556737,
+      "loss": 2.0944,
+      "step": 40220
+    },
+    {
+      "epoch": 0.07844743115375052,
+      "grad_norm": 0.349609375,
+      "learning_rate": 0.0004870840407001127,
+      "loss": 2.0886,
+      "step": 40230
+    },
+    {
+      "epoch": 0.07846693088806665,
+      "grad_norm": 0.71875,
+      "learning_rate": 0.00048708078968465803,
+      "loss": 2.0778,
+      "step": 40240
+    },
+    {
+      "epoch": 0.07848643062238277,
+      "grad_norm": 0.39453125,
+      "learning_rate": 0.00048707753866920336,
+      "loss": 2.0816,
+      "step": 40250
+    },
+    {
+      "epoch": 0.0785059303566989,
+      "grad_norm": 0.38671875,
+      "learning_rate": 0.0004870742876537487,
+      "loss": 2.095,
+      "step": 40260
+    },
+    {
+      "epoch": 0.07852543009101501,
+      "grad_norm": 0.357421875,
+      "learning_rate": 0.000487071036638294,
+      "loss": 2.0847,
+      "step": 40270
+    },
+    {
+      "epoch": 0.07854492982533114,
+      "grad_norm": 0.3984375,
+      "learning_rate": 0.00048706778562283934,
+      "loss": 2.095,
+      "step": 40280
+    },
+    {
+      "epoch": 0.07856442955964725,
+      "grad_norm": 0.408203125,
+      "learning_rate": 0.00048706453460738467,
+      "loss": 2.0868,
+      "step": 40290
+    },
+    {
+      "epoch": 0.07858392929396336,
+      "grad_norm": 0.375,
+      "learning_rate": 0.00048706128359192995,
+      "loss": 2.0935,
+      "step": 40300
+    },
+    {
+      "epoch": 0.07860342902827949,
+      "grad_norm": 0.43359375,
+      "learning_rate": 0.0004870580325764753,
+      "loss": 2.0762,
+      "step": 40310
+    },
+    {
+      "epoch": 0.0786229287625956,
+      "grad_norm": 0.384765625,
+      "learning_rate": 0.0004870547815610206,
+      "loss": 2.0939,
+      "step": 40320
+    },
+    {
+      "epoch": 0.07864242849691173,
+      "grad_norm": 0.431640625,
+      "learning_rate": 0.0004870515305455659,
+      "loss": 2.091,
+      "step": 40330
+    },
+    {
+      "epoch": 0.07866192823122785,
+      "grad_norm": 0.44921875,
+      "learning_rate": 0.0004870482795301112,
+      "loss": 2.1065,
+      "step": 40340
+    },
+    {
+      "epoch": 0.07868142796554398,
+      "grad_norm": 0.42578125,
+      "learning_rate": 0.00048704502851465653,
+      "loss": 2.0833,
+      "step": 40350
+    },
+    {
+      "epoch": 0.07870092769986009,
+      "grad_norm": 0.390625,
+      "learning_rate": 0.00048704177749920186,
+      "loss": 2.0815,
+      "step": 40360
+    },
+    {
+      "epoch": 0.0787204274341762,
+      "grad_norm": 0.431640625,
+      "learning_rate": 0.0004870385264837472,
+      "loss": 2.0821,
+      "step": 40370
+    },
+    {
+      "epoch": 0.07873992716849233,
+      "grad_norm": 0.416015625,
+      "learning_rate": 0.0004870352754682925,
+      "loss": 2.0873,
+      "step": 40380
+    },
+    {
+      "epoch": 0.07875942690280845,
+      "grad_norm": 0.404296875,
+      "learning_rate": 0.00048703202445283785,
+      "loss": 2.0744,
+      "step": 40390
+    },
+    {
+      "epoch": 0.07877892663712457,
+      "grad_norm": 0.392578125,
+      "learning_rate": 0.0004870287734373832,
+      "loss": 2.0897,
+      "step": 40400
+    },
+    {
+      "epoch": 0.07879842637144069,
+      "grad_norm": 0.46484375,
+      "learning_rate": 0.0004870255224219285,
+      "loss": 2.0874,
+      "step": 40410
+    },
+    {
+      "epoch": 0.0788179261057568,
+      "grad_norm": 0.435546875,
+      "learning_rate": 0.00048702227140647383,
+      "loss": 2.0957,
+      "step": 40420
+    },
+    {
+      "epoch": 0.07883742584007293,
+      "grad_norm": 0.419921875,
+      "learning_rate": 0.00048701902039101916,
+      "loss": 2.0726,
+      "step": 40430
+    },
+    {
+      "epoch": 0.07885692557438904,
+      "grad_norm": 0.4140625,
+      "learning_rate": 0.0004870157693755645,
+      "loss": 2.0802,
+      "step": 40440
+    },
+    {
+      "epoch": 0.07887642530870517,
+      "grad_norm": 0.53515625,
+      "learning_rate": 0.00048701251836010976,
+      "loss": 2.0906,
+      "step": 40450
+    },
+    {
+      "epoch": 0.07889592504302129,
+      "grad_norm": 0.384765625,
+      "learning_rate": 0.0004870092673446551,
+      "loss": 2.0789,
+      "step": 40460
+    },
+    {
+      "epoch": 0.07891542477733741,
+      "grad_norm": 0.421875,
+      "learning_rate": 0.0004870060163292004,
+      "loss": 2.0869,
+      "step": 40470
+    },
+    {
+      "epoch": 0.07893492451165353,
+      "grad_norm": 0.396484375,
+      "learning_rate": 0.00048700276531374575,
+      "loss": 2.0973,
+      "step": 40480
+    },
+    {
+      "epoch": 0.07895442424596964,
+      "grad_norm": 0.39453125,
+      "learning_rate": 0.0004869995142982911,
+      "loss": 2.0851,
+      "step": 40490
+    },
+    {
+      "epoch": 0.07897392398028577,
+      "grad_norm": 0.40234375,
+      "learning_rate": 0.0004869962632828364,
+      "loss": 2.0964,
+      "step": 40500
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.3244470339766845e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null