Training in progress, step 77000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc06a7f089f926af24a4dafd2fc5c68a00957b0501ae37664b3613577e08b3af
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:456f25e6949b3d7d11abfc7016f20c519ec2224b5939a593f25e94ab538895d0
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9513e34a098ccfc4d0eeceb95099c5472b4ff0a71cffb25d876aad974cab2486
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:583ce6afe4697cbc9bee02b6ce9c574f4dc14c85f97be8a32cab3b7f02347cff
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3691082114682896d0f28ee5b4c8f41d4639d4efe6c895c755146048ab7c832
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:9d61a2372056f2b32f6ae2b2c7745d9d5c6ac967a32622f315e73c700a55b59c
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6c20256e223141e6700101ae515de5a6287d380eaea8a4346e1c56536ce67dcb
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:0305d043d3c89d1352e924dd8f0e87b43a3b6eaaaf9859b3bc689a1146bd169b
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a5392970d44236e7a431111e07b6640793728800da16c52d461401ef3040338a
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:974b9922d6267aa5fa0a64e6a68535833054f08cce85f66dc5aaf99a834c1951
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:756f7cda01c1bba0353fe356cfd74ccb32f9626ff6708219372ea8a4c1ba35dc
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:0cc97b8ced844bab48e2e6688594701ea9aba44b688ce227a136172614ec21f5
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:178aedfc2920966f379dc01376957ddc00b6df6f84cf67e8abd741361412b63d
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd8038700a783d3dbbe90c65ffd9f9176aad3cbeda38c8a7508ae0b5dcd99468
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.11257991692787182,
   "eval_steps": 500,
-  "global_step": 76000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -26608,6 +26608,356 @@
       "learning_rate": 0.00048135576729318704,
       "loss": 16.3442,
       "step": 76000
     }
   ],
   "logging_steps": 20,
@@ -26627,7 +26977,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.587853722915124e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.11406123162429119,
   "eval_steps": 500,
+  "global_step": 77000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00048135576729318704,
       "loss": 16.3442,
       "step": 76000
+    },
+    {
+      "epoch": 0.11260954322180021,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.0004813508283581795,
+      "loss": 16.3857,
+      "step": 76020
+    },
+    {
+      "epoch": 0.1126391695157286,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.00048134588942317193,
+      "loss": 16.4228,
+      "step": 76040
+    },
+    {
+      "epoch": 0.11266879580965698,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.0004813409504881643,
+      "loss": 16.4156,
+      "step": 76060
+    },
+    {
+      "epoch": 0.11269842210358537,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.0004813360115531568,
+      "loss": 16.3843,
+      "step": 76080
+    },
+    {
+      "epoch": 0.11272804839751376,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.0004813310726181492,
+      "loss": 16.3459,
+      "step": 76100
+    },
+    {
+      "epoch": 0.11275767469144214,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.00048132613368314167,
+      "loss": 16.3973,
+      "step": 76120
+    },
+    {
+      "epoch": 0.11278730098537054,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.00048132119474813406,
+      "loss": 16.3245,
+      "step": 76140
+    },
+    {
+      "epoch": 0.11281692727929893,
+      "grad_norm": 7.65625,
+      "learning_rate": 0.0004813162558131265,
+      "loss": 16.3705,
+      "step": 76160
+    },
+    {
+      "epoch": 0.11284655357322732,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.00048131131687811896,
+      "loss": 16.3244,
+      "step": 76180
+    },
+    {
+      "epoch": 0.1128761798671557,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.0004813063779431114,
+      "loss": 16.3466,
+      "step": 76200
+    },
+    {
+      "epoch": 0.11290580616108409,
+      "grad_norm": 6.28125,
+      "learning_rate": 0.0004813014390081038,
+      "loss": 16.345,
+      "step": 76220
+    },
+    {
+      "epoch": 0.11293543245501247,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.00048129650007309625,
+      "loss": 16.3821,
+      "step": 76240
+    },
+    {
+      "epoch": 0.11296505874894086,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.0004812915611380887,
+      "loss": 16.3541,
+      "step": 76260
+    },
+    {
+      "epoch": 0.11299468504286925,
+      "grad_norm": 7.0,
+      "learning_rate": 0.0004812866222030811,
+      "loss": 16.2916,
+      "step": 76280
+    },
+    {
+      "epoch": 0.11302431133679763,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.00048128168326807354,
+      "loss": 16.2938,
+      "step": 76300
+    },
+    {
+      "epoch": 0.11305393763072602,
+      "grad_norm": 6.5,
+      "learning_rate": 0.000481276744333066,
+      "loss": 16.4026,
+      "step": 76320
+    },
+    {
+      "epoch": 0.1130835639246544,
+      "grad_norm": 6.34375,
+      "learning_rate": 0.00048127180539805843,
+      "loss": 16.3102,
+      "step": 76340
+    },
+    {
+      "epoch": 0.1131131902185828,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.0004812668664630508,
+      "loss": 16.338,
+      "step": 76360
+    },
+    {
+      "epoch": 0.11314281651251118,
+      "grad_norm": 6.0,
+      "learning_rate": 0.0004812619275280433,
+      "loss": 16.3304,
+      "step": 76380
+    },
+    {
+      "epoch": 0.11317244280643957,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.0004812569885930357,
+      "loss": 16.353,
+      "step": 76400
+    },
+    {
+      "epoch": 0.11320206910036795,
+      "grad_norm": 7.03125,
+      "learning_rate": 0.00048125204965802817,
+      "loss": 16.3146,
+      "step": 76420
+    },
+    {
+      "epoch": 0.11323169539429635,
+      "grad_norm": 7.125,
+      "learning_rate": 0.00048124711072302056,
+      "loss": 16.3556,
+      "step": 76440
+    },
+    {
+      "epoch": 0.11326132168822474,
+      "grad_norm": 7.03125,
+      "learning_rate": 0.000481242171788013,
+      "loss": 16.4044,
+      "step": 76460
+    },
+    {
+      "epoch": 0.11329094798215313,
+      "grad_norm": 6.125,
+      "learning_rate": 0.00048123723285300546,
+      "loss": 16.2708,
+      "step": 76480
+    },
+    {
+      "epoch": 0.11332057427608151,
+      "grad_norm": 6.96875,
+      "learning_rate": 0.0004812322939179979,
+      "loss": 16.3705,
+      "step": 76500
+    },
+    {
+      "epoch": 0.1133502005700099,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.0004812273549829903,
+      "loss": 16.3579,
+      "step": 76520
+    },
+    {
+      "epoch": 0.11337982686393829,
+      "grad_norm": 9.4375,
+      "learning_rate": 0.00048122241604798275,
+      "loss": 16.3504,
+      "step": 76540
+    },
+    {
+      "epoch": 0.11340945315786667,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.0004812174771129752,
+      "loss": 16.3338,
+      "step": 76560
+    },
+    {
+      "epoch": 0.11343907945179506,
+      "grad_norm": 7.75,
+      "learning_rate": 0.00048121253817796764,
+      "loss": 16.337,
+      "step": 76580
+    },
+    {
+      "epoch": 0.11346870574572344,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.00048120759924296004,
+      "loss": 16.3507,
+      "step": 76600
+    },
+    {
+      "epoch": 0.11349833203965183,
+      "grad_norm": 6.0,
+      "learning_rate": 0.0004812026603079525,
+      "loss": 16.311,
+      "step": 76620
+    },
+    {
+      "epoch": 0.11352795833358022,
+      "grad_norm": 6.15625,
+      "learning_rate": 0.00048119772137294493,
+      "loss": 16.3014,
+      "step": 76640
+    },
+    {
+      "epoch": 0.1135575846275086,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.00048119278243793733,
+      "loss": 16.3813,
+      "step": 76660
+    },
+    {
+      "epoch": 0.11358721092143699,
+      "grad_norm": 6.375,
+      "learning_rate": 0.0004811878435029298,
+      "loss": 16.3528,
+      "step": 76680
+    },
+    {
+      "epoch": 0.11361683721536538,
+      "grad_norm": 7.34375,
+      "learning_rate": 0.0004811829045679222,
+      "loss": 16.308,
+      "step": 76700
+    },
+    {
+      "epoch": 0.11364646350929376,
+      "grad_norm": 6.28125,
+      "learning_rate": 0.00048117796563291467,
+      "loss": 16.3711,
+      "step": 76720
+    },
+    {
+      "epoch": 0.11367608980322215,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.00048117302669790706,
+      "loss": 16.2906,
+      "step": 76740
+    },
+    {
+      "epoch": 0.11370571609715055,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.0004811680877628995,
+      "loss": 16.3626,
+      "step": 76760
+    },
+    {
+      "epoch": 0.11373534239107894,
+      "grad_norm": 5.75,
+      "learning_rate": 0.00048116314882789196,
+      "loss": 16.2853,
+      "step": 76780
+    },
+    {
+      "epoch": 0.11376496868500732,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.0004811582098928844,
+      "loss": 16.3697,
+      "step": 76800
+    },
+    {
+      "epoch": 0.11379459497893571,
+      "grad_norm": 6.28125,
+      "learning_rate": 0.0004811532709578768,
+      "loss": 16.3249,
+      "step": 76820
+    },
+    {
+      "epoch": 0.1138242212728641,
+      "grad_norm": 7.59375,
+      "learning_rate": 0.00048114833202286925,
+      "loss": 16.3022,
+      "step": 76840
+    },
+    {
+      "epoch": 0.11385384756679248,
+      "grad_norm": 7.375,
+      "learning_rate": 0.0004811433930878617,
+      "loss": 16.3372,
+      "step": 76860
+    },
+    {
+      "epoch": 0.11388347386072087,
+      "grad_norm": 7.25,
+      "learning_rate": 0.00048113845415285414,
+      "loss": 16.3621,
+      "step": 76880
+    },
+    {
+      "epoch": 0.11391310015464925,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.00048113351521784654,
+      "loss": 16.3544,
+      "step": 76900
+    },
+    {
+      "epoch": 0.11394272644857764,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.00048112857628283904,
+      "loss": 16.3275,
+      "step": 76920
+    },
+    {
+      "epoch": 0.11397235274250603,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.00048112363734783143,
+      "loss": 16.359,
+      "step": 76940
+    },
+    {
+      "epoch": 0.11400197903643441,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.00048111869841282383,
+      "loss": 16.3528,
+      "step": 76960
+    },
+    {
+      "epoch": 0.1140316053303628,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.0004811137594778163,
+      "loss": 16.3342,
+      "step": 76980
+    },
+    {
+      "epoch": 0.11406123162429119,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.0004811088205428087,
+      "loss": 16.3042,
+      "step": 77000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 5.661389989152712e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null