Training in progress, step 74000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:82e96b382e85cf4f91a0957df390eab642f1a5b90594b054112e585987e922fb
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c14f92422cc30c9605f95654d62c250bad463581bd3da10bb7b17093206005e
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:86831135ab2a33d7609f755ab5e685a1ac6602cf0ed6e3f717ff3cd6a64064f2
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:cdbe93c9686a0a02ecdcba702915ad1389c2bb261f4103c48b737864febba412
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dbf5a8e94cdeb9d71543994044a1496c0b99dc653812727d1f2b5879319264c4
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf863b0b895309e73d9088642dd8d00845be8fee481352073f05fd0bd67029a2
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e585ae00a418f8315b98a87df365e3f31023ec6747db05d48bdc24ed26af3666
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:7f0942e1e9569ddb210dcd2d42bc92e339bbd2239990fd3cc546265bee775d39
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c65df296955d0ea7a8b7df67d30426101d0bc72ddcf4935d0366aeb81991dd30
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2a7d2488bf1d4b76628b506fc6b6fb862cbf4396985e4c9e2f16e4262ba5085
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75f3a690a6b3c19beeba0982e2eceaedb3e05582e018ecc3f8710afa643876ad
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:115e6df582159f803bd87cdfeee2a6c991779cf09357b4ef2537b502b04c878f
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1971585f96833288fec52d3fdc773fe9f57b50e9c45dc3d75ed2e10f5ab3dca7
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9391a0b437930e5697a6d0905f7bf157b3a70a9ca0d6fddfd220757077049906
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.10813597283861373,
   "eval_steps": 500,
-  "global_step": 73000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -25558,6 +25558,356 @@
       "learning_rate": 0.000482096607544322,
       "loss": 16.4235,
       "step": 73000
     }
   ],
   "logging_steps": 20,
@@ -25577,7 +25927,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.367243712484711e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.1096172875350331,
   "eval_steps": 500,
+  "global_step": 74000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.000482096607544322,
       "loss": 16.4235,
       "step": 73000
+    },
+    {
+      "epoch": 0.10816559913254212,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.0004820916686093145,
+      "loss": 16.4474,
+      "step": 73020
+    },
+    {
+      "epoch": 0.1081952254264705,
+      "grad_norm": 6.75,
+      "learning_rate": 0.0004820867296743069,
+      "loss": 16.4285,
+      "step": 73040
+    },
+    {
+      "epoch": 0.10822485172039889,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.0004820817907392993,
+      "loss": 16.5063,
+      "step": 73060
+    },
+    {
+      "epoch": 0.10825447801432728,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.0004820768518042917,
+      "loss": 16.421,
+      "step": 73080
+    },
+    {
+      "epoch": 0.10828410430825566,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.0004820719128692842,
+      "loss": 16.5147,
+      "step": 73100
+    },
+    {
+      "epoch": 0.10831373060218405,
+      "grad_norm": 7.125,
+      "learning_rate": 0.0004820669739342766,
+      "loss": 16.4131,
+      "step": 73120
+    },
+    {
+      "epoch": 0.10834335689611244,
+      "grad_norm": 6.375,
+      "learning_rate": 0.00048206203499926906,
+      "loss": 16.3763,
+      "step": 73140
+    },
+    {
+      "epoch": 0.10837298319004082,
+      "grad_norm": 6.75,
+      "learning_rate": 0.00048205709606426145,
+      "loss": 16.402,
+      "step": 73160
+    },
+    {
+      "epoch": 0.10840260948396921,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.00048205215712925396,
+      "loss": 16.4398,
+      "step": 73180
+    },
+    {
+      "epoch": 0.1084322357778976,
+      "grad_norm": 7.34375,
+      "learning_rate": 0.00048204721819424635,
+      "loss": 16.4245,
+      "step": 73200
+    },
+    {
+      "epoch": 0.10846186207182598,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.00048204227925923874,
+      "loss": 16.4691,
+      "step": 73220
+    },
+    {
+      "epoch": 0.10849148836575437,
+      "grad_norm": 6.375,
+      "learning_rate": 0.0004820373403242312,
+      "loss": 16.4718,
+      "step": 73240
+    },
+    {
+      "epoch": 0.10852111465968275,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.00048203240138922364,
+      "loss": 16.4101,
+      "step": 73260
+    },
+    {
+      "epoch": 0.10855074095361116,
+      "grad_norm": 6.875,
+      "learning_rate": 0.0004820274624542161,
+      "loss": 16.4164,
+      "step": 73280
+    },
+    {
+      "epoch": 0.10858036724753954,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.0004820225235192085,
+      "loss": 16.4198,
+      "step": 73300
+    },
+    {
+      "epoch": 0.10860999354146793,
+      "grad_norm": 7.25,
+      "learning_rate": 0.000482017584584201,
+      "loss": 16.3898,
+      "step": 73320
+    },
+    {
+      "epoch": 0.10863961983539631,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.0004820126456491934,
+      "loss": 16.4542,
+      "step": 73340
+    },
+    {
+      "epoch": 0.1086692461293247,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.0004820077067141858,
+      "loss": 16.4689,
+      "step": 73360
+    },
+    {
+      "epoch": 0.10869887242325309,
+      "grad_norm": 7.8125,
+      "learning_rate": 0.0004820027677791782,
+      "loss": 16.3908,
+      "step": 73380
+    },
+    {
+      "epoch": 0.10872849871718147,
+      "grad_norm": 7.625,
+      "learning_rate": 0.0004819978288441707,
+      "loss": 16.4518,
+      "step": 73400
+    },
+    {
+      "epoch": 0.10875812501110986,
+      "grad_norm": 5.65625,
+      "learning_rate": 0.0004819928899091631,
+      "loss": 16.4547,
+      "step": 73420
+    },
+    {
+      "epoch": 0.10878775130503825,
+      "grad_norm": 7.21875,
+      "learning_rate": 0.00048198795097415556,
+      "loss": 16.4573,
+      "step": 73440
+    },
+    {
+      "epoch": 0.10881737759896663,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.00048198301203914795,
+      "loss": 16.4254,
+      "step": 73460
+    },
+    {
+      "epoch": 0.10884700389289502,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.00048197807310414046,
+      "loss": 16.4479,
+      "step": 73480
+    },
+    {
+      "epoch": 0.1088766301868234,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.00048197313416913285,
+      "loss": 16.374,
+      "step": 73500
+    },
+    {
+      "epoch": 0.10890625648075179,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.0004819681952341253,
+      "loss": 16.4854,
+      "step": 73520
+    },
+    {
+      "epoch": 0.10893588277468018,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.0004819632562991177,
+      "loss": 16.373,
+      "step": 73540
+    },
+    {
+      "epoch": 0.10896550906860857,
+      "grad_norm": 5.6875,
+      "learning_rate": 0.00048195831736411014,
+      "loss": 16.4403,
+      "step": 73560
+    },
+    {
+      "epoch": 0.10899513536253697,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.0004819533784291026,
+      "loss": 16.4399,
+      "step": 73580
+    },
+    {
+      "epoch": 0.10902476165646535,
+      "grad_norm": 6.0,
+      "learning_rate": 0.000481948439494095,
+      "loss": 16.4257,
+      "step": 73600
+    },
+    {
+      "epoch": 0.10905438795039374,
+      "grad_norm": 7.0,
+      "learning_rate": 0.0004819435005590875,
+      "loss": 16.3843,
+      "step": 73620
+    },
+    {
+      "epoch": 0.10908401424432213,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.0004819385616240799,
+      "loss": 16.4221,
+      "step": 73640
+    },
+    {
+      "epoch": 0.10911364053825051,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.0004819336226890723,
+      "loss": 16.4464,
+      "step": 73660
+    },
+    {
+      "epoch": 0.1091432668321789,
+      "grad_norm": 6.875,
+      "learning_rate": 0.0004819286837540647,
+      "loss": 16.4325,
+      "step": 73680
+    },
+    {
+      "epoch": 0.10917289312610728,
+      "grad_norm": 6.09375,
+      "learning_rate": 0.0004819237448190572,
+      "loss": 16.4222,
+      "step": 73700
+    },
+    {
+      "epoch": 0.10920251942003567,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.0004819188058840496,
+      "loss": 16.4236,
+      "step": 73720
+    },
+    {
+      "epoch": 0.10923214571396406,
+      "grad_norm": 6.0625,
+      "learning_rate": 0.00048191386694904206,
+      "loss": 16.4719,
+      "step": 73740
+    },
+    {
+      "epoch": 0.10926177200789244,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.00048190892801403445,
+      "loss": 16.4062,
+      "step": 73760
+    },
+    {
+      "epoch": 0.10929139830182083,
+      "grad_norm": 7.0,
+      "learning_rate": 0.00048190398907902696,
+      "loss": 16.4468,
+      "step": 73780
+    },
+    {
+      "epoch": 0.10932102459574922,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.00048189905014401935,
+      "loss": 16.4426,
+      "step": 73800
+    },
+    {
+      "epoch": 0.1093506508896776,
+      "grad_norm": 6.625,
+      "learning_rate": 0.0004818941112090118,
+      "loss": 16.4042,
+      "step": 73820
+    },
+    {
+      "epoch": 0.10938027718360599,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.0004818891722740042,
+      "loss": 16.489,
+      "step": 73840
+    },
+    {
+      "epoch": 0.10940990347753438,
+      "grad_norm": 7.25,
+      "learning_rate": 0.0004818842333389967,
+      "loss": 16.4129,
+      "step": 73860
+    },
+    {
+      "epoch": 0.10943952977146276,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.0004818792944039891,
+      "loss": 16.4828,
+      "step": 73880
+    },
+    {
+      "epoch": 0.10946915606539116,
+      "grad_norm": 6.03125,
+      "learning_rate": 0.0004818743554689815,
+      "loss": 16.4081,
+      "step": 73900
+    },
+    {
+      "epoch": 0.10949878235931955,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.000481869416533974,
+      "loss": 16.441,
+      "step": 73920
+    },
+    {
+      "epoch": 0.10952840865324794,
+      "grad_norm": 6.96875,
+      "learning_rate": 0.0004818644775989664,
+      "loss": 16.4013,
+      "step": 73940
+    },
+    {
+      "epoch": 0.10955803494717632,
+      "grad_norm": 7.46875,
+      "learning_rate": 0.0004818595386639588,
+      "loss": 16.4102,
+      "step": 73960
+    },
+    {
+      "epoch": 0.10958766124110471,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.0004818545997289512,
+      "loss": 16.3879,
+      "step": 73980
+    },
+    {
+      "epoch": 0.1096172875350331,
+      "grad_norm": 7.0,
+      "learning_rate": 0.0004818496607939437,
+      "loss": 16.3994,
+      "step": 74000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 5.440780396085746e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null