Training in progress, step 69000, checkpoint

Browse files

Files changed (9) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3
last-checkpoint/training_args.bin +1 -1

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:855097e18de16f85c46f8b027e1873d375c3a4edc034e8bed8a7f0b58970ad94
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:e12ef6a026db88916d2bccaa887c346b617f8bf524f61c49a560d4c1854fb6f1
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f6210b328c6e30eb767412099efb2004508322ff25c3e6056826eba5d995bc2b
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:287d41aa21a4eb242c8834811d8cbeecb6b0fd5e8162f8a93804fb2ec7aa6398
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b1fe05f5b470f95761cfc3fed3146b8c8e8a912646d05e70e539792b7f745a3f
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:c9b12b8c5c5c31953b64891eb7a5a87fe3243666cbd4801ead4f6238d85d2c9c
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:49ad9d6f5fe6b13eeb9343f8fae928ab75997e82b569c4a8977d808cdc884b1e
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:17944b85b9d02378f311e5505f3d2beb901e13fa7a7306f1d0d6ef90c3394bf6
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4711ff133c23ad6d8a7643a31e0e727444cc5280990eabd826bfc8c92e7cdf77
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:c35b076a46134f931f65ad614d3a133b44af15affbef2c6984eacb0867534788
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d66ecdc5ab3f9e8ebc655822c33c54e4023463dd04074044db32f0a8095e3378
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:00652d787b0217457a14651c8e87f0d8ab4c5f0af3727292f0c3f9d4e718cb0a
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:730a17924aec965fee0684191a1f8a93d017e71268086042298dd7299e09c6f3
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc0d2189df36213ee36dacfa0f47fda988de8257ffa315d320b6c0176d420bf1
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.1007293993565169,
   "eval_steps": 500,
-  "global_step": 68000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -23808,6 +23808,356 @@
       "learning_rate": 0.00048333134129621366,
       "loss": 16.5557,
       "step": 68000
     }
   ],
   "logging_steps": 20,
@@ -23827,7 +24177,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.999562170735998e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.10221071405293626,
   "eval_steps": 500,
+  "global_step": 69000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00048333134129621366,
       "loss": 16.5557,
       "step": 68000
+    },
+    {
+      "epoch": 0.10075902565044528,
+      "grad_norm": 7.46875,
+      "learning_rate": 0.00048332640236120605,
+      "loss": 16.6099,
+      "step": 68020
+    },
+    {
+      "epoch": 0.10078865194437367,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.0004833214634261985,
+      "loss": 16.6517,
+      "step": 68040
+    },
+    {
+      "epoch": 0.10081827823830206,
+      "grad_norm": 7.4375,
+      "learning_rate": 0.0004833165244911909,
+      "loss": 16.5766,
+      "step": 68060
+    },
+    {
+      "epoch": 0.10084790453223044,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.0004833115855561834,
+      "loss": 16.606,
+      "step": 68080
+    },
+    {
+      "epoch": 0.10087753082615883,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.0004833066466211758,
+      "loss": 16.5779,
+      "step": 68100
+    },
+    {
+      "epoch": 0.10090715712008722,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.00048330170768616824,
+      "loss": 16.6219,
+      "step": 68120
+    },
+    {
+      "epoch": 0.1009367834140156,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.0004832967687511607,
+      "loss": 16.5954,
+      "step": 68140
+    },
+    {
+      "epoch": 0.10096640970794399,
+      "grad_norm": 7.0,
+      "learning_rate": 0.0004832918298161531,
+      "loss": 16.5884,
+      "step": 68160
+    },
+    {
+      "epoch": 0.10099603600187239,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.00048328689088114553,
+      "loss": 16.5813,
+      "step": 68180
+    },
+    {
+      "epoch": 0.10102566229580077,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.0004832819519461379,
+      "loss": 16.6294,
+      "step": 68200
+    },
+    {
+      "epoch": 0.10105528858972916,
+      "grad_norm": 7.34375,
+      "learning_rate": 0.0004832770130111304,
+      "loss": 16.627,
+      "step": 68220
+    },
+    {
+      "epoch": 0.10108491488365755,
+      "grad_norm": 7.0,
+      "learning_rate": 0.0004832720740761228,
+      "loss": 16.5516,
+      "step": 68240
+    },
+    {
+      "epoch": 0.10111454117758593,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.00048326713514111526,
+      "loss": 16.6234,
+      "step": 68260
+    },
+    {
+      "epoch": 0.10114416747151432,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.00048326219620610766,
+      "loss": 16.5566,
+      "step": 68280
+    },
+    {
+      "epoch": 0.10117379376544271,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.00048325725727110016,
+      "loss": 16.6184,
+      "step": 68300
+    },
+    {
+      "epoch": 0.1012034200593711,
+      "grad_norm": 7.46875,
+      "learning_rate": 0.00048325231833609255,
+      "loss": 16.6634,
+      "step": 68320
+    },
+    {
+      "epoch": 0.10123304635329948,
+      "grad_norm": 7.75,
+      "learning_rate": 0.000483247379401085,
+      "loss": 16.5374,
+      "step": 68340
+    },
+    {
+      "epoch": 0.10126267264722787,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.0004832424404660774,
+      "loss": 16.6118,
+      "step": 68360
+    },
+    {
+      "epoch": 0.10129229894115625,
+      "grad_norm": 7.03125,
+      "learning_rate": 0.0004832375015310699,
+      "loss": 16.5952,
+      "step": 68380
+    },
+    {
+      "epoch": 0.10132192523508464,
+      "grad_norm": 7.0,
+      "learning_rate": 0.0004832325625960623,
+      "loss": 16.5866,
+      "step": 68400
+    },
+    {
+      "epoch": 0.10135155152901303,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.00048322762366105474,
+      "loss": 16.5999,
+      "step": 68420
+    },
+    {
+      "epoch": 0.10138117782294141,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.0004832226847260472,
+      "loss": 16.553,
+      "step": 68440
+    },
+    {
+      "epoch": 0.1014108041168698,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.00048321774579103963,
+      "loss": 16.6224,
+      "step": 68460
+    },
+    {
+      "epoch": 0.10144043041079819,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.00048321280685603203,
+      "loss": 16.5789,
+      "step": 68480
+    },
+    {
+      "epoch": 0.10147005670472659,
+      "grad_norm": 6.96875,
+      "learning_rate": 0.0004832078679210244,
+      "loss": 16.6379,
+      "step": 68500
+    },
+    {
+      "epoch": 0.10149968299865497,
+      "grad_norm": 6.0625,
+      "learning_rate": 0.0004832029289860169,
+      "loss": 16.5628,
+      "step": 68520
+    },
+    {
+      "epoch": 0.10152930929258336,
+      "grad_norm": 6.875,
+      "learning_rate": 0.0004831979900510093,
+      "loss": 16.5675,
+      "step": 68540
+    },
+    {
+      "epoch": 0.10155893558651174,
+      "grad_norm": 6.96875,
+      "learning_rate": 0.00048319305111600176,
+      "loss": 16.591,
+      "step": 68560
+    },
+    {
+      "epoch": 0.10158856188044013,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.00048318811218099416,
+      "loss": 16.5641,
+      "step": 68580
+    },
+    {
+      "epoch": 0.10161818817436852,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00048318317324598666,
+      "loss": 16.5867,
+      "step": 68600
+    },
+    {
+      "epoch": 0.1016478144682969,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.00048317823431097905,
+      "loss": 16.5886,
+      "step": 68620
+    },
+    {
+      "epoch": 0.10167744076222529,
+      "grad_norm": 7.625,
+      "learning_rate": 0.0004831732953759715,
+      "loss": 16.5648,
+      "step": 68640
+    },
+    {
+      "epoch": 0.10170706705615368,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.0004831683564409639,
+      "loss": 16.5727,
+      "step": 68660
+    },
+    {
+      "epoch": 0.10173669335008206,
+      "grad_norm": 6.75,
+      "learning_rate": 0.0004831634175059564,
+      "loss": 16.5931,
+      "step": 68680
+    },
+    {
+      "epoch": 0.10176631964401045,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.0004831584785709488,
+      "loss": 16.6137,
+      "step": 68700
+    },
+    {
+      "epoch": 0.10179594593793884,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.00048315353963594124,
+      "loss": 16.5733,
+      "step": 68720
+    },
+    {
+      "epoch": 0.10182557223186722,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.0004831486007009337,
+      "loss": 16.5379,
+      "step": 68740
+    },
+    {
+      "epoch": 0.10185519852579561,
+      "grad_norm": 5.59375,
+      "learning_rate": 0.00048314366176592613,
+      "loss": 16.6031,
+      "step": 68760
+    },
+    {
+      "epoch": 0.101884824819724,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.00048313872283091853,
+      "loss": 16.5457,
+      "step": 68780
+    },
+    {
+      "epoch": 0.10191445111365238,
+      "grad_norm": 6.125,
+      "learning_rate": 0.000483133783895911,
+      "loss": 16.5053,
+      "step": 68800
+    },
+    {
+      "epoch": 0.10194407740758078,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.0004831288449609034,
+      "loss": 16.576,
+      "step": 68820
+    },
+    {
+      "epoch": 0.10197370370150917,
+      "grad_norm": 6.875,
+      "learning_rate": 0.0004831239060258958,
+      "loss": 16.6092,
+      "step": 68840
+    },
+    {
+      "epoch": 0.10200332999543756,
+      "grad_norm": 6.5,
+      "learning_rate": 0.00048311896709088827,
+      "loss": 16.598,
+      "step": 68860
+    },
+    {
+      "epoch": 0.10203295628936594,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.00048311402815588066,
+      "loss": 16.5953,
+      "step": 68880
+    },
+    {
+      "epoch": 0.10206258258329433,
+      "grad_norm": 6.34375,
+      "learning_rate": 0.00048310908922087316,
+      "loss": 16.5686,
+      "step": 68900
+    },
+    {
+      "epoch": 0.10209220887722271,
+      "grad_norm": 7.3125,
+      "learning_rate": 0.00048310415028586555,
+      "loss": 16.5649,
+      "step": 68920
+    },
+    {
+      "epoch": 0.1021218351711511,
+      "grad_norm": 7.15625,
+      "learning_rate": 0.000483099211350858,
+      "loss": 16.5833,
+      "step": 68940
+    },
+    {
+      "epoch": 0.10215146146507949,
+      "grad_norm": 7.0,
+      "learning_rate": 0.0004830942724158504,
+      "loss": 16.5813,
+      "step": 68960
+    },
+    {
+      "epoch": 0.10218108775900787,
+      "grad_norm": 7.125,
+      "learning_rate": 0.0004830893334808429,
+      "loss": 16.6435,
+      "step": 68980
+    },
+    {
+      "epoch": 0.10221071405293626,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.0004830843945458353,
+      "loss": 16.6299,
+      "step": 69000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 5.073098616395845e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1ae6fe7865a6680f0788decd4b8035db04ae39b0ae4392f872489469c00e7d58
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc3f551404b0d7edd833494ee70d9c95a722ebd26deaead78190bce345559dbd
 size 5432