Training in progress, step 68000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:592e049b4f95cedc3ce748066bfe5c49caaf185103ee6879f3fe5855784ee5e4
 size 304481530

 version https://git-lfs.github.com/spec/v1
+oid sha256:855097e18de16f85c46f8b027e1873d375c3a4edc034e8bed8a7f0b58970ad94
 size 304481530

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7b6410b67c32a1a3c53feeda8ce0bc10d5dfb988aac519c5a064ebceb11a2b44
 size 402029570

 version https://git-lfs.github.com/spec/v1
+oid sha256:f6210b328c6e30eb767412099efb2004508322ff25c3e6056826eba5d995bc2b
 size 402029570

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2e906076e058ba8639864a21864af8842f5ecac1704c887beaab37a8e6e242fc
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:b1fe05f5b470f95761cfc3fed3146b8c8e8a912646d05e70e539792b7f745a3f
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:42a4f9c06b183d2e64de63ce44653d84f7852f22703e0981e748b5484b6ce5bf
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:49ad9d6f5fe6b13eeb9343f8fae928ab75997e82b569c4a8977d808cdc884b1e
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b6f0f87d2ccf743784eeb2c3749299a3c74e27a9d13e979b4b7bb6bc45c7f2b4
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:4711ff133c23ad6d8a7643a31e0e727444cc5280990eabd826bfc8c92e7cdf77
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f7b50ba5ae16dcff5d8422d327a7567bf3c63ab4752711cdff26c891625467c8
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:d66ecdc5ab3f9e8ebc655822c33c54e4023463dd04074044db32f0a8095e3378
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e30b6ea2494d807c4b425128323468f737661014732dbc995415194d13c4da2b
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:730a17924aec965fee0684191a1f8a93d017e71268086042298dd7299e09c6f3
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.09924808466009753,
   "eval_steps": 500,
-  "global_step": 67000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -23458,6 +23458,356 @@
       "learning_rate": 0.0004835782880465919,
       "loss": 16.6314,
       "step": 67000
     }
   ],
   "logging_steps": 20,
@@ -23477,7 +23827,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4.9260258238603985e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.1007293993565169,
   "eval_steps": 500,
+  "global_step": 68000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004835782880465919,
       "loss": 16.6314,
       "step": 67000
+    },
+    {
+      "epoch": 0.09927771095402592,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.0004835733491115844,
+      "loss": 16.6278,
+      "step": 67020
+    },
+    {
+      "epoch": 0.0993073372479543,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.0004835684101765768,
+      "loss": 16.5956,
+      "step": 67040
+    },
+    {
+      "epoch": 0.09933696354188269,
+      "grad_norm": 6.75,
+      "learning_rate": 0.00048356347124156926,
+      "loss": 16.6496,
+      "step": 67060
+    },
+    {
+      "epoch": 0.09936658983581108,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.00048355853230656166,
+      "loss": 16.6581,
+      "step": 67080
+    },
+    {
+      "epoch": 0.09939621612973946,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.00048355359337155416,
+      "loss": 16.6221,
+      "step": 67100
+    },
+    {
+      "epoch": 0.09942584242366785,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.00048354865443654655,
+      "loss": 16.6772,
+      "step": 67120
+    },
+    {
+      "epoch": 0.09945546871759624,
+      "grad_norm": 7.25,
+      "learning_rate": 0.00048354371550153895,
+      "loss": 16.6104,
+      "step": 67140
+    },
+    {
+      "epoch": 0.09948509501152462,
+      "grad_norm": 6.75,
+      "learning_rate": 0.0004835387765665314,
+      "loss": 16.6313,
+      "step": 67160
+    },
+    {
+      "epoch": 0.09951472130545301,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.00048353383763152384,
+      "loss": 16.6284,
+      "step": 67180
+    },
+    {
+      "epoch": 0.0995443475993814,
+      "grad_norm": 7.6875,
+      "learning_rate": 0.0004835288986965163,
+      "loss": 16.6672,
+      "step": 67200
+    },
+    {
+      "epoch": 0.09957397389330978,
+      "grad_norm": 6.75,
+      "learning_rate": 0.0004835239597615087,
+      "loss": 16.6011,
+      "step": 67220
+    },
+    {
+      "epoch": 0.09960360018723818,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.0004835190208265012,
+      "loss": 16.6609,
+      "step": 67240
+    },
+    {
+      "epoch": 0.09963322648116657,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.0004835140818914936,
+      "loss": 16.6259,
+      "step": 67260
+    },
+    {
+      "epoch": 0.09966285277509496,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.000483509142956486,
+      "loss": 16.6092,
+      "step": 67280
+    },
+    {
+      "epoch": 0.09969247906902334,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.0004835042040214784,
+      "loss": 16.6204,
+      "step": 67300
+    },
+    {
+      "epoch": 0.09972210536295173,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.0004834992650864709,
+      "loss": 16.6558,
+      "step": 67320
+    },
+    {
+      "epoch": 0.09975173165688012,
+      "grad_norm": 6.875,
+      "learning_rate": 0.0004834943261514633,
+      "loss": 16.6027,
+      "step": 67340
+    },
+    {
+      "epoch": 0.0997813579508085,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.00048348938721645576,
+      "loss": 16.5951,
+      "step": 67360
+    },
+    {
+      "epoch": 0.09981098424473689,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.00048348444828144816,
+      "loss": 16.6149,
+      "step": 67380
+    },
+    {
+      "epoch": 0.09984061053866528,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.00048347950934644066,
+      "loss": 16.5772,
+      "step": 67400
+    },
+    {
+      "epoch": 0.09987023683259366,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.00048347457041143305,
+      "loss": 16.6317,
+      "step": 67420
+    },
+    {
+      "epoch": 0.09989986312652205,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.0004834696314764255,
+      "loss": 16.5701,
+      "step": 67440
+    },
+    {
+      "epoch": 0.09992948942045043,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.0004834646925414179,
+      "loss": 16.6727,
+      "step": 67460
+    },
+    {
+      "epoch": 0.09995911571437882,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.00048345975360641034,
+      "loss": 16.6282,
+      "step": 67480
+    },
+    {
+      "epoch": 0.09998874200830721,
+      "grad_norm": 7.59375,
+      "learning_rate": 0.0004834548146714028,
+      "loss": 16.58,
+      "step": 67500
+    },
+    {
+      "epoch": 0.1000183683022356,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004834498757363952,
+      "loss": 16.657,
+      "step": 67520
+    },
+    {
+      "epoch": 0.100047994596164,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.0004834449368013877,
+      "loss": 16.6031,
+      "step": 67540
+    },
+    {
+      "epoch": 0.10007762089009238,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.0004834399978663801,
+      "loss": 16.5908,
+      "step": 67560
+    },
+    {
+      "epoch": 0.10010724718402077,
+      "grad_norm": 6.875,
+      "learning_rate": 0.0004834350589313725,
+      "loss": 16.5555,
+      "step": 67580
+    },
+    {
+      "epoch": 0.10013687347794915,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004834301199963649,
+      "loss": 16.6778,
+      "step": 67600
+    },
+    {
+      "epoch": 0.10016649977187754,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.0004834251810613574,
+      "loss": 16.6049,
+      "step": 67620
+    },
+    {
+      "epoch": 0.10019612606580593,
+      "grad_norm": 6.96875,
+      "learning_rate": 0.0004834202421263498,
+      "loss": 16.6337,
+      "step": 67640
+    },
+    {
+      "epoch": 0.10022575235973431,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.00048341530319134226,
+      "loss": 16.6476,
+      "step": 67660
+    },
+    {
+      "epoch": 0.1002553786536627,
+      "grad_norm": 6.03125,
+      "learning_rate": 0.00048341036425633466,
+      "loss": 16.6637,
+      "step": 67680
+    },
+    {
+      "epoch": 0.10028500494759109,
+      "grad_norm": 6.25,
+      "learning_rate": 0.00048340542532132716,
+      "loss": 16.6126,
+      "step": 67700
+    },
+    {
+      "epoch": 0.10031463124151947,
+      "grad_norm": 7.0,
+      "learning_rate": 0.00048340048638631955,
+      "loss": 16.6287,
+      "step": 67720
+    },
+    {
+      "epoch": 0.10034425753544786,
+      "grad_norm": 7.71875,
+      "learning_rate": 0.000483395547451312,
+      "loss": 16.6074,
+      "step": 67740
+    },
+    {
+      "epoch": 0.10037388382937625,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.0004833906085163044,
+      "loss": 16.5914,
+      "step": 67760
+    },
+    {
+      "epoch": 0.10040351012330463,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.0004833856695812969,
+      "loss": 16.6092,
+      "step": 67780
+    },
+    {
+      "epoch": 0.10043313641723302,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.0004833807306462893,
+      "loss": 16.6073,
+      "step": 67800
+    },
+    {
+      "epoch": 0.1004627627111614,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.0004833757917112817,
+      "loss": 16.5553,
+      "step": 67820
+    },
+    {
+      "epoch": 0.10049238900508979,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.0004833708527762742,
+      "loss": 16.6536,
+      "step": 67840
+    },
+    {
+      "epoch": 0.10052201529901819,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.0004833659138412666,
+      "loss": 16.6156,
+      "step": 67860
+    },
+    {
+      "epoch": 0.10055164159294658,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.00048336097490625903,
+      "loss": 16.527,
+      "step": 67880
+    },
+    {
+      "epoch": 0.10058126788687496,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.0004833560359712514,
+      "loss": 16.5762,
+      "step": 67900
+    },
+    {
+      "epoch": 0.10061089418080335,
+      "grad_norm": 6.96875,
+      "learning_rate": 0.0004833510970362439,
+      "loss": 16.5745,
+      "step": 67920
+    },
+    {
+      "epoch": 0.10064052047473174,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.0004833461581012363,
+      "loss": 16.6078,
+      "step": 67940
+    },
+    {
+      "epoch": 0.10067014676866012,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.00048334121916622876,
+      "loss": 16.581,
+      "step": 67960
+    },
+    {
+      "epoch": 0.10069977306258851,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.00048333628023122116,
+      "loss": 16.6142,
+      "step": 67980
+    },
+    {
+      "epoch": 0.1007293993565169,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.00048333134129621366,
+      "loss": 16.5557,
+      "step": 68000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 4.999562170735998e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null