Training in progress, step 10000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +153 -5

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:82e63226467172dd25840fb54845dc32e3e907af34adac5c0a79bcdcd65287ba
 size 745545644

 version https://git-lfs.github.com/spec/v1
+oid sha256:50faadf4bc595b7caaff8804037a5d2e7226402c4843fc494891b7c32a3f840a
 size 745545644

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e4a402c6d5f0ae39002e12b3d0535a8d925d46c680f2b88775e8ee3852513a24
 size 1491179130

 version https://git-lfs.github.com/spec/v1
+oid sha256:077ba1a6c558d5642f089528667c21b945745af87673e986ac92ec5d343a899b
 size 1491179130

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1a6ec98147904582f9073debdd2d46db8be81f1fae8b7e30a4fb33c0724bf01a
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:c223685611eb8ca66172e98a5ae4d95e91d096b50c87d5ac51cf3a20f20698f0
 size 15024

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:53345026ed0535abd8cf6a0e1ecbdbf9a4d5ef86d37a344cbca5ef2567e6cc18
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:07ecb9f981bc1cea06513b87be9861bad9289cd33d5bc89c8c1d83bebf350e5a
 size 15024

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb45d6dbfeb6c6a340bea2c45d80cd63df0bd6b093ca44076be3a394a5390059
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:6b78f34e8919a0955c52fa2192994cab8b958695edae102824ebf7fb17e0c11e
 size 15024

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:184eba8fad81b78f577f59c07102dc1253cd2cdbd814ae3d20c579d2332fc55c
 size 15024

 version https://git-lfs.github.com/spec/v1
+oid sha256:497903fb965e5819817c1bcae461581db9a95d72adb9e86baf55fc2dc3a38323
 size 15024

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b34394007fb724a2242beef7117b8d040edeb5bc519c17102fa71720d6dd20f1
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:d404c0c28c3437e7b0154cff623cff9b7c51c81744b8ecf81ebf78e114cbab97
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 0.4018039107322693,
-  "best_model_checkpoint": "/workspace/result/modern_bert/checkpoint-5000",
-  "epoch": 0.5688767528514947,
   "eval_steps": 5000,
-  "global_step": 5000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -155,6 +155,154 @@
       "eval_samples_per_second": 868.824,
       "eval_steps_per_second": 6.789,
       "step": 5000
     }
   ],
   "logging_steps": 250,
@@ -174,7 +322,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8.724307797816115e+17,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.37532275915145874,
+  "best_model_checkpoint": "/workspace/result/modern_bert/checkpoint-10000",
+  "epoch": 1.1376681741900616,
   "eval_steps": 5000,
+  "global_step": 10000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 868.824,
       "eval_steps_per_second": 6.789,
       "step": 5000
+    },
+    {
+      "epoch": 0.5973205904940695,
+      "grad_norm": 4.046791076660156,
+      "learning_rate": 0.000105,
+      "loss": 1.5728,
+      "step": 5250
+    },
+    {
+      "epoch": 0.6257644281366442,
+      "grad_norm": 3.785233736038208,
+      "learning_rate": 0.00011000000000000002,
+      "loss": 1.5712,
+      "step": 5500
+    },
+    {
+      "epoch": 0.654208265779219,
+      "grad_norm": 3.365227460861206,
+      "learning_rate": 0.00011499999999999999,
+      "loss": 1.5575,
+      "step": 5750
+    },
+    {
+      "epoch": 0.6826521034217937,
+      "grad_norm": 3.3776111602783203,
+      "learning_rate": 0.00012,
+      "loss": 1.5616,
+      "step": 6000
+    },
+    {
+      "epoch": 0.7110959410643684,
+      "grad_norm": 3.636517286300659,
+      "learning_rate": 0.000125,
+      "loss": 1.5548,
+      "step": 6250
+    },
+    {
+      "epoch": 0.7395397787069431,
+      "grad_norm": 3.1290106773376465,
+      "learning_rate": 0.00013000000000000002,
+      "loss": 1.5377,
+      "step": 6500
+    },
+    {
+      "epoch": 0.7679836163495178,
+      "grad_norm": 3.2603325843811035,
+      "learning_rate": 0.00013500000000000003,
+      "loss": 1.5418,
+      "step": 6750
+    },
+    {
+      "epoch": 0.7964274539920926,
+      "grad_norm": 2.521440029144287,
+      "learning_rate": 0.00014,
+      "loss": 1.5444,
+      "step": 7000
+    },
+    {
+      "epoch": 0.8248712916346673,
+      "grad_norm": 3.360039710998535,
+      "learning_rate": 0.000145,
+      "loss": 1.5442,
+      "step": 7250
+    },
+    {
+      "epoch": 0.8533151292772421,
+      "grad_norm": 3.529284715652466,
+      "learning_rate": 0.00015000000000000001,
+      "loss": 1.5225,
+      "step": 7500
+    },
+    {
+      "epoch": 0.8817589669198168,
+      "grad_norm": 2.673048257827759,
+      "learning_rate": 0.000155,
+      "loss": 1.5402,
+      "step": 7750
+    },
+    {
+      "epoch": 0.9102028045623916,
+      "grad_norm": 3.269730806350708,
+      "learning_rate": 0.00016,
+      "loss": 1.5309,
+      "step": 8000
+    },
+    {
+      "epoch": 0.9386466422049663,
+      "grad_norm": 2.5737032890319824,
+      "learning_rate": 0.000165,
+      "loss": 1.5179,
+      "step": 8250
+    },
+    {
+      "epoch": 0.967090479847541,
+      "grad_norm": 2.7734227180480957,
+      "learning_rate": 0.00017,
+      "loss": 1.5027,
+      "step": 8500
+    },
+    {
+      "epoch": 0.9955343174901158,
+      "grad_norm": 2.7340924739837646,
+      "learning_rate": 0.000175,
+      "loss": 1.5224,
+      "step": 8750
+    },
+    {
+      "epoch": 1.0238928236197629,
+      "grad_norm": 2.8657028675079346,
+      "learning_rate": 0.00018,
+      "loss": 1.4871,
+      "step": 9000
+    },
+    {
+      "epoch": 1.0523366612623375,
+      "grad_norm": 2.7939274311065674,
+      "learning_rate": 0.00018500000000000002,
+      "loss": 1.4865,
+      "step": 9250
+    },
+    {
+      "epoch": 1.0807804989049123,
+      "grad_norm": 2.5338730812072754,
+      "learning_rate": 0.00019,
+      "loss": 1.4753,
+      "step": 9500
+    },
+    {
+      "epoch": 1.109224336547487,
+      "grad_norm": 2.5085394382476807,
+      "learning_rate": 0.000195,
+      "loss": 1.4732,
+      "step": 9750
+    },
+    {
+      "epoch": 1.1376681741900616,
+      "grad_norm": 2.2118537425994873,
+      "learning_rate": 0.0002,
+      "loss": 1.4932,
+      "step": 10000
+    },
+    {
+      "epoch": 1.1376681741900616,
+      "eval_loss": 0.37532275915145874,
+      "eval_runtime": 575.1259,
+      "eval_samples_per_second": 869.375,
+      "eval_steps_per_second": 6.793,
+      "step": 10000
     }
   ],
   "logging_steps": 250,
       "attributes": {}
     }
   },
+  "total_flos": 1.7447306962046812e+18,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null