Training in progress, step 9000, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scaler.pt +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4cbfbd0f6a07b835e4eb96ae97dd49eff870ad88d6be40d3bd1e0357be8624f8
 size 471641972

 version https://git-lfs.github.com/spec/v1
+oid sha256:beb9560a8c2fe423ef88591ac0d75f94884fe5582f9b6b783282d22b1370fc23
 size 471641972

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d2245879460433ceb7eb7c5b19fed96d125ec36ce0d53f2cd06f9d87c1a467ab
 size 943408715

 version https://git-lfs.github.com/spec/v1
+oid sha256:e07c0e371e8347d7031e7a77e1760ae234fe283d2998e8890cb9f2acf682bd6f
 size 943408715

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04ed82d1fdf30f41ed12aad05e55898e36a9e89c5860f5a9745500b3b0bce109
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:d02bf5604d96d55b809dd1552690250e2164168d4b80ec847e5101bd7d910b5f
 size 14645

last-checkpoint/scaler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4da13d0d238bcfbcaf7c2d149fc9e058944471070eba242db234d5d3e151f20
 size 1383

 version https://git-lfs.github.com/spec/v1
+oid sha256:1a8a70b54b4c634942b08249f8cc6e6889c33d7c513637bc49b9d2b7a00426ce
 size 1383

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:acfed49c8551f776696d2cfe4ff48e27d2dd05686444ac96aaac243eb30eaf75
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:c7cd3679656ae98477cc816a6c7c865253dbb97497498a91f08c26ecf155e5b3
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.812162127864775,
   "eval_steps": 500,
-  "global_step": 8000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -575,6 +575,76 @@
       "learning_rate": 1.9790553537080574e-05,
       "loss": 1.967,
       "step": 8000
     }
   ],
   "logging_steps": 100,
@@ -594,7 +664,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 4349680792659456.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.9136823938478719,
   "eval_steps": 500,
+  "global_step": 9000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 1.9790553537080574e-05,
       "loss": 1.967,
       "step": 8000
+    },
+    {
+      "epoch": 0.8223141544630846,
+      "grad_norm": 6.654317378997803,
+      "learning_rate": 1.8721949134430436e-05,
+      "loss": 1.9463,
+      "step": 8100
+    },
+    {
+      "epoch": 0.8324661810613944,
+      "grad_norm": 6.44837760925293,
+      "learning_rate": 1.7653344731780298e-05,
+      "loss": 1.9443,
+      "step": 8200
+    },
+    {
+      "epoch": 0.8426182076597041,
+      "grad_norm": 6.596092224121094,
+      "learning_rate": 1.658474032913016e-05,
+      "loss": 1.9514,
+      "step": 8300
+    },
+    {
+      "epoch": 0.8527702342580138,
+      "grad_norm": 6.20723295211792,
+      "learning_rate": 1.5516135926480017e-05,
+      "loss": 1.9623,
+      "step": 8400
+    },
+    {
+      "epoch": 0.8629222608563234,
+      "grad_norm": 6.767848491668701,
+      "learning_rate": 1.4447531523829879e-05,
+      "loss": 1.959,
+      "step": 8500
+    },
+    {
+      "epoch": 0.8730742874546331,
+      "grad_norm": 7.135315418243408,
+      "learning_rate": 1.337892712117974e-05,
+      "loss": 1.9661,
+      "step": 8600
+    },
+    {
+      "epoch": 0.8832263140529428,
+      "grad_norm": 6.851596832275391,
+      "learning_rate": 1.2310322718529602e-05,
+      "loss": 1.8859,
+      "step": 8700
+    },
+    {
+      "epoch": 0.8933783406512525,
+      "grad_norm": 5.696228504180908,
+      "learning_rate": 1.1241718315879462e-05,
+      "loss": 1.9249,
+      "step": 8800
+    },
+    {
+      "epoch": 0.9035303672495621,
+      "grad_norm": 7.561517238616943,
+      "learning_rate": 1.0173113913229324e-05,
+      "loss": 1.8969,
+      "step": 8900
+    },
+    {
+      "epoch": 0.9136823938478719,
+      "grad_norm": 5.516517639160156,
+      "learning_rate": 9.104509510579184e-06,
+      "loss": 1.952,
+      "step": 9000
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 4893769911505920.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null