Training in progress, step 180, checkpoint

Files changed (6) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:65bf4bf882aaede4a28a7946cfc9bdcf36b3c1e5ca2e0ff522619cff2f6a7dcc
 size 250490408

 version https://git-lfs.github.com/spec/v1
+oid sha256:7dd92eac0c175facf66bcc6744b550abe565351d58bfc65728501b0eb34deb6a
 size 250490408

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ea7a7662193bb7eec3ae38ae2840d7d8ba684cc08e3f58fa084ebd57157cdd62
 size 255265850

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c5a2c0147463a169de4f6d49d24c6bab30ce74a9cdea031b1cbd9a46e14a1a3
 size 255265850

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fe45c69e48a4c5c66de748d1ec7b6fa257eefbb2af87020ecbb0edb8fac5e065
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:9eb8f173dc69af6518470d76de345e8f28d66983c49c7120eb4118b25379494c
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f08d1b5b2fb7f095d5f8456b35fce01dec787b65d26419e00acaecb339621b63
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:ef1dfb8026c7c70f38bd139303377768a0568a232a11e705dcdc6c898cedb8c3
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cf268cb489a17b00a5da2e7f7b4a75dcc816743c1b54be7c48bf73c90d10ecf4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:9f9f0e9fcb3fec99611921d8f34980406ad06f308daea00535e4da9839a46d96
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 13.333333333333334,
   "eval_steps": 40,
-  "global_step": 160,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -159,6 +159,20 @@
       "eval_samples_per_second": 143.198,
       "eval_steps_per_second": 2.29,
       "step": 160
     }
   ],
   "logging_steps": 10,
@@ -178,7 +192,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.0372248443551744e+16,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 15.0,
   "eval_steps": 40,
+  "global_step": 180,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 143.198,
       "eval_steps_per_second": 2.29,
       "step": 160
+    },
+    {
+      "epoch": 14.166666666666666,
+      "grad_norm": 4.5,
+      "learning_rate": 0.00017167825131684513,
+      "loss": 3.5521,
+      "step": 170
+    },
+    {
+      "epoch": 15.0,
+      "grad_norm": 4.90625,
+      "learning_rate": 0.00016772815716257412,
+      "loss": 3.4769,
+      "step": 180
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.1658850390245376e+16,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null