Training in progress, step 160, checkpoint

Files changed (6) hide show

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5eccc29f20136e494c181b5aa29e8b7587894903fe73530f9417233b28db3cd6
 size 250490408

 version https://git-lfs.github.com/spec/v1
+oid sha256:65bf4bf882aaede4a28a7946cfc9bdcf36b3c1e5ca2e0ff522619cff2f6a7dcc
 size 250490408

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bb2b6093a475d42265f69c469897bf59a1052f8a2162fe277c84c75e1c2d7121
 size 255265850

 version https://git-lfs.github.com/spec/v1
+oid sha256:ea7a7662193bb7eec3ae38ae2840d7d8ba684cc08e3f58fa084ebd57157cdd62
 size 255265850

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4d3162f96d93c4de8f0b55d1e1a81f49c8184b1e035e8b2823a0289ac758bbc4
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:fe45c69e48a4c5c66de748d1ec7b6fa257eefbb2af87020ecbb0edb8fac5e065
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3ec93be897e5dfa0ec937b22566356bc36cb89c5fcbcabc5d6f7aa190d5e8539
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:f08d1b5b2fb7f095d5f8456b35fce01dec787b65d26419e00acaecb339621b63
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:648d534e3bfa3788d45747301f1f48cbd9086fc244a9af1c996a30c275d122dc
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf268cb489a17b00a5da2e7f7b4a75dcc816743c1b54be7c48bf73c90d10ecf4
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 11.666666666666666,
   "eval_steps": 40,
-  "global_step": 140,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -137,6 +137,28 @@
       "learning_rate": 0.0001821777815225245,
       "loss": 3.8009,
       "step": 140
     }
   ],
   "logging_steps": 10,
@@ -156,7 +178,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 9085646496858112.0,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 13.333333333333334,
   "eval_steps": 40,
+  "global_step": 160,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0001821777815225245,
       "loss": 3.8009,
       "step": 140
+    },
+    {
+      "epoch": 12.5,
+      "grad_norm": 4.375,
+      "learning_rate": 0.00017891405093963938,
+      "loss": 3.7176,
+      "step": 150
+    },
+    {
+      "epoch": 13.333333333333334,
+      "grad_norm": 4.25,
+      "learning_rate": 0.00017541066097768963,
+      "loss": 3.6337,
+      "step": 160
+    },
+    {
+      "epoch": 13.333333333333334,
+      "eval_loss": 3.2374608516693115,
+      "eval_runtime": 10.482,
+      "eval_samples_per_second": 143.198,
+      "eval_steps_per_second": 2.29,
+      "step": 160
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.0372248443551744e+16,
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null