Training in progress, step 200, checkpoint

Files changed (6) hide show

last-checkpoint/adapter/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3e4cf0f4856a6e6f56b18e9e50d8071fba50d1de4c9a3651203574256ed0771
 size 5923472

 version https://git-lfs.github.com/spec/v1
+oid sha256:94b4c341c1f6e38b018a91527e20a50a8a47fba3fc594cc9eac764c40c6a664f
 size 5923472

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6eb98fa47c71554c71d92d77f55240183681222d6e4028c7b5c84c9f2c7187df
 size 15380683

 version https://git-lfs.github.com/spec/v1
+oid sha256:d0a75270c4c1aab635aa24596f87bb5eb324c9f3e368c579b1b66508e7d696c5
 size 15380683

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:27ab3f95bb1ac111e6efedaa17909ea89e5e838d4cebcb0588f8b31b70905244
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:5cb67c0079b7a0236a758f25c62f579a989df8e152ab878b9e35a57181641f39
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4a53320edab235c21487b7069195478dd15c40c497eb77473a2fb8b3a6590567
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:ca08c3f20018396cb1b58625cf16f0249c311373253deb820f8c47c2bac784ba
 size 1465

last-checkpoint/speaker_projector.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7ee44fd22d08d8f0309e043c5e9020904117477da52a3e9080818b67fd1a8dcc
 size 1710977

 version https://git-lfs.github.com/spec/v1
+oid sha256:4523af57f0e547436d2a2a2e23a3094b3fbb9b4183b1f6a530cbee4b52b8fa8f
 size 1710977

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.256,
   "eval_steps": 100,
-  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -23,6 +23,21 @@
       "eval_samples_per_second": 115.95,
       "eval_steps_per_second": 14.517,
       "step": 100
     }
   ],
   "logging_steps": 100,
@@ -42,7 +57,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5009860711845888.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.512,
   "eval_steps": 100,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 115.95,
       "eval_steps_per_second": 14.517,
       "step": 100
+    },
+    {
+      "epoch": 0.512,
+      "grad_norm": 0.14562372863292694,
+      "learning_rate": 9.998009998009998e-05,
+      "loss": 6.89924072265625,
+      "step": 200
+    },
+    {
+      "epoch": 0.512,
+      "eval_loss": 6.9076080322265625,
+      "eval_runtime": 21.3341,
+      "eval_samples_per_second": 117.184,
+      "eval_steps_per_second": 14.671,
+      "step": 200
     }
   ],
   "logging_steps": 100,
       "attributes": {}
     }
   },
+  "total_flos": 1.0038252361580544e+16,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null