Training in progress, step 4500, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +148 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:21408a8c0f804aa69ccdc098361ffb0c38ee5eae2c774310be1c3d5f3d0c89cb
 size 527048968

 version https://git-lfs.github.com/spec/v1
+oid sha256:3a5a6f82a282e6ee513038360740bfa6163feb4b1d4b1bb3319d6b0ef1f4751f
 size 527048968

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1bcaa1a30846eab83cb0fb8aeeb387ec463414d9ba20d28e1e9aa81c65bf4680
 size 1054135994

 version https://git-lfs.github.com/spec/v1
+oid sha256:a71d3e09cfa9bcbe2ed92701fe51af492bac444d4e688ae56a471982c181c9e9
 size 1054135994

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8a9ea00016d252bf419fc0794eade190f54eb50118e2e0be5b9c332ef3c36fc0
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:802e223144d189244ce5a768642009b3c15e29f14e41b4808f514470d4c7be6e
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6e7ae917a4132ef2fbbbdadfebce9aa687102db21112c728e0ebfe527b807e8a
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:0e092ded0b8103aeaa278f39556d48ebee944cc0f4cd6e8f95b6ba39b7752813
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.7166205048561096,
   "best_model_checkpoint": "./output/checkpoint-450",
-  "epoch": 189.1304347826087,
   "eval_steps": 150,
-  "global_step": 4350,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -4212,6 +4212,151 @@
       "EMA_steps_per_second": 25.542,
       "epoch": 189.1304347826087,
       "step": 4350
     }
   ],
   "logging_steps": 10,
@@ -4231,7 +4376,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.1202954752684851e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.7166205048561096,
   "best_model_checkpoint": "./output/checkpoint-450",
+  "epoch": 195.65217391304347,
   "eval_steps": 150,
+  "global_step": 4500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "EMA_steps_per_second": 25.542,
       "epoch": 189.1304347826087,
       "step": 4350
+    },
+    {
+      "epoch": 189.56521739130434,
+      "grad_norm": 1.9401793479919434,
+      "learning_rate": 1.5299694155534387e-06,
+      "loss": 0.2163,
+      "step": 4360
+    },
+    {
+      "epoch": 190.0,
+      "grad_norm": 4.096744060516357,
+      "learning_rate": 1.529967444651148e-06,
+      "loss": 0.2344,
+      "step": 4370
+    },
+    {
+      "epoch": 190.43478260869566,
+      "grad_norm": 2.7062318325042725,
+      "learning_rate": 1.529965170535223e-06,
+      "loss": 0.251,
+      "step": 4380
+    },
+    {
+      "epoch": 190.8695652173913,
+      "grad_norm": 1.7941333055496216,
+      "learning_rate": 1.5299625932065658e-06,
+      "loss": 0.2192,
+      "step": 4390
+    },
+    {
+      "epoch": 191.30434782608697,
+      "grad_norm": 2.2132506370544434,
+      "learning_rate": 1.5299597126661977e-06,
+      "loss": 0.2179,
+      "step": 4400
+    },
+    {
+      "epoch": 191.7391304347826,
+      "grad_norm": 2.125366687774658,
+      "learning_rate": 1.5299565289152606e-06,
+      "loss": 0.2031,
+      "step": 4410
+    },
+    {
+      "epoch": 192.17391304347825,
+      "grad_norm": 2.0995376110076904,
+      "learning_rate": 1.5299530419550163e-06,
+      "loss": 0.2472,
+      "step": 4420
+    },
+    {
+      "epoch": 192.6086956521739,
+      "grad_norm": 2.151653289794922,
+      "learning_rate": 1.529949251786847e-06,
+      "loss": 0.2326,
+      "step": 4430
+    },
+    {
+      "epoch": 193.04347826086956,
+      "grad_norm": 2.5126099586486816,
+      "learning_rate": 1.5299451584122548e-06,
+      "loss": 0.234,
+      "step": 4440
+    },
+    {
+      "epoch": 193.47826086956522,
+      "grad_norm": 1.9897412061691284,
+      "learning_rate": 1.5299407618328622e-06,
+      "loss": 0.2401,
+      "step": 4450
+    },
+    {
+      "epoch": 193.91304347826087,
+      "grad_norm": 2.143177032470703,
+      "learning_rate": 1.5299360620504121e-06,
+      "loss": 0.2344,
+      "step": 4460
+    },
+    {
+      "epoch": 194.34782608695653,
+      "grad_norm": 2.5046348571777344,
+      "learning_rate": 1.5299310590667677e-06,
+      "loss": 0.2091,
+      "step": 4470
+    },
+    {
+      "epoch": 194.7826086956522,
+      "grad_norm": 2.4033350944519043,
+      "learning_rate": 1.529925752883911e-06,
+      "loss": 0.2265,
+      "step": 4480
+    },
+    {
+      "epoch": 195.2173913043478,
+      "grad_norm": 1.9208111763000488,
+      "learning_rate": 1.529920143503946e-06,
+      "loss": 0.2074,
+      "step": 4490
+    },
+    {
+      "epoch": 195.65217391304347,
+      "grad_norm": 2.4695804119110107,
+      "learning_rate": 1.5299142309290955e-06,
+      "loss": 0.2067,
+      "step": 4500
+    },
+    {
+      "epoch": 195.65217391304347,
+      "eval_loss": 0.9752073287963867,
+      "eval_runtime": 0.4001,
+      "eval_samples_per_second": 24.997,
+      "eval_steps_per_second": 24.997,
+      "step": 4500
+    },
+    {
+      "Start_State_loss": 0.8609819412231445,
+      "Start_State_runtime": 0.3943,
+      "Start_State_samples_per_second": 25.364,
+      "Start_State_steps_per_second": 25.364,
+      "epoch": 195.65217391304347,
+      "step": 4500
+    },
+    {
+      "Raw_Model_loss": 0.9752073287963867,
+      "Raw_Model_runtime": 0.3942,
+      "Raw_Model_samples_per_second": 25.367,
+      "Raw_Model_steps_per_second": 25.367,
+      "epoch": 195.65217391304347,
+      "step": 4500
+    },
+    {
+      "SWA_loss": 0.8120683431625366,
+      "SWA_runtime": 0.3958,
+      "SWA_samples_per_second": 25.266,
+      "SWA_steps_per_second": 25.266,
+      "epoch": 195.65217391304347,
+      "step": 4500
+    },
+    {
+      "EMA_loss": 0.8598009943962097,
+      "EMA_runtime": 0.3937,
+      "EMA_samples_per_second": 25.401,
+      "EMA_steps_per_second": 25.401,
+      "epoch": 195.65217391304347,
+      "step": 4500
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.1587388919393485e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null