Training in progress, step 3450, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +148 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:415b012a16d8a3069af7b2d00a549e623ca759b20310f8dd3bc8182b71b9f1ce
 size 527048968

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d85d797097afce863d924a902fd673ba283872940210e3c933a64dde7a4be42
 size 527048968

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4fb0dddd1ba0510c292970530f8d534b9d2c07fb94582bd515965a0add9b808f
 size 1054135994

 version https://git-lfs.github.com/spec/v1
+oid sha256:32f59ddc5d1d02f125383197f9e87f354af36a0ab9f9c34a9101568ff6028781
 size 1054135994

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:46d43b62f9cc9036b40877de2b586a52a3533079415fdaa5852bb39f1d0f3f9c
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:d8298cd2038dcf4bc8c0ba6dfa0d230a23246f758d069f7c77f9c04f77b6d8f8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:85714b7c0ed500123430db079e03a8a9d980fe0b19bc329292c309cdba264050
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5c1ec9215b291496d6c7959ac8b8fcef926d7c6ecd1d84b37dbf2c985bef91a3
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.717534065246582,
   "best_model_checkpoint": "./output/checkpoint-450",
-  "epoch": 143.47826086956522,
   "eval_steps": 150,
-  "global_step": 3300,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3157,6 +3157,151 @@
       "EMA_steps_per_second": 25.181,
       "epoch": 143.47826086956522,
       "step": 3300
     }
   ],
   "logging_steps": 10,
@@ -3176,7 +3321,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8.51203574828974e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.717534065246582,
   "best_model_checkpoint": "./output/checkpoint-450",
+  "epoch": 150.0,
   "eval_steps": 150,
+  "global_step": 3450,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "EMA_steps_per_second": 25.181,
       "epoch": 143.47826086956522,
       "step": 3300
+    },
+    {
+      "epoch": 143.91304347826087,
+      "grad_norm": 1.8851526975631714,
+      "learning_rate": 3.7921521115670724e-06,
+      "loss": 0.2538,
+      "step": 3310
+    },
+    {
+      "epoch": 144.34782608695653,
+      "grad_norm": 1.569898247718811,
+      "learning_rate": 3.7921134038575663e-06,
+      "loss": 0.2145,
+      "step": 3320
+    },
+    {
+      "epoch": 144.7826086956522,
+      "grad_norm": 1.718190312385559,
+      "learning_rate": 3.79207394481317e-06,
+      "loss": 0.2708,
+      "step": 3330
+    },
+    {
+      "epoch": 145.2173913043478,
+      "grad_norm": 2.9095687866210938,
+      "learning_rate": 3.7920337344495226e-06,
+      "loss": 0.2084,
+      "step": 3340
+    },
+    {
+      "epoch": 145.65217391304347,
+      "grad_norm": 1.8533018827438354,
+      "learning_rate": 3.791992772782563e-06,
+      "loss": 0.2381,
+      "step": 3350
+    },
+    {
+      "epoch": 146.08695652173913,
+      "grad_norm": 1.9780678749084473,
+      "learning_rate": 3.791951059828527e-06,
+      "loss": 0.2651,
+      "step": 3360
+    },
+    {
+      "epoch": 146.52173913043478,
+      "grad_norm": 1.834191083908081,
+      "learning_rate": 3.791908595603947e-06,
+      "loss": 0.2269,
+      "step": 3370
+    },
+    {
+      "epoch": 146.95652173913044,
+      "grad_norm": 1.6292699575424194,
+      "learning_rate": 3.7918653801256568e-06,
+      "loss": 0.2159,
+      "step": 3380
+    },
+    {
+      "epoch": 147.3913043478261,
+      "grad_norm": 1.5715214014053345,
+      "learning_rate": 3.791821413410784e-06,
+      "loss": 0.2288,
+      "step": 3390
+    },
+    {
+      "epoch": 147.82608695652175,
+      "grad_norm": 1.5430243015289307,
+      "learning_rate": 3.791776695476756e-06,
+      "loss": 0.2538,
+      "step": 3400
+    },
+    {
+      "epoch": 148.2608695652174,
+      "grad_norm": 1.466277837753296,
+      "learning_rate": 3.791731226341297e-06,
+      "loss": 0.2156,
+      "step": 3410
+    },
+    {
+      "epoch": 148.69565217391303,
+      "grad_norm": 1.8279281854629517,
+      "learning_rate": 3.7916850060224308e-06,
+      "loss": 0.2498,
+      "step": 3420
+    },
+    {
+      "epoch": 149.1304347826087,
+      "grad_norm": 1.7966867685317993,
+      "learning_rate": 3.791638034538477e-06,
+      "loss": 0.2716,
+      "step": 3430
+    },
+    {
+      "epoch": 149.56521739130434,
+      "grad_norm": 2.2440056800842285,
+      "learning_rate": 3.7915903119080527e-06,
+      "loss": 0.265,
+      "step": 3440
+    },
+    {
+      "epoch": 150.0,
+      "grad_norm": 3.2762231826782227,
+      "learning_rate": 3.7915418381500747e-06,
+      "loss": 0.2208,
+      "step": 3450
+    },
+    {
+      "epoch": 150.0,
+      "eval_loss": 0.9505823850631714,
+      "eval_runtime": 0.4422,
+      "eval_samples_per_second": 22.615,
+      "eval_steps_per_second": 22.615,
+      "step": 3450
+    },
+    {
+      "Start_State_loss": 0.7309322357177734,
+      "Start_State_runtime": 0.4072,
+      "Start_State_samples_per_second": 24.558,
+      "Start_State_steps_per_second": 24.558,
+      "epoch": 150.0,
+      "step": 3450
+    },
+    {
+      "Raw_Model_loss": 0.9505823850631714,
+      "Raw_Model_runtime": 0.4153,
+      "Raw_Model_samples_per_second": 24.076,
+      "Raw_Model_steps_per_second": 24.076,
+      "epoch": 150.0,
+      "step": 3450
+    },
+    {
+      "SWA_loss": 0.8045159578323364,
+      "SWA_runtime": 0.402,
+      "SWA_samples_per_second": 24.876,
+      "SWA_steps_per_second": 24.876,
+      "epoch": 150.0,
+      "step": 3450
+    },
+    {
+      "EMA_loss": 0.7316843271255493,
+      "EMA_runtime": 0.4,
+      "EMA_samples_per_second": 25.002,
+      "EMA_steps_per_second": 25.002,
+      "epoch": 150.0,
+      "step": 3450
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 8.89909420608553e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null