Training in progress, step 3600, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +148 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6d85d797097afce863d924a902fd673ba283872940210e3c933a64dde7a4be42
 size 527048968

 version https://git-lfs.github.com/spec/v1
+oid sha256:7a7db4ae93951b0eb394bb0a363f73cd5df34f9278223503ea607797313cdef9
 size 527048968

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:32f59ddc5d1d02f125383197f9e87f354af36a0ab9f9c34a9101568ff6028781
 size 1054135994

 version https://git-lfs.github.com/spec/v1
+oid sha256:550ddc0253077b9ade8068188ab7383f87735a416196347e817b58cdd6eecfa7
 size 1054135994

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8298cd2038dcf4bc8c0ba6dfa0d230a23246f758d069f7c77f9c04f77b6d8f8
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:79ae35034e3077f87418b20f4a24e69590c4f56a313fa0284d685c7f3a1b03d8
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c1ec9215b291496d6c7959ac8b8fcef926d7c6ecd1d84b37dbf2c985bef91a3
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:92a0ba1807c4ff64f4d8fc6d84a7a517689523073c7ea31a60948b80a14d9e61
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.717534065246582,
   "best_model_checkpoint": "./output/checkpoint-450",
-  "epoch": 150.0,
   "eval_steps": 150,
-  "global_step": 3450,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3302,6 +3302,151 @@
       "EMA_steps_per_second": 25.002,
       "epoch": 150.0,
       "step": 3450
     }
   ],
   "logging_steps": 10,
@@ -3321,7 +3466,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 8.89909420608553e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.717534065246582,
   "best_model_checkpoint": "./output/checkpoint-450",
+  "epoch": 156.52173913043478,
   "eval_steps": 150,
+  "global_step": 3600,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "EMA_steps_per_second": 25.002,
       "epoch": 150.0,
       "step": 3450
+    },
+    {
+      "epoch": 150.43478260869566,
+      "grad_norm": 1.7541120052337646,
+      "learning_rate": 2.4672082280509036e-07,
+      "loss": 0.214,
+      "step": 3460
+    },
+    {
+      "epoch": 150.8695652173913,
+      "grad_norm": 2.0008656978607178,
+      "learning_rate": 4.934416456101807e-07,
+      "loss": 0.2627,
+      "step": 3470
+    },
+    {
+      "epoch": 151.30434782608697,
+      "grad_norm": 1.6539170742034912,
+      "learning_rate": 7.40162468415271e-07,
+      "loss": 0.2,
+      "step": 3480
+    },
+    {
+      "epoch": 151.7391304347826,
+      "grad_norm": 2.369926691055298,
+      "learning_rate": 9.868832912203614e-07,
+      "loss": 0.2478,
+      "step": 3490
+    },
+    {
+      "epoch": 152.17391304347825,
+      "grad_norm": 2.07112979888916,
+      "learning_rate": 1.2336041140254517e-06,
+      "loss": 0.2427,
+      "step": 3500
+    },
+    {
+      "epoch": 152.6086956521739,
+      "grad_norm": 1.6030749082565308,
+      "learning_rate": 1.480324936830542e-06,
+      "loss": 0.2402,
+      "step": 3510
+    },
+    {
+      "epoch": 153.04347826086956,
+      "grad_norm": 1.5949645042419434,
+      "learning_rate": 1.7270457596356322e-06,
+      "loss": 0.2072,
+      "step": 3520
+    },
+    {
+      "epoch": 153.47826086956522,
+      "grad_norm": 2.338641881942749,
+      "learning_rate": 1.973766582440723e-06,
+      "loss": 0.2506,
+      "step": 3530
+    },
+    {
+      "epoch": 153.91304347826087,
+      "grad_norm": 2.719093084335327,
+      "learning_rate": 2.220487405245813e-06,
+      "loss": 0.2321,
+      "step": 3540
+    },
+    {
+      "epoch": 154.34782608695653,
+      "grad_norm": 2.292358636856079,
+      "learning_rate": 2.4672082280509034e-06,
+      "loss": 0.2404,
+      "step": 3550
+    },
+    {
+      "epoch": 154.7826086956522,
+      "grad_norm": 2.0019381046295166,
+      "learning_rate": 2.4672079835702752e-06,
+      "loss": 0.2343,
+      "step": 3560
+    },
+    {
+      "epoch": 155.2173913043478,
+      "grad_norm": 1.6779125928878784,
+      "learning_rate": 2.4672072501284865e-06,
+      "loss": 0.1963,
+      "step": 3570
+    },
+    {
+      "epoch": 155.65217391304347,
+      "grad_norm": 2.0632243156433105,
+      "learning_rate": 2.467206027725829e-06,
+      "loss": 0.267,
+      "step": 3580
+    },
+    {
+      "epoch": 156.08695652173913,
+      "grad_norm": 1.6089539527893066,
+      "learning_rate": 2.467204316362787e-06,
+      "loss": 0.2034,
+      "step": 3590
+    },
+    {
+      "epoch": 156.52173913043478,
+      "grad_norm": 2.475633382797241,
+      "learning_rate": 2.4672021160400387e-06,
+      "loss": 0.2685,
+      "step": 3600
+    },
+    {
+      "epoch": 156.52173913043478,
+      "eval_loss": 0.9592596292495728,
+      "eval_runtime": 0.4813,
+      "eval_samples_per_second": 20.778,
+      "eval_steps_per_second": 20.778,
+      "step": 3600
+    },
+    {
+      "Start_State_loss": 0.7309322357177734,
+      "Start_State_runtime": 0.4223,
+      "Start_State_samples_per_second": 23.679,
+      "Start_State_steps_per_second": 23.679,
+      "epoch": 156.52173913043478,
+      "step": 3600
+    },
+    {
+      "Raw_Model_loss": 0.9592596292495728,
+      "Raw_Model_runtime": 0.3944,
+      "Raw_Model_samples_per_second": 25.356,
+      "Raw_Model_steps_per_second": 25.356,
+      "epoch": 156.52173913043478,
+      "step": 3600
+    },
+    {
+      "SWA_loss": 0.8119293451309204,
+      "SWA_runtime": 0.3904,
+      "SWA_samples_per_second": 25.615,
+      "SWA_steps_per_second": 25.615,
+      "epoch": 156.52173913043478,
+      "step": 3600
+    },
+    {
+      "EMA_loss": 0.7311049103736877,
+      "EMA_runtime": 0.4017,
+      "EMA_samples_per_second": 24.896,
+      "EMA_steps_per_second": 24.896,
+      "epoch": 156.52173913043478,
+      "step": 3600
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 9.28760054861906e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null