Training in progress, step 1650, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +148 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be8a45329c3e8e3a2f45c77d7ac9080fd0aad7fa0e5966556ccab7ad8fa2f098
 size 527048968

 version https://git-lfs.github.com/spec/v1
+oid sha256:e34a2741903c7131a591ab1ea100bd60a9f729205b2327f5d897a43dd1a350df
 size 527048968

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1178946cf0771c850e2a43f81c79255d308890814174c3df8070c8810c0eba4c
 size 1054135994

 version https://git-lfs.github.com/spec/v1
+oid sha256:b8b868ab44d14f751a290f2fb9a43b0004c429bf63a62d6da5cdde1046626611
 size 1054135994

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:86a4d2549b1634b5162bba6559970b4387f95d4c5153179e15ae2066cc09b884
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:001785b24c383f1dc5e05ca97682558022e868af635239d8c60b6646c2c21747
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d00fd7759971df004f86f26240bbcec291b25e581e60ea022fb56951da465e4f
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:2e0ba921a18d46c557f13f21f43adf78f0e35b0b4cbde5268f7f2125015b3077
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.7166430950164795,
   "best_model_checkpoint": "./output/checkpoint-450",
-  "epoch": 65.21739130434783,
   "eval_steps": 150,
-  "global_step": 1500,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -1457,6 +1457,151 @@
       "EMA_steps_per_second": 24.774,
       "epoch": 65.21739130434783,
       "step": 1500
     }
   ],
   "logging_steps": 10,
@@ -1476,7 +1621,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.854430872108237e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.7166430950164795,
   "best_model_checkpoint": "./output/checkpoint-450",
+  "epoch": 71.73913043478261,
   "eval_steps": 150,
+  "global_step": 1650,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "EMA_steps_per_second": 24.774,
       "epoch": 65.21739130434783,
       "step": 1500
+    },
+    {
+      "epoch": 65.65217391304348,
+      "grad_norm": 2.2542121410369873,
+      "learning_rate": 1.4981367287650419e-05,
+      "loss": 0.3164,
+      "step": 1510
+    },
+    {
+      "epoch": 66.08695652173913,
+      "grad_norm": 1.7643301486968994,
+      "learning_rate": 1.4981303451854267e-05,
+      "loss": 0.2947,
+      "step": 1520
+    },
+    {
+      "epoch": 66.52173913043478,
+      "grad_norm": 1.7471901178359985,
+      "learning_rate": 1.4981236647145501e-05,
+      "loss": 0.3103,
+      "step": 1530
+    },
+    {
+      "epoch": 66.95652173913044,
+      "grad_norm": 2.057833194732666,
+      "learning_rate": 1.4981166873550601e-05,
+      "loss": 0.3051,
+      "step": 1540
+    },
+    {
+      "epoch": 67.3913043478261,
+      "grad_norm": 1.7425355911254883,
+      "learning_rate": 1.4981094131097224e-05,
+      "loss": 0.2713,
+      "step": 1550
+    },
+    {
+      "epoch": 67.82608695652173,
+      "grad_norm": 2.050690174102783,
+      "learning_rate": 1.49810184198142e-05,
+      "loss": 0.3439,
+      "step": 1560
+    },
+    {
+      "epoch": 68.26086956521739,
+      "grad_norm": 2.0778491497039795,
+      "learning_rate": 1.498093973973154e-05,
+      "loss": 0.2503,
+      "step": 1570
+    },
+    {
+      "epoch": 68.69565217391305,
+      "grad_norm": 1.8078017234802246,
+      "learning_rate": 1.4980858090880429e-05,
+      "loss": 0.2862,
+      "step": 1580
+    },
+    {
+      "epoch": 69.1304347826087,
+      "grad_norm": 1.9451018571853638,
+      "learning_rate": 1.4980773473293232e-05,
+      "loss": 0.368,
+      "step": 1590
+    },
+    {
+      "epoch": 69.56521739130434,
+      "grad_norm": 1.9795953035354614,
+      "learning_rate": 1.4980685887003486e-05,
+      "loss": 0.3073,
+      "step": 1600
+    },
+    {
+      "epoch": 70.0,
+      "grad_norm": 1.6645371913909912,
+      "learning_rate": 1.498059533204591e-05,
+      "loss": 0.2691,
+      "step": 1610
+    },
+    {
+      "epoch": 70.43478260869566,
+      "grad_norm": 2.21379017829895,
+      "learning_rate": 1.4980501808456398e-05,
+      "loss": 0.3142,
+      "step": 1620
+    },
+    {
+      "epoch": 70.8695652173913,
+      "grad_norm": 1.9500844478607178,
+      "learning_rate": 1.4980405316272018e-05,
+      "loss": 0.2996,
+      "step": 1630
+    },
+    {
+      "epoch": 71.30434782608695,
+      "grad_norm": 2.359870195388794,
+      "learning_rate": 1.4980305855531015e-05,
+      "loss": 0.2888,
+      "step": 1640
+    },
+    {
+      "epoch": 71.73913043478261,
+      "grad_norm": 1.8895881175994873,
+      "learning_rate": 1.4980203426272815e-05,
+      "loss": 0.2624,
+      "step": 1650
+    },
+    {
+      "epoch": 71.73913043478261,
+      "eval_loss": 0.847686767578125,
+      "eval_runtime": 0.5359,
+      "eval_samples_per_second": 18.659,
+      "eval_steps_per_second": 18.659,
+      "step": 1650
+    },
+    {
+      "Start_State_loss": 0.8601926565170288,
+      "Start_State_runtime": 0.3989,
+      "Start_State_samples_per_second": 25.067,
+      "Start_State_steps_per_second": 25.067,
+      "epoch": 71.73913043478261,
+      "step": 1650
+    },
+    {
+      "Raw_Model_loss": 0.847686767578125,
+      "Raw_Model_runtime": 0.4133,
+      "Raw_Model_samples_per_second": 24.198,
+      "Raw_Model_steps_per_second": 24.198,
+      "epoch": 71.73913043478261,
+      "step": 1650
+    },
+    {
+      "SWA_loss": 0.7314801216125488,
+      "SWA_runtime": 0.3914,
+      "SWA_samples_per_second": 25.548,
+      "SWA_steps_per_second": 25.548,
+      "epoch": 71.73913043478261,
+      "step": 1650
+    },
+    {
+      "EMA_loss": 0.8605908155441284,
+      "EMA_runtime": 0.3897,
+      "EMA_samples_per_second": 25.662,
+      "EMA_steps_per_second": 25.662,
+      "epoch": 71.73913043478261,
+      "step": 1650
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 4.242701339976499e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null