Training in progress, epoch 1, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +12 -71
last-checkpoint/training_args.bin +1 -1

last-checkpoint/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:622ceda914025a3cb6c3aea861afbd414ca14e4ada07549a490403bb60ed436e
 size 1447317080

 version https://git-lfs.github.com/spec/v1
+oid sha256:7eeef6ea82868fed037096afaae75138d366fdd8a948a478a6d39da41c0320cc
 size 1447317080

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3e7ab6da7bbcf285ba92b6a319955403be98c63cb8e5f808e2fac2b858c00b04
 size 2894813242

 version https://git-lfs.github.com/spec/v1
+oid sha256:8e307af0d3d8da03f1b9c643b876554b892185b2c269d203e47abe5885b91daa
 size 2894813242

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9899ccda7f0d8d9511991180b93aab508ce6e8489de708c88ad1188e7e1d90d6
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:1ff264f99d31b522cc7e2a4eac9d38606d0c58a34c0adc74d71e0ca8b371dc36
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:230edc29c7f4d0d371abd36f9f7bae2a11f65b418f970748364cb37008e3f3ef
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:3f9b5be710a7d11d99443218152501810ac7e04613867545dd1469d3cfd97c06
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,86 +1,27 @@
 {
-  "best_metric": 0.37884894013404846,
-  "best_model_checkpoint": "./opt_trained2/checkpoint-3212",
-  "epoch": 4.0,
   "eval_steps": 500,
-  "global_step": 3212,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.6226650062266501,
-      "grad_norm": 0.7334734201431274,
-      "learning_rate": 0.0003875716064757161,
-      "loss": 3.2343,
       "step": 500
     },
     {
       "epoch": 1.0,
-      "eval_loss": 0.4036673307418823,
-      "eval_runtime": 273.6748,
-      "eval_samples_per_second": 10.436,
-      "eval_steps_per_second": 2.609,
       "step": 803
-    },
-    {
-      "epoch": 1.2453300124533002,
-      "grad_norm": 1.277639627456665,
-      "learning_rate": 0.0003751183063511831,
-      "loss": 3.2109,
-      "step": 1000
-    },
-    {
-      "epoch": 1.86799501867995,
-      "grad_norm": 1.1930288076400757,
-      "learning_rate": 0.00036273972602739727,
-      "loss": 3.4898,
-      "step": 1500
-    },
-    {
-      "epoch": 2.0,
-      "eval_loss": 0.3986678719520569,
-      "eval_runtime": 273.4619,
-      "eval_samples_per_second": 10.444,
-      "eval_steps_per_second": 2.611,
-      "step": 1606
-    },
-    {
-      "epoch": 2.4906600249066004,
-      "grad_norm": 0.363370418548584,
-      "learning_rate": 0.0003502864259028643,
-      "loss": 3.1553,
-      "step": 2000
-    },
-    {
-      "epoch": 3.0,
-      "eval_loss": 0.3892712891101837,
-      "eval_runtime": 273.5885,
-      "eval_samples_per_second": 10.439,
-      "eval_steps_per_second": 2.61,
-      "step": 2409
-    },
-    {
-      "epoch": 3.1133250311332503,
-      "grad_norm": 0.4317278563976288,
-      "learning_rate": 0.0003378331257783313,
-      "loss": 3.0505,
-      "step": 2500
-    },
-    {
-      "epoch": 3.7359900373599,
-      "grad_norm": 0.3730609118938446,
-      "learning_rate": 0.00032537982565379825,
-      "loss": 2.9785,
-      "step": 3000
-    },
-    {
-      "epoch": 4.0,
-      "eval_loss": 0.37884894013404846,
-      "eval_runtime": 273.844,
-      "eval_samples_per_second": 10.429,
-      "eval_steps_per_second": 2.607,
-      "step": 3212
     }
   ],
   "logging_steps": 500,
@@ -100,7 +41,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.2514885666272e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 0.344450980424881,
+  "best_model_checkpoint": "./opt_trained2/checkpoint-803",
+  "epoch": 1.0,
   "eval_steps": 500,
+  "global_step": 803,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.6226650062266501,
+      "grad_norm": 1.3505125045776367,
+      "learning_rate": 0.000387546699875467,
+      "loss": 2.7763,
       "step": 500
     },
     {
       "epoch": 1.0,
+      "eval_loss": 0.344450980424881,
+      "eval_runtime": 273.127,
+      "eval_samples_per_second": 10.457,
+      "eval_steps_per_second": 2.614,
       "step": 803
     }
   ],
   "logging_steps": 500,
       "attributes": {}
     }
   },
+  "total_flos": 3.128721416568e+16,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:69d0a29f401b9a755904f81ae972cbb11a5cf5b18ec1bf43f3d071d452c37c88
 size 5368

 version https://git-lfs.github.com/spec/v1
+oid sha256:83ebdc8e79af6a5c9fa4062f9f21aa548e32853f463a515333ea4cf561b7abfc
 size 5368