Training in progress, step 4050, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +116 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f8fa8f720af10124343d16ee209390207e597ea5c09bd0bc6be42bf045fc2a8
 size 1502116544

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2cb27e04d92bb595af7bcd531079cb1a7260601f737a8d637a4c37175b81770
 size 1502116544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec25ee5915e8a4add2599f15d550cbf7ca81fdfa42a3b57acd6c3bd3832debd9
 size 2924673466

 version https://git-lfs.github.com/spec/v1
+oid sha256:b40cce3a5cd81047edcec8097e717345eaf07d2d73f6cd4162aba9976f43dc2c
 size 2924673466

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a82af09a987f26290b9f3f00d4e9552d816c6951e034630adf85af19b034a40f
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4ef1bf71bf6833b710c538ae8c380dfe197fac7f08964a7d0acdc6e98e34ee2
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6fcddfb1f106a755911246d49690b13d594cc29b7e565480a222840e2793c588
 size 1256

 version https://git-lfs.github.com/spec/v1
+oid sha256:45f92a97ffdbc7f88199b20e87167f2ab2e0d78a2ac0becd89030b1e9e2faac0
 size 1256

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 0.9355312585830688,
   "best_model_checkpoint": "./output/checkpoint-3150",
-  "epoch": 2.4208566108007448,
   "eval_steps": 150,
-  "global_step": 3900,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -2945,6 +2945,119 @@
       "eval_samples_per_second": 9.266,
       "eval_steps_per_second": 9.266,
       "step": 3900
     }
   ],
   "logging_steps": 10,
@@ -2964,7 +3077,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.49117140564609e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": 0.9355312585830688,
   "best_model_checkpoint": "./output/checkpoint-3150",
+  "epoch": 2.5139664804469275,
   "eval_steps": 150,
+  "global_step": 4050,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 9.266,
       "eval_steps_per_second": 9.266,
       "step": 3900
+    },
+    {
+      "epoch": 2.4270639354438237,
+      "grad_norm": 1.6727492809295654,
+      "learning_rate": 9.376546391952211e-06,
+      "loss": 0.4695,
+      "step": 3910
+    },
+    {
+      "epoch": 2.4332712600869026,
+      "grad_norm": 1.9848363399505615,
+      "learning_rate": 9.212189928903758e-06,
+      "loss": 0.5046,
+      "step": 3920
+    },
+    {
+      "epoch": 2.439478584729981,
+      "grad_norm": 1.9910500049591064,
+      "learning_rate": 9.049099032139725e-06,
+      "loss": 0.4243,
+      "step": 3930
+    },
+    {
+      "epoch": 2.44568590937306,
+      "grad_norm": 1.6215895414352417,
+      "learning_rate": 8.887280405688106e-06,
+      "loss": 0.4843,
+      "step": 3940
+    },
+    {
+      "epoch": 2.451893234016139,
+      "grad_norm": 1.9749666452407837,
+      "learning_rate": 8.72674070127881e-06,
+      "loss": 0.4632,
+      "step": 3950
+    },
+    {
+      "epoch": 2.458100558659218,
+      "grad_norm": 2.1119041442871094,
+      "learning_rate": 8.567486518070306e-06,
+      "loss": 0.4471,
+      "step": 3960
+    },
+    {
+      "epoch": 2.464307883302297,
+      "grad_norm": 1.5868020057678223,
+      "learning_rate": 8.409524402378308e-06,
+      "loss": 0.4282,
+      "step": 3970
+    },
+    {
+      "epoch": 2.4705152079453754,
+      "grad_norm": 2.0005483627319336,
+      "learning_rate": 8.252860847406712e-06,
+      "loss": 0.3916,
+      "step": 3980
+    },
+    {
+      "epoch": 2.4767225325884543,
+      "grad_norm": 1.7088433504104614,
+      "learning_rate": 8.097502292980626e-06,
+      "loss": 0.4363,
+      "step": 3990
+    },
+    {
+      "epoch": 2.4829298572315333,
+      "grad_norm": 1.8316535949707031,
+      "learning_rate": 7.943455125281741e-06,
+      "loss": 0.4325,
+      "step": 4000
+    },
+    {
+      "epoch": 2.489137181874612,
+      "grad_norm": 1.8140100240707397,
+      "learning_rate": 7.790725676585756e-06,
+      "loss": 0.4846,
+      "step": 4010
+    },
+    {
+      "epoch": 2.4953445065176907,
+      "grad_norm": 2.005836248397827,
+      "learning_rate": 7.639320225002106e-06,
+      "loss": 0.4892,
+      "step": 4020
+    },
+    {
+      "epoch": 2.5015518311607696,
+      "grad_norm": 2.0285496711730957,
+      "learning_rate": 7.489244994215897e-06,
+      "loss": 0.4536,
+      "step": 4030
+    },
+    {
+      "epoch": 2.5077591558038486,
+      "grad_norm": 1.8983845710754395,
+      "learning_rate": 7.340506153232052e-06,
+      "loss": 0.4346,
+      "step": 4040
+    },
+    {
+      "epoch": 2.5139664804469275,
+      "grad_norm": 1.8659793138504028,
+      "learning_rate": 7.193109816121762e-06,
+      "loss": 0.4594,
+      "step": 4050
+    },
+    {
+      "epoch": 2.5139664804469275,
+      "eval_loss": 0.9785549640655518,
+      "eval_runtime": 54.6526,
+      "eval_samples_per_second": 9.167,
+      "eval_steps_per_second": 9.167,
+      "step": 4050
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 6.741170098839982e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null