Training in progress, step 4950, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +118 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bc40fbefead84ea7caad963c1efa70501e832ef22239e7a54d927cfa0939bcba
 size 774409936

 version https://git-lfs.github.com/spec/v1
+oid sha256:ae90e0ed80cb5637bb6c9ecabcc99991c9828716da3d8c0cefea14ccc2ecbd9d
 size 774409936

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:650f15d525b37de4912730572da2d80697b8f8c54898d7d1c7fdfbc5a72fa5e9
 size 1523152634

 version https://git-lfs.github.com/spec/v1
+oid sha256:47ab85ba546c598f5673a4214419dd308b49a195a3355405daed92697e13424a
 size 1523152634

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:113aa991f3a0567dee9572abb91fc32e8bd02c99557fffc0f47d83da41eca9ef
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:0bc94cd5f166b08af777c923ba842bf10db572ab5700f8d6003b85076b1592f9
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:716d7ef0d2def98440e32b2cba336f73e613b85c0427aef8f0c8a6789d61bd46
 size 1256

 version https://git-lfs.github.com/spec/v1
+oid sha256:83dd037c783110e2dc4f61307500d62937b8821c50649baf8ed55dd7f5bddf19
 size 1256

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 1.4709917306900024,
-  "best_model_checkpoint": "./output/checkpoint-4800",
-  "epoch": 0.45201996421508617,
   "eval_steps": 150,
-  "global_step": 4800,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -3623,6 +3623,119 @@
       "eval_samples_per_second": 10.873,
       "eval_steps_per_second": 10.873,
       "step": 4800
     }
   ],
   "logging_steps": 10,
@@ -3642,7 +3755,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 3.0622411684439654e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 1.4705742597579956,
+  "best_model_checkpoint": "./output/checkpoint-4950",
+  "epoch": 0.4661455880968076,
   "eval_steps": 150,
+  "global_step": 4950,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 10.873,
       "eval_steps_per_second": 10.873,
       "step": 4800
+    },
+    {
+      "epoch": 0.4529616724738676,
+      "grad_norm": 3.871877431869507,
+      "learning_rate": 8.285193919530187e-08,
+      "loss": 1.1957,
+      "step": 4810
+    },
+    {
+      "epoch": 0.453903380732649,
+      "grad_norm": 9.62677001953125,
+      "learning_rate": 7.436961672649524e-08,
+      "loss": 1.4019,
+      "step": 4820
+    },
+    {
+      "epoch": 0.45484508899143045,
+      "grad_norm": 8.656726837158203,
+      "learning_rate": 6.634381719640962e-08,
+      "loss": 1.7807,
+      "step": 4830
+    },
+    {
+      "epoch": 0.4557867972502119,
+      "grad_norm": 14.876380920410156,
+      "learning_rate": 5.877487051422937e-08,
+      "loss": 1.0471,
+      "step": 4840
+    },
+    {
+      "epoch": 0.4567285055089933,
+      "grad_norm": 3.9531829357147217,
+      "learning_rate": 5.166308780970642e-08,
+      "loss": 1.394,
+      "step": 4850
+    },
+    {
+      "epoch": 0.4576702137677747,
+      "grad_norm": 7.217214107513428,
+      "learning_rate": 4.500876142037269e-08,
+      "loss": 1.4854,
+      "step": 4860
+    },
+    {
+      "epoch": 0.45861192202655615,
+      "grad_norm": 2.900088310241699,
+      "learning_rate": 3.881216487952338e-08,
+      "loss": 1.0854,
+      "step": 4870
+    },
+    {
+      "epoch": 0.4595536302853376,
+      "grad_norm": 7.573949337005615,
+      "learning_rate": 3.307355290497236e-08,
+      "loss": 1.5179,
+      "step": 4880
+    },
+    {
+      "epoch": 0.46049533854411906,
+      "grad_norm": 5.3885087966918945,
+      "learning_rate": 2.7793161388579562e-08,
+      "loss": 1.3822,
+      "step": 4890
+    },
+    {
+      "epoch": 0.4614370468029005,
+      "grad_norm": 9.68319320678711,
+      "learning_rate": 2.2971207386559186e-08,
+      "loss": 1.4108,
+      "step": 4900
+    },
+    {
+      "epoch": 0.4623787550616819,
+      "grad_norm": 11.433544158935547,
+      "learning_rate": 1.8607889110554993e-08,
+      "loss": 1.48,
+      "step": 4910
+    },
+    {
+      "epoch": 0.46332046332046334,
+      "grad_norm": 14.155740737915039,
+      "learning_rate": 1.4703385919488896e-08,
+      "loss": 1.4005,
+      "step": 4920
+    },
+    {
+      "epoch": 0.46426217157924476,
+      "grad_norm": 3.100614309310913,
+      "learning_rate": 1.1257858312197773e-08,
+      "loss": 1.249,
+      "step": 4930
+    },
+    {
+      "epoch": 0.4652038798380262,
+      "grad_norm": 4.316708087921143,
+      "learning_rate": 8.271447920822464e-09,
+      "loss": 1.3052,
+      "step": 4940
+    },
+    {
+      "epoch": 0.4661455880968076,
+      "grad_norm": 9.99028491973877,
+      "learning_rate": 5.744277504999899e-09,
+      "loss": 1.1356,
+      "step": 4950
+    },
+    {
+      "epoch": 0.4661455880968076,
+      "eval_loss": 1.4705742597579956,
+      "eval_runtime": 47.4682,
+      "eval_samples_per_second": 10.533,
+      "eval_steps_per_second": 10.533,
+      "step": 4950
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 3.1532401080395366e+17,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null