Training in progress, step 200, checkpoint

Browse files

Files changed (7) hide show

last-checkpoint/adapter_config.json +1 -1
last-checkpoint/adapter_model.safetensors +2 -2
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +44 -121
last-checkpoint/training_args.bin +1 -1

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -34,8 +34,8 @@
   "revision": null,
   "target_modules": [
     "k_proj",
-    "v_proj",
     "gate_proj",
     "down_proj",
     "o_proj",
     "q_proj",

   "revision": null,
   "target_modules": [
     "k_proj",
     "gate_proj",
+    "v_proj",
     "down_proj",
     "o_proj",
     "q_proj",

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1f6ffbbc3ef1f6459a42262c20daa57644415f98821543c7715481de3e049f96
-size 71320216

 version https://git-lfs.github.com/spec/v1
+oid sha256:004a710a01739ae3c15a50d64e44652fec98d5ecb61417891e3231a60dc3e913
+size 69157536

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d772851b906c9cbdcaa2be3b9e1ae1aed7be5fd89bb71ea2c128fa1c81e39880
-size 142716747

 version https://git-lfs.github.com/spec/v1
+oid sha256:692fc2df0713e15ddbeca9b9efc0cc0b8de2692777ad27c718868912049276ef
+size 1657

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:053a4a2984d013d4d0873c07ec5f7e11687c065a6bb1930ad375f212dd298451
 size 14645

 version https://git-lfs.github.com/spec/v1
+oid sha256:aa733b20b2588180e4f01040300a6b66e5d3ccc8c1888e13968f16f4605a02f5
 size 14645

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e25765e01d3a9886ffd3a8cf684f09035523b1ed36102a40cec344e367336da1
 size 1465

 version https://git-lfs.github.com/spec/v1
+oid sha256:8a81eebbf90e05919b75a3aef49bb021bcba6fe6e53cbcca4e18d781822f621e
 size 1465

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,229 +2,152 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 2.0,
   "eval_steps": 500,
-  "global_step": 314,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.064,
-      "grad_norm": 4.04085636138916,
       "learning_rate": 9.713375796178345e-06,
-      "loss": 3.491233062744141,
       "step": 10
     },
     {
       "epoch": 0.128,
-      "grad_norm": 2.1230499744415283,
       "learning_rate": 9.394904458598726e-06,
-      "loss": 3.2013439178466796,
       "step": 20
     },
     {
       "epoch": 0.192,
-      "grad_norm": 2.1368634700775146,
       "learning_rate": 9.07643312101911e-06,
-      "loss": 3.034148406982422,
       "step": 30
     },
     {
       "epoch": 0.256,
-      "grad_norm": 2.1545660495758057,
       "learning_rate": 8.757961783439492e-06,
-      "loss": 2.9740901947021485,
       "step": 40
     },
     {
       "epoch": 0.32,
-      "grad_norm": 1.511207938194275,
       "learning_rate": 8.439490445859873e-06,
-      "loss": 2.9105926513671876,
       "step": 50
     },
     {
       "epoch": 0.384,
-      "grad_norm": 1.613718867301941,
       "learning_rate": 8.121019108280256e-06,
-      "loss": 2.92062931060791,
       "step": 60
     },
     {
       "epoch": 0.448,
-      "grad_norm": 1.6831860542297363,
       "learning_rate": 7.802547770700637e-06,
-      "loss": 2.9122573852539064,
       "step": 70
     },
     {
       "epoch": 0.512,
-      "grad_norm": 1.7110016345977783,
       "learning_rate": 7.484076433121019e-06,
-      "loss": 2.856836128234863,
       "step": 80
     },
     {
       "epoch": 0.576,
-      "grad_norm": 2.022252321243286,
       "learning_rate": 7.1656050955414014e-06,
-      "loss": 2.9169572830200194,
       "step": 90
     },
     {
       "epoch": 0.64,
-      "grad_norm": 1.6567405462265015,
       "learning_rate": 6.8471337579617835e-06,
-      "loss": 2.8249130249023438,
       "step": 100
     },
     {
       "epoch": 0.704,
-      "grad_norm": 1.503312587738037,
       "learning_rate": 6.5286624203821655e-06,
-      "loss": 2.827799987792969,
       "step": 110
     },
     {
       "epoch": 0.768,
-      "grad_norm": 1.7029547691345215,
       "learning_rate": 6.210191082802548e-06,
-      "loss": 2.833590507507324,
       "step": 120
     },
     {
       "epoch": 0.832,
-      "grad_norm": 1.9305304288864136,
       "learning_rate": 5.89171974522293e-06,
-      "loss": 2.7830537796020507,
       "step": 130
     },
     {
       "epoch": 0.896,
-      "grad_norm": 1.4598714113235474,
       "learning_rate": 5.573248407643312e-06,
-      "loss": 2.8037981033325194,
       "step": 140
     },
     {
       "epoch": 0.96,
-      "grad_norm": 1.537855625152588,
       "learning_rate": 5.2547770700636944e-06,
-      "loss": 2.820992088317871,
       "step": 150
     },
     {
       "epoch": 1.0192,
-      "grad_norm": 1.706451654434204,
       "learning_rate": 4.9363057324840765e-06,
-      "loss": 2.7822286605834963,
       "step": 160
     },
     {
       "epoch": 1.0832,
-      "grad_norm": 1.5079314708709717,
       "learning_rate": 4.617834394904459e-06,
-      "loss": 2.770013427734375,
       "step": 170
     },
     {
       "epoch": 1.1472,
-      "grad_norm": 1.8642104864120483,
       "learning_rate": 4.299363057324841e-06,
-      "loss": 2.804817962646484,
       "step": 180
     },
     {
       "epoch": 1.2112,
-      "grad_norm": 1.5836384296417236,
       "learning_rate": 3.980891719745223e-06,
-      "loss": 2.7627302169799806,
       "step": 190
     },
     {
       "epoch": 1.2752,
-      "grad_norm": 1.5155103206634521,
       "learning_rate": 3.662420382165605e-06,
-      "loss": 2.7734092712402343,
       "step": 200
-    },
-    {
-      "epoch": 1.3392,
-      "grad_norm": 1.5116642713546753,
-      "learning_rate": 3.3439490445859875e-06,
-      "loss": 2.756932830810547,
-      "step": 210
-    },
-    {
-      "epoch": 1.4032,
-      "grad_norm": 1.5571694374084473,
-      "learning_rate": 3.0254777070063695e-06,
-      "loss": 2.738229751586914,
-      "step": 220
-    },
-    {
-      "epoch": 1.4672,
-      "grad_norm": 1.8248878717422485,
-      "learning_rate": 2.707006369426752e-06,
-      "loss": 2.718592643737793,
-      "step": 230
-    },
-    {
-      "epoch": 1.5312000000000001,
-      "grad_norm": 1.8225314617156982,
-      "learning_rate": 2.388535031847134e-06,
-      "loss": 2.7289609909057617,
-      "step": 240
-    },
-    {
-      "epoch": 1.5952,
-      "grad_norm": 1.5926852226257324,
-      "learning_rate": 2.070063694267516e-06,
-      "loss": 2.7409887313842773,
-      "step": 250
-    },
-    {
-      "epoch": 1.6592,
-      "grad_norm": 1.8249088525772095,
-      "learning_rate": 1.7515923566878982e-06,
-      "loss": 2.784321975708008,
-      "step": 260
-    },
-    {
-      "epoch": 1.7231999999999998,
-      "grad_norm": 1.720214605331421,
-      "learning_rate": 1.4331210191082802e-06,
-      "loss": 2.704195976257324,
-      "step": 270
-    },
-    {
-      "epoch": 1.7872,
-      "grad_norm": 2.020470380783081,
-      "learning_rate": 1.1146496815286625e-06,
-      "loss": 2.7614681243896486,
-      "step": 280
-    },
-    {
-      "epoch": 1.8512,
-      "grad_norm": 1.5761101245880127,
-      "learning_rate": 7.961783439490446e-07,
-      "loss": 2.762880325317383,
-      "step": 290
-    },
-    {
-      "epoch": 1.9152,
-      "grad_norm": 1.4878287315368652,
-      "learning_rate": 4.777070063694269e-07,
-      "loss": 2.7690963745117188,
-      "step": 300
-    },
-    {
-      "epoch": 1.9792,
-      "grad_norm": 1.6394439935684204,
-      "learning_rate": 1.5923566878980893e-07,
-      "loss": 2.7822860717773437,
-      "step": 310
     }
   ],
   "logging_steps": 10,
@@ -239,12 +162,12 @@
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
-        "should_training_stop": true
       },
       "attributes": {}
     }
   },
-  "total_flos": 8715036223782912.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.2752,
   "eval_steps": 500,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
       "epoch": 0.064,
+      "grad_norm": 0.0,
       "learning_rate": 9.713375796178345e-06,
+      "loss": 2.5330434799194337,
       "step": 10
     },
     {
       "epoch": 0.128,
+      "grad_norm": 0.0,
       "learning_rate": 9.394904458598726e-06,
+      "loss": 2.5563262939453124,
       "step": 20
     },
     {
       "epoch": 0.192,
+      "grad_norm": 0.0,
       "learning_rate": 9.07643312101911e-06,
+      "loss": 2.4857761383056642,
       "step": 30
     },
     {
       "epoch": 0.256,
+      "grad_norm": 0.0,
       "learning_rate": 8.757961783439492e-06,
+      "loss": 2.5507957458496096,
       "step": 40
     },
     {
       "epoch": 0.32,
+      "grad_norm": 0.0,
       "learning_rate": 8.439490445859873e-06,
+      "loss": 2.5324316024780273,
       "step": 50
     },
     {
       "epoch": 0.384,
+      "grad_norm": 0.0,
       "learning_rate": 8.121019108280256e-06,
+      "loss": 2.533696174621582,
       "step": 60
     },
     {
       "epoch": 0.448,
+      "grad_norm": 0.0,
       "learning_rate": 7.802547770700637e-06,
+      "loss": 2.688054656982422,
       "step": 70
     },
     {
       "epoch": 0.512,
+      "grad_norm": 0.0,
       "learning_rate": 7.484076433121019e-06,
+      "loss": 2.598434829711914,
       "step": 80
     },
     {
       "epoch": 0.576,
+      "grad_norm": 0.0,
       "learning_rate": 7.1656050955414014e-06,
+      "loss": 2.514725685119629,
       "step": 90
     },
     {
       "epoch": 0.64,
+      "grad_norm": 0.0,
       "learning_rate": 6.8471337579617835e-06,
+      "loss": 2.4956493377685547,
       "step": 100
     },
     {
       "epoch": 0.704,
+      "grad_norm": 0.0,
       "learning_rate": 6.5286624203821655e-06,
+      "loss": 2.6257051467895507,
       "step": 110
     },
     {
       "epoch": 0.768,
+      "grad_norm": 0.0,
       "learning_rate": 6.210191082802548e-06,
+      "loss": 2.5550819396972657,
       "step": 120
     },
     {
       "epoch": 0.832,
+      "grad_norm": 0.0,
       "learning_rate": 5.89171974522293e-06,
+      "loss": 2.5322742462158203,
       "step": 130
     },
     {
       "epoch": 0.896,
+      "grad_norm": 0.0,
       "learning_rate": 5.573248407643312e-06,
+      "loss": 2.61910343170166,
       "step": 140
     },
     {
       "epoch": 0.96,
+      "grad_norm": 0.0,
       "learning_rate": 5.2547770700636944e-06,
+      "loss": 2.584405708312988,
       "step": 150
     },
     {
       "epoch": 1.0192,
+      "grad_norm": 0.0,
       "learning_rate": 4.9363057324840765e-06,
+      "loss": 2.620287322998047,
       "step": 160
     },
     {
       "epoch": 1.0832,
+      "grad_norm": 0.0,
       "learning_rate": 4.617834394904459e-06,
+      "loss": 2.5575042724609376,
       "step": 170
     },
     {
       "epoch": 1.1472,
+      "grad_norm": 0.0,
       "learning_rate": 4.299363057324841e-06,
+      "loss": 2.554376411437988,
       "step": 180
     },
     {
       "epoch": 1.2112,
+      "grad_norm": 0.0,
       "learning_rate": 3.980891719745223e-06,
+      "loss": 2.51497745513916,
       "step": 190
     },
     {
       "epoch": 1.2752,
+      "grad_norm": 0.0,
       "learning_rate": 3.662420382165605e-06,
+      "loss": 2.5773569107055665,
       "step": 200
     }
   ],
   "logging_steps": 10,
         "should_evaluate": false,
         "should_log": false,
         "should_save": true,
+        "should_training_stop": false
       },
       "attributes": {}
     }
   },
+  "total_flos": 3909179867234304.0,
   "train_batch_size": 4,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1e88c25a1d4de80853a53173b8cfe311be9e1d5e07531379233d09d62bc4c4a2
 size 5649

 version https://git-lfs.github.com/spec/v1
+oid sha256:4aa2e3325989e8434fe3c0f7564866f88b52c2009f394bc964c603eedb7a1a38
 size 5649