Training in progress, step 30, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_config.json +5 -5
last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +73 -3
last-checkpoint/training_args.bin +2 -2

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "o_proj",
-    "q_proj",
     "up_proj",
-    "v_proj",
     "k_proj",
-    "gate_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "gate_proj",
     "up_proj",
     "k_proj",
+    "down_proj",
+    "q_proj",
+    "v_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:388de8b1fd2d9b827937b3a78d2f0c4f9e28ffa087987c50cf8c48941c605e27
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:56a595ab8d0501db585f02e5044fa57e61fcac8cca3667878ad850a07e524b4e
 size 167832240

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37278b058da42c070ece918b5474e74f3a0c469f409efa5127d07d2cc5355219
-size 85723284

 version https://git-lfs.github.com/spec/v1
+oid sha256:2f21feea2e2f7292f02c7822951aa623f6cf71bda055968e00277d00429c41ed
+size 86889042

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bcc550cfbb47ad4cbc37125ea640e4b6df0c324dad2c713e9b18c9c4eb2ecb33
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:f2f0c5f75e18f8cb763d8ea5434e79bf35d98af96676e76ac16c35aac4009f48
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.0016,
   "eval_steps": 500,
-  "global_step": 20,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -147,6 +147,76 @@
       "learning_rate": 0.00018461538461538463,
       "loss": 0.4766,
       "step": 20
     }
   ],
   "logging_steps": 1,
@@ -166,7 +236,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 5377867744100352.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.0024,
   "eval_steps": 500,
+  "global_step": 30,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00018461538461538463,
       "loss": 0.4766,
       "step": 20
+    },
+    {
+      "epoch": 0.00168,
+      "grad_norm": 0.4504310190677643,
+      "learning_rate": 0.00018358974358974358,
+      "loss": 0.5895,
+      "step": 21
+    },
+    {
+      "epoch": 0.00176,
+      "grad_norm": 0.4786751866340637,
+      "learning_rate": 0.00018256410256410258,
+      "loss": 0.4999,
+      "step": 22
+    },
+    {
+      "epoch": 0.00184,
+      "grad_norm": 0.5530946850776672,
+      "learning_rate": 0.00018153846153846155,
+      "loss": 0.8919,
+      "step": 23
+    },
+    {
+      "epoch": 0.00192,
+      "grad_norm": 0.6729783415794373,
+      "learning_rate": 0.00018051282051282052,
+      "loss": 0.4719,
+      "step": 24
+    },
+    {
+      "epoch": 0.002,
+      "grad_norm": 0.5149514675140381,
+      "learning_rate": 0.0001794871794871795,
+      "loss": 0.3778,
+      "step": 25
+    },
+    {
+      "epoch": 0.00208,
+      "grad_norm": 0.6156336069107056,
+      "learning_rate": 0.00017846153846153847,
+      "loss": 0.4884,
+      "step": 26
+    },
+    {
+      "epoch": 0.00216,
+      "grad_norm": 0.6409617066383362,
+      "learning_rate": 0.00017743589743589744,
+      "loss": 0.4507,
+      "step": 27
+    },
+    {
+      "epoch": 0.00224,
+      "grad_norm": 0.6406662464141846,
+      "learning_rate": 0.00017641025641025642,
+      "loss": 0.4805,
+      "step": 28
+    },
+    {
+      "epoch": 0.00232,
+      "grad_norm": 0.6323011517524719,
+      "learning_rate": 0.0001753846153846154,
+      "loss": 0.7445,
+      "step": 29
+    },
+    {
+      "epoch": 0.0024,
+      "grad_norm": 0.5450726747512817,
+      "learning_rate": 0.00017435897435897436,
+      "loss": 0.7906,
+      "step": 30
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 8053805909950464.0,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ee3d2f856f997c674f6bb7fe372a2487f94f510b9888e9ae1ace34a8f9c4424b
-size 5560

 version https://git-lfs.github.com/spec/v1
+oid sha256:f97f8f21692b92eba2621575ec0a1f5b1dbf41c8aca4fd8165bf5c79e0de6d06
+size 5624