starlineventures/pilot-talk

Browse files

Files changed (7) hide show

README.md +2 -2
adapter_config.json +4 -4
adapter_model.safetensors +1 -1
all_results.json +5 -5
train_results.json +5 -5
trainer_state.json +112 -63
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -36,12 +36,12 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 0.0001
-- train_batch_size: 10
 - eval_batch_size: 16
 - seed: 3407
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 2
 ### Training results

 The following hyperparameters were used during training:
 - learning_rate: 0.0001
+- train_batch_size: 5
 - eval_batch_size: 16
 - seed: 3407
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 3
 ### Training results

adapter_config.json CHANGED Viewed

@@ -23,13 +23,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "v_proj",
-    "up_proj",
     "down_proj",
     "q_proj",
-    "o_proj",
     "k_proj",
-    "gate_proj"
   ],
   "task_type": null,
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "down_proj",
+    "v_proj",
     "q_proj",
+    "gate_proj",
     "k_proj",
+    "up_proj",
+    "o_proj"
   ],
   "task_type": null,
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e66932712cc6978913634cebf6137e4645ceba968663ffe2270fbf1dc51d50c
 size 94422368

 version https://git-lfs.github.com/spec/v1
+oid sha256:618af00e309753cefbd7ef5740bfcd86b4d7fb319aeec37ac9eb20727dd70fc0
 size 94422368

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 1.625,
     "total_flos": 0.0,
-    "train_loss": 0.2457691999582144,
-    "train_runtime": 133.1729,
-    "train_samples_per_second": 12.014,
-    "train_steps_per_second": 1.201
 }

 {
+    "epoch": 1.25,
     "total_flos": 0.0,
+    "train_loss": 0.2832891649007797,
+    "train_runtime": 123.2677,
+    "train_samples_per_second": 19.47,
+    "train_steps_per_second": 3.894
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 1.625,
     "total_flos": 0.0,
-    "train_loss": 0.2457691999582144,
-    "train_runtime": 133.1729,
-    "train_samples_per_second": 12.014,
-    "train_steps_per_second": 1.201
 }

 {
+    "epoch": 1.25,
     "total_flos": 0.0,
+    "train_loss": 0.2832891649007797,
+    "train_runtime": 123.2677,
+    "train_samples_per_second": 19.47,
+    "train_steps_per_second": 3.894
 }

trainer_state.json CHANGED Viewed

@@ -1,118 +1,167 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.625,
   "eval_steps": 500,
-  "global_step": 130,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.125,
-      "grad_norm": 21.495697021484375,
-      "learning_rate": 9.375e-05,
-      "loss": 1.2248,
       "step": 10
     },
     {
-      "epoch": 0.25,
-      "grad_norm": 24.887985229492188,
-      "learning_rate": 8.75e-05,
-      "loss": 0.3582,
       "step": 20
     },
     {
-      "epoch": 0.375,
-      "grad_norm": 19.630859375,
-      "learning_rate": 8.125000000000001e-05,
-      "loss": 0.2253,
       "step": 30
     },
     {
-      "epoch": 0.5,
-      "grad_norm": 20.30435562133789,
-      "learning_rate": 7.500000000000001e-05,
-      "loss": 0.1816,
       "step": 40
     },
     {
-      "epoch": 0.625,
-      "grad_norm": 24.925588607788086,
-      "learning_rate": 6.875e-05,
-      "loss": 0.1588,
       "step": 50
     },
     {
-      "epoch": 0.75,
-      "grad_norm": 24.958547592163086,
-      "learning_rate": 6.25e-05,
-      "loss": 0.1508,
       "step": 60
     },
     {
-      "epoch": 0.875,
-      "grad_norm": 25.598276138305664,
-      "learning_rate": 5.6250000000000005e-05,
-      "loss": 0.1384,
       "step": 70
     },
     {
-      "epoch": 1.0,
-      "grad_norm": 25.500638961791992,
-      "learning_rate": 5e-05,
-      "loss": 0.1316,
       "step": 80
     },
     {
-      "epoch": 1.125,
-      "grad_norm": 26.4311466217041,
-      "learning_rate": 4.375e-05,
-      "loss": 0.127,
       "step": 90
     },
     {
-      "epoch": 1.25,
-      "grad_norm": 27.138051986694336,
-      "learning_rate": 3.7500000000000003e-05,
-      "loss": 0.1265,
       "step": 100
     },
     {
-      "epoch": 1.375,
-      "grad_norm": 27.014684677124023,
-      "learning_rate": 3.125e-05,
-      "loss": 0.1247,
       "step": 110
     },
     {
-      "epoch": 1.5,
-      "grad_norm": 26.983625411987305,
-      "learning_rate": 2.5e-05,
-      "loss": 0.125,
       "step": 120
     },
     {
-      "epoch": 1.625,
-      "grad_norm": 27.20880126953125,
-      "learning_rate": 1.8750000000000002e-05,
-      "loss": 0.1223,
       "step": 130
     },
     {
-      "epoch": 1.625,
-      "step": 130,
       "total_flos": 0.0,
-      "train_loss": 0.2457691999582144,
-      "train_runtime": 133.1729,
-      "train_samples_per_second": 12.014,
-      "train_steps_per_second": 1.201
     }
   ],
   "logging_steps": 10,
-  "max_steps": 160,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 2,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -127,7 +176,7 @@
     }
   },
   "total_flos": 0.0,
-  "train_batch_size": 10,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.25,
   "eval_steps": 500,
+  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.0625,
+      "grad_norm": 42.53068923950195,
+      "learning_rate": 9.791666666666667e-05,
+      "loss": 1.6155,
       "step": 10
     },
     {
+      "epoch": 0.125,
+      "grad_norm": 43.14527130126953,
+      "learning_rate": 9.583333333333334e-05,
+      "loss": 0.7779,
       "step": 20
     },
     {
+      "epoch": 0.1875,
+      "grad_norm": 42.024879455566406,
+      "learning_rate": 9.375e-05,
+      "loss": 0.3742,
       "step": 30
     },
     {
+      "epoch": 0.25,
+      "grad_norm": 40.460575103759766,
+      "learning_rate": 9.166666666666667e-05,
+      "loss": 0.2596,
       "step": 40
     },
     {
+      "epoch": 0.3125,
+      "grad_norm": 29.57782554626465,
+      "learning_rate": 8.958333333333335e-05,
+      "loss": 0.2438,
       "step": 50
     },
     {
+      "epoch": 0.375,
+      "grad_norm": 29.711307525634766,
+      "learning_rate": 8.75e-05,
+      "loss": 0.2827,
       "step": 60
     },
     {
+      "epoch": 0.4375,
+      "grad_norm": 31.016084671020508,
+      "learning_rate": 8.541666666666666e-05,
+      "loss": 0.2664,
       "step": 70
     },
     {
+      "epoch": 0.5,
+      "grad_norm": 36.58619689941406,
+      "learning_rate": 8.333333333333334e-05,
+      "loss": 0.2032,
       "step": 80
     },
     {
+      "epoch": 0.5625,
+      "grad_norm": 32.24593734741211,
+      "learning_rate": 8.125000000000001e-05,
+      "loss": 0.1676,
       "step": 90
     },
     {
+      "epoch": 0.625,
+      "grad_norm": 30.47890853881836,
+      "learning_rate": 7.916666666666666e-05,
+      "loss": 0.1566,
       "step": 100
     },
     {
+      "epoch": 0.6875,
+      "grad_norm": 32.572731018066406,
+      "learning_rate": 7.708333333333334e-05,
+      "loss": 0.148,
       "step": 110
     },
     {
+      "epoch": 0.75,
+      "grad_norm": 36.662147521972656,
+      "learning_rate": 7.500000000000001e-05,
+      "loss": 0.1436,
       "step": 120
     },
     {
+      "epoch": 0.8125,
+      "grad_norm": 36.703155517578125,
+      "learning_rate": 7.291666666666667e-05,
+      "loss": 0.1374,
       "step": 130
     },
     {
+      "epoch": 0.875,
+      "grad_norm": 36.09929656982422,
+      "learning_rate": 7.083333333333334e-05,
+      "loss": 0.1344,
+      "step": 140
+    },
+    {
+      "epoch": 0.9375,
+      "grad_norm": 36.28221893310547,
+      "learning_rate": 6.875e-05,
+      "loss": 0.1314,
+      "step": 150
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 34.68128967285156,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 0.1282,
+      "step": 160
+    },
+    {
+      "epoch": 1.0625,
+      "grad_norm": 34.41044616699219,
+      "learning_rate": 6.458333333333334e-05,
+      "loss": 0.1253,
+      "step": 170
+    },
+    {
+      "epoch": 1.125,
+      "grad_norm": 34.5189323425293,
+      "learning_rate": 6.25e-05,
+      "loss": 0.1241,
+      "step": 180
+    },
+    {
+      "epoch": 1.1875,
+      "grad_norm": 34.811683654785156,
+      "learning_rate": 6.041666666666667e-05,
+      "loss": 0.1229,
+      "step": 190
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 34.972721099853516,
+      "learning_rate": 5.833333333333334e-05,
+      "loss": 0.1231,
+      "step": 200
+    },
+    {
+      "epoch": 1.25,
+      "step": 200,
       "total_flos": 0.0,
+      "train_loss": 0.2832891649007797,
+      "train_runtime": 123.2677,
+      "train_samples_per_second": 19.47,
+      "train_steps_per_second": 3.894
     }
   ],
   "logging_steps": 10,
+  "max_steps": 480,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
     }
   },
   "total_flos": 0.0,
+  "train_batch_size": 5,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:14cc783a36dee3704e3da79bae3b11eb2d9a5095150f30b44c6a202027ac8ba5
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:616fe51dd08a3eefcb7f6e92081d212fc4108fce67e30260302d2ab6ea932d65
 size 5432