starlineventures/pilot-talk

Browse files

Files changed (7) hide show

README.md +1 -1
adapter_config.json +3 -3
adapter_model.safetensors +1 -1
all_results.json +5 -5
train_results.json +5 -5
trainer_state.json +66 -108
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -36,7 +36,7 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 0.0001
-- train_batch_size: 5
 - eval_batch_size: 16
 - seed: 3407
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08

 The following hyperparameters were used during training:
 - learning_rate: 0.0001
+- train_batch_size: 10
 - eval_batch_size: 16
 - seed: 3407
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08

adapter_config.json CHANGED Viewed

@@ -23,11 +23,11 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "down_proj",
-    "v_proj",
     "q_proj",
-    "gate_proj",
     "k_proj",
     "up_proj",
     "o_proj"
   ],

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
+    "v_proj",
     "k_proj",
+    "gate_proj",
+    "down_proj",
     "up_proj",
     "o_proj"
   ],

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:618af00e309753cefbd7ef5740bfcd86b4d7fb319aeec37ac9eb20727dd70fc0
 size 94422368

 version https://git-lfs.github.com/spec/v1
+oid sha256:9cbfaa87820050cb9b4a70ab1715a8fb6c4b0d72c5d031b6f8ee9ae548a0ae95
 size 94422368

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 1.25,
     "total_flos": 0.0,
-    "train_loss": 0.2832891649007797,
-    "train_runtime": 123.2677,
-    "train_samples_per_second": 19.47,
-    "train_steps_per_second": 3.894
 }

 {
+    "epoch": 1.75,
     "total_flos": 0.0,
+    "train_loss": 0.2669122704437801,
+    "train_runtime": 143.3284,
+    "train_samples_per_second": 16.745,
+    "train_steps_per_second": 1.674
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 1.25,
     "total_flos": 0.0,
-    "train_loss": 0.2832891649007797,
-    "train_runtime": 123.2677,
-    "train_samples_per_second": 19.47,
-    "train_steps_per_second": 3.894
 }

 {
+    "epoch": 1.75,
     "total_flos": 0.0,
+    "train_loss": 0.2669122704437801,
+    "train_runtime": 143.3284,
+    "train_samples_per_second": 16.745,
+    "train_steps_per_second": 1.674
 }

trainer_state.json CHANGED Viewed

@@ -1,165 +1,123 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.25,
   "eval_steps": 500,
-  "global_step": 200,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.0625,
-      "grad_norm": 42.53068923950195,
-      "learning_rate": 9.791666666666667e-05,
-      "loss": 1.6155,
       "step": 10
     },
     {
-      "epoch": 0.125,
-      "grad_norm": 43.14527130126953,
-      "learning_rate": 9.583333333333334e-05,
-      "loss": 0.7779,
       "step": 20
     },
     {
-      "epoch": 0.1875,
-      "grad_norm": 42.024879455566406,
-      "learning_rate": 9.375e-05,
-      "loss": 0.3742,
       "step": 30
     },
     {
-      "epoch": 0.25,
-      "grad_norm": 40.460575103759766,
-      "learning_rate": 9.166666666666667e-05,
-      "loss": 0.2596,
       "step": 40
     },
     {
-      "epoch": 0.3125,
-      "grad_norm": 29.57782554626465,
-      "learning_rate": 8.958333333333335e-05,
-      "loss": 0.2438,
       "step": 50
     },
     {
-      "epoch": 0.375,
-      "grad_norm": 29.711307525634766,
-      "learning_rate": 8.75e-05,
-      "loss": 0.2827,
       "step": 60
     },
     {
-      "epoch": 0.4375,
-      "grad_norm": 31.016084671020508,
-      "learning_rate": 8.541666666666666e-05,
-      "loss": 0.2664,
       "step": 70
     },
     {
-      "epoch": 0.5,
-      "grad_norm": 36.58619689941406,
-      "learning_rate": 8.333333333333334e-05,
-      "loss": 0.2032,
       "step": 80
     },
     {
-      "epoch": 0.5625,
-      "grad_norm": 32.24593734741211,
-      "learning_rate": 8.125000000000001e-05,
-      "loss": 0.1676,
       "step": 90
     },
     {
-      "epoch": 0.625,
-      "grad_norm": 30.47890853881836,
-      "learning_rate": 7.916666666666666e-05,
-      "loss": 0.1566,
       "step": 100
     },
     {
-      "epoch": 0.6875,
-      "grad_norm": 32.572731018066406,
-      "learning_rate": 7.708333333333334e-05,
-      "loss": 0.148,
       "step": 110
     },
     {
-      "epoch": 0.75,
-      "grad_norm": 36.662147521972656,
-      "learning_rate": 7.500000000000001e-05,
-      "loss": 0.1436,
       "step": 120
     },
     {
-      "epoch": 0.8125,
-      "grad_norm": 36.703155517578125,
-      "learning_rate": 7.291666666666667e-05,
-      "loss": 0.1374,
       "step": 130
     },
     {
-      "epoch": 0.875,
-      "grad_norm": 36.09929656982422,
-      "learning_rate": 7.083333333333334e-05,
-      "loss": 0.1344,
       "step": 140
     },
     {
-      "epoch": 0.9375,
-      "grad_norm": 36.28221893310547,
-      "learning_rate": 6.875e-05,
-      "loss": 0.1314,
-      "step": 150
-    },
-    {
-      "epoch": 1.0,
-      "grad_norm": 34.68128967285156,
-      "learning_rate": 6.666666666666667e-05,
-      "loss": 0.1282,
-      "step": 160
-    },
-    {
-      "epoch": 1.0625,
-      "grad_norm": 34.41044616699219,
-      "learning_rate": 6.458333333333334e-05,
-      "loss": 0.1253,
-      "step": 170
-    },
-    {
-      "epoch": 1.125,
-      "grad_norm": 34.5189323425293,
-      "learning_rate": 6.25e-05,
-      "loss": 0.1241,
-      "step": 180
-    },
-    {
-      "epoch": 1.1875,
-      "grad_norm": 34.811683654785156,
-      "learning_rate": 6.041666666666667e-05,
-      "loss": 0.1229,
-      "step": 190
-    },
-    {
-      "epoch": 1.25,
-      "grad_norm": 34.972721099853516,
-      "learning_rate": 5.833333333333334e-05,
-      "loss": 0.1231,
-      "step": 200
-    },
-    {
-      "epoch": 1.25,
-      "step": 200,
       "total_flos": 0.0,
-      "train_loss": 0.2832891649007797,
-      "train_runtime": 123.2677,
-      "train_samples_per_second": 19.47,
-      "train_steps_per_second": 3.894
     }
   ],
   "logging_steps": 10,
-  "max_steps": 480,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 500,
@@ -176,7 +134,7 @@
     }
   },
   "total_flos": 0.0,
-  "train_batch_size": 5,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.75,
   "eval_steps": 500,
+  "global_step": 140,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.125,
+      "grad_norm": 26.014522552490234,
+      "learning_rate": 9.583333333333334e-05,
+      "loss": 1.2951,
       "step": 10
     },
     {
+      "epoch": 0.25,
+      "grad_norm": 38.19829177856445,
+      "learning_rate": 9.166666666666667e-05,
+      "loss": 0.4151,
       "step": 20
     },
     {
+      "epoch": 0.375,
+      "grad_norm": 35.403507232666016,
+      "learning_rate": 8.75e-05,
+      "loss": 0.2558,
       "step": 30
     },
     {
+      "epoch": 0.5,
+      "grad_norm": 37.772884368896484,
+      "learning_rate": 8.333333333333334e-05,
+      "loss": 0.2447,
       "step": 40
     },
     {
+      "epoch": 0.625,
+      "grad_norm": 36.102352142333984,
+      "learning_rate": 7.916666666666666e-05,
+      "loss": 0.2089,
       "step": 50
     },
     {
+      "epoch": 0.75,
+      "grad_norm": 37.57475662231445,
+      "learning_rate": 7.500000000000001e-05,
+      "loss": 0.1759,
       "step": 60
     },
     {
+      "epoch": 0.875,
+      "grad_norm": 47.88151550292969,
+      "learning_rate": 7.083333333333334e-05,
+      "loss": 0.1593,
       "step": 70
     },
     {
+      "epoch": 1.0,
+      "grad_norm": 51.923824310302734,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 0.1495,
       "step": 80
     },
     {
+      "epoch": 1.125,
+      "grad_norm": 52.507205963134766,
+      "learning_rate": 6.25e-05,
+      "loss": 0.1442,
       "step": 90
     },
     {
+      "epoch": 1.25,
+      "grad_norm": 51.443668365478516,
+      "learning_rate": 5.833333333333334e-05,
+      "loss": 0.1408,
       "step": 100
     },
     {
+      "epoch": 1.375,
+      "grad_norm": 46.47669982910156,
+      "learning_rate": 5.4166666666666664e-05,
+      "loss": 0.1379,
       "step": 110
     },
     {
+      "epoch": 1.5,
+      "grad_norm": 42.74077606201172,
+      "learning_rate": 5e-05,
+      "loss": 0.1379,
       "step": 120
     },
     {
+      "epoch": 1.625,
+      "grad_norm": 40.221805572509766,
+      "learning_rate": 4.5833333333333334e-05,
+      "loss": 0.1371,
       "step": 130
     },
     {
+      "epoch": 1.75,
+      "grad_norm": 34.954124450683594,
+      "learning_rate": 4.166666666666667e-05,
+      "loss": 0.1346,
       "step": 140
     },
     {
+      "epoch": 1.75,
+      "step": 140,
       "total_flos": 0.0,
+      "train_loss": 0.2669122704437801,
+      "train_runtime": 143.3284,
+      "train_samples_per_second": 16.745,
+      "train_steps_per_second": 1.674
     }
   ],
   "logging_steps": 10,
+  "max_steps": 240,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 3,
   "save_steps": 500,
     }
   },
   "total_flos": 0.0,
+  "train_batch_size": 10,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:616fe51dd08a3eefcb7f6e92081d212fc4108fce67e30260302d2ab6ea932d65
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:e3328c58653121484b735b79386a21a53daeb13b417b5ef0defe329a7fa35aec
 size 5432