starlineventures/pilot-talk

Browse files

Files changed (7) hide show

README.md +2 -2
adapter_config.json +3 -3
adapter_model.safetensors +1 -1
all_results.json +5 -5
train_results.json +5 -5
trainer_state.json +63 -84
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -36,12 +36,12 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 0.0001
-- train_batch_size: 5
 - eval_batch_size: 16
 - seed: 3407
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 3
 ### Training results

 The following hyperparameters were used during training:
 - learning_rate: 0.0001
+- train_batch_size: 10
 - eval_batch_size: 16
 - seed: 3407
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 2
 ### Training results

adapter_config.json CHANGED Viewed

@@ -23,12 +23,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "k_proj",
-    "down_proj",
     "v_proj",
     "up_proj",
-    "o_proj",
     "q_proj",
     "gate_proj"
   ],
   "task_type": null,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "v_proj",
     "up_proj",
+    "down_proj",
     "q_proj",
+    "o_proj",
+    "k_proj",
     "gate_proj"
   ],
   "task_type": null,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:54dba78f4b6e627f89fc3f21c2541ef05ddf6bb95172877a10b7ba7e8f292741
 size 94422368

 version https://git-lfs.github.com/spec/v1
+oid sha256:8e66932712cc6978913634cebf6137e4645ceba968663ffe2270fbf1dc51d50c
 size 94422368

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 1.0,
     "total_flos": 0.0,
-    "train_loss": 0.30142025277018547,
-    "train_runtime": 98.1187,
-    "train_samples_per_second": 24.46,
-    "train_steps_per_second": 4.892
 }

 {
+    "epoch": 1.625,
     "total_flos": 0.0,
+    "train_loss": 0.2457691999582144,
+    "train_runtime": 133.1729,
+    "train_samples_per_second": 12.014,
+    "train_steps_per_second": 1.201
 }

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-    "epoch": 1.0,
     "total_flos": 0.0,
-    "train_loss": 0.30142025277018547,
-    "train_runtime": 98.1187,
-    "train_samples_per_second": 24.46,
-    "train_steps_per_second": 4.892
 }

 {
+    "epoch": 1.625,
     "total_flos": 0.0,
+    "train_loss": 0.2457691999582144,
+    "train_runtime": 133.1729,
+    "train_samples_per_second": 12.014,
+    "train_steps_per_second": 1.201
 }

trainer_state.json CHANGED Viewed

@@ -1,139 +1,118 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 1.0,
   "eval_steps": 500,
-  "global_step": 160,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.0625,
-      "grad_norm": 20.87160873413086,
-      "learning_rate": 9.791666666666667e-05,
-      "loss": 1.7149,
       "step": 10
     },
     {
-      "epoch": 0.125,
-      "grad_norm": 21.004980087280273,
-      "learning_rate": 9.583333333333334e-05,
-      "loss": 0.8011,
       "step": 20
     },
     {
-      "epoch": 0.1875,
-      "grad_norm": 20.969247817993164,
-      "learning_rate": 9.375e-05,
-      "loss": 0.3873,
       "step": 30
     },
     {
-      "epoch": 0.25,
-      "grad_norm": 23.872455596923828,
-      "learning_rate": 9.166666666666667e-05,
-      "loss": 0.2433,
       "step": 40
     },
     {
-      "epoch": 0.3125,
-      "grad_norm": 26.21103286743164,
-      "learning_rate": 8.958333333333335e-05,
-      "loss": 0.1846,
       "step": 50
     },
     {
-      "epoch": 0.375,
-      "grad_norm": 30.174484252929688,
-      "learning_rate": 8.75e-05,
-      "loss": 0.1698,
       "step": 60
     },
     {
-      "epoch": 0.4375,
-      "grad_norm": 30.354196548461914,
-      "learning_rate": 8.541666666666666e-05,
-      "loss": 0.1519,
       "step": 70
     },
     {
-      "epoch": 0.5,
-      "grad_norm": 30.821664810180664,
-      "learning_rate": 8.333333333333334e-05,
-      "loss": 0.143,
       "step": 80
     },
     {
-      "epoch": 0.5625,
-      "grad_norm": 35.24763488769531,
-      "learning_rate": 8.125000000000001e-05,
-      "loss": 0.1373,
       "step": 90
     },
     {
-      "epoch": 0.625,
-      "grad_norm": 34.299808502197266,
-      "learning_rate": 7.916666666666666e-05,
-      "loss": 0.1344,
       "step": 100
     },
     {
-      "epoch": 0.6875,
-      "grad_norm": 32.865936279296875,
-      "learning_rate": 7.708333333333334e-05,
-      "loss": 0.1282,
       "step": 110
     },
     {
-      "epoch": 0.75,
-      "grad_norm": 31.330398559570312,
-      "learning_rate": 7.500000000000001e-05,
-      "loss": 0.1282,
       "step": 120
     },
     {
-      "epoch": 0.8125,
-      "grad_norm": 30.16261100769043,
-      "learning_rate": 7.291666666666667e-05,
-      "loss": 0.1232,
       "step": 130
     },
     {
-      "epoch": 0.875,
-      "grad_norm": 29.665931701660156,
-      "learning_rate": 7.083333333333334e-05,
-      "loss": 0.1261,
-      "step": 140
-    },
-    {
-      "epoch": 0.9375,
-      "grad_norm": 30.841318130493164,
-      "learning_rate": 6.875e-05,
-      "loss": 0.1241,
-      "step": 150
-    },
-    {
-      "epoch": 1.0,
-      "grad_norm": 32.89496994018555,
-      "learning_rate": 6.666666666666667e-05,
-      "loss": 0.1253,
-      "step": 160
-    },
-    {
-      "epoch": 1.0,
-      "step": 160,
       "total_flos": 0.0,
-      "train_loss": 0.30142025277018547,
-      "train_runtime": 98.1187,
-      "train_samples_per_second": 24.46,
-      "train_steps_per_second": 4.892
     }
   ],
   "logging_steps": 10,
-  "max_steps": 480,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 3,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -148,7 +127,7 @@
     }
   },
   "total_flos": 0.0,
-  "train_batch_size": 5,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.625,
   "eval_steps": 500,
+  "global_step": 130,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.125,
+      "grad_norm": 21.495697021484375,
+      "learning_rate": 9.375e-05,
+      "loss": 1.2248,
       "step": 10
     },
     {
+      "epoch": 0.25,
+      "grad_norm": 24.887985229492188,
+      "learning_rate": 8.75e-05,
+      "loss": 0.3582,
       "step": 20
     },
     {
+      "epoch": 0.375,
+      "grad_norm": 19.630859375,
+      "learning_rate": 8.125000000000001e-05,
+      "loss": 0.2253,
       "step": 30
     },
     {
+      "epoch": 0.5,
+      "grad_norm": 20.30435562133789,
+      "learning_rate": 7.500000000000001e-05,
+      "loss": 0.1816,
       "step": 40
     },
     {
+      "epoch": 0.625,
+      "grad_norm": 24.925588607788086,
+      "learning_rate": 6.875e-05,
+      "loss": 0.1588,
       "step": 50
     },
     {
+      "epoch": 0.75,
+      "grad_norm": 24.958547592163086,
+      "learning_rate": 6.25e-05,
+      "loss": 0.1508,
       "step": 60
     },
     {
+      "epoch": 0.875,
+      "grad_norm": 25.598276138305664,
+      "learning_rate": 5.6250000000000005e-05,
+      "loss": 0.1384,
       "step": 70
     },
     {
+      "epoch": 1.0,
+      "grad_norm": 25.500638961791992,
+      "learning_rate": 5e-05,
+      "loss": 0.1316,
       "step": 80
     },
     {
+      "epoch": 1.125,
+      "grad_norm": 26.4311466217041,
+      "learning_rate": 4.375e-05,
+      "loss": 0.127,
       "step": 90
     },
     {
+      "epoch": 1.25,
+      "grad_norm": 27.138051986694336,
+      "learning_rate": 3.7500000000000003e-05,
+      "loss": 0.1265,
       "step": 100
     },
     {
+      "epoch": 1.375,
+      "grad_norm": 27.014684677124023,
+      "learning_rate": 3.125e-05,
+      "loss": 0.1247,
       "step": 110
     },
     {
+      "epoch": 1.5,
+      "grad_norm": 26.983625411987305,
+      "learning_rate": 2.5e-05,
+      "loss": 0.125,
       "step": 120
     },
     {
+      "epoch": 1.625,
+      "grad_norm": 27.20880126953125,
+      "learning_rate": 1.8750000000000002e-05,
+      "loss": 0.1223,
       "step": 130
     },
     {
+      "epoch": 1.625,
+      "step": 130,
       "total_flos": 0.0,
+      "train_loss": 0.2457691999582144,
+      "train_runtime": 133.1729,
+      "train_samples_per_second": 12.014,
+      "train_steps_per_second": 1.201
     }
   ],
   "logging_steps": 10,
+  "max_steps": 160,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
     }
   },
   "total_flos": 0.0,
+  "train_batch_size": 10,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7c4f9fc9bff2e218da5e9f8f7b396e54282341ce6e9263a7f1e576e953a84151
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:14cc783a36dee3704e3da79bae3b11eb2d9a5095150f30b44c6a202027ac8ba5
 size 5432