Model save

Files changed (7) hide show

README.md CHANGED Viewed

@@ -42,10 +42,8 @@ The following hyperparameters were used during training:
 - eval_batch_size: 8
 - seed: 42
 - distributed_type: multi-GPU
-- num_devices: 2
 - gradient_accumulation_steps: 2
-- total_train_batch_size: 16
-- total_eval_batch_size: 16
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.1

 - eval_batch_size: 8
 - seed: 42
 - distributed_type: multi-GPU
 - gradient_accumulation_steps: 2
+- total_train_batch_size: 8
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.1

adapter_config.json CHANGED Viewed

@@ -21,12 +21,12 @@
   "revision": null,
   "target_modules": [
     "o_proj",
-    "k_proj",
-    "v_proj",
     "q_proj",
-    "down_proj",
     "gate_proj",
-    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "revision": null,
   "target_modules": [
     "o_proj",
     "q_proj",
     "gate_proj",
+    "down_proj",
+    "up_proj",
+    "k_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

all_results.json CHANGED Viewed

@@ -2,8 +2,8 @@
     "epoch": 0.9836065573770492,
     "total_flos": 2.0918103256858624e+16,
     "train_loss": 0.0,
-    "train_runtime": 0.1537,
     "train_samples": 153,
-    "train_samples_per_second": 1587.386,
-    "train_steps_per_second": 97.585
 }

     "epoch": 0.9836065573770492,
     "total_flos": 2.0918103256858624e+16,
     "train_loss": 0.0,
+    "train_runtime": 0.1548,
     "train_samples": 153,
+    "train_samples_per_second": 1576.031,
+    "train_steps_per_second": 193.774
 }

runs/Jul31_16-24-12_VM-146-67-centos/events.out.tfevents.1722414285.VM-146-67-centos.3350321.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:68cf4981bad21269c8d0ac4da5b2f21d83ba80a235af18b0a452cee0ccd7ba22
+size 5290

train_results.json CHANGED Viewed

@@ -2,8 +2,8 @@
     "epoch": 0.9836065573770492,
     "total_flos": 2.0918103256858624e+16,
     "train_loss": 0.0,
-    "train_runtime": 0.1537,
     "train_samples": 153,
-    "train_samples_per_second": 1587.386,
-    "train_steps_per_second": 97.585
 }

     "epoch": 0.9836065573770492,
     "total_flos": 2.0918103256858624e+16,
     "train_loss": 0.0,
+    "train_runtime": 0.1548,
     "train_samples": 153,
+    "train_samples_per_second": 1576.031,
+    "train_steps_per_second": 193.774
 }

trainer_state.json CHANGED Viewed

@@ -62,13 +62,13 @@
       "step": 30,
       "total_flos": 2.0918103256858624e+16,
       "train_loss": 0.0,
-      "train_runtime": 0.1537,
-      "train_samples_per_second": 1587.386,
-      "train_steps_per_second": 97.585
     }
   ],
   "logging_steps": 5,
-  "max_steps": 15,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 100,

       "step": 30,
       "total_flos": 2.0918103256858624e+16,
       "train_loss": 0.0,
+      "train_runtime": 0.1548,
+      "train_samples_per_second": 1576.031,
+      "train_steps_per_second": 193.774
     }
   ],
   "logging_steps": 5,
+  "max_steps": 30,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 100,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ebd7d984fa09a666a5006533c7a63743f2fdeb9cface8072d79678f1c5ed4a9f
 size 5240

 version https://git-lfs.github.com/spec/v1
+oid sha256:450f93ba028cf9ad2f9655b757053f5dd234fffe94c891e1f66ab309207c04d5
 size 5240