llama2-13B-supervised-ft-10-epochs-351

Browse files

Files changed (4) hide show

README.md +51 -8
adapter_config.json +5 -5
adapter_model.safetensors +2 -2
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -17,12 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model was trained from scratch on the None dataset.
 It achieves the following results on the evaluation set:
-- eval_loss: 1.6825
-- eval_runtime: 99.0641
-- eval_samples_per_second: 0.697
-- eval_steps_per_second: 0.697
-- epoch: 7.53
-- step: 128
 ## Model description
@@ -41,7 +36,7 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 5e-06
 - train_batch_size: 1
 - eval_batch_size: 1
 - seed: 42
@@ -49,9 +44,57 @@ The following hyperparameters were used during training:
 - total_train_batch_size: 16
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 20
 - mixed_precision_training: Native AMP
 ### Framework versions
 - PEFT 0.8.2

 This model was trained from scratch on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.4471
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 5e-05
 - train_batch_size: 1
 - eval_batch_size: 1
 - seed: 42
 - total_train_batch_size: 16
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 10
 - mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 2.0121        | 0.24  | 4    | 1.9623          |
+| 2.0559        | 0.47  | 8    | 1.8792          |
+| 1.8364        | 0.71  | 12   | 1.8000          |
+| 1.6852        | 0.94  | 16   | 1.7083          |
+| 1.7672        | 1.18  | 20   | 1.6258          |
+| 1.6344        | 1.41  | 24   | 1.5731          |
+| 1.5026        | 1.65  | 28   | 1.5477          |
+| 1.4473        | 1.88  | 32   | 1.5344          |
+| 1.5297        | 2.12  | 36   | 1.5254          |
+| 1.5623        | 2.35  | 40   | 1.5183          |
+| 1.3866        | 2.59  | 44   | 1.5122          |
+| 1.5727        | 2.82  | 48   | 1.5067          |
+| 1.3696        | 3.06  | 52   | 1.5018          |
+| 1.5159        | 3.29  | 56   | 1.4973          |
+| 1.5107        | 3.53  | 60   | 1.4932          |
+| 1.4175        | 3.76  | 64   | 1.4893          |
+| 1.404         | 4.0   | 68   | 1.4854          |
+| 1.4115        | 4.24  | 72   | 1.4821          |
+| 1.5804        | 4.47  | 76   | 1.4788          |
+| 1.4163        | 4.71  | 80   | 1.4759          |
+| 1.4404        | 4.94  | 84   | 1.4729          |
+| 1.4331        | 5.18  | 88   | 1.4700          |
+| 1.4269        | 5.41  | 92   | 1.4674          |
+| 1.4902        | 5.65  | 96   | 1.4650          |
+| 1.493         | 5.88  | 100  | 1.4627          |
+| 1.4476        | 6.12  | 104  | 1.4608          |
+| 1.4142        | 6.35  | 108  | 1.4592          |
+| 1.4147        | 6.59  | 112  | 1.4573          |
+| 1.3399        | 6.82  | 116  | 1.4558          |
+| 1.4323        | 7.06  | 120  | 1.4543          |
+| 1.2752        | 7.29  | 124  | 1.4531          |
+| 1.4907        | 7.53  | 128  | 1.4524          |
+| 1.3854        | 7.76  | 132  | 1.4510          |
+| 1.3615        | 8.0   | 136  | 1.4498          |
+| 1.5176        | 8.24  | 140  | 1.4492          |
+| 1.3634        | 8.47  | 144  | 1.4490          |
+| 1.3644        | 8.71  | 148  | 1.4486          |
+| 1.4832        | 8.94  | 152  | 1.4481          |
+| 1.4305        | 9.18  | 156  | 1.4476          |
+| 1.5344        | 9.41  | 160  | 1.4473          |
+| 1.4204        | 9.65  | 164  | 1.4471          |
+| 1.3488        | 9.88  | 168  | 1.4471          |
 ### Framework versions
 - PEFT 0.8.2

adapter_config.json CHANGED Viewed

@@ -15,17 +15,17 @@
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 64,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
     "gate_proj",
-    "o_proj",
-    "down_proj",
     "q_proj",
-    "up_proj",
-    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_rslora": false

   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 128,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
+    "up_proj",
     "gate_proj",
+    "v_proj",
     "q_proj",
+    "down_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_rslora": false

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:270ced53a9714f416bffc8c4ffc60dc86f1e477f69b6173af6e30c63c4d6abd2
-size 1001465824

 version https://git-lfs.github.com/spec/v1
+oid sha256:a7270f8b2ce500576568e6d0d99dc89289051a76cac3bfc66b49b84b0c3bdb80
+size 2002857080

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:587b32f730f5908b2afd8b6d94ad08395d97c135b5a409ca2313f96f39a15d5f
 size 4664

 version https://git-lfs.github.com/spec/v1
+oid sha256:2feb3a72004a78345189fddde57ecae17643f1dfe32c31b72068642d138e3242
 size 4664