End of training

Browse files

Files changed (4) hide show

README.md +188 -0
adapter_config.json +32 -0
adapter_model.safetensors +3 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,188 @@

+---
+license: apache-2.0
+library_name: peft
+tags:
+- generated_from_trainer
+base_model: mistralai/Mixtral-8x7B-Instruct-v0.1
+model-index:
+- name: Mixtral-8x7B-Instruct-v0.1
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# Mixtral-8x7B-Instruct-v0.1
+This model is a fine-tuned version of [mistralai/Mixtral-8x7B-Instruct-v0.1](https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.3898
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 1
+- eval_batch_size: 1
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 1
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 1.8837        | 0.01  | 5    | 1.7824          |
+| 1.8578        | 0.02  | 10   | 1.7336          |
+| 1.6828        | 0.02  | 15   | 1.6999          |
+| 1.8063        | 0.03  | 20   | 1.6716          |
+| 1.6728        | 0.04  | 25   | 1.6400          |
+| 1.5326        | 0.05  | 30   | 1.6079          |
+| 1.5742        | 0.05  | 35   | 1.5795          |
+| 1.6315        | 0.06  | 40   | 1.5597          |
+| 1.6111        | 0.07  | 45   | 1.5449          |
+| 1.5815        | 0.08  | 50   | 1.5359          |
+| 1.4937        | 0.09  | 55   | 1.5234          |
+| 1.4524        | 0.09  | 60   | 1.5177          |
+| 1.4922        | 0.1   | 65   | 1.5150          |
+| 1.5636        | 0.11  | 70   | 1.5058          |
+| 1.5303        | 0.12  | 75   | 1.5020          |
+| 1.4182        | 0.12  | 80   | 1.5003          |
+| 1.4375        | 0.13  | 85   | 1.4919          |
+| 1.4287        | 0.14  | 90   | 1.4873          |
+| 1.5023        | 0.15  | 95   | 1.4859          |
+| 1.5331        | 0.16  | 100  | 1.4800          |
+| 1.4797        | 0.16  | 105  | 1.4784          |
+| 1.476         | 0.17  | 110  | 1.4741          |
+| 1.5398        | 0.18  | 115  | 1.4702          |
+| 1.4086        | 0.19  | 120  | 1.4670          |
+| 1.4222        | 0.19  | 125  | 1.4648          |
+| 1.4033        | 0.2   | 130  | 1.4623          |
+| 1.5386        | 0.21  | 135  | 1.4608          |
+| 1.4959        | 0.22  | 140  | 1.4583          |
+| 1.4908        | 0.22  | 145  | 1.4542          |
+| 1.4669        | 0.23  | 150  | 1.4510          |
+| 1.4733        | 0.24  | 155  | 1.4477          |
+| 1.5692        | 0.25  | 160  | 1.4458          |
+| 1.484         | 0.26  | 165  | 1.4468          |
+| 1.4186        | 0.26  | 170  | 1.4432          |
+| 1.3907        | 0.27  | 175  | 1.4391          |
+| 1.4489        | 0.28  | 180  | 1.4392          |
+| 1.471         | 0.29  | 185  | 1.4364          |
+| 1.31          | 0.29  | 190  | 1.4344          |
+| 1.3949        | 0.3   | 195  | 1.4324          |
+| 1.4094        | 0.31  | 200  | 1.4306          |
+| 1.4235        | 0.32  | 205  | 1.4275          |
+| 1.5056        | 0.33  | 210  | 1.4271          |
+| 1.4281        | 0.33  | 215  | 1.4251          |
+| 1.4329        | 0.34  | 220  | 1.4236          |
+| 1.3791        | 0.35  | 225  | 1.4221          |
+| 1.4189        | 0.36  | 230  | 1.4207          |
+| 1.4192        | 0.36  | 235  | 1.4198          |
+| 1.3807        | 0.37  | 240  | 1.4187          |
+| 1.4362        | 0.38  | 245  | 1.4177          |
+| 1.419         | 0.39  | 250  | 1.4174          |
+| 1.5039        | 0.4   | 255  | 1.4176          |
+| 1.4323        | 0.4   | 260  | 1.4160          |
+| 1.5249        | 0.41  | 265  | 1.4154          |
+| 1.4462        | 0.42  | 270  | 1.4144          |
+| 1.2841        | 0.43  | 275  | 1.4137          |
+| 1.3764        | 0.43  | 280  | 1.4137          |
+| 1.3063        | 0.44  | 285  | 1.4123          |
+| 1.4296        | 0.45  | 290  | 1.4122          |
+| 1.4333        | 0.46  | 295  | 1.4110          |
+| 1.3113        | 0.47  | 300  | 1.4103          |
+| 1.3138        | 0.47  | 305  | 1.4103          |
+| 1.3951        | 0.48  | 310  | 1.4104          |
+| 1.3592        | 0.49  | 315  | 1.4099          |
+| 1.458         | 0.5   | 320  | 1.4094          |
+| 1.4037        | 0.5   | 325  | 1.4094          |
+| 1.4431        | 0.51  | 330  | 1.4086          |
+| 1.3595        | 0.52  | 335  | 1.4076          |
+| 1.3198        | 0.53  | 340  | 1.4061          |
+| 1.3967        | 0.53  | 345  | 1.4054          |
+| 1.254         | 0.54  | 350  | 1.4049          |
+| 1.3324        | 0.55  | 355  | 1.4047          |
+| 1.2428        | 0.56  | 360  | 1.4037          |
+| 1.3976        | 0.57  | 365  | 1.4033          |
+| 1.4226        | 0.57  | 370  | 1.4036          |
+| 1.3678        | 0.58  | 375  | 1.4038          |
+| 1.4634        | 0.59  | 380  | 1.4028          |
+| 1.4325        | 0.6   | 385  | 1.4018          |
+| 1.3175        | 0.6   | 390  | 1.4013          |
+| 1.3263        | 0.61  | 395  | 1.4007          |
+| 1.3653        | 0.62  | 400  | 1.4019          |
+| 1.3804        | 0.63  | 405  | 1.4009          |
+| 1.3686        | 0.64  | 410  | 1.4003          |
+| 1.3975        | 0.64  | 415  | 1.4003          |
+| 1.3289        | 0.65  | 420  | 1.4000          |
+| 1.3336        | 0.66  | 425  | 1.3991          |
+| 1.3958        | 0.67  | 430  | 1.3979          |
+| 1.2227        | 0.67  | 435  | 1.3972          |
+| 1.3202        | 0.68  | 440  | 1.3967          |
+| 1.3508        | 0.69  | 445  | 1.3963          |
+| 1.4077        | 0.7   | 450  | 1.3956          |
+| 1.4148        | 0.71  | 455  | 1.3952          |
+| 1.4219        | 0.71  | 460  | 1.3948          |
+| 1.3802        | 0.72  | 465  | 1.3949          |
+| 1.301         | 0.73  | 470  | 1.3945          |
+| 1.2894        | 0.74  | 475  | 1.3938          |
+| 1.3469        | 0.74  | 480  | 1.3940          |
+| 1.2852        | 0.75  | 485  | 1.3941          |
+| 1.4896        | 0.76  | 490  | 1.3933          |
+| 1.3953        | 0.77  | 495  | 1.3929          |
+| 1.3624        | 0.78  | 500  | 1.3926          |
+| 1.4719        | 0.78  | 505  | 1.3927          |
+| 1.3274        | 0.79  | 510  | 1.3920          |
+| 1.2106        | 0.8   | 515  | 1.3917          |
+| 1.3851        | 0.81  | 520  | 1.3918          |
+| 1.344         | 0.81  | 525  | 1.3916          |
+| 1.3197        | 0.82  | 530  | 1.3917          |
+| 1.3426        | 0.83  | 535  | 1.3922          |
+| 1.266         | 0.84  | 540  | 1.3919          |
+| 1.392         | 0.84  | 545  | 1.3918          |
+| 1.325         | 0.85  | 550  | 1.3918          |
+| 1.4706        | 0.86  | 555  | 1.3915          |
+| 1.3695        | 0.87  | 560  | 1.3910          |
+| 1.4036        | 0.88  | 565  | 1.3912          |
+| 1.3042        | 0.88  | 570  | 1.3912          |
+| 1.2578        | 0.89  | 575  | 1.3912          |
+| 1.3579        | 0.9   | 580  | 1.3915          |
+| 1.3324        | 0.91  | 585  | 1.3913          |
+| 1.5166        | 0.91  | 590  | 1.3911          |
+| 1.3563        | 0.92  | 595  | 1.3907          |
+| 1.4271        | 0.93  | 600  | 1.3901          |
+| 1.4084        | 0.94  | 605  | 1.3899          |
+| 1.3975        | 0.95  | 610  | 1.3897          |
+| 1.3887        | 0.95  | 615  | 1.3896          |
+| 1.4221        | 0.96  | 620  | 1.3898          |
+| 1.4031        | 0.97  | 625  | 1.3898          |
+| 1.3114        | 0.98  | 630  | 1.3897          |
+| 1.4907        | 0.98  | 635  | 1.3896          |
+| 1.3519        | 0.99  | 640  | 1.3898          |
+| 1.3648        | 1.0   | 645  | 1.3898          |
+### Framework versions
+- PEFT 0.7.1
+- Transformers 4.36.2
+- Pytorch 2.1.0+cu118
+- Datasets 2.16.1
+- Tokenizers 0.15.0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "w3",
+    "lm_head",
+    "w2",
+    "v_proj",
+    "o_proj",
+    "q_proj",
+    "k_proj",
+    "w1"
+  ],
+  "task_type": "CAUSAL_LM"
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d5cb2e1263dc42673c927ca9735f07ab84645b81ec594815419f44130d2a2e0f
+size 4375780256

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c0055033a2482543ae7542fd2afd6b7f111b7a231104f603892d60f0d9ceee1
+size 4728