Pushing fine-tuned model

Browse files

Files changed (4) hide show

README.md +160 -0
adapter_config.json +29 -0
adapter_model.safetensors +3 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,160 @@

+---
+license: apache-2.0
+library_name: peft
+tags:
+- generated_from_trainer
+base_model: TheBloke/Marcoroni-7B-v3-GPTQ
+model-index:
+- name: 20231215-144935
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# 20231215-144935
+This model is a fine-tuned version of [TheBloke/Marcoroni-7B-v3-GPTQ](https://huggingface.co/TheBloke/Marcoroni-7B-v3-GPTQ) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.2792
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2.5e-05
+- train_batch_size: 2
+- eval_batch_size: 8
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 2
+- training_steps: 100
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 2.1951        | 0.02  | 1    | 2.0427          |
+| 1.4965        | 0.03  | 2    | 2.0427          |
+| 2.2716        | 0.05  | 3    | 1.9661          |
+| 1.9397        | 0.06  | 4    | 1.9661          |
+| 1.8659        | 0.08  | 5    | 1.9661          |
+| 1.9903        | 0.09  | 6    | 1.8551          |
+| 1.9031        | 0.11  | 7    | 1.7846          |
+| 1.929         | 0.12  | 8    | 1.7253          |
+| 0.9994        | 0.14  | 9    | 1.6749          |
+| 1.4887        | 0.15  | 10   | 1.6266          |
+| 1.2112        | 0.17  | 11   | 1.5763          |
+| 1.3437        | 0.18  | 12   | 1.5326          |
+| 1.1061        | 0.2   | 13   | 1.5014          |
+| 1.4769        | 0.22  | 14   | 1.4676          |
+| 1.452         | 0.23  | 15   | 1.4313          |
+| 1.2393        | 0.25  | 16   | 1.3963          |
+| 1.1998        | 0.26  | 17   | 1.3662          |
+| 1.529         | 0.28  | 18   | 1.3353          |
+| 0.949         | 0.29  | 19   | 1.3088          |
+| 1.1857        | 0.31  | 20   | 1.2798          |
+| 1.4852        | 0.32  | 21   | 1.2484          |
+| 1.2845        | 0.34  | 22   | 1.2146          |
+| 1.332         | 0.35  | 23   | 1.1795          |
+| 0.8718        | 0.37  | 24   | 1.1435          |
+| 1.2038        | 0.38  | 25   | 1.1069          |
+| 1.2105        | 0.4   | 26   | 1.0683          |
+| 0.9161        | 0.42  | 27   | 1.0277          |
+| 1.0415        | 0.43  | 28   | 0.9849          |
+| 1.0367        | 0.45  | 29   | 0.9400          |
+| 0.9279        | 0.46  | 30   | 0.8924          |
+| 0.82          | 0.48  | 31   | 0.8467          |
+| 0.8286        | 0.49  | 32   | 0.8077          |
+| 0.7855        | 0.51  | 33   | 0.7645          |
+| 0.7919        | 0.52  | 34   | 0.7164          |
+| 0.6294        | 0.54  | 35   | 0.6712          |
+| 0.6205        | 0.55  | 36   | 0.6302          |
+| 0.5754        | 0.57  | 37   | 0.5926          |
+| 0.6331        | 0.58  | 38   | 0.5578          |
+| 0.5068        | 0.6   | 39   | 0.5251          |
+| 0.5252        | 0.62  | 40   | 0.5009          |
+| 0.442         | 0.63  | 41   | 0.4800          |
+| 0.4451        | 0.65  | 42   | 0.4628          |
+| 0.3742        | 0.66  | 43   | 0.4491          |
+| 0.8263        | 0.68  | 44   | 0.4383          |
+| 0.4392        | 0.69  | 45   | 0.4277          |
+| 0.5359        | 0.71  | 46   | 0.4174          |
+| 0.3903        | 0.72  | 47   | 0.4051          |
+| 0.4177        | 0.74  | 48   | 0.3921          |
+| 0.4606        | 0.75  | 49   | 0.3792          |
+| 0.5233        | 0.77  | 50   | 0.3700          |
+| 0.3838        | 0.78  | 51   | 0.3638          |
+| 0.353         | 0.8   | 52   | 0.3587          |
+| 0.3108        | 0.82  | 53   | 0.3530          |
+| 0.4029        | 0.83  | 54   | 0.3470          |
+| 0.3425        | 0.85  | 55   | 0.3411          |
+| 0.2976        | 0.86  | 56   | 0.3345          |
+| 0.5347        | 0.88  | 57   | 0.3291          |
+| 0.3208        | 0.89  | 58   | 0.3239          |
+| 0.3319        | 0.91  | 59   | 0.3194          |
+| 0.3333        | 0.92  | 60   | 0.3160          |
+| 0.3102        | 0.94  | 61   | 0.3135          |
+| 0.4149        | 0.95  | 62   | 0.3118          |
+| 0.3044        | 0.97  | 63   | 0.3098          |
+| 0.2688        | 0.98  | 64   | 0.3076          |
+| 0.3926        | 1.0   | 65   | 0.3060          |
+| 0.2969        | 1.02  | 66   | 0.3045          |
+| 0.3799        | 1.03  | 67   | 0.3029          |
+| 0.3045        | 1.05  | 68   | 0.3014          |
+| 0.2295        | 1.06  | 69   | 0.2997          |
+| 0.4113        | 1.08  | 70   | 0.2983          |
+| 0.3093        | 1.09  | 71   | 0.2969          |
+| 0.2542        | 1.11  | 72   | 0.2957          |
+| 0.2445        | 1.12  | 73   | 0.2947          |
+| 0.2592        | 1.14  | 74   | 0.2935          |
+| 0.6259        | 1.15  | 75   | 0.2925          |
+| 0.3217        | 1.17  | 76   | 0.2916          |
+| 0.2969        | 1.18  | 77   | 0.2907          |
+| 0.287         | 1.2   | 78   | 0.2898          |
+| 0.2642        | 1.22  | 79   | 0.2890          |
+| 0.2735        | 1.23  | 80   | 0.2880          |
+| 0.2472        | 1.25  | 81   | 0.2870          |
+| 0.3068        | 1.26  | 82   | 0.2862          |
+| 0.2779        | 1.28  | 83   | 0.2855          |
+| 0.2915        | 1.29  | 84   | 0.2848          |
+| 0.3926        | 1.31  | 85   | 0.2841          |
+| 0.3057        | 1.32  | 86   | 0.2836          |
+| 0.2413        | 1.34  | 87   | 0.2830          |
+| 0.2851        | 1.35  | 88   | 0.2826          |
+| 0.2295        | 1.37  | 89   | 0.2821          |
+| 0.2611        | 1.38  | 90   | 0.2817          |
+| 0.6346        | 1.4   | 91   | 0.2814          |
+| 0.2297        | 1.42  | 92   | 0.2811          |
+| 0.4868        | 1.43  | 93   | 0.2808          |
+| 0.2819        | 1.45  | 94   | 0.2805          |
+| 0.2589        | 1.46  | 95   | 0.2803          |
+| 0.2171        | 1.48  | 96   | 0.2801          |
+| 0.2581        | 1.49  | 97   | 0.2798          |
+| 0.2495        | 1.51  | 98   | 0.2796          |
+| 0.2702        | 1.52  | 99   | 0.2793          |
+| 0.271         | 1.54  | 100  | 0.2792          |
+### Framework versions
+- PEFT 0.7.2.dev0
+- Transformers 4.37.0.dev0
+- Pytorch 2.1.0+cu121
+- Datasets 2.15.0
+- Tokenizers 0.15.0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "TheBloke/Marcoroni-7B-v3-GPTQ",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d64ff06fd94f533ec80293c1a1b75c2f43b8fb49d5cea2c125a87d08ecf68370
+size 27297032

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:02e6b3862d60150bebb47b8a45548ff1d0f24795109f1147f66259a6fc3212ee
+size 4728