Dev-SriramB/qa_bot2

Files changed (5) hide show

README.md CHANGED Viewed

@@ -16,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [TheBloke/Mistral-7B-Instruct-v0.2-GPTQ](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GPTQ) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 1.4925
 ## Model description
@@ -35,13 +35,13 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 0.0002
-- train_batch_size: 1
-- eval_batch_size: 1
 - seed: 42
 - gradient_accumulation_steps: 4
-- total_train_batch_size: 4
-- optimizer: Use paged_adamw_8bit with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 2
 - num_epochs: 2
@@ -51,14 +51,14 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 1.8787        | 1.0   | 75   | 1.5135          |
-| 1.489         | 2.0   | 150  | 1.4925          |
 ### Framework versions
-- PEFT 0.13.2
-- Transformers 4.46.2
-- Pytorch 2.5.1+cu121
-- Datasets 3.1.0
-- Tokenizers 0.20.3

 This model is a fine-tuned version of [TheBloke/Mistral-7B-Instruct-v0.2-GPTQ](https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GPTQ) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.0784
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 3
+- eval_batch_size: 3
 - seed: 42
 - gradient_accumulation_steps: 4
+- total_train_batch_size: 12
+- optimizer: Use OptimizerNames.PAGED_ADAMW_8BIT with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: linear
 - lr_scheduler_warmup_steps: 2
 - num_epochs: 2
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 10.1116       | 1.0   | 25   | 2.2149          |
+| 8.6814        | 2.0   | 50   | 2.0784          |
 ### Framework versions
+- PEFT 0.14.0
+- Transformers 4.47.1
+- Pytorch 2.5.1+cu124
+- Datasets 3.2.0
+- Tokenizers 0.21.0

adapter_config.json CHANGED Viewed

@@ -1,8 +1,10 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": null,
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
   "init_lora_weights": true,
@@ -11,15 +13,17 @@
   "layers_to_transform": null,
   "loftq_config": {},
   "lora_alpha": 32,
   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
-  "r": 8,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj"
   ],
   "task_type": "CAUSAL_LM",

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "TheBloke/Mistral-7B-Instruct-v0.2-GPTQ",
   "bias": "none",
+  "eva_config": null,
+  "exclude_modules": null,
   "fan_in_fan_out": false,
   "inference_mode": true,
   "init_lora_weights": true,
   "layers_to_transform": null,
   "loftq_config": {},
   "lora_alpha": 32,
+  "lora_bias": false,
   "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": null,
   "peft_type": "LORA",
+  "r": 16,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "v_proj",
     "q_proj"
   ],
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:983641d24bf39235041c15c2003b9f1a2ff0ecb741f371d4cde3086a1b1fb0c8
-size 8398144

 version https://git-lfs.github.com/spec/v1
+oid sha256:23ce61ead5d7b31bf2178da17ab221af8d36af442e1ae5a91364249e137dd0b2
+size 27280152

runs/Jan31_16-11-51_d64bec8c4c1b/events.out.tfevents.1738339915.d64bec8c4c1b.449.0 ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:be26f2d25fdf4b45820ebf4edbcb6235c3423c24e790c388b3bacb6535416726
+size 7019

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:024f5ab57e6d0309ea3dd026e2712ac7228b110d23ad844814b1121588355385
 size 5304

 version https://git-lfs.github.com/spec/v1
+oid sha256:13789db8eeb74358645966e07c8b906286149896a893d3eb67472ade542abdc5
 size 5304