nandhakumar7/unslothtrainer

Files changed (6) hide show

README.md CHANGED Viewed

@@ -1,11 +1,12 @@
 ---
-license: llama2
 library_name: peft
 tags:
 - trl
 - sft
 - generated_from_trainer
-base_model: unsloth/llama-3-8b-bnb-4bit
 model-index:
 - name: outputs
   results: []
@@ -16,7 +17,7 @@ should probably proofread and complete it, then remove this comment. -->
 # outputs
-This model is a fine-tuned version of [unsloth/llama-3-8b-bnb-4bit](https://huggingface.co/unsloth/llama-3-8b-bnb-4bit) on an unknown dataset.
 ## Model description
@@ -35,14 +36,17 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 5e-05
-- train_batch_size: 1
 - eval_batch_size: 8
 - seed: 3407
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- lr_scheduler_warmup_steps: 10
-- training_steps: 12
 ### Training results
@@ -51,7 +55,7 @@ The following hyperparameters were used during training:
 ### Framework versions
 - PEFT 0.11.1
-- Transformers 4.41.0
-- Pytorch 2.3.0+cu121
-- Datasets 2.19.1
 - Tokenizers 0.19.1

 ---
+base_model: unsloth/llama-3-8b-instruct-bnb-4bit
 library_name: peft
+license: llama3
 tags:
 - trl
 - sft
+- unsloth
 - generated_from_trainer
 model-index:
 - name: outputs
   results: []
 # outputs
+This model is a fine-tuned version of [unsloth/llama-3-8b-instruct-bnb-4bit](https://huggingface.co/unsloth/llama-3-8b-instruct-bnb-4bit) on an unknown dataset.
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 0.0005
+- train_batch_size: 2
 - eval_batch_size: 8
 - seed: 3407
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 8
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 2
+- num_epochs: 2
+- mixed_precision_training: Native AMP
 ### Training results
 ### Framework versions
 - PEFT 0.11.1
+- Transformers 4.42.4
+- Pytorch 2.3.1
+- Datasets 2.20.0
 - Tokenizers 0.19.1

adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "unsloth/llama-3-8b-bnb-4bit",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
@@ -9,7 +9,7 @@
   "layer_replication": null,
   "layers_pattern": null,
   "layers_to_transform": null,
-  "loftq_config": null,
   "lora_alpha": 16,
   "lora_dropout": 0,
   "megatron_config": null,
@@ -18,15 +18,15 @@
   "peft_type": "LORA",
   "r": 16,
   "rank_pattern": {},
-  "revision": null,
   "target_modules": [
-    "up_proj",
     "v_proj",
-    "q_proj",
-    "down_proj",
-    "gate_proj",
     "o_proj",
-    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "unsloth/llama-3-8b-instruct-bnb-4bit",
   "bias": "none",
   "fan_in_fan_out": false,
   "inference_mode": true,
   "layer_replication": null,
   "layers_pattern": null,
   "layers_to_transform": null,
+  "loftq_config": {},
   "lora_alpha": 16,
   "lora_dropout": 0,
   "megatron_config": null,
   "peft_type": "LORA",
   "r": 16,
   "rank_pattern": {},
+  "revision": "unsloth",
   "target_modules": [
     "v_proj",
     "o_proj",
+    "k_proj",
+    "up_proj",
+    "down_proj",
+    "q_proj",
+    "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8651868631b0a19aa1ce8b621f887978d8cf88862cfb43e8ea712fb5200a3916
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:e9ee50554a3c2f7a98cebf8f9865d82aec93dd868868d3fd2b4c7d25d4c9bf7f
 size 167832240

special_tokens_map.json CHANGED Viewed

@@ -7,11 +7,17 @@
     "single_word": false
   },
   "eos_token": {
-    "content": "<|end_of_text|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
-  "pad_token": "<|end_of_text|>"
 }

     "single_word": false
   },
   "eos_token": {
+    "content": "<|eot_id|>",
     "lstrip": false,
     "normalized": false,
     "rstrip": false,
     "single_word": false
   },
+  "pad_token": {
+    "content": "<|reserved_special_token_250|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
 }

tokenizer_config.json CHANGED Viewed

@@ -2050,14 +2050,15 @@
     }
   },
   "bos_token": "<|begin_of_text|>",
   "clean_up_tokenization_spaces": true,
-  "eos_token": "<|end_of_text|>",
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
-  "model_max_length": 1000000000000000019884624838656,
-  "pad_token": "<|end_of_text|>",
-  "padding_side": "left",
   "tokenizer_class": "PreTrainedTokenizerFast"
 }

     }
   },
   "bos_token": "<|begin_of_text|>",
+  "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
   "clean_up_tokenization_spaces": true,
+  "eos_token": "<|eot_id|>",
   "model_input_names": [
     "input_ids",
     "attention_mask"
   ],
+  "model_max_length": 8192,
+  "pad_token": "<|reserved_special_token_250|>",
+  "padding_side": "right",
   "tokenizer_class": "PreTrainedTokenizerFast"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cb7ac46cdbbc99bd3b90f4954592c5bfd27fb9e6464d1a5385777b901742de70
-size 5048

 version https://git-lfs.github.com/spec/v1
+oid sha256:d6601c554304cadbb67c363c3f848816993528a21e78e82e2d52f175088ebd57
+size 5112