End of training

Browse files

Files changed (9) hide show

README.md +71 -60
adapter_config.json +4 -6
adapter_model.bin +2 -2
adapter_model.safetensors +2 -2
config.json +1 -2
runs/Jun21_16-13-03_sky-08ab-atomkins-3e94-head/events.out.tfevents.1718986384.sky-08ab-atomkins-3e94-head.2667.0 +3 -0
tokenizer.json +0 -1
tokenizer_config.json +0 -1
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -16,70 +16,81 @@ should probably proofread and complete it, then remove this comment. -->
 [<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
 <details><summary>See axolotl config</summary>
-axolotl version: `0.4.1`
 ```yaml
-adam_beta2: 0.95
-adam_epsilon: 1.0e-05
-adapter: qlora
 base_model: microsoft/phi-1_5
-bf16: auto
-dataset_prepared_path: null
-datasets:
-- path: garage-bAInd/Open-Platypus
-  type: alpaca
-debug: null
-deepspeed: null
-early_stopping_patience: null
-evals_per_epoch: 1
-flash_attention: false
-fp16: false
-fsdp: null
-fsdp_config: null
-gradient_accumulation_steps: 1
-gradient_checkpointing: true
-gradient_checkpointing_kwargs:
-  use_reentrant: true
-hub_model_id: AdamRTomkins/phi-kal
-hub_strategy: end
-learning_rate: 3.0e-06
-load_in_4bit: true
 load_in_8bit: false
-local_rank: null
-logging_steps: 1
 lora_alpha: 32
 lora_dropout: 0.05
-lora_fan_in_fan_out: null
-lora_model_dir: null
-lora_r: 64
 lora_target_linear: true
-lr_scheduler: cosine
-max_grad_norm: 1.0
-max_steps: 2
 micro_batch_size: 1
-model_type: AutoModelForCausalLM
 num_epochs: 1
 optimizer: adamw_torch
-output_dir: ./outputs/phi-sft-out
-pad_to_sequence_len: true
-resize_token_embeddings_to_32x: true
-resume_from_checkpoint: null
-sample_packing: true
-saves_per_epoch: 1
-sequence_len: 1024
-special_tokens:
-  pad_token: <|endoftext|>
-strict: false
-tokenizer_type: AutoTokenizer
-val_set_size: 0.05
-wandb_entity: null
-wandb_log_model: null
-wandb_name: null
-wandb_project: null
-wandb_watch: null
 warmup_steps: 100
 weight_decay: 0.1
-xformers_attention: null
 ```
 </details><br>
@@ -118,15 +129,15 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch  | Step | Validation Loss |
-|:-------------:|:------:|:----:|:---------------:|
-| 5.1862        | 0.0002 | 2    | 2.4120          |
 ### Framework versions
-- PEFT 0.11.1
-- Transformers 4.41.1
-- Pytorch 2.1.2+cu118
-- Datasets 2.19.1
-- Tokenizers 0.19.1

 [<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
 <details><summary>See axolotl config</summary>
+axolotl version: `0.4.0`
 ```yaml
 base_model: microsoft/phi-1_5
+model_type: AutoModelForCausalLM
+tokenizer_type: AutoTokenizer
 load_in_8bit: false
+load_in_4bit: true
+strict: false
+datasets:
+  - path: garage-bAInd/Open-Platypus
+    type: alpaca
+dataset_prepared_path:
+val_set_size: 0.05
+output_dir: ./outputs/phi-sft-out
+sequence_len: 1024
+sample_packing: true
+pad_to_sequence_len: true
+adapter: qlora
+lora_model_dir:
+lora_r: 64
 lora_alpha: 32
 lora_dropout: 0.05
 lora_target_linear: true
+lora_fan_in_fan_out:
+wandb_project:
+wandb_entity:
+wandb_watch:
+wandb_name:
+wandb_log_model:
+gradient_accumulation_steps: 1
 micro_batch_size: 1
 num_epochs: 1
 optimizer: adamw_torch
+adam_beta2: 0.95
+adam_epsilon: 0.00001
+max_grad_norm: 1.0
+lr_scheduler: cosine
+learning_rate: 0.000003
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: True
+early_stopping_patience:
+resume_from_checkpoint:
+local_rank:
+logging_steps: 1
+xformers_attention:
+flash_attention: false
 warmup_steps: 100
+evals_per_epoch: 1
+saves_per_epoch: 1
+debug:
+deepspeed:
 weight_decay: 0.1
+fsdp:
+fsdp_config:
+resize_token_embeddings_to_32x: true
+special_tokens:
+  pad_token: "<|endoftext|>"
+hub_model_id: AdamRTomkins/phi-kal
+hub_strategy: end
+max_steps: 2
+# Setting to enable pre-ampere cards!
+bf16: auto
+fp16: false
 ```
 </details><br>
 ### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 6.3765        | 0.0   | 2    | 2.4120          |
 ### Framework versions
+- PEFT 0.8.2
+- Transformers 4.39.0.dev0
+- Pytorch 2.0.1+cu118
+- Datasets 2.17.1
+- Tokenizers 0.15.0

adapter_config.json CHANGED Viewed

@@ -6,7 +6,6 @@
   "fan_in_fan_out": null,
   "inference_mode": true,
   "init_lora_weights": true,
-  "layer_replication": null,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
@@ -20,14 +19,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "v_proj",
     "k_proj",
-    "fc1",
     "fc2",
-    "dense"
   ],
   "task_type": "CAUSAL_LM",
-  "use_dora": false,
   "use_rslora": false
 }

   "fan_in_fan_out": null,
   "inference_mode": true,
   "init_lora_weights": true,
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
     "fc2",
+    "dense",
+    "q_proj",
+    "fc1",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_rslora": false
 }

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2b37f7035685613b89aecf86f8f9de31a3bdbdc6ab6c7ff0081e0a64de77533a
-size 113349834

 version https://git-lfs.github.com/spec/v1
+oid sha256:0f17d6ca14d1d23d30ec7ed6aca31e1508bd7bcc52167918d23083304202aef1
+size 226595597

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d8dbcc8bc7ef951b5d4dd3bbf172e58e76dbde1b4ed80aa171da864c7b04270
-size 113284400

 version https://git-lfs.github.com/spec/v1
+oid sha256:45512c7c500fdaa0cbaa141f934531b482c5af5bfa64602e9103d6addedec079
+size 226530600

config.json CHANGED Viewed

@@ -23,7 +23,6 @@
     "_load_in_4bit": true,
     "_load_in_8bit": false,
     "bnb_4bit_compute_dtype": "float32",
-    "bnb_4bit_quant_storage": "bfloat16",
     "bnb_4bit_quant_type": "nf4",
     "bnb_4bit_use_double_quant": true,
     "llm_int8_enable_fp32_cpu_offload": false,
@@ -39,7 +38,7 @@
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
-  "transformers_version": "4.41.1",
   "use_cache": false,
   "vocab_size": 51200
 }

     "_load_in_4bit": true,
     "_load_in_8bit": false,
     "bnb_4bit_compute_dtype": "float32",
     "bnb_4bit_quant_type": "nf4",
     "bnb_4bit_use_double_quant": true,
     "llm_int8_enable_fp32_cpu_offload": false,
   "rope_theta": 10000.0,
   "tie_word_embeddings": false,
   "torch_dtype": "float16",
+  "transformers_version": "4.39.0.dev0",
   "use_cache": false,
   "vocab_size": 51200
 }

runs/Jun21_16-13-03_sky-08ab-atomkins-3e94-head/events.out.tfevents.1718986384.sky-08ab-atomkins-3e94-head.2667.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:033cedc32c7cf86d9e798995c8826833fcc926c94373f866b8212175a3869985
+size 6853

tokenizer.json CHANGED Viewed

@@ -382,7 +382,6 @@
     "end_of_word_suffix": "",
     "fuse_unk": false,
     "byte_fallback": false,
-    "ignore_merges": false,
     "vocab": {
       "!": 0,
       "\"": 1,

     "end_of_word_suffix": "",
     "fuse_unk": false,
     "byte_fallback": false,
     "vocab": {
       "!": 0,
       "\"": 1,

tokenizer_config.json CHANGED Viewed

@@ -319,7 +319,6 @@
   "eos_token": "<|endoftext|>",
   "model_max_length": 2048,
   "pad_token": "<|endoftext|>",
-  "return_token_type_ids": false,
   "tokenizer_class": "CodeGenTokenizer",
   "unk_token": "<|endoftext|>"
 }

   "eos_token": "<|endoftext|>",
   "model_max_length": 2048,
   "pad_token": "<|endoftext|>",
   "tokenizer_class": "CodeGenTokenizer",
   "unk_token": "<|endoftext|>"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d3ed7245af6fdfac6574f6643e1814f238a1aa4646a3d4f5bdca3202f0fdfa10
-size 6072

 version https://git-lfs.github.com/spec/v1
+oid sha256:93506f2af7c4c83e948c7f2e1da0759e701cd6707309e1e91dc6120361cb7229
+size 5179