diff --git a/Everything-LM/README.md b/Everything-LM/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3 --- /dev/null +++ b/Everything-LM/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/Everything-LM/adapter_config.json b/Everything-LM/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbe4dfdbbcdf59d3381aba62eca599a276e3fb6e --- /dev/null +++ b/Everything-LM/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "down_proj", + "up_proj", + "gate_proj", + "k_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/Everything-LM/adapter_model.bin b/Everything-LM/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..52a3e0edeca0c2e370018dc5ffd37c1c1719de8d --- /dev/null +++ b/Everything-LM/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a991d74cf01bc5993d7d515e6c91abb9aacf2250cc5e32c7912ed38e3764b1d +size 80114765 diff --git a/Everything-LM/added_tokens.json b/Everything-LM/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..9c16aa4be022f03ad001b006fba14dfb73a1929c --- /dev/null +++ b/Everything-LM/added_tokens.json @@ -0,0 +1,3 @@ +{ + "": 32000 +} diff --git a/Everything-LM/checkpoint-2/README.md b/Everything-LM/checkpoint-2/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3 --- /dev/null +++ b/Everything-LM/checkpoint-2/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/Everything-LM/checkpoint-2/adapter_config.json b/Everything-LM/checkpoint-2/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbe4dfdbbcdf59d3381aba62eca599a276e3fb6e --- /dev/null +++ b/Everything-LM/checkpoint-2/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "down_proj", + "up_proj", + "gate_proj", + "k_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/Everything-LM/checkpoint-2/adapter_model.bin b/Everything-LM/checkpoint-2/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e3b372cf658c8c76dbabdc160d96e72aa3d7edcb --- /dev/null +++ b/Everything-LM/checkpoint-2/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a04a5ca22abdae32458bd108cafa031afc19b36fd26be501f6a79361c29dc8c +size 80114765 diff --git a/Everything-LM/checkpoint-2/adapter_model/README.md b/Everything-LM/checkpoint-2/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3 --- /dev/null +++ b/Everything-LM/checkpoint-2/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/Everything-LM/checkpoint-2/adapter_model/adapter_config.json b/Everything-LM/checkpoint-2/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbe4dfdbbcdf59d3381aba62eca599a276e3fb6e --- /dev/null +++ b/Everything-LM/checkpoint-2/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "down_proj", + "up_proj", + "gate_proj", + "k_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/Everything-LM/checkpoint-2/adapter_model/adapter_model.bin b/Everything-LM/checkpoint-2/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e3b372cf658c8c76dbabdc160d96e72aa3d7edcb --- /dev/null +++ b/Everything-LM/checkpoint-2/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a04a5ca22abdae32458bd108cafa031afc19b36fd26be501f6a79361c29dc8c +size 80114765 diff --git a/Everything-LM/checkpoint-2/optimizer.pt b/Everything-LM/checkpoint-2/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1bec732c18aeed9058f045a30be5c91fe473f041 --- /dev/null +++ b/Everything-LM/checkpoint-2/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3318079d84860039de45c144aa3fe2e9d0e56190bfc36e1c2b9e412d0598ee26 +size 40569887 diff --git a/Everything-LM/checkpoint-2/rng_state.pth b/Everything-LM/checkpoint-2/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a64f8db7f0eef7d8f6fd06fa05ca8ac7a972a822 --- /dev/null +++ b/Everything-LM/checkpoint-2/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:984433fd7be740b2d6360cfe44e349d5a40ecb2285791768d183ed3afcfc48aa +size 14575 diff --git a/Everything-LM/checkpoint-2/scheduler.pt b/Everything-LM/checkpoint-2/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d74bc8aac574cbc99a3e87804fc277de9d7b9bf --- /dev/null +++ b/Everything-LM/checkpoint-2/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f69965cd123fba88348bcf5858148ecb2990698e61613f3f725965ea841e49de +size 627 diff --git a/Everything-LM/checkpoint-2/trainer_state.json b/Everything-LM/checkpoint-2/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..87864cb8a5a8be865466a31011bdb461f7de87a0 --- /dev/null +++ b/Everything-LM/checkpoint-2/trainer_state.json @@ -0,0 +1,31 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8648648648648649, + "eval_steps": 500, + "global_step": 2, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.43, + "learning_rate": 0.0002799038105676658, + "loss": 1.5099, + "step": 1 + }, + { + "epoch": 0.86, + "learning_rate": 0.000225, + "loss": 1.4484, + "step": 2 + } + ], + "logging_steps": 1, + "max_steps": 6, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 1.2089671264763904e+16, + "trial_name": null, + "trial_params": null +} diff --git a/Everything-LM/checkpoint-2/training_args.bin b/Everything-LM/checkpoint-2/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c2985646bdb82b8066f0e57712fe3aee0e87a525 --- /dev/null +++ b/Everything-LM/checkpoint-2/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5069800a33ca54bf43c3b93e1d5f640909151ea51562afd2230d8c868d36ff7b +size 4155 diff --git a/Everything-LM/checkpoint-4/README.md b/Everything-LM/checkpoint-4/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3 --- /dev/null +++ b/Everything-LM/checkpoint-4/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/Everything-LM/checkpoint-4/adapter_config.json b/Everything-LM/checkpoint-4/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbe4dfdbbcdf59d3381aba62eca599a276e3fb6e --- /dev/null +++ b/Everything-LM/checkpoint-4/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "down_proj", + "up_proj", + "gate_proj", + "k_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/Everything-LM/checkpoint-4/adapter_model.bin b/Everything-LM/checkpoint-4/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..2711f0ff931bba5d086ec50f727c9de085513c46 --- /dev/null +++ b/Everything-LM/checkpoint-4/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9fe4007d8a741162ca6333b04b5e5df236fc4c410a30f8bde31fbda5ea0b4c +size 80114765 diff --git a/Everything-LM/checkpoint-4/adapter_model/README.md b/Everything-LM/checkpoint-4/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3 --- /dev/null +++ b/Everything-LM/checkpoint-4/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/Everything-LM/checkpoint-4/adapter_model/adapter_config.json b/Everything-LM/checkpoint-4/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbe4dfdbbcdf59d3381aba62eca599a276e3fb6e --- /dev/null +++ b/Everything-LM/checkpoint-4/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "down_proj", + "up_proj", + "gate_proj", + "k_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/Everything-LM/checkpoint-4/adapter_model/adapter_model.bin b/Everything-LM/checkpoint-4/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..2711f0ff931bba5d086ec50f727c9de085513c46 --- /dev/null +++ b/Everything-LM/checkpoint-4/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9fe4007d8a741162ca6333b04b5e5df236fc4c410a30f8bde31fbda5ea0b4c +size 80114765 diff --git a/Everything-LM/checkpoint-4/optimizer.pt b/Everything-LM/checkpoint-4/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0a9e0acb404e1dfd319d48ef51f2b012916535b --- /dev/null +++ b/Everything-LM/checkpoint-4/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5487ddde84775df62200352f01ec1c66a2cb3960ede96aadbd22f95022927fc +size 40569887 diff --git a/Everything-LM/checkpoint-4/rng_state.pth b/Everything-LM/checkpoint-4/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c34700ebf6c4e2c40f18678e00acc4a18319f64d --- /dev/null +++ b/Everything-LM/checkpoint-4/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82b85a7a337110f6a2d8109bdab3eec70d3e60c4e6b04e854ff5998d9e8f9f68 +size 14575 diff --git a/Everything-LM/checkpoint-4/scheduler.pt b/Everything-LM/checkpoint-4/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..03c921d9466f8c47148f689ba46c71631f0f8426 --- /dev/null +++ b/Everything-LM/checkpoint-4/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c60fa183fd6ae9404348042f9312b80e0a1fb1bc4a4feda8a143c9f9c99975d7 +size 627 diff --git a/Everything-LM/checkpoint-4/trainer_state.json b/Everything-LM/checkpoint-4/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a486c764f46d79a1706b23daf0b20e83a84e290d --- /dev/null +++ b/Everything-LM/checkpoint-4/trainer_state.json @@ -0,0 +1,43 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.7297297297297298, + "eval_steps": 500, + "global_step": 4, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.43, + "learning_rate": 0.0002799038105676658, + "loss": 1.5099, + "step": 1 + }, + { + "epoch": 0.86, + "learning_rate": 0.000225, + "loss": 1.4484, + "step": 2 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015, + "loss": 1.4938, + "step": 3 + }, + { + "epoch": 1.73, + "learning_rate": 7.500000000000002e-05, + "loss": 1.4088, + "step": 4 + } + ], + "logging_steps": 1, + "max_steps": 6, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 2.4157216960413696e+16, + "trial_name": null, + "trial_params": null +} diff --git a/Everything-LM/checkpoint-4/training_args.bin b/Everything-LM/checkpoint-4/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c2985646bdb82b8066f0e57712fe3aee0e87a525 --- /dev/null +++ b/Everything-LM/checkpoint-4/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5069800a33ca54bf43c3b93e1d5f640909151ea51562afd2230d8c868d36ff7b +size 4155 diff --git a/Everything-LM/checkpoint-6/README.md b/Everything-LM/checkpoint-6/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3 --- /dev/null +++ b/Everything-LM/checkpoint-6/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/Everything-LM/checkpoint-6/adapter_config.json b/Everything-LM/checkpoint-6/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbe4dfdbbcdf59d3381aba62eca599a276e3fb6e --- /dev/null +++ b/Everything-LM/checkpoint-6/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "down_proj", + "up_proj", + "gate_proj", + "k_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/Everything-LM/checkpoint-6/adapter_model.bin b/Everything-LM/checkpoint-6/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..52a3e0edeca0c2e370018dc5ffd37c1c1719de8d --- /dev/null +++ b/Everything-LM/checkpoint-6/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a991d74cf01bc5993d7d515e6c91abb9aacf2250cc5e32c7912ed38e3764b1d +size 80114765 diff --git a/Everything-LM/checkpoint-6/adapter_model/README.md b/Everything-LM/checkpoint-6/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3 --- /dev/null +++ b/Everything-LM/checkpoint-6/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/Everything-LM/checkpoint-6/adapter_model/adapter_config.json b/Everything-LM/checkpoint-6/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..cbe4dfdbbcdf59d3381aba62eca599a276e3fb6e --- /dev/null +++ b/Everything-LM/checkpoint-6/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "down_proj", + "up_proj", + "gate_proj", + "k_proj", + "o_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/Everything-LM/checkpoint-6/adapter_model/adapter_model.bin b/Everything-LM/checkpoint-6/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..52a3e0edeca0c2e370018dc5ffd37c1c1719de8d --- /dev/null +++ b/Everything-LM/checkpoint-6/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a991d74cf01bc5993d7d515e6c91abb9aacf2250cc5e32c7912ed38e3764b1d +size 80114765 diff --git a/Everything-LM/checkpoint-6/optimizer.pt b/Everything-LM/checkpoint-6/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..16baebd689faf52ebdd48501c1b29c6760740d26 --- /dev/null +++ b/Everything-LM/checkpoint-6/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e6418f6ca59834adfc8c6238036b82d86a4f38a81210582f84a5693e45c8120 +size 40569887 diff --git a/Everything-LM/checkpoint-6/rng_state.pth b/Everything-LM/checkpoint-6/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dda35b16545baec766437929889f4b0c1338783e --- /dev/null +++ b/Everything-LM/checkpoint-6/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89ad21499347281d1b140497be2595dba5d75ce73b57d40d135e0fdd20c3f4c5 +size 14575 diff --git a/Everything-LM/checkpoint-6/scheduler.pt b/Everything-LM/checkpoint-6/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ada004f66f8383e0ea4230d1bb89ddcead6aa5b8 --- /dev/null +++ b/Everything-LM/checkpoint-6/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a2141b0f152d878006263e11b37207f4c1e1d4252b202dbe2b127e504173c88 +size 627 diff --git a/Everything-LM/checkpoint-6/trainer_state.json b/Everything-LM/checkpoint-6/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7ff70716e7d7632624a741fe5bb769bb49abce28 --- /dev/null +++ b/Everything-LM/checkpoint-6/trainer_state.json @@ -0,0 +1,55 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.5945945945945947, + "eval_steps": 500, + "global_step": 6, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.43, + "learning_rate": 0.0002799038105676658, + "loss": 1.5099, + "step": 1 + }, + { + "epoch": 0.86, + "learning_rate": 0.000225, + "loss": 1.4484, + "step": 2 + }, + { + "epoch": 1.3, + "learning_rate": 0.00015, + "loss": 1.4938, + "step": 3 + }, + { + "epoch": 1.73, + "learning_rate": 7.500000000000002e-05, + "loss": 1.4088, + "step": 4 + }, + { + "epoch": 2.16, + "learning_rate": 2.009618943233419e-05, + "loss": 1.3926, + "step": 5 + }, + { + "epoch": 2.59, + "learning_rate": 0.0, + "loss": 1.4358, + "step": 6 + } + ], + "logging_steps": 1, + "max_steps": 6, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 3.134282090623795e+16, + "trial_name": null, + "trial_params": null +} diff --git a/Everything-LM/checkpoint-6/training_args.bin b/Everything-LM/checkpoint-6/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c2985646bdb82b8066f0e57712fe3aee0e87a525 --- /dev/null +++ b/Everything-LM/checkpoint-6/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5069800a33ca54bf43c3b93e1d5f640909151ea51562afd2230d8c868d36ff7b +size 4155 diff --git a/Everything-LM/special_tokens_map.json b/Everything-LM/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3f58a5e115855c6ea3cec98accae196ad927222e --- /dev/null +++ b/Everything-LM/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "", + "eos_token": "", + "pad_token": "[PAD]", + "unk_token": "" +} diff --git a/Everything-LM/tokenizer.model b/Everything-LM/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899 --- /dev/null +++ b/Everything-LM/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/Everything-LM/tokenizer_config.json b/Everything-LM/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..65532c2074f7f407d2d801a559f3d90aa5137f26 --- /dev/null +++ b/Everything-LM/tokenizer_config.json @@ -0,0 +1,38 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "bos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "clean_up_tokenization_spaces": false, + "eos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "legacy": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": null, + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "trust_remote_code": false, + "unk_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "use_default_system_prompt": true, + "use_fast": true +} diff --git a/Puffin-7B/.ipynb_checkpoints/Untitled-checkpoint.ipynb b/Puffin-7B/.ipynb_checkpoints/Untitled-checkpoint.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..363fcab7ed6e9634e198cf5555ceb88932c9a245 --- /dev/null +++ b/Puffin-7B/.ipynb_checkpoints/Untitled-checkpoint.ipynb @@ -0,0 +1,6 @@ +{ + "cells": [], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Puffin-7B/Untitled.ipynb b/Puffin-7B/Untitled.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..ac30a892d35d10b408ec85910f00f169c7ffa5c0 --- /dev/null +++ b/Puffin-7B/Untitled.ipynb @@ -0,0 +1,125 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "670a4958-8306-4a10-a51c-01eb2764f6fe", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "009ab51eb1164706bca69f75874d064e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "adapter_model.bin: 0%| | 0.00/80.1M [00:00": 32000 +} diff --git a/Puffin-7B/checkpoint-20/README.md b/Puffin-7B/checkpoint-20/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6 --- /dev/null +++ b/Puffin-7B/checkpoint-20/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/Puffin-7B/checkpoint-20/adapter_config.json b/Puffin-7B/checkpoint-20/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a1cd75bf98835983a780077bf012648484e8285 --- /dev/null +++ b/Puffin-7B/checkpoint-20/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "k_proj", + "down_proj", + "up_proj", + "v_proj", + "o_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/Puffin-7B/checkpoint-20/adapter_model.bin b/Puffin-7B/checkpoint-20/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..87b48269fdd97f78341213d9f99b0b82afcf4ccb --- /dev/null +++ b/Puffin-7B/checkpoint-20/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b21e5f4dcceb76ea3181f0c5f052515f8d8733911db0785eb6c7ae5e10a7e796 +size 80114765 diff --git a/Puffin-7B/checkpoint-20/adapter_model/README.md b/Puffin-7B/checkpoint-20/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6 --- /dev/null +++ b/Puffin-7B/checkpoint-20/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/Puffin-7B/checkpoint-20/adapter_model/adapter_config.json b/Puffin-7B/checkpoint-20/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a1cd75bf98835983a780077bf012648484e8285 --- /dev/null +++ b/Puffin-7B/checkpoint-20/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "k_proj", + "down_proj", + "up_proj", + "v_proj", + "o_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/Puffin-7B/checkpoint-20/adapter_model/adapter_model.bin b/Puffin-7B/checkpoint-20/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..87b48269fdd97f78341213d9f99b0b82afcf4ccb --- /dev/null +++ b/Puffin-7B/checkpoint-20/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b21e5f4dcceb76ea3181f0c5f052515f8d8733911db0785eb6c7ae5e10a7e796 +size 80114765 diff --git a/Puffin-7B/checkpoint-20/optimizer.pt b/Puffin-7B/checkpoint-20/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a36d94dc4db5f4fb4c728c7ba291bbf71652e551 --- /dev/null +++ b/Puffin-7B/checkpoint-20/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e6d3149d2f727915dab9db573761ad95065bb59ba1704fe9e52b6ba8e976f6e +size 40569887 diff --git a/Puffin-7B/checkpoint-20/rng_state.pth b/Puffin-7B/checkpoint-20/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4925620c36ffe5fc57c2bd6064bae1c8d45e3c9b --- /dev/null +++ b/Puffin-7B/checkpoint-20/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:930027ef671facab47ddd6f3b220cff979254f311abb138cf064cb376f5d5918 +size 14575 diff --git a/Puffin-7B/checkpoint-20/scheduler.pt b/Puffin-7B/checkpoint-20/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c8925cc0ec3b18a7ee772557d3761b93ac696c91 --- /dev/null +++ b/Puffin-7B/checkpoint-20/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a343d51a1518966727eef74c11b5d5eaa20f6293d7bc972d7025f1daada4663e +size 627 diff --git a/Puffin-7B/checkpoint-20/trainer_state.json b/Puffin-7B/checkpoint-20/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d5d7e88851863a922318c1fde683380479584d3f --- /dev/null +++ b/Puffin-7B/checkpoint-20/trainer_state.json @@ -0,0 +1,147 @@ +{ + "best_metric": 8.979241371154785, + "best_model_checkpoint": "./qlora-out-Puffin/checkpoint-20", + "epoch": 0.37735849056603776, + "eval_steps": 20, + "global_step": 20, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "learning_rate": 5.9999999999999995e-05, + "loss": 0.9753, + "step": 1 + }, + { + "epoch": 0.04, + "learning_rate": 0.00011999999999999999, + "loss": 0.9631, + "step": 2 + }, + { + "epoch": 0.06, + "learning_rate": 0.00017999999999999998, + "loss": 0.952, + "step": 3 + }, + { + "epoch": 0.08, + "learning_rate": 0.00023999999999999998, + "loss": 0.8687, + "step": 4 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003, + "loss": 1.0306, + "step": 5 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029996878922838096, + "loss": 0.9029, + "step": 6 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002998751699016874, + "loss": 0.8488, + "step": 7 + }, + { + "epoch": 0.15, + "learning_rate": 0.000299719180979005, + "loss": 0.8167, + "step": 8 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029950088737412895, + "loss": 0.7058, + "step": 9 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002992203799285506, + "loss": 0.8208, + "step": 10 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029887777537365414, + "loss": 0.752, + "step": 11 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002984732162821399, + "loss": 0.725, + "step": 12 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002980068710086933, + "loss": 0.7936, + "step": 13 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002974789336199254, + "loss": 0.7811, + "step": 14 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002968896238136131, + "loss": 0.7519, + "step": 15 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002962391868272735, + "loss": 0.7475, + "step": 16 + }, + { + "epoch": 0.32, + "learning_rate": 0.00029552789333610964, + "loss": 0.8075, + "step": 17 + }, + { + "epoch": 0.34, + "learning_rate": 0.00029475603934037094, + "loss": 0.7297, + "step": 18 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002939239460421746, + "loss": 0.7071, + "step": 19 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002930319597118391, + "loss": 0.7873, + "step": 20 + }, + { + "epoch": 0.38, + "eval_loss": 8.979241371154785, + "eval_runtime": 28.0141, + "eval_samples_per_second": 4.176, + "eval_steps_per_second": 1.071, + "step": 20 + } + ], + "logging_steps": 1, + "max_steps": 159, + "num_train_epochs": 3, + "save_steps": 20, + "total_flos": 2.5549826310537216e+16, + "trial_name": null, + "trial_params": null +} diff --git a/Puffin-7B/checkpoint-20/training_args.bin b/Puffin-7B/checkpoint-20/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c04455771af5c94d4ad83dddf97f19e3f60cc49e --- /dev/null +++ b/Puffin-7B/checkpoint-20/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cedb1881c5272bd443403b1508825590117bc6ccfd192e967d653552a0caa6a +size 4219 diff --git a/Puffin-7B/checkpoint-40/README.md b/Puffin-7B/checkpoint-40/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6 --- /dev/null +++ b/Puffin-7B/checkpoint-40/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/Puffin-7B/checkpoint-40/adapter_config.json b/Puffin-7B/checkpoint-40/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a1cd75bf98835983a780077bf012648484e8285 --- /dev/null +++ b/Puffin-7B/checkpoint-40/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "k_proj", + "down_proj", + "up_proj", + "v_proj", + "o_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/Puffin-7B/checkpoint-40/adapter_model.bin b/Puffin-7B/checkpoint-40/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..2b88672488afb1065f43e9d924a68d1bc1e94e85 --- /dev/null +++ b/Puffin-7B/checkpoint-40/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3384337d94297fbe43261e2bb46308c88a16e31a50e2ca6ddfbfa95c354f29b +size 80114765 diff --git a/Puffin-7B/checkpoint-40/adapter_model/README.md b/Puffin-7B/checkpoint-40/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6 --- /dev/null +++ b/Puffin-7B/checkpoint-40/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/Puffin-7B/checkpoint-40/adapter_model/adapter_config.json b/Puffin-7B/checkpoint-40/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a1cd75bf98835983a780077bf012648484e8285 --- /dev/null +++ b/Puffin-7B/checkpoint-40/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "k_proj", + "down_proj", + "up_proj", + "v_proj", + "o_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/Puffin-7B/checkpoint-40/adapter_model/adapter_model.bin b/Puffin-7B/checkpoint-40/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..2b88672488afb1065f43e9d924a68d1bc1e94e85 --- /dev/null +++ b/Puffin-7B/checkpoint-40/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3384337d94297fbe43261e2bb46308c88a16e31a50e2ca6ddfbfa95c354f29b +size 80114765 diff --git a/Puffin-7B/checkpoint-40/optimizer.pt b/Puffin-7B/checkpoint-40/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..86a61e0b1ccd96bad6635c17852d122b2829b4ad --- /dev/null +++ b/Puffin-7B/checkpoint-40/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0d06227b84729f3d74f91c96021133a82e9e1f93e98bc39dd405b3583230d26 +size 40569887 diff --git a/Puffin-7B/checkpoint-40/rng_state.pth b/Puffin-7B/checkpoint-40/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..55d90be29d8c7acf13da3bd73cbab706536c5c3b --- /dev/null +++ b/Puffin-7B/checkpoint-40/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88442e1e885b1eeeceda92b84921b5637ebf6bdc3862d9116b67ed421cdba32e +size 14575 diff --git a/Puffin-7B/checkpoint-40/scheduler.pt b/Puffin-7B/checkpoint-40/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..45258ed4e1219fb88c433f514845c3539d79db5b --- /dev/null +++ b/Puffin-7B/checkpoint-40/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e62b6523d0cf9d571fdfab3999f41d0efc40f6522e500e52bde40b3fa57de48 +size 627 diff --git a/Puffin-7B/checkpoint-40/trainer_state.json b/Puffin-7B/checkpoint-40/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..300f6c5ace9c143185775e83ee33097cb16295cd --- /dev/null +++ b/Puffin-7B/checkpoint-40/trainer_state.json @@ -0,0 +1,275 @@ +{ + "best_metric": 8.979241371154785, + "best_model_checkpoint": "./qlora-out-Puffin/checkpoint-20", + "epoch": 0.7547169811320755, + "eval_steps": 20, + "global_step": 40, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "learning_rate": 5.9999999999999995e-05, + "loss": 0.9753, + "step": 1 + }, + { + "epoch": 0.04, + "learning_rate": 0.00011999999999999999, + "loss": 0.9631, + "step": 2 + }, + { + "epoch": 0.06, + "learning_rate": 0.00017999999999999998, + "loss": 0.952, + "step": 3 + }, + { + "epoch": 0.08, + "learning_rate": 0.00023999999999999998, + "loss": 0.8687, + "step": 4 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003, + "loss": 1.0306, + "step": 5 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029996878922838096, + "loss": 0.9029, + "step": 6 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002998751699016874, + "loss": 0.8488, + "step": 7 + }, + { + "epoch": 0.15, + "learning_rate": 0.000299719180979005, + "loss": 0.8167, + "step": 8 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029950088737412895, + "loss": 0.7058, + "step": 9 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002992203799285506, + "loss": 0.8208, + "step": 10 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029887777537365414, + "loss": 0.752, + "step": 11 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002984732162821399, + "loss": 0.725, + "step": 12 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002980068710086933, + "loss": 0.7936, + "step": 13 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002974789336199254, + "loss": 0.7811, + "step": 14 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002968896238136131, + "loss": 0.7519, + "step": 15 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002962391868272735, + "loss": 0.7475, + "step": 16 + }, + { + "epoch": 0.32, + "learning_rate": 0.00029552789333610964, + "loss": 0.8075, + "step": 17 + }, + { + "epoch": 0.34, + "learning_rate": 0.00029475603934037094, + "loss": 0.7297, + "step": 18 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002939239460421746, + "loss": 0.7071, + "step": 19 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002930319597118391, + "loss": 0.7873, + "step": 20 + }, + { + "epoch": 0.38, + "eval_loss": 8.979241371154785, + "eval_runtime": 28.0141, + "eval_samples_per_second": 4.176, + "eval_steps_per_second": 1.071, + "step": 20 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002920804515437865, + "loss": 0.7057, + "step": 21 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002910698175020717, + "loss": 0.7695, + "step": 22 + }, + { + "epoch": 0.43, + "learning_rate": 0.000290000478155605, + "loss": 0.7348, + "step": 23 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002888728785031347, + "loss": 0.72, + "step": 24 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028768748778806386, + "loss": 0.742, + "step": 25 + }, + { + "epoch": 0.49, + "learning_rate": 0.00028644479930317775, + "loss": 0.7085, + "step": 26 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002851453301853628, + "loss": 0.7044, + "step": 27 + }, + { + "epoch": 0.53, + "learning_rate": 0.00028378962120040405, + "loss": 0.7065, + "step": 28 + }, + { + "epoch": 0.55, + "learning_rate": 0.00028237823651794814, + "loss": 0.7105, + "step": 29 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002809117634767284, + "loss": 0.6517, + "step": 30 + }, + { + "epoch": 0.58, + "learning_rate": 0.00027939081234014705, + "loss": 0.7091, + "step": 31 + }, + { + "epoch": 0.6, + "learning_rate": 0.00027781601604231847, + "loss": 0.7677, + "step": 32 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002761880299246772, + "loss": 0.6455, + "step": 33 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002745075314632621, + "loss": 0.7224, + "step": 34 + }, + { + "epoch": 0.66, + "learning_rate": 0.000272775219986789, + "loss": 0.6218, + "step": 35 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002709918163856295, + "loss": 0.6965, + "step": 36 + }, + { + "epoch": 0.7, + "learning_rate": 0.00026915806281181686, + "loss": 0.6512, + "step": 37 + }, + { + "epoch": 0.72, + "learning_rate": 0.00026727472237020447, + "loss": 0.746, + "step": 38 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002653425788009043, + "loss": 0.7126, + "step": 39 + }, + { + "epoch": 0.75, + "learning_rate": 0.00026336243615313873, + "loss": 0.6657, + "step": 40 + }, + { + "epoch": 0.75, + "eval_loss": 9.270380973815918, + "eval_runtime": 28.0073, + "eval_samples_per_second": 4.177, + "eval_steps_per_second": 1.071, + "step": 40 + } + ], + "logging_steps": 1, + "max_steps": 159, + "num_train_epochs": 3, + "save_steps": 20, + "total_flos": 5.050356375905894e+16, + "trial_name": null, + "trial_params": null +} diff --git a/Puffin-7B/checkpoint-40/training_args.bin b/Puffin-7B/checkpoint-40/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c04455771af5c94d4ad83dddf97f19e3f60cc49e --- /dev/null +++ b/Puffin-7B/checkpoint-40/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cedb1881c5272bd443403b1508825590117bc6ccfd192e967d653552a0caa6a +size 4219 diff --git a/Puffin-7B/checkpoint-60/README.md b/Puffin-7B/checkpoint-60/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6 --- /dev/null +++ b/Puffin-7B/checkpoint-60/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/Puffin-7B/checkpoint-60/adapter_config.json b/Puffin-7B/checkpoint-60/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a1cd75bf98835983a780077bf012648484e8285 --- /dev/null +++ b/Puffin-7B/checkpoint-60/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "k_proj", + "down_proj", + "up_proj", + "v_proj", + "o_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/Puffin-7B/checkpoint-60/adapter_model.bin b/Puffin-7B/checkpoint-60/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..22af2cc9b8330c05882b0ad00588be8e0373c9d9 --- /dev/null +++ b/Puffin-7B/checkpoint-60/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28eaf819a76beb056936cedc370545d758e16961dae5b4a0c8376b1b139996c8 +size 80114765 diff --git a/Puffin-7B/checkpoint-60/adapter_model/README.md b/Puffin-7B/checkpoint-60/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6 --- /dev/null +++ b/Puffin-7B/checkpoint-60/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/Puffin-7B/checkpoint-60/adapter_model/adapter_config.json b/Puffin-7B/checkpoint-60/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a1cd75bf98835983a780077bf012648484e8285 --- /dev/null +++ b/Puffin-7B/checkpoint-60/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "k_proj", + "down_proj", + "up_proj", + "v_proj", + "o_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/Puffin-7B/checkpoint-60/adapter_model/adapter_model.bin b/Puffin-7B/checkpoint-60/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..22af2cc9b8330c05882b0ad00588be8e0373c9d9 --- /dev/null +++ b/Puffin-7B/checkpoint-60/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28eaf819a76beb056936cedc370545d758e16961dae5b4a0c8376b1b139996c8 +size 80114765 diff --git a/Puffin-7B/checkpoint-60/optimizer.pt b/Puffin-7B/checkpoint-60/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..580dfe06fb5978e8d1327d7e877bbbc6b197d95c --- /dev/null +++ b/Puffin-7B/checkpoint-60/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:695fc79fb9df7aaa302b9dc715ae5e30dc4e234feb66e4d818437bc9996eccdc +size 40569887 diff --git a/Puffin-7B/checkpoint-60/rng_state.pth b/Puffin-7B/checkpoint-60/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3bdae08984eda60050aa921cc54e43ea292c8846 --- /dev/null +++ b/Puffin-7B/checkpoint-60/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77cb4cbfb7b181923b75cbee905a10468e8293562c12029dd61d7d9f6c46f32c +size 14575 diff --git a/Puffin-7B/checkpoint-60/scheduler.pt b/Puffin-7B/checkpoint-60/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fba444037085fcecc8bf1025f71f9e0212faa4fc --- /dev/null +++ b/Puffin-7B/checkpoint-60/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04135f2b8153c7b36bfd155069e4a5c7f8acfedd0e84a78c0f2a68acec1181c9 +size 627 diff --git a/Puffin-7B/checkpoint-60/trainer_state.json b/Puffin-7B/checkpoint-60/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8bd5f6121d3bc973cc7aa56b26e66d8728d8ec56 --- /dev/null +++ b/Puffin-7B/checkpoint-60/trainer_state.json @@ -0,0 +1,403 @@ +{ + "best_metric": 8.979241371154785, + "best_model_checkpoint": "./qlora-out-Puffin/checkpoint-20", + "epoch": 1.1320754716981132, + "eval_steps": 20, + "global_step": 60, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "learning_rate": 5.9999999999999995e-05, + "loss": 0.9753, + "step": 1 + }, + { + "epoch": 0.04, + "learning_rate": 0.00011999999999999999, + "loss": 0.9631, + "step": 2 + }, + { + "epoch": 0.06, + "learning_rate": 0.00017999999999999998, + "loss": 0.952, + "step": 3 + }, + { + "epoch": 0.08, + "learning_rate": 0.00023999999999999998, + "loss": 0.8687, + "step": 4 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003, + "loss": 1.0306, + "step": 5 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029996878922838096, + "loss": 0.9029, + "step": 6 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002998751699016874, + "loss": 0.8488, + "step": 7 + }, + { + "epoch": 0.15, + "learning_rate": 0.000299719180979005, + "loss": 0.8167, + "step": 8 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029950088737412895, + "loss": 0.7058, + "step": 9 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002992203799285506, + "loss": 0.8208, + "step": 10 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029887777537365414, + "loss": 0.752, + "step": 11 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002984732162821399, + "loss": 0.725, + "step": 12 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002980068710086933, + "loss": 0.7936, + "step": 13 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002974789336199254, + "loss": 0.7811, + "step": 14 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002968896238136131, + "loss": 0.7519, + "step": 15 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002962391868272735, + "loss": 0.7475, + "step": 16 + }, + { + "epoch": 0.32, + "learning_rate": 0.00029552789333610964, + "loss": 0.8075, + "step": 17 + }, + { + "epoch": 0.34, + "learning_rate": 0.00029475603934037094, + "loss": 0.7297, + "step": 18 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002939239460421746, + "loss": 0.7071, + "step": 19 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002930319597118391, + "loss": 0.7873, + "step": 20 + }, + { + "epoch": 0.38, + "eval_loss": 8.979241371154785, + "eval_runtime": 28.0141, + "eval_samples_per_second": 4.176, + "eval_steps_per_second": 1.071, + "step": 20 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002920804515437865, + "loss": 0.7057, + "step": 21 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002910698175020717, + "loss": 0.7695, + "step": 22 + }, + { + "epoch": 0.43, + "learning_rate": 0.000290000478155605, + "loss": 0.7348, + "step": 23 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002888728785031347, + "loss": 0.72, + "step": 24 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028768748778806386, + "loss": 0.742, + "step": 25 + }, + { + "epoch": 0.49, + "learning_rate": 0.00028644479930317775, + "loss": 0.7085, + "step": 26 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002851453301853628, + "loss": 0.7044, + "step": 27 + }, + { + "epoch": 0.53, + "learning_rate": 0.00028378962120040405, + "loss": 0.7065, + "step": 28 + }, + { + "epoch": 0.55, + "learning_rate": 0.00028237823651794814, + "loss": 0.7105, + "step": 29 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002809117634767284, + "loss": 0.6517, + "step": 30 + }, + { + "epoch": 0.58, + "learning_rate": 0.00027939081234014705, + "loss": 0.7091, + "step": 31 + }, + { + "epoch": 0.6, + "learning_rate": 0.00027781601604231847, + "loss": 0.7677, + "step": 32 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002761880299246772, + "loss": 0.6455, + "step": 33 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002745075314632621, + "loss": 0.7224, + "step": 34 + }, + { + "epoch": 0.66, + "learning_rate": 0.000272775219986789, + "loss": 0.6218, + "step": 35 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002709918163856295, + "loss": 0.6965, + "step": 36 + }, + { + "epoch": 0.7, + "learning_rate": 0.00026915806281181686, + "loss": 0.6512, + "step": 37 + }, + { + "epoch": 0.72, + "learning_rate": 0.00026727472237020447, + "loss": 0.746, + "step": 38 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002653425788009043, + "loss": 0.7126, + "step": 39 + }, + { + "epoch": 0.75, + "learning_rate": 0.00026336243615313873, + "loss": 0.6657, + "step": 40 + }, + { + "epoch": 0.75, + "eval_loss": 9.270380973815918, + "eval_runtime": 28.0073, + "eval_samples_per_second": 4.177, + "eval_steps_per_second": 1.071, + "step": 40 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002613351184506405, + "loss": 0.6897, + "step": 41 + }, + { + "epoch": 0.79, + "learning_rate": 0.00025926146934874037, + "loss": 0.6771, + "step": 42 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002571423517832855, + "loss": 0.6954, + "step": 43 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002549786476115343, + "loss": 0.7264, + "step": 44 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002527712572451766, + "loss": 0.6308, + "step": 45 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002505210992756339, + "loss": 0.6424, + "step": 46 + }, + { + "epoch": 0.89, + "learning_rate": 0.00024822911009179276, + "loss": 0.6904, + "step": 47 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002458962434903327, + "loss": 0.8218, + "step": 48 + }, + { + "epoch": 0.92, + "learning_rate": 0.00024352347027881003, + "loss": 0.6698, + "step": 49 + }, + { + "epoch": 0.94, + "learning_rate": 0.00024111177787166212, + "loss": 0.7299, + "step": 50 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002386621698793015, + "loss": 0.6973, + "step": 51 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002361756656904695, + "loss": 0.7159, + "step": 52 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002336533000480244, + "loss": 0.703, + "step": 53 + }, + { + "epoch": 1.02, + "learning_rate": 0.00023109612261833963, + "loss": 0.7033, + "step": 54 + }, + { + "epoch": 1.04, + "learning_rate": 0.0002285051975544918, + "loss": 0.6758, + "step": 55 + }, + { + "epoch": 1.06, + "learning_rate": 0.00022588160305342023, + "loss": 0.6828, + "step": 56 + }, + { + "epoch": 1.08, + "learning_rate": 0.00022322643090724216, + "loss": 0.6433, + "step": 57 + }, + { + "epoch": 1.09, + "learning_rate": 0.0002205407860489105, + "loss": 0.6495, + "step": 58 + }, + { + "epoch": 1.11, + "learning_rate": 0.00021782578609240284, + "loss": 0.654, + "step": 59 + }, + { + "epoch": 1.13, + "learning_rate": 0.00021508256086763368, + "loss": 0.6364, + "step": 60 + }, + { + "epoch": 1.13, + "eval_loss": 9.256454467773438, + "eval_runtime": 28.0034, + "eval_samples_per_second": 4.178, + "eval_steps_per_second": 1.071, + "step": 60 + } + ], + "logging_steps": 1, + "max_steps": 159, + "num_train_epochs": 3, + "save_steps": 20, + "total_flos": 7.573191856540877e+16, + "trial_name": null, + "trial_params": null +} diff --git a/Puffin-7B/checkpoint-60/training_args.bin b/Puffin-7B/checkpoint-60/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c04455771af5c94d4ad83dddf97f19e3f60cc49e --- /dev/null +++ b/Puffin-7B/checkpoint-60/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cedb1881c5272bd443403b1508825590117bc6ccfd192e967d653552a0caa6a +size 4219 diff --git a/Puffin-7B/checkpoint-80/README.md b/Puffin-7B/checkpoint-80/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6 --- /dev/null +++ b/Puffin-7B/checkpoint-80/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/Puffin-7B/checkpoint-80/adapter_config.json b/Puffin-7B/checkpoint-80/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a1cd75bf98835983a780077bf012648484e8285 --- /dev/null +++ b/Puffin-7B/checkpoint-80/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "k_proj", + "down_proj", + "up_proj", + "v_proj", + "o_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/Puffin-7B/checkpoint-80/adapter_model.bin b/Puffin-7B/checkpoint-80/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..aed06167ab488d62bd44dfe43529b4be650bc5ec --- /dev/null +++ b/Puffin-7B/checkpoint-80/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3534aa08e9f113c39c8290b11f3113c96d9537bd94a2d545ee2617cab7fb21b +size 80114765 diff --git a/Puffin-7B/checkpoint-80/adapter_model/README.md b/Puffin-7B/checkpoint-80/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f397922221c4a2f56d632b66d68ab92408f4d0f6 --- /dev/null +++ b/Puffin-7B/checkpoint-80/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: bfloat16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/Puffin-7B/checkpoint-80/adapter_model/adapter_config.json b/Puffin-7B/checkpoint-80/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8a1cd75bf98835983a780077bf012648484e8285 --- /dev/null +++ b/Puffin-7B/checkpoint-80/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "k_proj", + "down_proj", + "up_proj", + "v_proj", + "o_proj", + "q_proj", + "gate_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/Puffin-7B/checkpoint-80/adapter_model/adapter_model.bin b/Puffin-7B/checkpoint-80/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..aed06167ab488d62bd44dfe43529b4be650bc5ec --- /dev/null +++ b/Puffin-7B/checkpoint-80/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3534aa08e9f113c39c8290b11f3113c96d9537bd94a2d545ee2617cab7fb21b +size 80114765 diff --git a/Puffin-7B/checkpoint-80/optimizer.pt b/Puffin-7B/checkpoint-80/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ee03c93b90bd36f39f25b90aeb83416b976f938 --- /dev/null +++ b/Puffin-7B/checkpoint-80/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d96f18bc030cc5de4b5f67fd24e51077b6f36a002049405a7aae8c1a8412eef9 +size 40569887 diff --git a/Puffin-7B/checkpoint-80/rng_state.pth b/Puffin-7B/checkpoint-80/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7e2c37f5d3b5a7da301c5f21cff14af000c24460 --- /dev/null +++ b/Puffin-7B/checkpoint-80/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b25d04b74f535bace2746d0e525fef4336cb3eafa1df5d97916bc3a61217f08a +size 14575 diff --git a/Puffin-7B/checkpoint-80/scheduler.pt b/Puffin-7B/checkpoint-80/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..098ed5bd5a6302861b68d6f63a05067f39a9ee71 --- /dev/null +++ b/Puffin-7B/checkpoint-80/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d834acafa9bc3ca9adcc7c685156c02a8dd422cea7a30a83d3ec21d63724de75 +size 627 diff --git a/Puffin-7B/checkpoint-80/trainer_state.json b/Puffin-7B/checkpoint-80/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..aacf30e1ecf737a9cdaa911364fe06bd7b95b2c1 --- /dev/null +++ b/Puffin-7B/checkpoint-80/trainer_state.json @@ -0,0 +1,531 @@ +{ + "best_metric": 8.979241371154785, + "best_model_checkpoint": "./qlora-out-Puffin/checkpoint-20", + "epoch": 1.509433962264151, + "eval_steps": 20, + "global_step": 80, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.02, + "learning_rate": 5.9999999999999995e-05, + "loss": 0.9753, + "step": 1 + }, + { + "epoch": 0.04, + "learning_rate": 0.00011999999999999999, + "loss": 0.9631, + "step": 2 + }, + { + "epoch": 0.06, + "learning_rate": 0.00017999999999999998, + "loss": 0.952, + "step": 3 + }, + { + "epoch": 0.08, + "learning_rate": 0.00023999999999999998, + "loss": 0.8687, + "step": 4 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003, + "loss": 1.0306, + "step": 5 + }, + { + "epoch": 0.11, + "learning_rate": 0.00029996878922838096, + "loss": 0.9029, + "step": 6 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002998751699016874, + "loss": 0.8488, + "step": 7 + }, + { + "epoch": 0.15, + "learning_rate": 0.000299719180979005, + "loss": 0.8167, + "step": 8 + }, + { + "epoch": 0.17, + "learning_rate": 0.00029950088737412895, + "loss": 0.7058, + "step": 9 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002992203799285506, + "loss": 0.8208, + "step": 10 + }, + { + "epoch": 0.21, + "learning_rate": 0.00029887777537365414, + "loss": 0.752, + "step": 11 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002984732162821399, + "loss": 0.725, + "step": 12 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002980068710086933, + "loss": 0.7936, + "step": 13 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002974789336199254, + "loss": 0.7811, + "step": 14 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002968896238136131, + "loss": 0.7519, + "step": 15 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002962391868272735, + "loss": 0.7475, + "step": 16 + }, + { + "epoch": 0.32, + "learning_rate": 0.00029552789333610964, + "loss": 0.8075, + "step": 17 + }, + { + "epoch": 0.34, + "learning_rate": 0.00029475603934037094, + "loss": 0.7297, + "step": 18 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002939239460421746, + "loss": 0.7071, + "step": 19 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002930319597118391, + "loss": 0.7873, + "step": 20 + }, + { + "epoch": 0.38, + "eval_loss": 8.979241371154785, + "eval_runtime": 28.0141, + "eval_samples_per_second": 4.176, + "eval_steps_per_second": 1.071, + "step": 20 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002920804515437865, + "loss": 0.7057, + "step": 21 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002910698175020717, + "loss": 0.7695, + "step": 22 + }, + { + "epoch": 0.43, + "learning_rate": 0.000290000478155605, + "loss": 0.7348, + "step": 23 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002888728785031347, + "loss": 0.72, + "step": 24 + }, + { + "epoch": 0.47, + "learning_rate": 0.00028768748778806386, + "loss": 0.742, + "step": 25 + }, + { + "epoch": 0.49, + "learning_rate": 0.00028644479930317775, + "loss": 0.7085, + "step": 26 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002851453301853628, + "loss": 0.7044, + "step": 27 + }, + { + "epoch": 0.53, + "learning_rate": 0.00028378962120040405, + "loss": 0.7065, + "step": 28 + }, + { + "epoch": 0.55, + "learning_rate": 0.00028237823651794814, + "loss": 0.7105, + "step": 29 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002809117634767284, + "loss": 0.6517, + "step": 30 + }, + { + "epoch": 0.58, + "learning_rate": 0.00027939081234014705, + "loss": 0.7091, + "step": 31 + }, + { + "epoch": 0.6, + "learning_rate": 0.00027781601604231847, + "loss": 0.7677, + "step": 32 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002761880299246772, + "loss": 0.6455, + "step": 33 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002745075314632621, + "loss": 0.7224, + "step": 34 + }, + { + "epoch": 0.66, + "learning_rate": 0.000272775219986789, + "loss": 0.6218, + "step": 35 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002709918163856295, + "loss": 0.6965, + "step": 36 + }, + { + "epoch": 0.7, + "learning_rate": 0.00026915806281181686, + "loss": 0.6512, + "step": 37 + }, + { + "epoch": 0.72, + "learning_rate": 0.00026727472237020447, + "loss": 0.746, + "step": 38 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002653425788009043, + "loss": 0.7126, + "step": 39 + }, + { + "epoch": 0.75, + "learning_rate": 0.00026336243615313873, + "loss": 0.6657, + "step": 40 + }, + { + "epoch": 0.75, + "eval_loss": 9.270380973815918, + "eval_runtime": 28.0073, + "eval_samples_per_second": 4.177, + "eval_steps_per_second": 1.071, + "step": 40 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002613351184506405, + "loss": 0.6897, + "step": 41 + }, + { + "epoch": 0.79, + "learning_rate": 0.00025926146934874037, + "loss": 0.6771, + "step": 42 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002571423517832855, + "loss": 0.6954, + "step": 43 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002549786476115343, + "loss": 0.7264, + "step": 44 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002527712572451766, + "loss": 0.6308, + "step": 45 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002505210992756339, + "loss": 0.6424, + "step": 46 + }, + { + "epoch": 0.89, + "learning_rate": 0.00024822911009179276, + "loss": 0.6904, + "step": 47 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002458962434903327, + "loss": 0.8218, + "step": 48 + }, + { + "epoch": 0.92, + "learning_rate": 0.00024352347027881003, + "loss": 0.6698, + "step": 49 + }, + { + "epoch": 0.94, + "learning_rate": 0.00024111177787166212, + "loss": 0.7299, + "step": 50 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002386621698793015, + "loss": 0.6973, + "step": 51 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002361756656904695, + "loss": 0.7159, + "step": 52 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002336533000480244, + "loss": 0.703, + "step": 53 + }, + { + "epoch": 1.02, + "learning_rate": 0.00023109612261833963, + "loss": 0.7033, + "step": 54 + }, + { + "epoch": 1.04, + "learning_rate": 0.0002285051975544918, + "loss": 0.6758, + "step": 55 + }, + { + "epoch": 1.06, + "learning_rate": 0.00022588160305342023, + "loss": 0.6828, + "step": 56 + }, + { + "epoch": 1.08, + "learning_rate": 0.00022322643090724216, + "loss": 0.6433, + "step": 57 + }, + { + "epoch": 1.09, + "learning_rate": 0.0002205407860489105, + "loss": 0.6495, + "step": 58 + }, + { + "epoch": 1.11, + "learning_rate": 0.00021782578609240284, + "loss": 0.654, + "step": 59 + }, + { + "epoch": 1.13, + "learning_rate": 0.00021508256086763368, + "loss": 0.6364, + "step": 60 + }, + { + "epoch": 1.13, + "eval_loss": 9.256454467773438, + "eval_runtime": 28.0034, + "eval_samples_per_second": 4.178, + "eval_steps_per_second": 1.071, + "step": 60 + }, + { + "epoch": 1.15, + "learning_rate": 0.00021231225195028297, + "loss": 0.6944, + "step": 61 + }, + { + "epoch": 1.17, + "learning_rate": 0.00020951601218673635, + "loss": 0.6534, + "step": 62 + }, + { + "epoch": 1.19, + "learning_rate": 0.00020669500521433597, + "loss": 0.7002, + "step": 63 + }, + { + "epoch": 1.21, + "learning_rate": 0.00020385040497713976, + "loss": 0.6381, + "step": 64 + }, + { + "epoch": 1.23, + "learning_rate": 0.00020098339523739247, + "loss": 0.6451, + "step": 65 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001980951690829103, + "loss": 0.6308, + "step": 66 + }, + { + "epoch": 1.26, + "learning_rate": 0.00019518692843058512, + "loss": 0.6586, + "step": 67 + }, + { + "epoch": 1.28, + "learning_rate": 0.00019225988352621445, + "loss": 0.7115, + "step": 68 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001893152524408653, + "loss": 0.5739, + "step": 69 + }, + { + "epoch": 1.32, + "learning_rate": 0.00018635426056398186, + "loss": 0.6538, + "step": 70 + }, + { + "epoch": 1.34, + "learning_rate": 0.00018337814009344714, + "loss": 0.5868, + "step": 71 + }, + { + "epoch": 1.36, + "learning_rate": 0.00018038812952281212, + "loss": 0.6573, + "step": 72 + }, + { + "epoch": 1.38, + "learning_rate": 0.00017738547312590424, + "loss": 0.6931, + "step": 73 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001743714204390309, + "loss": 0.6873, + "step": 74 + }, + { + "epoch": 1.42, + "learning_rate": 0.00017134722574099276, + "loss": 0.6295, + "step": 75 + }, + { + "epoch": 1.43, + "learning_rate": 0.00016831414753112398, + "loss": 0.6832, + "step": 76 + }, + { + "epoch": 1.45, + "learning_rate": 0.00016527344800557533, + "loss": 0.646, + "step": 77 + }, + { + "epoch": 1.47, + "learning_rate": 0.00016222639253205947, + "loss": 0.6658, + "step": 78 + }, + { + "epoch": 1.49, + "learning_rate": 0.00015917424912327641, + "loss": 0.6123, + "step": 79 + }, + { + "epoch": 1.51, + "learning_rate": 0.00015611828790923786, + "loss": 0.6698, + "step": 80 + }, + { + "epoch": 1.51, + "eval_loss": 9.175628662109375, + "eval_runtime": 28.001, + "eval_samples_per_second": 4.178, + "eval_steps_per_second": 1.071, + "step": 80 + } + ], + "logging_steps": 1, + "max_steps": 159, + "num_train_epochs": 3, + "save_steps": 20, + "total_flos": 1.0081710792454963e+17, + "trial_name": null, + "trial_params": null +} diff --git a/Puffin-7B/checkpoint-80/training_args.bin b/Puffin-7B/checkpoint-80/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..c04455771af5c94d4ad83dddf97f19e3f60cc49e --- /dev/null +++ b/Puffin-7B/checkpoint-80/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cedb1881c5272bd443403b1508825590117bc6ccfd192e967d653552a0caa6a +size 4219 diff --git a/Puffin-7B/special_tokens_map.json b/Puffin-7B/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3f58a5e115855c6ea3cec98accae196ad927222e --- /dev/null +++ b/Puffin-7B/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "", + "eos_token": "", + "pad_token": "[PAD]", + "unk_token": "" +} diff --git a/Puffin-7B/tokenizer.model b/Puffin-7B/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899 --- /dev/null +++ b/Puffin-7B/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/Puffin-7B/tokenizer_config.json b/Puffin-7B/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..65532c2074f7f407d2d801a559f3d90aa5137f26 --- /dev/null +++ b/Puffin-7B/tokenizer_config.json @@ -0,0 +1,38 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "bos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "clean_up_tokenization_spaces": false, + "eos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "legacy": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": null, + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "trust_remote_code": false, + "unk_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "use_default_system_prompt": true, + "use_fast": true +} diff --git a/ShareGPT-cleaned/README.md b/ShareGPT-cleaned/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3 --- /dev/null +++ b/ShareGPT-cleaned/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/ShareGPT-cleaned/adapter_config.json b/ShareGPT-cleaned/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f035b69c0f76b657a7ea6a70e8e929f6301cea47 --- /dev/null +++ b/ShareGPT-cleaned/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "q_proj", + "up_proj", + "o_proj", + "gate_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ShareGPT-cleaned/adapter_model.bin b/ShareGPT-cleaned/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..15489d429b58fad9a4354febce043a14fd1d094a --- /dev/null +++ b/ShareGPT-cleaned/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c45f3436fbeb782012ba6c0a7e855a1a891e5939a2279bf712d192d1d8c83a53 +size 80114765 diff --git a/ShareGPT-cleaned/added_tokens.json b/ShareGPT-cleaned/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..9c16aa4be022f03ad001b006fba14dfb73a1929c --- /dev/null +++ b/ShareGPT-cleaned/added_tokens.json @@ -0,0 +1,3 @@ +{ + "": 32000 +} diff --git a/ShareGPT-cleaned/checkpoint-12/README.md b/ShareGPT-cleaned/checkpoint-12/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-12/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/ShareGPT-cleaned/checkpoint-12/adapter_config.json b/ShareGPT-cleaned/checkpoint-12/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f035b69c0f76b657a7ea6a70e8e929f6301cea47 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-12/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "q_proj", + "up_proj", + "o_proj", + "gate_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ShareGPT-cleaned/checkpoint-12/adapter_model.bin b/ShareGPT-cleaned/checkpoint-12/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..9fb11ccb8c8d8ef1303e6963928ae852a3e624ad --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-12/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d07c1b713f6eba443b872849b924f9c77fe69485dbedeed2095c4c74f3f1e5c8 +size 80114765 diff --git a/ShareGPT-cleaned/checkpoint-12/adapter_model/README.md b/ShareGPT-cleaned/checkpoint-12/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-12/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/ShareGPT-cleaned/checkpoint-12/adapter_model/adapter_config.json b/ShareGPT-cleaned/checkpoint-12/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f035b69c0f76b657a7ea6a70e8e929f6301cea47 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-12/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "q_proj", + "up_proj", + "o_proj", + "gate_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ShareGPT-cleaned/checkpoint-12/adapter_model/adapter_model.bin b/ShareGPT-cleaned/checkpoint-12/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..9fb11ccb8c8d8ef1303e6963928ae852a3e624ad --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-12/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d07c1b713f6eba443b872849b924f9c77fe69485dbedeed2095c4c74f3f1e5c8 +size 80114765 diff --git a/ShareGPT-cleaned/checkpoint-12/optimizer.pt b/ShareGPT-cleaned/checkpoint-12/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..72d5178ec523b7438ade0202541e2dc86a2ad0c4 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-12/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75261ac468437b0174757c6bced7a51cf80bcc4ee928cebf31ca3891deaddd49 +size 40569887 diff --git a/ShareGPT-cleaned/checkpoint-12/rng_state.pth b/ShareGPT-cleaned/checkpoint-12/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7e96a293c9bbccc9bdfd0dce6f567bf271db959f --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-12/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3106e70c084cf6d9b3133067f57d635d31ce5e67e314582db71d4b3ce4ec446c +size 14575 diff --git a/ShareGPT-cleaned/checkpoint-12/scheduler.pt b/ShareGPT-cleaned/checkpoint-12/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..542d577812371b57a2b87f48bd64167ec8959ef7 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-12/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25262a79f04fd972af27a73324b2eb39f5732b3adbb3489d877e0cbceab7224e +size 627 diff --git a/ShareGPT-cleaned/checkpoint-12/trainer_state.json b/ShareGPT-cleaned/checkpoint-12/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4a36aa0f56046181dd3bd4c77cb4360f85a5fab4 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-12/trainer_state.json @@ -0,0 +1,91 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9795918367346939, + "eval_steps": 500, + "global_step": 12, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 0.0003, + "loss": 1.0275, + "step": 1 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029939614409928584, + "loss": 1.0557, + "step": 2 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029758943828979444, + "loss": 1.0341, + "step": 3 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029459442910437797, + "loss": 1.0866, + "step": 4 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029043523059596053, + "loss": 1.0544, + "step": 5 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002851453301853628, + "loss": 0.9875, + "step": 6 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002787673190402799, + "loss": 1.0672, + "step": 7 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002713525491562421, + "loss": 1.0344, + "step": 8 + }, + { + "epoch": 0.73, + "learning_rate": 0.00026296071990054165, + "loss": 1.0481, + "step": 9 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002536593973480297, + "loss": 0.8523, + "step": 10 + }, + { + "epoch": 0.9, + "learning_rate": 0.00024352347027881003, + "loss": 1.019, + "step": 11 + }, + { + "epoch": 0.98, + "learning_rate": 0.00023263454721781537, + "loss": 0.9123, + "step": 12 + } + ], + "logging_steps": 1, + "max_steps": 36, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 1.5882254111735808e+16, + "trial_name": null, + "trial_params": null +} diff --git a/ShareGPT-cleaned/checkpoint-12/training_args.bin b/ShareGPT-cleaned/checkpoint-12/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..00e89e1c931b56d90542ad868dbc0e31f257ec1a --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-12/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44a629ad453bf2898430a28c0829ede2a81c7366c7bf8749dc4057027d66b023 +size 4219 diff --git a/ShareGPT-cleaned/checkpoint-24/README.md b/ShareGPT-cleaned/checkpoint-24/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-24/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/ShareGPT-cleaned/checkpoint-24/adapter_config.json b/ShareGPT-cleaned/checkpoint-24/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f035b69c0f76b657a7ea6a70e8e929f6301cea47 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-24/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "q_proj", + "up_proj", + "o_proj", + "gate_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ShareGPT-cleaned/checkpoint-24/adapter_model.bin b/ShareGPT-cleaned/checkpoint-24/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e3f09d2aa3f9c98c1a43049a5a7a69576362884 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-24/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc64c7985ff45ee7f80d6cb8dd7eb22ebaa66ad796f2ebb5c9b59181b699e997 +size 80114765 diff --git a/ShareGPT-cleaned/checkpoint-24/adapter_model/README.md b/ShareGPT-cleaned/checkpoint-24/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-24/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/ShareGPT-cleaned/checkpoint-24/adapter_model/adapter_config.json b/ShareGPT-cleaned/checkpoint-24/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f035b69c0f76b657a7ea6a70e8e929f6301cea47 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-24/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "q_proj", + "up_proj", + "o_proj", + "gate_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ShareGPT-cleaned/checkpoint-24/adapter_model/adapter_model.bin b/ShareGPT-cleaned/checkpoint-24/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e3f09d2aa3f9c98c1a43049a5a7a69576362884 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-24/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc64c7985ff45ee7f80d6cb8dd7eb22ebaa66ad796f2ebb5c9b59181b699e997 +size 80114765 diff --git a/ShareGPT-cleaned/checkpoint-24/optimizer.pt b/ShareGPT-cleaned/checkpoint-24/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0d3aafc5b6595dc9d682b32d43bbbb2a47b91b9 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-24/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e5b505beeefeeec841267f568c495e8f92a3771e5f5b41903be4d415fb47efa +size 40569887 diff --git a/ShareGPT-cleaned/checkpoint-24/rng_state.pth b/ShareGPT-cleaned/checkpoint-24/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ed769759652568720df6202b19479ab0d8b02493 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-24/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3e20b6e5968ea92d3ef28f695a6de364e7d3ba9d5545e08a5e373a05a7fee4c +size 14575 diff --git a/ShareGPT-cleaned/checkpoint-24/scheduler.pt b/ShareGPT-cleaned/checkpoint-24/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..474c893b363aeaf8ea20ccfc6808ed4bacc2adfa --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-24/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9083b16933f40bfbf9b5ea98facc24202d2fd767e5924b71286e71821404c5d6 +size 627 diff --git a/ShareGPT-cleaned/checkpoint-24/trainer_state.json b/ShareGPT-cleaned/checkpoint-24/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0b28c0b9a21e66c72b8811c52754dc955903d4a0 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-24/trainer_state.json @@ -0,0 +1,163 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9591836734693877, + "eval_steps": 500, + "global_step": 24, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 0.0003, + "loss": 1.0275, + "step": 1 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029939614409928584, + "loss": 1.0557, + "step": 2 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029758943828979444, + "loss": 1.0341, + "step": 3 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029459442910437797, + "loss": 1.0866, + "step": 4 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029043523059596053, + "loss": 1.0544, + "step": 5 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002851453301853628, + "loss": 0.9875, + "step": 6 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002787673190402799, + "loss": 1.0672, + "step": 7 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002713525491562421, + "loss": 1.0344, + "step": 8 + }, + { + "epoch": 0.73, + "learning_rate": 0.00026296071990054165, + "loss": 1.0481, + "step": 9 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002536593973480297, + "loss": 0.8523, + "step": 10 + }, + { + "epoch": 0.9, + "learning_rate": 0.00024352347027881003, + "loss": 1.019, + "step": 11 + }, + { + "epoch": 0.98, + "learning_rate": 0.00023263454721781537, + "loss": 0.9123, + "step": 12 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002210802993709498, + "loss": 0.9131, + "step": 13 + }, + { + "epoch": 1.14, + "learning_rate": 0.00020895375474808852, + "loss": 0.9333, + "step": 14 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001963525491562421, + "loss": 0.84, + "step": 15 + }, + { + "epoch": 1.31, + "learning_rate": 0.00018337814009344714, + "loss": 0.9898, + "step": 16 + }, + { + "epoch": 1.39, + "learning_rate": 0.00017013498987264832, + "loss": 0.9718, + "step": 17 + }, + { + "epoch": 1.47, + "learning_rate": 0.00015672972455257723, + "loss": 0.9909, + "step": 18 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001432702754474228, + "loss": 0.8745, + "step": 19 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001298650101273517, + "loss": 0.8947, + "step": 20 + }, + { + "epoch": 1.71, + "learning_rate": 0.00011662185990655284, + "loss": 0.9917, + "step": 21 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001036474508437579, + "loss": 0.9463, + "step": 22 + }, + { + "epoch": 1.88, + "learning_rate": 9.104624525191145e-05, + "loss": 0.9384, + "step": 23 + }, + { + "epoch": 1.96, + "learning_rate": 7.89197006290502e-05, + "loss": 0.89, + "step": 24 + } + ], + "logging_steps": 1, + "max_steps": 36, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 3.1718955581177856e+16, + "trial_name": null, + "trial_params": null +} diff --git a/ShareGPT-cleaned/checkpoint-24/training_args.bin b/ShareGPT-cleaned/checkpoint-24/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..00e89e1c931b56d90542ad868dbc0e31f257ec1a --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-24/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44a629ad453bf2898430a28c0829ede2a81c7366c7bf8749dc4057027d66b023 +size 4219 diff --git a/ShareGPT-cleaned/checkpoint-36/README.md b/ShareGPT-cleaned/checkpoint-36/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-36/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/ShareGPT-cleaned/checkpoint-36/adapter_config.json b/ShareGPT-cleaned/checkpoint-36/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f035b69c0f76b657a7ea6a70e8e929f6301cea47 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-36/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "q_proj", + "up_proj", + "o_proj", + "gate_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ShareGPT-cleaned/checkpoint-36/adapter_model.bin b/ShareGPT-cleaned/checkpoint-36/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..15489d429b58fad9a4354febce043a14fd1d094a --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-36/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c45f3436fbeb782012ba6c0a7e855a1a891e5939a2279bf712d192d1d8c83a53 +size 80114765 diff --git a/ShareGPT-cleaned/checkpoint-36/adapter_model/README.md b/ShareGPT-cleaned/checkpoint-36/adapter_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6c245dd4d0d9c42982f595cb3f63bc42afcddbb3 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-36/adapter_model/README.md @@ -0,0 +1,21 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- quant_method: bitsandbytes +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.5.0.dev0 diff --git a/ShareGPT-cleaned/checkpoint-36/adapter_model/adapter_config.json b/ShareGPT-cleaned/checkpoint-36/adapter_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f035b69c0f76b657a7ea6a70e8e929f6301cea47 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-36/adapter_model/adapter_config.json @@ -0,0 +1,26 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "abhishek/llama-2-7b-hf-small-shards", + "bias": "none", + "fan_in_fan_out": null, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 16, + "lora_dropout": 0.05, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "k_proj", + "v_proj", + "q_proj", + "up_proj", + "o_proj", + "gate_proj", + "down_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/ShareGPT-cleaned/checkpoint-36/adapter_model/adapter_model.bin b/ShareGPT-cleaned/checkpoint-36/adapter_model/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..15489d429b58fad9a4354febce043a14fd1d094a --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-36/adapter_model/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c45f3436fbeb782012ba6c0a7e855a1a891e5939a2279bf712d192d1d8c83a53 +size 80114765 diff --git a/ShareGPT-cleaned/checkpoint-36/optimizer.pt b/ShareGPT-cleaned/checkpoint-36/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c91e27e179e85bb902f05e4c69c411abe6776700 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-36/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff220621b882760c63f76d483365c31639b794b48befa77de9de572fcce64b47 +size 40569887 diff --git a/ShareGPT-cleaned/checkpoint-36/rng_state.pth b/ShareGPT-cleaned/checkpoint-36/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8e04fdea8e021a54d1b0efb8045bf8cb251bcb30 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-36/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d11ee642e6d300c4abb8c42b5efb5b0fcb1863dd61632b68c2a149bd1b50af5 +size 14575 diff --git a/ShareGPT-cleaned/checkpoint-36/scheduler.pt b/ShareGPT-cleaned/checkpoint-36/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e3ef78fa129be9ba4ff9c23d2297f6d63d3af2a0 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-36/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e465c4ecfe5cb05ad694da4e0166e9c5a9e2ca60e69ac108c0469c49a9b6b3ca +size 627 diff --git a/ShareGPT-cleaned/checkpoint-36/trainer_state.json b/ShareGPT-cleaned/checkpoint-36/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..47dca7b011ce07e5cce1b53e8e7af91e945f9b67 --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-36/trainer_state.json @@ -0,0 +1,235 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.938775510204082, + "eval_steps": 500, + "global_step": 36, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.08, + "learning_rate": 0.0003, + "loss": 1.0275, + "step": 1 + }, + { + "epoch": 0.16, + "learning_rate": 0.00029939614409928584, + "loss": 1.0557, + "step": 2 + }, + { + "epoch": 0.24, + "learning_rate": 0.00029758943828979444, + "loss": 1.0341, + "step": 3 + }, + { + "epoch": 0.33, + "learning_rate": 0.00029459442910437797, + "loss": 1.0866, + "step": 4 + }, + { + "epoch": 0.41, + "learning_rate": 0.00029043523059596053, + "loss": 1.0544, + "step": 5 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002851453301853628, + "loss": 0.9875, + "step": 6 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002787673190402799, + "loss": 1.0672, + "step": 7 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002713525491562421, + "loss": 1.0344, + "step": 8 + }, + { + "epoch": 0.73, + "learning_rate": 0.00026296071990054165, + "loss": 1.0481, + "step": 9 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002536593973480297, + "loss": 0.8523, + "step": 10 + }, + { + "epoch": 0.9, + "learning_rate": 0.00024352347027881003, + "loss": 1.019, + "step": 11 + }, + { + "epoch": 0.98, + "learning_rate": 0.00023263454721781537, + "loss": 0.9123, + "step": 12 + }, + { + "epoch": 1.06, + "learning_rate": 0.0002210802993709498, + "loss": 0.9131, + "step": 13 + }, + { + "epoch": 1.14, + "learning_rate": 0.00020895375474808852, + "loss": 0.9333, + "step": 14 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001963525491562421, + "loss": 0.84, + "step": 15 + }, + { + "epoch": 1.31, + "learning_rate": 0.00018337814009344714, + "loss": 0.9898, + "step": 16 + }, + { + "epoch": 1.39, + "learning_rate": 0.00017013498987264832, + "loss": 0.9718, + "step": 17 + }, + { + "epoch": 1.47, + "learning_rate": 0.00015672972455257723, + "loss": 0.9909, + "step": 18 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001432702754474228, + "loss": 0.8745, + "step": 19 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001298650101273517, + "loss": 0.8947, + "step": 20 + }, + { + "epoch": 1.71, + "learning_rate": 0.00011662185990655284, + "loss": 0.9917, + "step": 21 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001036474508437579, + "loss": 0.9463, + "step": 22 + }, + { + "epoch": 1.88, + "learning_rate": 9.104624525191145e-05, + "loss": 0.9384, + "step": 23 + }, + { + "epoch": 1.96, + "learning_rate": 7.89197006290502e-05, + "loss": 0.89, + "step": 24 + }, + { + "epoch": 2.04, + "learning_rate": 6.736545278218463e-05, + "loss": 0.9943, + "step": 25 + }, + { + "epoch": 2.12, + "learning_rate": 5.6476529721189974e-05, + "loss": 0.8828, + "step": 26 + }, + { + "epoch": 2.2, + "learning_rate": 4.63406026519703e-05, + "loss": 0.884, + "step": 27 + }, + { + "epoch": 2.29, + "learning_rate": 3.7039280099458366e-05, + "loss": 0.8695, + "step": 28 + }, + { + "epoch": 2.37, + "learning_rate": 2.8647450843757897e-05, + "loss": 0.9088, + "step": 29 + }, + { + "epoch": 2.45, + "learning_rate": 2.1232680959720082e-05, + "loss": 0.9084, + "step": 30 + }, + { + "epoch": 2.53, + "learning_rate": 1.4854669814637143e-05, + "loss": 0.8754, + "step": 31 + }, + { + "epoch": 2.61, + "learning_rate": 9.564769404039419e-06, + "loss": 0.8326, + "step": 32 + }, + { + "epoch": 2.69, + "learning_rate": 5.405570895622013e-06, + "loss": 0.8982, + "step": 33 + }, + { + "epoch": 2.78, + "learning_rate": 2.4105617102055496e-06, + "loss": 1.0135, + "step": 34 + }, + { + "epoch": 2.86, + "learning_rate": 6.038559007141397e-07, + "loss": 0.9641, + "step": 35 + }, + { + "epoch": 2.94, + "learning_rate": 0.0, + "loss": 0.8868, + "step": 36 + } + ], + "logging_steps": 1, + "max_steps": 36, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 4.666672977385882e+16, + "trial_name": null, + "trial_params": null +} diff --git a/ShareGPT-cleaned/checkpoint-36/training_args.bin b/ShareGPT-cleaned/checkpoint-36/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..00e89e1c931b56d90542ad868dbc0e31f257ec1a --- /dev/null +++ b/ShareGPT-cleaned/checkpoint-36/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44a629ad453bf2898430a28c0829ede2a81c7366c7bf8749dc4057027d66b023 +size 4219 diff --git a/ShareGPT-cleaned/special_tokens_map.json b/ShareGPT-cleaned/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..3f58a5e115855c6ea3cec98accae196ad927222e --- /dev/null +++ b/ShareGPT-cleaned/special_tokens_map.json @@ -0,0 +1,6 @@ +{ + "bos_token": "", + "eos_token": "", + "pad_token": "[PAD]", + "unk_token": "" +} diff --git a/ShareGPT-cleaned/tokenizer.model b/ShareGPT-cleaned/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..6c00c742ce03c627d6cd5b795984876fa49fa899 --- /dev/null +++ b/ShareGPT-cleaned/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 +size 499723 diff --git a/ShareGPT-cleaned/tokenizer_config.json b/ShareGPT-cleaned/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..65532c2074f7f407d2d801a559f3d90aa5137f26 --- /dev/null +++ b/ShareGPT-cleaned/tokenizer_config.json @@ -0,0 +1,38 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "bos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "clean_up_tokenization_spaces": false, + "eos_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "legacy": false, + "model_max_length": 1000000000000000019884624838656, + "pad_token": null, + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "trust_remote_code": false, + "unk_token": { + "__type": "AddedToken", + "content": "", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false + }, + "use_default_system_prompt": true, + "use_fast": true +}