diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..ba58ce7ac27cf656a0c2a6f16aee5e73678fff2c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +checkpoint-138/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-276/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-414/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-552/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-690/tokenizer.json filter=lfs diff=lfs merge=lfs -text +checkpoint-828/tokenizer.json filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..dfd85d29159e61f8c090feb3d219c62a0c01c56b --- /dev/null +++ b/README.md @@ -0,0 +1,206 @@ +--- +base_model: microsoft/Phi-4-mini-instruct +library_name: peft +tags: +- base_model:adapter:microsoft/Phi-4-mini-instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..675b31bd9d8a4c2efda38f5392b48daf033fe669 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,48 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "microsoft/Phi-4-mini-instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier", + "score" + ], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "SEQ_CLS", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/adapter_model.safetensors b/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c1ecb1cb90b4c9a55e6948a3d7e811dd5f8bb8f7 --- /dev/null +++ b/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d075579a534a1d4a45a32603660edecb6740758c1bbc080e4012de433623d5e4 +size 12603848 diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..a9c00dd9bbd97e117371168e9d62af65b9f0e725 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1 @@ +{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %} \ No newline at end of file diff --git a/checkpoint-138/README.md b/checkpoint-138/README.md new file mode 100644 index 0000000000000000000000000000000000000000..dfd85d29159e61f8c090feb3d219c62a0c01c56b --- /dev/null +++ b/checkpoint-138/README.md @@ -0,0 +1,206 @@ +--- +base_model: microsoft/Phi-4-mini-instruct +library_name: peft +tags: +- base_model:adapter:microsoft/Phi-4-mini-instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoint-138/adapter_config.json b/checkpoint-138/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..675b31bd9d8a4c2efda38f5392b48daf033fe669 --- /dev/null +++ b/checkpoint-138/adapter_config.json @@ -0,0 +1,48 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "microsoft/Phi-4-mini-instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier", + "score" + ], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "SEQ_CLS", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-138/adapter_model.safetensors b/checkpoint-138/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7721e2fd58c85bf6fb2db918511750fffe441589 --- /dev/null +++ b/checkpoint-138/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a7755c03037c9f31df4903fcb8e90efa96bc47d5ce6af67acda272d880df40 +size 12603848 diff --git a/checkpoint-138/chat_template.jinja b/checkpoint-138/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..a9c00dd9bbd97e117371168e9d62af65b9f0e725 --- /dev/null +++ b/checkpoint-138/chat_template.jinja @@ -0,0 +1 @@ +{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %} \ No newline at end of file diff --git a/checkpoint-138/optimizer.pt b/checkpoint-138/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..af9392c26a580539073b1433ea504c7d56413777 --- /dev/null +++ b/checkpoint-138/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f9e8658919ec83d372c14a1f908a820529ee7b5052631dfb8d597f18bd417b4 +size 25246667 diff --git a/checkpoint-138/rng_state.pth b/checkpoint-138/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b79e0a333286a4cff79a730d488bdd3226db5dc0 --- /dev/null +++ b/checkpoint-138/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:097fc9e407a419ddf28f3b853d450a43384d68e36e08e05b71f4c0a434df65b0 +size 14645 diff --git a/checkpoint-138/scheduler.pt b/checkpoint-138/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7441c727f9cb1c4968a8083ba627841ec358be0f --- /dev/null +++ b/checkpoint-138/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93eb7fd4a08aff2d138b02664f700445517a5d28c2e4178bd23a13a1600e1dd2 +size 1465 diff --git a/checkpoint-138/tokenizer.json b/checkpoint-138/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..8655fee00020e3140fc51416dac7cb0b9a8e4c45 --- /dev/null +++ b/checkpoint-138/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea8bdf68c3e7549a3fb4342523288ce628f6ab56a618f9a4dfb234a0b4d46a8 +size 15524476 diff --git a/checkpoint-138/tokenizer_config.json b/checkpoint-138/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..007b4fee54acc99b382393f6900ba96e673f8b49 --- /dev/null +++ b/checkpoint-138/tokenizer_config.json @@ -0,0 +1,12 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": "<|endoftext|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "tokenizer_class": "TokenizersBackend", + "unk_token": "<|endoftext|>" +} diff --git a/checkpoint-138/trainer_state.json b/checkpoint-138/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e9e07b3bc9b205b5ad8467282f0f2dafeab57458 --- /dev/null +++ b/checkpoint-138/trainer_state.json @@ -0,0 +1,56 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 500, + "global_step": 138, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.36231884057971014, + "grad_norm": 11.473546981811523, + "learning_rate": 9.408212560386473e-06, + "loss": 2.518828125, + "step": 50 + }, + { + "epoch": 0.7246376811594203, + "grad_norm": 19.91433334350586, + "learning_rate": 8.804347826086957e-06, + "loss": 2.4250390625, + "step": 100 + }, + { + "epoch": 1.0, + "eval_loss": 0.5803093314170837, + "eval_runtime": 29.4101, + "eval_samples_per_second": 16.661, + "eval_steps_per_second": 2.108, + "step": 138 + } + ], + "logging_steps": 50, + "max_steps": 828, + "num_input_tokens_seen": 0, + "num_train_epochs": 6, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4977144119808000.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-138/training_args.bin b/checkpoint-138/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..41879da9e32eb23f5013e1a62452ec18a7e8995e --- /dev/null +++ b/checkpoint-138/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:033534501afcab0521199d3c0685a5e811f2a297a318bad85be0999d18aac32c +size 5137 diff --git a/checkpoint-276/README.md b/checkpoint-276/README.md new file mode 100644 index 0000000000000000000000000000000000000000..dfd85d29159e61f8c090feb3d219c62a0c01c56b --- /dev/null +++ b/checkpoint-276/README.md @@ -0,0 +1,206 @@ +--- +base_model: microsoft/Phi-4-mini-instruct +library_name: peft +tags: +- base_model:adapter:microsoft/Phi-4-mini-instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoint-276/adapter_config.json b/checkpoint-276/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..675b31bd9d8a4c2efda38f5392b48daf033fe669 --- /dev/null +++ b/checkpoint-276/adapter_config.json @@ -0,0 +1,48 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "microsoft/Phi-4-mini-instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier", + "score" + ], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "SEQ_CLS", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-276/adapter_model.safetensors b/checkpoint-276/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..315919641c869215a91452520d8161c9b6f2a11d --- /dev/null +++ b/checkpoint-276/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0c6ded5ba416b29a39409dadce0593ae89ad8745ffd49e026eebac7fd6a2418 +size 12603848 diff --git a/checkpoint-276/chat_template.jinja b/checkpoint-276/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..a9c00dd9bbd97e117371168e9d62af65b9f0e725 --- /dev/null +++ b/checkpoint-276/chat_template.jinja @@ -0,0 +1 @@ +{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %} \ No newline at end of file diff --git a/checkpoint-276/optimizer.pt b/checkpoint-276/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5fe093a6353c6cf08bc7a82559a70dfb7968797e --- /dev/null +++ b/checkpoint-276/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce21ffd5a7c5857b062193fe0933bfffa42a995c930787eb31aac23a793c7017 +size 25246667 diff --git a/checkpoint-276/rng_state.pth b/checkpoint-276/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..46fb50fe41dae7cad48db337b3a96ba480974f58 --- /dev/null +++ b/checkpoint-276/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60ba65ae9c9e67ea17aba4e7a084c9af58ba8127153fbbf45dc7355c9e1bfa49 +size 14645 diff --git a/checkpoint-276/scheduler.pt b/checkpoint-276/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..431ec5e25d388299b9df2f00d5facde327d9d266 --- /dev/null +++ b/checkpoint-276/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d1e711516c1ca8ebd1dc2ed174db5a989057bb5fee53204461ae96fd052b31d +size 1465 diff --git a/checkpoint-276/tokenizer.json b/checkpoint-276/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..8655fee00020e3140fc51416dac7cb0b9a8e4c45 --- /dev/null +++ b/checkpoint-276/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea8bdf68c3e7549a3fb4342523288ce628f6ab56a618f9a4dfb234a0b4d46a8 +size 15524476 diff --git a/checkpoint-276/tokenizer_config.json b/checkpoint-276/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..007b4fee54acc99b382393f6900ba96e673f8b49 --- /dev/null +++ b/checkpoint-276/tokenizer_config.json @@ -0,0 +1,12 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": "<|endoftext|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "tokenizer_class": "TokenizersBackend", + "unk_token": "<|endoftext|>" +} diff --git a/checkpoint-276/trainer_state.json b/checkpoint-276/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1482c2306484dd8b550fb15e2d75b53d9c7408b8 --- /dev/null +++ b/checkpoint-276/trainer_state.json @@ -0,0 +1,85 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 276, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.36231884057971014, + "grad_norm": 11.473546981811523, + "learning_rate": 9.408212560386473e-06, + "loss": 2.518828125, + "step": 50 + }, + { + "epoch": 0.7246376811594203, + "grad_norm": 19.91433334350586, + "learning_rate": 8.804347826086957e-06, + "loss": 2.4250390625, + "step": 100 + }, + { + "epoch": 1.0, + "eval_loss": 0.5803093314170837, + "eval_runtime": 29.4101, + "eval_samples_per_second": 16.661, + "eval_steps_per_second": 2.108, + "step": 138 + }, + { + "epoch": 1.0869565217391304, + "grad_norm": 16.91636085510254, + "learning_rate": 8.20048309178744e-06, + "loss": 2.2990234375, + "step": 150 + }, + { + "epoch": 1.4492753623188406, + "grad_norm": 13.255877494812012, + "learning_rate": 7.596618357487924e-06, + "loss": 2.378984375, + "step": 200 + }, + { + "epoch": 1.8115942028985508, + "grad_norm": 18.71368980407715, + "learning_rate": 6.992753623188407e-06, + "loss": 2.15296875, + "step": 250 + }, + { + "epoch": 2.0, + "eval_loss": 0.5362404584884644, + "eval_runtime": 27.8676, + "eval_samples_per_second": 17.583, + "eval_steps_per_second": 2.225, + "step": 276 + } + ], + "logging_steps": 50, + "max_steps": 828, + "num_input_tokens_seen": 0, + "num_train_epochs": 6, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0001921782173696e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-276/training_args.bin b/checkpoint-276/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..41879da9e32eb23f5013e1a62452ec18a7e8995e --- /dev/null +++ b/checkpoint-276/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:033534501afcab0521199d3c0685a5e811f2a297a318bad85be0999d18aac32c +size 5137 diff --git a/checkpoint-414/README.md b/checkpoint-414/README.md new file mode 100644 index 0000000000000000000000000000000000000000..dfd85d29159e61f8c090feb3d219c62a0c01c56b --- /dev/null +++ b/checkpoint-414/README.md @@ -0,0 +1,206 @@ +--- +base_model: microsoft/Phi-4-mini-instruct +library_name: peft +tags: +- base_model:adapter:microsoft/Phi-4-mini-instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoint-414/adapter_config.json b/checkpoint-414/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..675b31bd9d8a4c2efda38f5392b48daf033fe669 --- /dev/null +++ b/checkpoint-414/adapter_config.json @@ -0,0 +1,48 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "microsoft/Phi-4-mini-instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier", + "score" + ], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "SEQ_CLS", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-414/adapter_model.safetensors b/checkpoint-414/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..759d11e8175bc631812febac5b3cb1a7b6235ef0 --- /dev/null +++ b/checkpoint-414/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b41017ab6b0d4427509ea94161f0af3040548b002e820f30b9335297f1016b8 +size 12603848 diff --git a/checkpoint-414/chat_template.jinja b/checkpoint-414/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..a9c00dd9bbd97e117371168e9d62af65b9f0e725 --- /dev/null +++ b/checkpoint-414/chat_template.jinja @@ -0,0 +1 @@ +{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %} \ No newline at end of file diff --git a/checkpoint-414/optimizer.pt b/checkpoint-414/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..921ecf6cb835d91e9eb48d50bbddf4dd77b38a72 --- /dev/null +++ b/checkpoint-414/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b12f5c97844b923904162a7022aab604e03843bf1564c70629e3d3a4021f49f +size 25246667 diff --git a/checkpoint-414/rng_state.pth b/checkpoint-414/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..032bbe0eec0ad0ddbda241143152779be39851eb --- /dev/null +++ b/checkpoint-414/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6bc5073c8303a0cad198d2112cdec53793d9b4bb882654a273cfd73c3944d75 +size 14645 diff --git a/checkpoint-414/scheduler.pt b/checkpoint-414/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6488b890c67d4280f950ac35e5b456f236192f25 --- /dev/null +++ b/checkpoint-414/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb5f5537b0c69c2f67d8b43747fec7176271cf94315dbef082100a0915554593 +size 1465 diff --git a/checkpoint-414/tokenizer.json b/checkpoint-414/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..8655fee00020e3140fc51416dac7cb0b9a8e4c45 --- /dev/null +++ b/checkpoint-414/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea8bdf68c3e7549a3fb4342523288ce628f6ab56a618f9a4dfb234a0b4d46a8 +size 15524476 diff --git a/checkpoint-414/tokenizer_config.json b/checkpoint-414/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..007b4fee54acc99b382393f6900ba96e673f8b49 --- /dev/null +++ b/checkpoint-414/tokenizer_config.json @@ -0,0 +1,12 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": "<|endoftext|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "tokenizer_class": "TokenizersBackend", + "unk_token": "<|endoftext|>" +} diff --git a/checkpoint-414/trainer_state.json b/checkpoint-414/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..135ea9fa3a817ad0fb73224a0aae412d83eee78a --- /dev/null +++ b/checkpoint-414/trainer_state.json @@ -0,0 +1,114 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 414, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.36231884057971014, + "grad_norm": 11.473546981811523, + "learning_rate": 9.408212560386473e-06, + "loss": 2.518828125, + "step": 50 + }, + { + "epoch": 0.7246376811594203, + "grad_norm": 19.91433334350586, + "learning_rate": 8.804347826086957e-06, + "loss": 2.4250390625, + "step": 100 + }, + { + "epoch": 1.0, + "eval_loss": 0.5803093314170837, + "eval_runtime": 29.4101, + "eval_samples_per_second": 16.661, + "eval_steps_per_second": 2.108, + "step": 138 + }, + { + "epoch": 1.0869565217391304, + "grad_norm": 16.91636085510254, + "learning_rate": 8.20048309178744e-06, + "loss": 2.2990234375, + "step": 150 + }, + { + "epoch": 1.4492753623188406, + "grad_norm": 13.255877494812012, + "learning_rate": 7.596618357487924e-06, + "loss": 2.378984375, + "step": 200 + }, + { + "epoch": 1.8115942028985508, + "grad_norm": 18.71368980407715, + "learning_rate": 6.992753623188407e-06, + "loss": 2.15296875, + "step": 250 + }, + { + "epoch": 2.0, + "eval_loss": 0.5362404584884644, + "eval_runtime": 27.8676, + "eval_samples_per_second": 17.583, + "eval_steps_per_second": 2.225, + "step": 276 + }, + { + "epoch": 2.1739130434782608, + "grad_norm": 9.45117473602295, + "learning_rate": 6.3888888888888885e-06, + "loss": 2.009921875, + "step": 300 + }, + { + "epoch": 2.536231884057971, + "grad_norm": 8.40969467163086, + "learning_rate": 5.785024154589373e-06, + "loss": 2.07037109375, + "step": 350 + }, + { + "epoch": 2.898550724637681, + "grad_norm": 10.270116806030273, + "learning_rate": 5.181159420289855e-06, + "loss": 1.952861328125, + "step": 400 + }, + { + "epoch": 3.0, + "eval_loss": 0.47528699040412903, + "eval_runtime": 27.7881, + "eval_samples_per_second": 17.633, + "eval_steps_per_second": 2.231, + "step": 414 + } + ], + "logging_steps": 50, + "max_steps": 828, + "num_input_tokens_seen": 0, + "num_train_epochs": 6, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.4994156951207936e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-414/training_args.bin b/checkpoint-414/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..41879da9e32eb23f5013e1a62452ec18a7e8995e --- /dev/null +++ b/checkpoint-414/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:033534501afcab0521199d3c0685a5e811f2a297a318bad85be0999d18aac32c +size 5137 diff --git a/checkpoint-552/README.md b/checkpoint-552/README.md new file mode 100644 index 0000000000000000000000000000000000000000..dfd85d29159e61f8c090feb3d219c62a0c01c56b --- /dev/null +++ b/checkpoint-552/README.md @@ -0,0 +1,206 @@ +--- +base_model: microsoft/Phi-4-mini-instruct +library_name: peft +tags: +- base_model:adapter:microsoft/Phi-4-mini-instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoint-552/adapter_config.json b/checkpoint-552/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..675b31bd9d8a4c2efda38f5392b48daf033fe669 --- /dev/null +++ b/checkpoint-552/adapter_config.json @@ -0,0 +1,48 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "microsoft/Phi-4-mini-instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier", + "score" + ], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "SEQ_CLS", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-552/adapter_model.safetensors b/checkpoint-552/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29ab7a87cc48bf102d05767ed44f93ce481045e4 --- /dev/null +++ b/checkpoint-552/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dacd0d6979261464b07e15948cc6fdd581bd750fb7d7c57aca0ad0e4b950ba7e +size 12603848 diff --git a/checkpoint-552/chat_template.jinja b/checkpoint-552/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..a9c00dd9bbd97e117371168e9d62af65b9f0e725 --- /dev/null +++ b/checkpoint-552/chat_template.jinja @@ -0,0 +1 @@ +{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %} \ No newline at end of file diff --git a/checkpoint-552/optimizer.pt b/checkpoint-552/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..897d62c40522b0549f2d6cf44ee4e61c0900915a --- /dev/null +++ b/checkpoint-552/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecfa1c3281277e1473512d0dc93126f4d0329444a30025f98e736b3ad571ddf8 +size 25246667 diff --git a/checkpoint-552/rng_state.pth b/checkpoint-552/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d7bb4799624878f28ee0b4457a40c1e926363e37 --- /dev/null +++ b/checkpoint-552/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd750b00e420d79940704088c0b007122910045476fa6ec2e20d1312d1dce295 +size 14645 diff --git a/checkpoint-552/scheduler.pt b/checkpoint-552/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..55bb689ddb493920d687c0336df0aac14e07ffd6 --- /dev/null +++ b/checkpoint-552/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6eedc4cfb80ab99aa1713d3d8b96c4e095ada96a544f8ee08a1cc99f6b1a8c4 +size 1465 diff --git a/checkpoint-552/tokenizer.json b/checkpoint-552/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..8655fee00020e3140fc51416dac7cb0b9a8e4c45 --- /dev/null +++ b/checkpoint-552/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea8bdf68c3e7549a3fb4342523288ce628f6ab56a618f9a4dfb234a0b4d46a8 +size 15524476 diff --git a/checkpoint-552/tokenizer_config.json b/checkpoint-552/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..007b4fee54acc99b382393f6900ba96e673f8b49 --- /dev/null +++ b/checkpoint-552/tokenizer_config.json @@ -0,0 +1,12 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": "<|endoftext|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "tokenizer_class": "TokenizersBackend", + "unk_token": "<|endoftext|>" +} diff --git a/checkpoint-552/trainer_state.json b/checkpoint-552/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..45a3ed33a1b0947af957936656d43b39da14b11f --- /dev/null +++ b/checkpoint-552/trainer_state.json @@ -0,0 +1,143 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "eval_steps": 500, + "global_step": 552, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.36231884057971014, + "grad_norm": 11.473546981811523, + "learning_rate": 9.408212560386473e-06, + "loss": 2.518828125, + "step": 50 + }, + { + "epoch": 0.7246376811594203, + "grad_norm": 19.91433334350586, + "learning_rate": 8.804347826086957e-06, + "loss": 2.4250390625, + "step": 100 + }, + { + "epoch": 1.0, + "eval_loss": 0.5803093314170837, + "eval_runtime": 29.4101, + "eval_samples_per_second": 16.661, + "eval_steps_per_second": 2.108, + "step": 138 + }, + { + "epoch": 1.0869565217391304, + "grad_norm": 16.91636085510254, + "learning_rate": 8.20048309178744e-06, + "loss": 2.2990234375, + "step": 150 + }, + { + "epoch": 1.4492753623188406, + "grad_norm": 13.255877494812012, + "learning_rate": 7.596618357487924e-06, + "loss": 2.378984375, + "step": 200 + }, + { + "epoch": 1.8115942028985508, + "grad_norm": 18.71368980407715, + "learning_rate": 6.992753623188407e-06, + "loss": 2.15296875, + "step": 250 + }, + { + "epoch": 2.0, + "eval_loss": 0.5362404584884644, + "eval_runtime": 27.8676, + "eval_samples_per_second": 17.583, + "eval_steps_per_second": 2.225, + "step": 276 + }, + { + "epoch": 2.1739130434782608, + "grad_norm": 9.45117473602295, + "learning_rate": 6.3888888888888885e-06, + "loss": 2.009921875, + "step": 300 + }, + { + "epoch": 2.536231884057971, + "grad_norm": 8.40969467163086, + "learning_rate": 5.785024154589373e-06, + "loss": 2.07037109375, + "step": 350 + }, + { + "epoch": 2.898550724637681, + "grad_norm": 10.270116806030273, + "learning_rate": 5.181159420289855e-06, + "loss": 1.952861328125, + "step": 400 + }, + { + "epoch": 3.0, + "eval_loss": 0.47528699040412903, + "eval_runtime": 27.7881, + "eval_samples_per_second": 17.633, + "eval_steps_per_second": 2.231, + "step": 414 + }, + { + "epoch": 3.260869565217391, + "grad_norm": 17.692113876342773, + "learning_rate": 4.5772946859903385e-06, + "loss": 1.79087890625, + "step": 450 + }, + { + "epoch": 3.6231884057971016, + "grad_norm": 13.763245582580566, + "learning_rate": 3.973429951690821e-06, + "loss": 1.650498046875, + "step": 500 + }, + { + "epoch": 3.9855072463768115, + "grad_norm": 33.43427276611328, + "learning_rate": 3.3695652173913045e-06, + "loss": 1.610087890625, + "step": 550 + }, + { + "epoch": 4.0, + "eval_loss": 0.410451203584671, + "eval_runtime": 27.8228, + "eval_samples_per_second": 17.611, + "eval_steps_per_second": 2.228, + "step": 552 + } + ], + "logging_steps": 50, + "max_steps": 828, + "num_input_tokens_seen": 0, + "num_train_epochs": 6, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.003460608777011e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-552/training_args.bin b/checkpoint-552/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..41879da9e32eb23f5013e1a62452ec18a7e8995e --- /dev/null +++ b/checkpoint-552/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:033534501afcab0521199d3c0685a5e811f2a297a318bad85be0999d18aac32c +size 5137 diff --git a/checkpoint-690/README.md b/checkpoint-690/README.md new file mode 100644 index 0000000000000000000000000000000000000000..dfd85d29159e61f8c090feb3d219c62a0c01c56b --- /dev/null +++ b/checkpoint-690/README.md @@ -0,0 +1,206 @@ +--- +base_model: microsoft/Phi-4-mini-instruct +library_name: peft +tags: +- base_model:adapter:microsoft/Phi-4-mini-instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoint-690/adapter_config.json b/checkpoint-690/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..675b31bd9d8a4c2efda38f5392b48daf033fe669 --- /dev/null +++ b/checkpoint-690/adapter_config.json @@ -0,0 +1,48 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "microsoft/Phi-4-mini-instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier", + "score" + ], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "SEQ_CLS", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-690/adapter_model.safetensors b/checkpoint-690/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..70b3df0b36cba5b5e1cb037ab97a462e91cb8a73 --- /dev/null +++ b/checkpoint-690/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2794d903c1cbd94e2bba0319f8192757a96d582c707a5c961fea609b52020a9f +size 12603848 diff --git a/checkpoint-690/chat_template.jinja b/checkpoint-690/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..a9c00dd9bbd97e117371168e9d62af65b9f0e725 --- /dev/null +++ b/checkpoint-690/chat_template.jinja @@ -0,0 +1 @@ +{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %} \ No newline at end of file diff --git a/checkpoint-690/optimizer.pt b/checkpoint-690/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a698feb4705abca25e0d9b92d41f5491e3813f54 --- /dev/null +++ b/checkpoint-690/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b38e5374d04c8a42750eebdc061404656338af5ce33418e0850c75a16cc54f38 +size 25246667 diff --git a/checkpoint-690/rng_state.pth b/checkpoint-690/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..502a66d35a9e33abf08a2d1145499dfc89ff9a92 --- /dev/null +++ b/checkpoint-690/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55d1a6531f1cad7cbc10ca169a19e64db822406a42e3eb0d1c0dbcb2171b791c +size 14645 diff --git a/checkpoint-690/scheduler.pt b/checkpoint-690/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..43d28ae4f70d721578ba73bd239abe986d84bf96 --- /dev/null +++ b/checkpoint-690/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a0943a79c4d72f7d717a68cee1dbf9a58982caea14f8a3dafb158b8c83312b56 +size 1465 diff --git a/checkpoint-690/tokenizer.json b/checkpoint-690/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..8655fee00020e3140fc51416dac7cb0b9a8e4c45 --- /dev/null +++ b/checkpoint-690/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea8bdf68c3e7549a3fb4342523288ce628f6ab56a618f9a4dfb234a0b4d46a8 +size 15524476 diff --git a/checkpoint-690/tokenizer_config.json b/checkpoint-690/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..007b4fee54acc99b382393f6900ba96e673f8b49 --- /dev/null +++ b/checkpoint-690/tokenizer_config.json @@ -0,0 +1,12 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": "<|endoftext|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "tokenizer_class": "TokenizersBackend", + "unk_token": "<|endoftext|>" +} diff --git a/checkpoint-690/trainer_state.json b/checkpoint-690/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1572b06bdbb516ae2e09287f103939594b005d26 --- /dev/null +++ b/checkpoint-690/trainer_state.json @@ -0,0 +1,165 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 500, + "global_step": 690, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.36231884057971014, + "grad_norm": 11.473546981811523, + "learning_rate": 9.408212560386473e-06, + "loss": 2.518828125, + "step": 50 + }, + { + "epoch": 0.7246376811594203, + "grad_norm": 19.91433334350586, + "learning_rate": 8.804347826086957e-06, + "loss": 2.4250390625, + "step": 100 + }, + { + "epoch": 1.0, + "eval_loss": 0.5803093314170837, + "eval_runtime": 29.4101, + "eval_samples_per_second": 16.661, + "eval_steps_per_second": 2.108, + "step": 138 + }, + { + "epoch": 1.0869565217391304, + "grad_norm": 16.91636085510254, + "learning_rate": 8.20048309178744e-06, + "loss": 2.2990234375, + "step": 150 + }, + { + "epoch": 1.4492753623188406, + "grad_norm": 13.255877494812012, + "learning_rate": 7.596618357487924e-06, + "loss": 2.378984375, + "step": 200 + }, + { + "epoch": 1.8115942028985508, + "grad_norm": 18.71368980407715, + "learning_rate": 6.992753623188407e-06, + "loss": 2.15296875, + "step": 250 + }, + { + "epoch": 2.0, + "eval_loss": 0.5362404584884644, + "eval_runtime": 27.8676, + "eval_samples_per_second": 17.583, + "eval_steps_per_second": 2.225, + "step": 276 + }, + { + "epoch": 2.1739130434782608, + "grad_norm": 9.45117473602295, + "learning_rate": 6.3888888888888885e-06, + "loss": 2.009921875, + "step": 300 + }, + { + "epoch": 2.536231884057971, + "grad_norm": 8.40969467163086, + "learning_rate": 5.785024154589373e-06, + "loss": 2.07037109375, + "step": 350 + }, + { + "epoch": 2.898550724637681, + "grad_norm": 10.270116806030273, + "learning_rate": 5.181159420289855e-06, + "loss": 1.952861328125, + "step": 400 + }, + { + "epoch": 3.0, + "eval_loss": 0.47528699040412903, + "eval_runtime": 27.7881, + "eval_samples_per_second": 17.633, + "eval_steps_per_second": 2.231, + "step": 414 + }, + { + "epoch": 3.260869565217391, + "grad_norm": 17.692113876342773, + "learning_rate": 4.5772946859903385e-06, + "loss": 1.79087890625, + "step": 450 + }, + { + "epoch": 3.6231884057971016, + "grad_norm": 13.763245582580566, + "learning_rate": 3.973429951690821e-06, + "loss": 1.650498046875, + "step": 500 + }, + { + "epoch": 3.9855072463768115, + "grad_norm": 33.43427276611328, + "learning_rate": 3.3695652173913045e-06, + "loss": 1.610087890625, + "step": 550 + }, + { + "epoch": 4.0, + "eval_loss": 0.410451203584671, + "eval_runtime": 27.8228, + "eval_samples_per_second": 17.611, + "eval_steps_per_second": 2.228, + "step": 552 + }, + { + "epoch": 4.3478260869565215, + "grad_norm": 8.163750648498535, + "learning_rate": 2.7657004830917877e-06, + "loss": 1.44470703125, + "step": 600 + }, + { + "epoch": 4.710144927536232, + "grad_norm": 17.915592193603516, + "learning_rate": 2.1618357487922704e-06, + "loss": 1.423818359375, + "step": 650 + }, + { + "epoch": 5.0, + "eval_loss": 0.3703005313873291, + "eval_runtime": 27.9318, + "eval_samples_per_second": 17.543, + "eval_steps_per_second": 2.22, + "step": 690 + } + ], + "logging_steps": 50, + "max_steps": 828, + "num_input_tokens_seen": 0, + "num_train_epochs": 6, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.493846188133581e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-690/training_args.bin b/checkpoint-690/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..41879da9e32eb23f5013e1a62452ec18a7e8995e --- /dev/null +++ b/checkpoint-690/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:033534501afcab0521199d3c0685a5e811f2a297a318bad85be0999d18aac32c +size 5137 diff --git a/checkpoint-828/README.md b/checkpoint-828/README.md new file mode 100644 index 0000000000000000000000000000000000000000..dfd85d29159e61f8c090feb3d219c62a0c01c56b --- /dev/null +++ b/checkpoint-828/README.md @@ -0,0 +1,206 @@ +--- +base_model: microsoft/Phi-4-mini-instruct +library_name: peft +tags: +- base_model:adapter:microsoft/Phi-4-mini-instruct +- lora +- transformers +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoint-828/adapter_config.json b/checkpoint-828/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..675b31bd9d8a4c2efda38f5392b48daf033fe669 --- /dev/null +++ b/checkpoint-828/adapter_config.json @@ -0,0 +1,48 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "microsoft/Phi-4-mini-instruct", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [ + "classifier", + "score" + ], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "o_proj", + "q_proj", + "k_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": "SEQ_CLS", + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoint-828/adapter_model.safetensors b/checkpoint-828/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c1ecb1cb90b4c9a55e6948a3d7e811dd5f8bb8f7 --- /dev/null +++ b/checkpoint-828/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d075579a534a1d4a45a32603660edecb6740758c1bbc080e4012de433623d5e4 +size 12603848 diff --git a/checkpoint-828/chat_template.jinja b/checkpoint-828/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..a9c00dd9bbd97e117371168e9d62af65b9f0e725 --- /dev/null +++ b/checkpoint-828/chat_template.jinja @@ -0,0 +1 @@ +{% for message in messages %}{% if message['role'] == 'system' and 'tools' in message and message['tools'] is not none %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|tool|>' + message['tools'] + '<|/tool|>' + '<|end|>' }}{% else %}{{ '<|' + message['role'] + '|>' + message['content'] + '<|end|>' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|assistant|>' }}{% else %}{{ eos_token }}{% endif %} \ No newline at end of file diff --git a/checkpoint-828/optimizer.pt b/checkpoint-828/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2ac4eb43ea0243a1a36a651e333feccf4c122412 --- /dev/null +++ b/checkpoint-828/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f1025419199037fd5dba875df2ed69253e370e6e7cf57e0725477a29bb3d4eb +size 25246667 diff --git a/checkpoint-828/rng_state.pth b/checkpoint-828/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..93ef028e9ee8dc1648cf817282962d8a2d185348 --- /dev/null +++ b/checkpoint-828/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a7fe73d179f99ae6dc92c440369719ab7067c11bcc01d4e90c49866b2d2eea7 +size 14645 diff --git a/checkpoint-828/scheduler.pt b/checkpoint-828/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..72ea129a1cc8fd7cab5742cb5e2005ea9a1219f9 --- /dev/null +++ b/checkpoint-828/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dd0e92d55cdd722e4de685551b7dc9c6e87bd7ff9ce03d5b98abc8046dc40d4 +size 1465 diff --git a/checkpoint-828/tokenizer.json b/checkpoint-828/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..8655fee00020e3140fc51416dac7cb0b9a8e4c45 --- /dev/null +++ b/checkpoint-828/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea8bdf68c3e7549a3fb4342523288ce628f6ab56a618f9a4dfb234a0b4d46a8 +size 15524476 diff --git a/checkpoint-828/tokenizer_config.json b/checkpoint-828/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..007b4fee54acc99b382393f6900ba96e673f8b49 --- /dev/null +++ b/checkpoint-828/tokenizer_config.json @@ -0,0 +1,12 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": "<|endoftext|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "tokenizer_class": "TokenizersBackend", + "unk_token": "<|endoftext|>" +} diff --git a/checkpoint-828/trainer_state.json b/checkpoint-828/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..263a7cc3a68a7d36c3a4c7a1d5bbf4de273d1980 --- /dev/null +++ b/checkpoint-828/trainer_state.json @@ -0,0 +1,194 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.0, + "eval_steps": 500, + "global_step": 828, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.36231884057971014, + "grad_norm": 11.473546981811523, + "learning_rate": 9.408212560386473e-06, + "loss": 2.518828125, + "step": 50 + }, + { + "epoch": 0.7246376811594203, + "grad_norm": 19.91433334350586, + "learning_rate": 8.804347826086957e-06, + "loss": 2.4250390625, + "step": 100 + }, + { + "epoch": 1.0, + "eval_loss": 0.5803093314170837, + "eval_runtime": 29.4101, + "eval_samples_per_second": 16.661, + "eval_steps_per_second": 2.108, + "step": 138 + }, + { + "epoch": 1.0869565217391304, + "grad_norm": 16.91636085510254, + "learning_rate": 8.20048309178744e-06, + "loss": 2.2990234375, + "step": 150 + }, + { + "epoch": 1.4492753623188406, + "grad_norm": 13.255877494812012, + "learning_rate": 7.596618357487924e-06, + "loss": 2.378984375, + "step": 200 + }, + { + "epoch": 1.8115942028985508, + "grad_norm": 18.71368980407715, + "learning_rate": 6.992753623188407e-06, + "loss": 2.15296875, + "step": 250 + }, + { + "epoch": 2.0, + "eval_loss": 0.5362404584884644, + "eval_runtime": 27.8676, + "eval_samples_per_second": 17.583, + "eval_steps_per_second": 2.225, + "step": 276 + }, + { + "epoch": 2.1739130434782608, + "grad_norm": 9.45117473602295, + "learning_rate": 6.3888888888888885e-06, + "loss": 2.009921875, + "step": 300 + }, + { + "epoch": 2.536231884057971, + "grad_norm": 8.40969467163086, + "learning_rate": 5.785024154589373e-06, + "loss": 2.07037109375, + "step": 350 + }, + { + "epoch": 2.898550724637681, + "grad_norm": 10.270116806030273, + "learning_rate": 5.181159420289855e-06, + "loss": 1.952861328125, + "step": 400 + }, + { + "epoch": 3.0, + "eval_loss": 0.47528699040412903, + "eval_runtime": 27.7881, + "eval_samples_per_second": 17.633, + "eval_steps_per_second": 2.231, + "step": 414 + }, + { + "epoch": 3.260869565217391, + "grad_norm": 17.692113876342773, + "learning_rate": 4.5772946859903385e-06, + "loss": 1.79087890625, + "step": 450 + }, + { + "epoch": 3.6231884057971016, + "grad_norm": 13.763245582580566, + "learning_rate": 3.973429951690821e-06, + "loss": 1.650498046875, + "step": 500 + }, + { + "epoch": 3.9855072463768115, + "grad_norm": 33.43427276611328, + "learning_rate": 3.3695652173913045e-06, + "loss": 1.610087890625, + "step": 550 + }, + { + "epoch": 4.0, + "eval_loss": 0.410451203584671, + "eval_runtime": 27.8228, + "eval_samples_per_second": 17.611, + "eval_steps_per_second": 2.228, + "step": 552 + }, + { + "epoch": 4.3478260869565215, + "grad_norm": 8.163750648498535, + "learning_rate": 2.7657004830917877e-06, + "loss": 1.44470703125, + "step": 600 + }, + { + "epoch": 4.710144927536232, + "grad_norm": 17.915592193603516, + "learning_rate": 2.1618357487922704e-06, + "loss": 1.423818359375, + "step": 650 + }, + { + "epoch": 5.0, + "eval_loss": 0.3703005313873291, + "eval_runtime": 27.9318, + "eval_samples_per_second": 17.543, + "eval_steps_per_second": 2.22, + "step": 690 + }, + { + "epoch": 5.072463768115942, + "grad_norm": 22.16204071044922, + "learning_rate": 1.5579710144927536e-06, + "loss": 1.38701171875, + "step": 700 + }, + { + "epoch": 5.434782608695652, + "grad_norm": 23.789710998535156, + "learning_rate": 9.541062801932368e-07, + "loss": 1.331396484375, + "step": 750 + }, + { + "epoch": 5.797101449275362, + "grad_norm": 29.811561584472656, + "learning_rate": 3.5024154589371985e-07, + "loss": 1.2618359375, + "step": 800 + }, + { + "epoch": 6.0, + "eval_loss": 0.35662469267845154, + "eval_runtime": 27.9183, + "eval_samples_per_second": 17.551, + "eval_steps_per_second": 2.221, + "step": 828 + } + ], + "logging_steps": 50, + "max_steps": 828, + "num_input_tokens_seen": 0, + "num_train_epochs": 6, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 2.9920558960889856e+16, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-828/training_args.bin b/checkpoint-828/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..41879da9e32eb23f5013e1a62452ec18a7e8995e --- /dev/null +++ b/checkpoint-828/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:033534501afcab0521199d3c0685a5e811f2a297a318bad85be0999d18aac32c +size 5137 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..8655fee00020e3140fc51416dac7cb0b9a8e4c45 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ea8bdf68c3e7549a3fb4342523288ce628f6ab56a618f9a4dfb234a0b4d46a8 +size 15524476 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..007b4fee54acc99b382393f6900ba96e673f8b49 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,12 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": "<|endoftext|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|endoftext|>", + "is_local": false, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "tokenizer_class": "TokenizersBackend", + "unk_token": "<|endoftext|>" +}