diff --git a/.gitattributes b/.gitattributes index 295d77e2899184fa0a855f0100e981338d9fa84e..c74f7f4e7ff10bd761c36e2e0661a2bab6c84666 100644 --- a/.gitattributes +++ b/.gitattributes @@ -767,3 +767,12 @@ gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs- gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-15098-sd-1/checkpoint-6512/tokenizer.json filter=lfs diff=lfs merge=lfs -text gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-15098-sd-1/checkpoint-814/tokenizer.json filter=lfs diff=lfs merge=lfs -text gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-15098-sd-1/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/tokenizer.json filter=lfs diff=lfs merge=lfs -text +Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/README.md b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/README.md new file mode 100644 index 0000000000000000000000000000000000000000..835e31ab8469ee39ddc8b2b6b2143a8c66dad510 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/adapter_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d63cb87eaccf2d81de3cdcfa11d2e99c440c0ea0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/adapter_model.safetensors b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08683fb978187499f5a5a94f1ef7270659f69cbe --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3197d67f8761102919f87e8312b13d7ed1575c87e89b969c1714ee45600b400 +size 109069176 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/README.md b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/README.md new file mode 100644 index 0000000000000000000000000000000000000000..835e31ab8469ee39ddc8b2b6b2143a8c66dad510 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/adapter_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d63cb87eaccf2d81de3cdcfa11d2e99c440c0ea0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/adapter_model.safetensors b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2efa23f3125b5816fccb026dc8e61e9f4a5339b8 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ab773b370f12bb18297890c042a85454c96ca35f6aa5a4b467bd9ded4ac5d89 +size 109069176 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/optimizer.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..289e881df7f565f2201579bdb4454bdf61ecb726 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19936997f3a76e5655f1a44c66dace94ad45c91db3aeba758da0191f75bad6a2 +size 55532666 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/rng_state.pth b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..12a5755bfdeb55f461bd77107620533d675bf6f8 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:def37be7123c682afaa0e2890fa5608fcc367b17e565c5cd23b68dd5cdaba255 +size 14244 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/scheduler.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f0b81e4fe23b2c108d7106fd67ac3f3aa0aa155 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:748d461c2ecd47f12cfa8309c4a2efcea7427f60a4c658152cceff8d9d347066 +size 1064 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/special_tokens_map.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/tokenizer.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/tokenizer_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..061e40d9db3253624f86e8e364c15ef546527c9d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/trainer_state.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5ed9cdd9f0930a86404d055ee3403cd9aaf55915 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/trainer_state.json @@ -0,0 +1,912 @@ +{ + "best_metric": 1.833760380744934, + "best_model_checkpoint": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605", + "epoch": 4.0, + "eval_steps": 10, + "global_step": 1210, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03305785123966942, + "grad_norm": 0.4384556710720062, + "learning_rate": 0.0002, + "loss": 2.528, + "step": 10 + }, + { + "epoch": 0.06611570247933884, + "grad_norm": 0.48312580585479736, + "learning_rate": 0.0002, + "loss": 2.2675, + "step": 20 + }, + { + "epoch": 0.09917355371900827, + "grad_norm": 0.6193496584892273, + "learning_rate": 0.0002, + "loss": 2.0439, + "step": 30 + }, + { + "epoch": 0.1322314049586777, + "grad_norm": 0.471858948469162, + "learning_rate": 0.0002, + "loss": 1.9649, + "step": 40 + }, + { + "epoch": 0.1652892561983471, + "grad_norm": 0.43199431896209717, + "learning_rate": 0.0002, + "loss": 1.9945, + "step": 50 + }, + { + "epoch": 0.19834710743801653, + "grad_norm": 0.5022176504135132, + "learning_rate": 0.0002, + "loss": 1.9409, + "step": 60 + }, + { + "epoch": 0.23140495867768596, + "grad_norm": 0.4934026300907135, + "learning_rate": 0.0002, + "loss": 1.89, + "step": 70 + }, + { + "epoch": 0.2644628099173554, + "grad_norm": 0.4313369691371918, + "learning_rate": 0.0002, + "loss": 1.9036, + "step": 80 + }, + { + "epoch": 0.2975206611570248, + "grad_norm": 0.48663529753685, + "learning_rate": 0.0002, + "loss": 1.8992, + "step": 90 + }, + { + "epoch": 0.3305785123966942, + "grad_norm": 0.47740036249160767, + "learning_rate": 0.0002, + "loss": 1.8655, + "step": 100 + }, + { + "epoch": 0.36363636363636365, + "grad_norm": 0.41685664653778076, + "learning_rate": 0.0002, + "loss": 1.8797, + "step": 110 + }, + { + "epoch": 0.39669421487603307, + "grad_norm": 2.368595838546753, + "learning_rate": 0.0002, + "loss": 1.8951, + "step": 120 + }, + { + "epoch": 0.4297520661157025, + "grad_norm": 0.4861043095588684, + "learning_rate": 0.0002, + "loss": 1.8753, + "step": 130 + }, + { + "epoch": 0.4628099173553719, + "grad_norm": 0.41848257184028625, + "learning_rate": 0.0002, + "loss": 1.8413, + "step": 140 + }, + { + "epoch": 0.49586776859504134, + "grad_norm": 0.38776087760925293, + "learning_rate": 0.0002, + "loss": 1.9775, + "step": 150 + }, + { + "epoch": 0.5289256198347108, + "grad_norm": 0.4095233380794525, + "learning_rate": 0.0002, + "loss": 1.8172, + "step": 160 + }, + { + "epoch": 0.5619834710743802, + "grad_norm": 0.4492895007133484, + "learning_rate": 0.0002, + "loss": 1.9401, + "step": 170 + }, + { + "epoch": 0.5950413223140496, + "grad_norm": 0.5678786039352417, + "learning_rate": 0.0002, + "loss": 1.8707, + "step": 180 + }, + { + "epoch": 0.628099173553719, + "grad_norm": 0.4926881492137909, + "learning_rate": 0.0002, + "loss": 1.8067, + "step": 190 + }, + { + "epoch": 0.6611570247933884, + "grad_norm": 0.3865489363670349, + "learning_rate": 0.0002, + "loss": 1.8567, + "step": 200 + }, + { + "epoch": 0.6942148760330579, + "grad_norm": 0.40578970313072205, + "learning_rate": 0.0002, + "loss": 1.7555, + "step": 210 + }, + { + "epoch": 0.7272727272727273, + "grad_norm": 0.3729846775531769, + "learning_rate": 0.0002, + "loss": 1.8192, + "step": 220 + }, + { + "epoch": 0.7603305785123967, + "grad_norm": 0.36989861726760864, + "learning_rate": 0.0002, + "loss": 1.8787, + "step": 230 + }, + { + "epoch": 0.7933884297520661, + "grad_norm": 0.3764864206314087, + "learning_rate": 0.0002, + "loss": 1.8254, + "step": 240 + }, + { + "epoch": 0.8264462809917356, + "grad_norm": 1.2193230390548706, + "learning_rate": 0.0002, + "loss": 1.8008, + "step": 250 + }, + { + "epoch": 0.859504132231405, + "grad_norm": 0.37381255626678467, + "learning_rate": 0.0002, + "loss": 1.8093, + "step": 260 + }, + { + "epoch": 0.8925619834710744, + "grad_norm": 0.35480767488479614, + "learning_rate": 0.0002, + "loss": 1.7911, + "step": 270 + }, + { + "epoch": 0.9256198347107438, + "grad_norm": 0.4945891201496124, + "learning_rate": 0.0002, + "loss": 1.7824, + "step": 280 + }, + { + "epoch": 0.9586776859504132, + "grad_norm": 0.39967674016952515, + "learning_rate": 0.0002, + "loss": 1.7842, + "step": 290 + }, + { + "epoch": 0.9917355371900827, + "grad_norm": 0.4257008135318756, + "learning_rate": 0.0002, + "loss": 1.8321, + "step": 300 + }, + { + "epoch": 0.9983471074380166, + "eval_loss": 1.8413277864456177, + "eval_runtime": 38.8241, + "eval_samples_per_second": 13.265, + "eval_steps_per_second": 1.674, + "step": 302 + }, + { + "epoch": 1.024793388429752, + "grad_norm": 0.4019509255886078, + "learning_rate": 0.0002, + "loss": 1.7265, + "step": 310 + }, + { + "epoch": 1.0578512396694215, + "grad_norm": 0.3439880311489105, + "learning_rate": 0.0002, + "loss": 1.7756, + "step": 320 + }, + { + "epoch": 1.0909090909090908, + "grad_norm": 0.4353587031364441, + "learning_rate": 0.0002, + "loss": 1.7719, + "step": 330 + }, + { + "epoch": 1.1239669421487604, + "grad_norm": 0.41257765889167786, + "learning_rate": 0.0002, + "loss": 1.7419, + "step": 340 + }, + { + "epoch": 1.1570247933884297, + "grad_norm": 0.4224575161933899, + "learning_rate": 0.0002, + "loss": 1.7774, + "step": 350 + }, + { + "epoch": 1.1900826446280992, + "grad_norm": 0.36395177245140076, + "learning_rate": 0.0002, + "loss": 1.7502, + "step": 360 + }, + { + "epoch": 1.2231404958677685, + "grad_norm": 0.4251839518547058, + "learning_rate": 0.0002, + "loss": 1.8064, + "step": 370 + }, + { + "epoch": 1.256198347107438, + "grad_norm": 0.43602821230888367, + "learning_rate": 0.0002, + "loss": 1.7626, + "step": 380 + }, + { + "epoch": 1.2892561983471074, + "grad_norm": 0.3940708637237549, + "learning_rate": 0.0002, + "loss": 1.8261, + "step": 390 + }, + { + "epoch": 1.322314049586777, + "grad_norm": 0.3626866042613983, + "learning_rate": 0.0002, + "loss": 1.7317, + "step": 400 + }, + { + "epoch": 1.3553719008264462, + "grad_norm": 0.40716150403022766, + "learning_rate": 0.0002, + "loss": 1.7493, + "step": 410 + }, + { + "epoch": 1.3884297520661157, + "grad_norm": 0.39323991537094116, + "learning_rate": 0.0002, + "loss": 1.7313, + "step": 420 + }, + { + "epoch": 1.421487603305785, + "grad_norm": 0.44480809569358826, + "learning_rate": 0.0002, + "loss": 1.7863, + "step": 430 + }, + { + "epoch": 1.4545454545454546, + "grad_norm": 0.4438270032405853, + "learning_rate": 0.0002, + "loss": 1.7477, + "step": 440 + }, + { + "epoch": 1.487603305785124, + "grad_norm": 0.3953928053379059, + "learning_rate": 0.0002, + "loss": 1.774, + "step": 450 + }, + { + "epoch": 1.5206611570247934, + "grad_norm": 0.4152870178222656, + "learning_rate": 0.0002, + "loss": 1.7162, + "step": 460 + }, + { + "epoch": 1.553719008264463, + "grad_norm": 0.45231857895851135, + "learning_rate": 0.0002, + "loss": 1.8176, + "step": 470 + }, + { + "epoch": 1.5867768595041323, + "grad_norm": 0.46560999751091003, + "learning_rate": 0.0002, + "loss": 1.7281, + "step": 480 + }, + { + "epoch": 1.6198347107438016, + "grad_norm": 0.3510372042655945, + "learning_rate": 0.0002, + "loss": 1.8047, + "step": 490 + }, + { + "epoch": 1.6528925619834711, + "grad_norm": 0.36788758635520935, + "learning_rate": 0.0002, + "loss": 1.7719, + "step": 500 + }, + { + "epoch": 1.6859504132231407, + "grad_norm": 0.3911917209625244, + "learning_rate": 0.0002, + "loss": 1.8287, + "step": 510 + }, + { + "epoch": 1.71900826446281, + "grad_norm": 0.440964937210083, + "learning_rate": 0.0002, + "loss": 1.7891, + "step": 520 + }, + { + "epoch": 1.7520661157024793, + "grad_norm": 0.36718201637268066, + "learning_rate": 0.0002, + "loss": 1.6858, + "step": 530 + }, + { + "epoch": 1.7851239669421488, + "grad_norm": 0.3927479088306427, + "learning_rate": 0.0002, + "loss": 1.7828, + "step": 540 + }, + { + "epoch": 1.8181818181818183, + "grad_norm": 0.4298672378063202, + "learning_rate": 0.0002, + "loss": 1.7406, + "step": 550 + }, + { + "epoch": 1.8512396694214877, + "grad_norm": 0.4257620871067047, + "learning_rate": 0.0002, + "loss": 1.7626, + "step": 560 + }, + { + "epoch": 1.884297520661157, + "grad_norm": 0.3743717670440674, + "learning_rate": 0.0002, + "loss": 1.7677, + "step": 570 + }, + { + "epoch": 1.9173553719008265, + "grad_norm": 0.4413471817970276, + "learning_rate": 0.0002, + "loss": 1.7263, + "step": 580 + }, + { + "epoch": 1.950413223140496, + "grad_norm": 0.41639673709869385, + "learning_rate": 0.0002, + "loss": 1.7528, + "step": 590 + }, + { + "epoch": 1.9834710743801653, + "grad_norm": 0.46319296956062317, + "learning_rate": 0.0002, + "loss": 1.7141, + "step": 600 + }, + { + "epoch": 2.0, + "eval_loss": 1.833760380744934, + "eval_runtime": 38.8469, + "eval_samples_per_second": 13.257, + "eval_steps_per_second": 1.673, + "step": 605 + }, + { + "epoch": 2.0165289256198347, + "grad_norm": 0.38033604621887207, + "learning_rate": 0.0002, + "loss": 1.7399, + "step": 610 + }, + { + "epoch": 2.049586776859504, + "grad_norm": 0.4522306025028229, + "learning_rate": 0.0002, + "loss": 1.6414, + "step": 620 + }, + { + "epoch": 2.0826446280991737, + "grad_norm": 0.41294756531715393, + "learning_rate": 0.0002, + "loss": 1.5976, + "step": 630 + }, + { + "epoch": 2.115702479338843, + "grad_norm": 0.5129091739654541, + "learning_rate": 0.0002, + "loss": 1.6664, + "step": 640 + }, + { + "epoch": 2.1487603305785123, + "grad_norm": 0.4630700647830963, + "learning_rate": 0.0002, + "loss": 1.7207, + "step": 650 + }, + { + "epoch": 2.1818181818181817, + "grad_norm": 0.4368151128292084, + "learning_rate": 0.0002, + "loss": 1.5884, + "step": 660 + }, + { + "epoch": 2.2148760330578514, + "grad_norm": 0.5266494154930115, + "learning_rate": 0.0002, + "loss": 1.7271, + "step": 670 + }, + { + "epoch": 2.2479338842975207, + "grad_norm": 0.4744901955127716, + "learning_rate": 0.0002, + "loss": 1.5749, + "step": 680 + }, + { + "epoch": 2.28099173553719, + "grad_norm": 0.5312414765357971, + "learning_rate": 0.0002, + "loss": 1.6512, + "step": 690 + }, + { + "epoch": 2.3140495867768593, + "grad_norm": 0.49116063117980957, + "learning_rate": 0.0002, + "loss": 1.6957, + "step": 700 + }, + { + "epoch": 2.347107438016529, + "grad_norm": 0.4626988172531128, + "learning_rate": 0.0002, + "loss": 1.646, + "step": 710 + }, + { + "epoch": 2.3801652892561984, + "grad_norm": 0.4851135015487671, + "learning_rate": 0.0002, + "loss": 1.6474, + "step": 720 + }, + { + "epoch": 2.4132231404958677, + "grad_norm": 0.4882378578186035, + "learning_rate": 0.0002, + "loss": 1.67, + "step": 730 + }, + { + "epoch": 2.446280991735537, + "grad_norm": 0.4470290243625641, + "learning_rate": 0.0002, + "loss": 1.6588, + "step": 740 + }, + { + "epoch": 2.479338842975207, + "grad_norm": 0.5901731848716736, + "learning_rate": 0.0002, + "loss": 1.6419, + "step": 750 + }, + { + "epoch": 2.512396694214876, + "grad_norm": 0.48137718439102173, + "learning_rate": 0.0002, + "loss": 1.6756, + "step": 760 + }, + { + "epoch": 2.5454545454545454, + "grad_norm": 0.45636510848999023, + "learning_rate": 0.0002, + "loss": 1.6708, + "step": 770 + }, + { + "epoch": 2.5785123966942147, + "grad_norm": 0.48216402530670166, + "learning_rate": 0.0002, + "loss": 1.6693, + "step": 780 + }, + { + "epoch": 2.6115702479338845, + "grad_norm": 0.47188714146614075, + "learning_rate": 0.0002, + "loss": 1.664, + "step": 790 + }, + { + "epoch": 2.644628099173554, + "grad_norm": 0.44025519490242004, + "learning_rate": 0.0002, + "loss": 1.619, + "step": 800 + }, + { + "epoch": 2.677685950413223, + "grad_norm": 0.4918605387210846, + "learning_rate": 0.0002, + "loss": 1.6532, + "step": 810 + }, + { + "epoch": 2.7107438016528924, + "grad_norm": 0.5082308650016785, + "learning_rate": 0.0002, + "loss": 1.7513, + "step": 820 + }, + { + "epoch": 2.7438016528925617, + "grad_norm": 0.5610618591308594, + "learning_rate": 0.0002, + "loss": 1.7221, + "step": 830 + }, + { + "epoch": 2.7768595041322315, + "grad_norm": 0.540302574634552, + "learning_rate": 0.0002, + "loss": 1.7115, + "step": 840 + }, + { + "epoch": 2.809917355371901, + "grad_norm": 0.46016451716423035, + "learning_rate": 0.0002, + "loss": 1.659, + "step": 850 + }, + { + "epoch": 2.84297520661157, + "grad_norm": 0.45313313603401184, + "learning_rate": 0.0002, + "loss": 1.672, + "step": 860 + }, + { + "epoch": 2.87603305785124, + "grad_norm": 0.49267083406448364, + "learning_rate": 0.0002, + "loss": 1.6676, + "step": 870 + }, + { + "epoch": 2.909090909090909, + "grad_norm": 0.4506530463695526, + "learning_rate": 0.0002, + "loss": 1.6577, + "step": 880 + }, + { + "epoch": 2.9421487603305785, + "grad_norm": 0.4393260180950165, + "learning_rate": 0.0002, + "loss": 1.7059, + "step": 890 + }, + { + "epoch": 2.975206611570248, + "grad_norm": 0.438073068857193, + "learning_rate": 0.0002, + "loss": 1.7042, + "step": 900 + }, + { + "epoch": 2.9983471074380166, + "eval_loss": 1.853971004486084, + "eval_runtime": 38.8404, + "eval_samples_per_second": 13.259, + "eval_steps_per_second": 1.674, + "step": 907 + }, + { + "epoch": 3.0082644628099175, + "grad_norm": 0.4399570822715759, + "learning_rate": 0.0002, + "loss": 1.6173, + "step": 910 + }, + { + "epoch": 3.041322314049587, + "grad_norm": 0.5338484644889832, + "learning_rate": 0.0002, + "loss": 1.5578, + "step": 920 + }, + { + "epoch": 3.074380165289256, + "grad_norm": 0.6154358983039856, + "learning_rate": 0.0002, + "loss": 1.5507, + "step": 930 + }, + { + "epoch": 3.1074380165289255, + "grad_norm": 0.6429790258407593, + "learning_rate": 0.0002, + "loss": 1.6189, + "step": 940 + }, + { + "epoch": 3.1404958677685952, + "grad_norm": 0.5375680923461914, + "learning_rate": 0.0002, + "loss": 1.5866, + "step": 950 + }, + { + "epoch": 3.1735537190082646, + "grad_norm": 0.5594999194145203, + "learning_rate": 0.0002, + "loss": 1.5119, + "step": 960 + }, + { + "epoch": 3.206611570247934, + "grad_norm": 0.6742738485336304, + "learning_rate": 0.0002, + "loss": 1.5096, + "step": 970 + }, + { + "epoch": 3.239669421487603, + "grad_norm": 0.563497006893158, + "learning_rate": 0.0002, + "loss": 1.5477, + "step": 980 + }, + { + "epoch": 3.2727272727272725, + "grad_norm": 0.6521140933036804, + "learning_rate": 0.0002, + "loss": 1.5559, + "step": 990 + }, + { + "epoch": 3.3057851239669422, + "grad_norm": 0.6016622185707092, + "learning_rate": 0.0002, + "loss": 1.4905, + "step": 1000 + }, + { + "epoch": 3.3388429752066116, + "grad_norm": 0.6564913988113403, + "learning_rate": 0.0002, + "loss": 1.5307, + "step": 1010 + }, + { + "epoch": 3.371900826446281, + "grad_norm": 0.6528742909431458, + "learning_rate": 0.0002, + "loss": 1.4595, + "step": 1020 + }, + { + "epoch": 3.4049586776859506, + "grad_norm": 0.5843546390533447, + "learning_rate": 0.0002, + "loss": 1.518, + "step": 1030 + }, + { + "epoch": 3.43801652892562, + "grad_norm": 0.5892922282218933, + "learning_rate": 0.0002, + "loss": 1.5148, + "step": 1040 + }, + { + "epoch": 3.4710743801652892, + "grad_norm": 0.6217362284660339, + "learning_rate": 0.0002, + "loss": 1.5125, + "step": 1050 + }, + { + "epoch": 3.5041322314049586, + "grad_norm": 0.5837283134460449, + "learning_rate": 0.0002, + "loss": 1.526, + "step": 1060 + }, + { + "epoch": 3.537190082644628, + "grad_norm": 0.6369057893753052, + "learning_rate": 0.0002, + "loss": 1.5776, + "step": 1070 + }, + { + "epoch": 3.5702479338842976, + "grad_norm": 0.632115364074707, + "learning_rate": 0.0002, + "loss": 1.4758, + "step": 1080 + }, + { + "epoch": 3.603305785123967, + "grad_norm": 0.6364002823829651, + "learning_rate": 0.0002, + "loss": 1.5604, + "step": 1090 + }, + { + "epoch": 3.6363636363636362, + "grad_norm": 0.550032377243042, + "learning_rate": 0.0002, + "loss": 1.508, + "step": 1100 + }, + { + "epoch": 3.669421487603306, + "grad_norm": 0.6106863617897034, + "learning_rate": 0.0002, + "loss": 1.5548, + "step": 1110 + }, + { + "epoch": 3.7024793388429753, + "grad_norm": 0.635955274105072, + "learning_rate": 0.0002, + "loss": 1.5237, + "step": 1120 + }, + { + "epoch": 3.7355371900826446, + "grad_norm": 0.615804135799408, + "learning_rate": 0.0002, + "loss": 1.5698, + "step": 1130 + }, + { + "epoch": 3.768595041322314, + "grad_norm": 0.5769386887550354, + "learning_rate": 0.0002, + "loss": 1.6068, + "step": 1140 + }, + { + "epoch": 3.8016528925619832, + "grad_norm": 0.5938104391098022, + "learning_rate": 0.0002, + "loss": 1.5262, + "step": 1150 + }, + { + "epoch": 3.834710743801653, + "grad_norm": 0.6149733066558838, + "learning_rate": 0.0002, + "loss": 1.5236, + "step": 1160 + }, + { + "epoch": 3.8677685950413223, + "grad_norm": 0.6228950023651123, + "learning_rate": 0.0002, + "loss": 1.5585, + "step": 1170 + }, + { + "epoch": 3.9008264462809916, + "grad_norm": 0.6196513175964355, + "learning_rate": 0.0002, + "loss": 1.5815, + "step": 1180 + }, + { + "epoch": 3.9338842975206614, + "grad_norm": 0.5946677327156067, + "learning_rate": 0.0002, + "loss": 1.5894, + "step": 1190 + }, + { + "epoch": 3.9669421487603307, + "grad_norm": 0.5882220268249512, + "learning_rate": 0.0002, + "loss": 1.5451, + "step": 1200 + }, + { + "epoch": 4.0, + "grad_norm": 0.6291728019714355, + "learning_rate": 0.0002, + "loss": 1.6202, + "step": 1210 + }, + { + "epoch": 4.0, + "eval_loss": 1.8943731784820557, + "eval_runtime": 38.826, + "eval_samples_per_second": 13.264, + "eval_steps_per_second": 1.674, + "step": 1210 + } + ], + "logging_steps": 10, + "max_steps": 2416, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.599609070026752e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/training_args.bin b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bdae94a3bb1cbb687094648bad0dbd67438f1665 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1210/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59d6797aee4f8f861b969f822d3a50bd2f069252a018ea198e60b03a920e2c9f +size 5560 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/README.md b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/README.md new file mode 100644 index 0000000000000000000000000000000000000000..835e31ab8469ee39ddc8b2b6b2143a8c66dad510 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/adapter_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d63cb87eaccf2d81de3cdcfa11d2e99c440c0ea0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/adapter_model.safetensors b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89be817e269b9022b2699a6978eeb1991ee7eb95 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e81034895c30917aec337505b171255850355460761c941c8f0526f7b4a5f3bb +size 109069176 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/optimizer.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8081389d2ba9fe593079515eaef171523b3a55c9 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eccdd75478481c091050ecd935fb88913e3642436b461bae07e14e79343e348 +size 55532666 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/rng_state.pth b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..259e270e5c62d38c7d92edd007ac8b4d5b6ae9da --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7462d7da945cfc3bb68e586bdef3173390117746953420d9a316ddcea754e4d4 +size 14244 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/scheduler.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d54de962b38db7b79c29cf78e6f5fb6d2644035 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:766e233135c4d4aeeee602a00039be8d7a10cecb09d2120fb926c12f65d57943 +size 1064 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/special_tokens_map.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/tokenizer.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/tokenizer_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..061e40d9db3253624f86e8e364c15ef546527c9d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/trainer_state.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cb4218ccc4a5c69edc212a44ff39b84dd9586c01 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/trainer_state.json @@ -0,0 +1,1130 @@ +{ + "best_metric": 1.833760380744934, + "best_model_checkpoint": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605", + "epoch": 4.998347107438017, + "eval_steps": 10, + "global_step": 1512, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03305785123966942, + "grad_norm": 0.4384556710720062, + "learning_rate": 0.0002, + "loss": 2.528, + "step": 10 + }, + { + "epoch": 0.06611570247933884, + "grad_norm": 0.48312580585479736, + "learning_rate": 0.0002, + "loss": 2.2675, + "step": 20 + }, + { + "epoch": 0.09917355371900827, + "grad_norm": 0.6193496584892273, + "learning_rate": 0.0002, + "loss": 2.0439, + "step": 30 + }, + { + "epoch": 0.1322314049586777, + "grad_norm": 0.471858948469162, + "learning_rate": 0.0002, + "loss": 1.9649, + "step": 40 + }, + { + "epoch": 0.1652892561983471, + "grad_norm": 0.43199431896209717, + "learning_rate": 0.0002, + "loss": 1.9945, + "step": 50 + }, + { + "epoch": 0.19834710743801653, + "grad_norm": 0.5022176504135132, + "learning_rate": 0.0002, + "loss": 1.9409, + "step": 60 + }, + { + "epoch": 0.23140495867768596, + "grad_norm": 0.4934026300907135, + "learning_rate": 0.0002, + "loss": 1.89, + "step": 70 + }, + { + "epoch": 0.2644628099173554, + "grad_norm": 0.4313369691371918, + "learning_rate": 0.0002, + "loss": 1.9036, + "step": 80 + }, + { + "epoch": 0.2975206611570248, + "grad_norm": 0.48663529753685, + "learning_rate": 0.0002, + "loss": 1.8992, + "step": 90 + }, + { + "epoch": 0.3305785123966942, + "grad_norm": 0.47740036249160767, + "learning_rate": 0.0002, + "loss": 1.8655, + "step": 100 + }, + { + "epoch": 0.36363636363636365, + "grad_norm": 0.41685664653778076, + "learning_rate": 0.0002, + "loss": 1.8797, + "step": 110 + }, + { + "epoch": 0.39669421487603307, + "grad_norm": 2.368595838546753, + "learning_rate": 0.0002, + "loss": 1.8951, + "step": 120 + }, + { + "epoch": 0.4297520661157025, + "grad_norm": 0.4861043095588684, + "learning_rate": 0.0002, + "loss": 1.8753, + "step": 130 + }, + { + "epoch": 0.4628099173553719, + "grad_norm": 0.41848257184028625, + "learning_rate": 0.0002, + "loss": 1.8413, + "step": 140 + }, + { + "epoch": 0.49586776859504134, + "grad_norm": 0.38776087760925293, + "learning_rate": 0.0002, + "loss": 1.9775, + "step": 150 + }, + { + "epoch": 0.5289256198347108, + "grad_norm": 0.4095233380794525, + "learning_rate": 0.0002, + "loss": 1.8172, + "step": 160 + }, + { + "epoch": 0.5619834710743802, + "grad_norm": 0.4492895007133484, + "learning_rate": 0.0002, + "loss": 1.9401, + "step": 170 + }, + { + "epoch": 0.5950413223140496, + "grad_norm": 0.5678786039352417, + "learning_rate": 0.0002, + "loss": 1.8707, + "step": 180 + }, + { + "epoch": 0.628099173553719, + "grad_norm": 0.4926881492137909, + "learning_rate": 0.0002, + "loss": 1.8067, + "step": 190 + }, + { + "epoch": 0.6611570247933884, + "grad_norm": 0.3865489363670349, + "learning_rate": 0.0002, + "loss": 1.8567, + "step": 200 + }, + { + "epoch": 0.6942148760330579, + "grad_norm": 0.40578970313072205, + "learning_rate": 0.0002, + "loss": 1.7555, + "step": 210 + }, + { + "epoch": 0.7272727272727273, + "grad_norm": 0.3729846775531769, + "learning_rate": 0.0002, + "loss": 1.8192, + "step": 220 + }, + { + "epoch": 0.7603305785123967, + "grad_norm": 0.36989861726760864, + "learning_rate": 0.0002, + "loss": 1.8787, + "step": 230 + }, + { + "epoch": 0.7933884297520661, + "grad_norm": 0.3764864206314087, + "learning_rate": 0.0002, + "loss": 1.8254, + "step": 240 + }, + { + "epoch": 0.8264462809917356, + "grad_norm": 1.2193230390548706, + "learning_rate": 0.0002, + "loss": 1.8008, + "step": 250 + }, + { + "epoch": 0.859504132231405, + "grad_norm": 0.37381255626678467, + "learning_rate": 0.0002, + "loss": 1.8093, + "step": 260 + }, + { + "epoch": 0.8925619834710744, + "grad_norm": 0.35480767488479614, + "learning_rate": 0.0002, + "loss": 1.7911, + "step": 270 + }, + { + "epoch": 0.9256198347107438, + "grad_norm": 0.4945891201496124, + "learning_rate": 0.0002, + "loss": 1.7824, + "step": 280 + }, + { + "epoch": 0.9586776859504132, + "grad_norm": 0.39967674016952515, + "learning_rate": 0.0002, + "loss": 1.7842, + "step": 290 + }, + { + "epoch": 0.9917355371900827, + "grad_norm": 0.4257008135318756, + "learning_rate": 0.0002, + "loss": 1.8321, + "step": 300 + }, + { + "epoch": 0.9983471074380166, + "eval_loss": 1.8413277864456177, + "eval_runtime": 38.8241, + "eval_samples_per_second": 13.265, + "eval_steps_per_second": 1.674, + "step": 302 + }, + { + "epoch": 1.024793388429752, + "grad_norm": 0.4019509255886078, + "learning_rate": 0.0002, + "loss": 1.7265, + "step": 310 + }, + { + "epoch": 1.0578512396694215, + "grad_norm": 0.3439880311489105, + "learning_rate": 0.0002, + "loss": 1.7756, + "step": 320 + }, + { + "epoch": 1.0909090909090908, + "grad_norm": 0.4353587031364441, + "learning_rate": 0.0002, + "loss": 1.7719, + "step": 330 + }, + { + "epoch": 1.1239669421487604, + "grad_norm": 0.41257765889167786, + "learning_rate": 0.0002, + "loss": 1.7419, + "step": 340 + }, + { + "epoch": 1.1570247933884297, + "grad_norm": 0.4224575161933899, + "learning_rate": 0.0002, + "loss": 1.7774, + "step": 350 + }, + { + "epoch": 1.1900826446280992, + "grad_norm": 0.36395177245140076, + "learning_rate": 0.0002, + "loss": 1.7502, + "step": 360 + }, + { + "epoch": 1.2231404958677685, + "grad_norm": 0.4251839518547058, + "learning_rate": 0.0002, + "loss": 1.8064, + "step": 370 + }, + { + "epoch": 1.256198347107438, + "grad_norm": 0.43602821230888367, + "learning_rate": 0.0002, + "loss": 1.7626, + "step": 380 + }, + { + "epoch": 1.2892561983471074, + "grad_norm": 0.3940708637237549, + "learning_rate": 0.0002, + "loss": 1.8261, + "step": 390 + }, + { + "epoch": 1.322314049586777, + "grad_norm": 0.3626866042613983, + "learning_rate": 0.0002, + "loss": 1.7317, + "step": 400 + }, + { + "epoch": 1.3553719008264462, + "grad_norm": 0.40716150403022766, + "learning_rate": 0.0002, + "loss": 1.7493, + "step": 410 + }, + { + "epoch": 1.3884297520661157, + "grad_norm": 0.39323991537094116, + "learning_rate": 0.0002, + "loss": 1.7313, + "step": 420 + }, + { + "epoch": 1.421487603305785, + "grad_norm": 0.44480809569358826, + "learning_rate": 0.0002, + "loss": 1.7863, + "step": 430 + }, + { + "epoch": 1.4545454545454546, + "grad_norm": 0.4438270032405853, + "learning_rate": 0.0002, + "loss": 1.7477, + "step": 440 + }, + { + "epoch": 1.487603305785124, + "grad_norm": 0.3953928053379059, + "learning_rate": 0.0002, + "loss": 1.774, + "step": 450 + }, + { + "epoch": 1.5206611570247934, + "grad_norm": 0.4152870178222656, + "learning_rate": 0.0002, + "loss": 1.7162, + "step": 460 + }, + { + "epoch": 1.553719008264463, + "grad_norm": 0.45231857895851135, + "learning_rate": 0.0002, + "loss": 1.8176, + "step": 470 + }, + { + "epoch": 1.5867768595041323, + "grad_norm": 0.46560999751091003, + "learning_rate": 0.0002, + "loss": 1.7281, + "step": 480 + }, + { + "epoch": 1.6198347107438016, + "grad_norm": 0.3510372042655945, + "learning_rate": 0.0002, + "loss": 1.8047, + "step": 490 + }, + { + "epoch": 1.6528925619834711, + "grad_norm": 0.36788758635520935, + "learning_rate": 0.0002, + "loss": 1.7719, + "step": 500 + }, + { + "epoch": 1.6859504132231407, + "grad_norm": 0.3911917209625244, + "learning_rate": 0.0002, + "loss": 1.8287, + "step": 510 + }, + { + "epoch": 1.71900826446281, + "grad_norm": 0.440964937210083, + "learning_rate": 0.0002, + "loss": 1.7891, + "step": 520 + }, + { + "epoch": 1.7520661157024793, + "grad_norm": 0.36718201637268066, + "learning_rate": 0.0002, + "loss": 1.6858, + "step": 530 + }, + { + "epoch": 1.7851239669421488, + "grad_norm": 0.3927479088306427, + "learning_rate": 0.0002, + "loss": 1.7828, + "step": 540 + }, + { + "epoch": 1.8181818181818183, + "grad_norm": 0.4298672378063202, + "learning_rate": 0.0002, + "loss": 1.7406, + "step": 550 + }, + { + "epoch": 1.8512396694214877, + "grad_norm": 0.4257620871067047, + "learning_rate": 0.0002, + "loss": 1.7626, + "step": 560 + }, + { + "epoch": 1.884297520661157, + "grad_norm": 0.3743717670440674, + "learning_rate": 0.0002, + "loss": 1.7677, + "step": 570 + }, + { + "epoch": 1.9173553719008265, + "grad_norm": 0.4413471817970276, + "learning_rate": 0.0002, + "loss": 1.7263, + "step": 580 + }, + { + "epoch": 1.950413223140496, + "grad_norm": 0.41639673709869385, + "learning_rate": 0.0002, + "loss": 1.7528, + "step": 590 + }, + { + "epoch": 1.9834710743801653, + "grad_norm": 0.46319296956062317, + "learning_rate": 0.0002, + "loss": 1.7141, + "step": 600 + }, + { + "epoch": 2.0, + "eval_loss": 1.833760380744934, + "eval_runtime": 38.8469, + "eval_samples_per_second": 13.257, + "eval_steps_per_second": 1.673, + "step": 605 + }, + { + "epoch": 2.0165289256198347, + "grad_norm": 0.38033604621887207, + "learning_rate": 0.0002, + "loss": 1.7399, + "step": 610 + }, + { + "epoch": 2.049586776859504, + "grad_norm": 0.4522306025028229, + "learning_rate": 0.0002, + "loss": 1.6414, + "step": 620 + }, + { + "epoch": 2.0826446280991737, + "grad_norm": 0.41294756531715393, + "learning_rate": 0.0002, + "loss": 1.5976, + "step": 630 + }, + { + "epoch": 2.115702479338843, + "grad_norm": 0.5129091739654541, + "learning_rate": 0.0002, + "loss": 1.6664, + "step": 640 + }, + { + "epoch": 2.1487603305785123, + "grad_norm": 0.4630700647830963, + "learning_rate": 0.0002, + "loss": 1.7207, + "step": 650 + }, + { + "epoch": 2.1818181818181817, + "grad_norm": 0.4368151128292084, + "learning_rate": 0.0002, + "loss": 1.5884, + "step": 660 + }, + { + "epoch": 2.2148760330578514, + "grad_norm": 0.5266494154930115, + "learning_rate": 0.0002, + "loss": 1.7271, + "step": 670 + }, + { + "epoch": 2.2479338842975207, + "grad_norm": 0.4744901955127716, + "learning_rate": 0.0002, + "loss": 1.5749, + "step": 680 + }, + { + "epoch": 2.28099173553719, + "grad_norm": 0.5312414765357971, + "learning_rate": 0.0002, + "loss": 1.6512, + "step": 690 + }, + { + "epoch": 2.3140495867768593, + "grad_norm": 0.49116063117980957, + "learning_rate": 0.0002, + "loss": 1.6957, + "step": 700 + }, + { + "epoch": 2.347107438016529, + "grad_norm": 0.4626988172531128, + "learning_rate": 0.0002, + "loss": 1.646, + "step": 710 + }, + { + "epoch": 2.3801652892561984, + "grad_norm": 0.4851135015487671, + "learning_rate": 0.0002, + "loss": 1.6474, + "step": 720 + }, + { + "epoch": 2.4132231404958677, + "grad_norm": 0.4882378578186035, + "learning_rate": 0.0002, + "loss": 1.67, + "step": 730 + }, + { + "epoch": 2.446280991735537, + "grad_norm": 0.4470290243625641, + "learning_rate": 0.0002, + "loss": 1.6588, + "step": 740 + }, + { + "epoch": 2.479338842975207, + "grad_norm": 0.5901731848716736, + "learning_rate": 0.0002, + "loss": 1.6419, + "step": 750 + }, + { + "epoch": 2.512396694214876, + "grad_norm": 0.48137718439102173, + "learning_rate": 0.0002, + "loss": 1.6756, + "step": 760 + }, + { + "epoch": 2.5454545454545454, + "grad_norm": 0.45636510848999023, + "learning_rate": 0.0002, + "loss": 1.6708, + "step": 770 + }, + { + "epoch": 2.5785123966942147, + "grad_norm": 0.48216402530670166, + "learning_rate": 0.0002, + "loss": 1.6693, + "step": 780 + }, + { + "epoch": 2.6115702479338845, + "grad_norm": 0.47188714146614075, + "learning_rate": 0.0002, + "loss": 1.664, + "step": 790 + }, + { + "epoch": 2.644628099173554, + "grad_norm": 0.44025519490242004, + "learning_rate": 0.0002, + "loss": 1.619, + "step": 800 + }, + { + "epoch": 2.677685950413223, + "grad_norm": 0.4918605387210846, + "learning_rate": 0.0002, + "loss": 1.6532, + "step": 810 + }, + { + "epoch": 2.7107438016528924, + "grad_norm": 0.5082308650016785, + "learning_rate": 0.0002, + "loss": 1.7513, + "step": 820 + }, + { + "epoch": 2.7438016528925617, + "grad_norm": 0.5610618591308594, + "learning_rate": 0.0002, + "loss": 1.7221, + "step": 830 + }, + { + "epoch": 2.7768595041322315, + "grad_norm": 0.540302574634552, + "learning_rate": 0.0002, + "loss": 1.7115, + "step": 840 + }, + { + "epoch": 2.809917355371901, + "grad_norm": 0.46016451716423035, + "learning_rate": 0.0002, + "loss": 1.659, + "step": 850 + }, + { + "epoch": 2.84297520661157, + "grad_norm": 0.45313313603401184, + "learning_rate": 0.0002, + "loss": 1.672, + "step": 860 + }, + { + "epoch": 2.87603305785124, + "grad_norm": 0.49267083406448364, + "learning_rate": 0.0002, + "loss": 1.6676, + "step": 870 + }, + { + "epoch": 2.909090909090909, + "grad_norm": 0.4506530463695526, + "learning_rate": 0.0002, + "loss": 1.6577, + "step": 880 + }, + { + "epoch": 2.9421487603305785, + "grad_norm": 0.4393260180950165, + "learning_rate": 0.0002, + "loss": 1.7059, + "step": 890 + }, + { + "epoch": 2.975206611570248, + "grad_norm": 0.438073068857193, + "learning_rate": 0.0002, + "loss": 1.7042, + "step": 900 + }, + { + "epoch": 2.9983471074380166, + "eval_loss": 1.853971004486084, + "eval_runtime": 38.8404, + "eval_samples_per_second": 13.259, + "eval_steps_per_second": 1.674, + "step": 907 + }, + { + "epoch": 3.0082644628099175, + "grad_norm": 0.4399570822715759, + "learning_rate": 0.0002, + "loss": 1.6173, + "step": 910 + }, + { + "epoch": 3.041322314049587, + "grad_norm": 0.5338484644889832, + "learning_rate": 0.0002, + "loss": 1.5578, + "step": 920 + }, + { + "epoch": 3.074380165289256, + "grad_norm": 0.6154358983039856, + "learning_rate": 0.0002, + "loss": 1.5507, + "step": 930 + }, + { + "epoch": 3.1074380165289255, + "grad_norm": 0.6429790258407593, + "learning_rate": 0.0002, + "loss": 1.6189, + "step": 940 + }, + { + "epoch": 3.1404958677685952, + "grad_norm": 0.5375680923461914, + "learning_rate": 0.0002, + "loss": 1.5866, + "step": 950 + }, + { + "epoch": 3.1735537190082646, + "grad_norm": 0.5594999194145203, + "learning_rate": 0.0002, + "loss": 1.5119, + "step": 960 + }, + { + "epoch": 3.206611570247934, + "grad_norm": 0.6742738485336304, + "learning_rate": 0.0002, + "loss": 1.5096, + "step": 970 + }, + { + "epoch": 3.239669421487603, + "grad_norm": 0.563497006893158, + "learning_rate": 0.0002, + "loss": 1.5477, + "step": 980 + }, + { + "epoch": 3.2727272727272725, + "grad_norm": 0.6521140933036804, + "learning_rate": 0.0002, + "loss": 1.5559, + "step": 990 + }, + { + "epoch": 3.3057851239669422, + "grad_norm": 0.6016622185707092, + "learning_rate": 0.0002, + "loss": 1.4905, + "step": 1000 + }, + { + "epoch": 3.3388429752066116, + "grad_norm": 0.6564913988113403, + "learning_rate": 0.0002, + "loss": 1.5307, + "step": 1010 + }, + { + "epoch": 3.371900826446281, + "grad_norm": 0.6528742909431458, + "learning_rate": 0.0002, + "loss": 1.4595, + "step": 1020 + }, + { + "epoch": 3.4049586776859506, + "grad_norm": 0.5843546390533447, + "learning_rate": 0.0002, + "loss": 1.518, + "step": 1030 + }, + { + "epoch": 3.43801652892562, + "grad_norm": 0.5892922282218933, + "learning_rate": 0.0002, + "loss": 1.5148, + "step": 1040 + }, + { + "epoch": 3.4710743801652892, + "grad_norm": 0.6217362284660339, + "learning_rate": 0.0002, + "loss": 1.5125, + "step": 1050 + }, + { + "epoch": 3.5041322314049586, + "grad_norm": 0.5837283134460449, + "learning_rate": 0.0002, + "loss": 1.526, + "step": 1060 + }, + { + "epoch": 3.537190082644628, + "grad_norm": 0.6369057893753052, + "learning_rate": 0.0002, + "loss": 1.5776, + "step": 1070 + }, + { + "epoch": 3.5702479338842976, + "grad_norm": 0.632115364074707, + "learning_rate": 0.0002, + "loss": 1.4758, + "step": 1080 + }, + { + "epoch": 3.603305785123967, + "grad_norm": 0.6364002823829651, + "learning_rate": 0.0002, + "loss": 1.5604, + "step": 1090 + }, + { + "epoch": 3.6363636363636362, + "grad_norm": 0.550032377243042, + "learning_rate": 0.0002, + "loss": 1.508, + "step": 1100 + }, + { + "epoch": 3.669421487603306, + "grad_norm": 0.6106863617897034, + "learning_rate": 0.0002, + "loss": 1.5548, + "step": 1110 + }, + { + "epoch": 3.7024793388429753, + "grad_norm": 0.635955274105072, + "learning_rate": 0.0002, + "loss": 1.5237, + "step": 1120 + }, + { + "epoch": 3.7355371900826446, + "grad_norm": 0.615804135799408, + "learning_rate": 0.0002, + "loss": 1.5698, + "step": 1130 + }, + { + "epoch": 3.768595041322314, + "grad_norm": 0.5769386887550354, + "learning_rate": 0.0002, + "loss": 1.6068, + "step": 1140 + }, + { + "epoch": 3.8016528925619832, + "grad_norm": 0.5938104391098022, + "learning_rate": 0.0002, + "loss": 1.5262, + "step": 1150 + }, + { + "epoch": 3.834710743801653, + "grad_norm": 0.6149733066558838, + "learning_rate": 0.0002, + "loss": 1.5236, + "step": 1160 + }, + { + "epoch": 3.8677685950413223, + "grad_norm": 0.6228950023651123, + "learning_rate": 0.0002, + "loss": 1.5585, + "step": 1170 + }, + { + "epoch": 3.9008264462809916, + "grad_norm": 0.6196513175964355, + "learning_rate": 0.0002, + "loss": 1.5815, + "step": 1180 + }, + { + "epoch": 3.9338842975206614, + "grad_norm": 0.5946677327156067, + "learning_rate": 0.0002, + "loss": 1.5894, + "step": 1190 + }, + { + "epoch": 3.9669421487603307, + "grad_norm": 0.5882220268249512, + "learning_rate": 0.0002, + "loss": 1.5451, + "step": 1200 + }, + { + "epoch": 4.0, + "grad_norm": 0.6291728019714355, + "learning_rate": 0.0002, + "loss": 1.6202, + "step": 1210 + }, + { + "epoch": 4.0, + "eval_loss": 1.8943731784820557, + "eval_runtime": 38.826, + "eval_samples_per_second": 13.264, + "eval_steps_per_second": 1.674, + "step": 1210 + }, + { + "epoch": 4.033057851239669, + "grad_norm": 1.0843605995178223, + "learning_rate": 0.0002, + "loss": 1.3944, + "step": 1220 + }, + { + "epoch": 4.066115702479339, + "grad_norm": 0.6460382342338562, + "learning_rate": 0.0002, + "loss": 1.3453, + "step": 1230 + }, + { + "epoch": 4.099173553719008, + "grad_norm": 0.7872665524482727, + "learning_rate": 0.0002, + "loss": 1.3714, + "step": 1240 + }, + { + "epoch": 4.132231404958677, + "grad_norm": 0.7585243582725525, + "learning_rate": 0.0002, + "loss": 1.3247, + "step": 1250 + }, + { + "epoch": 4.1652892561983474, + "grad_norm": 0.7955290079116821, + "learning_rate": 0.0002, + "loss": 1.4162, + "step": 1260 + }, + { + "epoch": 4.198347107438017, + "grad_norm": 0.8847756385803223, + "learning_rate": 0.0002, + "loss": 1.4127, + "step": 1270 + }, + { + "epoch": 4.231404958677686, + "grad_norm": 0.7897582650184631, + "learning_rate": 0.0002, + "loss": 1.3972, + "step": 1280 + }, + { + "epoch": 4.264462809917355, + "grad_norm": 0.755404531955719, + "learning_rate": 0.0002, + "loss": 1.3631, + "step": 1290 + }, + { + "epoch": 4.297520661157025, + "grad_norm": 0.7718978524208069, + "learning_rate": 0.0002, + "loss": 1.4219, + "step": 1300 + }, + { + "epoch": 4.330578512396694, + "grad_norm": 0.8073238134384155, + "learning_rate": 0.0002, + "loss": 1.3832, + "step": 1310 + }, + { + "epoch": 4.363636363636363, + "grad_norm": 0.8661217093467712, + "learning_rate": 0.0002, + "loss": 1.3968, + "step": 1320 + }, + { + "epoch": 4.3966942148760335, + "grad_norm": 0.8859766721725464, + "learning_rate": 0.0002, + "loss": 1.3809, + "step": 1330 + }, + { + "epoch": 4.429752066115703, + "grad_norm": 0.8635476231575012, + "learning_rate": 0.0002, + "loss": 1.3779, + "step": 1340 + }, + { + "epoch": 4.462809917355372, + "grad_norm": 0.7376685738563538, + "learning_rate": 0.0002, + "loss": 1.403, + "step": 1350 + }, + { + "epoch": 4.4958677685950414, + "grad_norm": 0.7924236059188843, + "learning_rate": 0.0002, + "loss": 1.4346, + "step": 1360 + }, + { + "epoch": 4.528925619834711, + "grad_norm": 0.6969273686408997, + "learning_rate": 0.0002, + "loss": 1.3205, + "step": 1370 + }, + { + "epoch": 4.56198347107438, + "grad_norm": 0.7346147894859314, + "learning_rate": 0.0002, + "loss": 1.399, + "step": 1380 + }, + { + "epoch": 4.595041322314049, + "grad_norm": 0.8515401482582092, + "learning_rate": 0.0002, + "loss": 1.4308, + "step": 1390 + }, + { + "epoch": 4.628099173553719, + "grad_norm": 0.8154449462890625, + "learning_rate": 0.0002, + "loss": 1.407, + "step": 1400 + }, + { + "epoch": 4.661157024793388, + "grad_norm": 0.8922461271286011, + "learning_rate": 0.0002, + "loss": 1.4182, + "step": 1410 + }, + { + "epoch": 4.694214876033058, + "grad_norm": 0.8835586309432983, + "learning_rate": 0.0002, + "loss": 1.3894, + "step": 1420 + }, + { + "epoch": 4.7272727272727275, + "grad_norm": 0.7689077258110046, + "learning_rate": 0.0002, + "loss": 1.411, + "step": 1430 + }, + { + "epoch": 4.760330578512397, + "grad_norm": 0.7515250444412231, + "learning_rate": 0.0002, + "loss": 1.4083, + "step": 1440 + }, + { + "epoch": 4.793388429752066, + "grad_norm": 0.7655003070831299, + "learning_rate": 0.0002, + "loss": 1.4004, + "step": 1450 + }, + { + "epoch": 4.8264462809917354, + "grad_norm": 0.7187207341194153, + "learning_rate": 0.0002, + "loss": 1.3633, + "step": 1460 + }, + { + "epoch": 4.859504132231405, + "grad_norm": 0.7122251987457275, + "learning_rate": 0.0002, + "loss": 1.3647, + "step": 1470 + }, + { + "epoch": 4.892561983471074, + "grad_norm": 0.7744072675704956, + "learning_rate": 0.0002, + "loss": 1.4481, + "step": 1480 + }, + { + "epoch": 4.925619834710744, + "grad_norm": 0.8202858567237854, + "learning_rate": 0.0002, + "loss": 1.3959, + "step": 1490 + }, + { + "epoch": 4.958677685950414, + "grad_norm": 0.7144979238510132, + "learning_rate": 0.0002, + "loss": 1.4176, + "step": 1500 + }, + { + "epoch": 4.991735537190083, + "grad_norm": 0.7824931144714355, + "learning_rate": 0.0002, + "loss": 1.4398, + "step": 1510 + }, + { + "epoch": 4.998347107438017, + "eval_loss": 1.9822860956192017, + "eval_runtime": 38.8296, + "eval_samples_per_second": 13.263, + "eval_steps_per_second": 1.674, + "step": 1512 + } + ], + "logging_steps": 10, + "max_steps": 2416, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.99951133753344e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/training_args.bin b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bdae94a3bb1cbb687094648bad0dbd67438f1665 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1512/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59d6797aee4f8f861b969f822d3a50bd2f069252a018ea198e60b03a920e2c9f +size 5560 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/README.md b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/README.md new file mode 100644 index 0000000000000000000000000000000000000000..835e31ab8469ee39ddc8b2b6b2143a8c66dad510 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/adapter_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d63cb87eaccf2d81de3cdcfa11d2e99c440c0ea0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/adapter_model.safetensors b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..489cd716e6fff3e7034fab8b249923116ba3efdc --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95c74d31ae09269b989b222b09b4b87e25198e440a2fa64a5369acae83ab8dfa +size 109069176 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/optimizer.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c24b118ecf419e14ff5223187107940e14e63cb8 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c590f7132a757513177578e59f14de27d51696e9077ddd48745ecc318a5b1ebf +size 55532666 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/rng_state.pth b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0829f6d14c06ce7d9665665aace3e1fe2ec3aa88 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef9a8154bf364cac160ae878a4d1a988d37de57e9341d3d661ffea70deb59928 +size 14244 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/scheduler.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d094ec9f5d48c716dccc3213db60a2707eccaaee --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a24502682d1a45ce8943321a1a2bb9bba39f2b539357efafd0334139cc4eb441 +size 1064 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/special_tokens_map.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/tokenizer.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/tokenizer_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..061e40d9db3253624f86e8e364c15ef546527c9d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/trainer_state.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ae5098136635d1b2c554bc086da4facc9847b6c5 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/trainer_state.json @@ -0,0 +1,1348 @@ +{ + "best_metric": 1.833760380744934, + "best_model_checkpoint": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605", + "epoch": 6.0, + "eval_steps": 10, + "global_step": 1815, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03305785123966942, + "grad_norm": 0.4384556710720062, + "learning_rate": 0.0002, + "loss": 2.528, + "step": 10 + }, + { + "epoch": 0.06611570247933884, + "grad_norm": 0.48312580585479736, + "learning_rate": 0.0002, + "loss": 2.2675, + "step": 20 + }, + { + "epoch": 0.09917355371900827, + "grad_norm": 0.6193496584892273, + "learning_rate": 0.0002, + "loss": 2.0439, + "step": 30 + }, + { + "epoch": 0.1322314049586777, + "grad_norm": 0.471858948469162, + "learning_rate": 0.0002, + "loss": 1.9649, + "step": 40 + }, + { + "epoch": 0.1652892561983471, + "grad_norm": 0.43199431896209717, + "learning_rate": 0.0002, + "loss": 1.9945, + "step": 50 + }, + { + "epoch": 0.19834710743801653, + "grad_norm": 0.5022176504135132, + "learning_rate": 0.0002, + "loss": 1.9409, + "step": 60 + }, + { + "epoch": 0.23140495867768596, + "grad_norm": 0.4934026300907135, + "learning_rate": 0.0002, + "loss": 1.89, + "step": 70 + }, + { + "epoch": 0.2644628099173554, + "grad_norm": 0.4313369691371918, + "learning_rate": 0.0002, + "loss": 1.9036, + "step": 80 + }, + { + "epoch": 0.2975206611570248, + "grad_norm": 0.48663529753685, + "learning_rate": 0.0002, + "loss": 1.8992, + "step": 90 + }, + { + "epoch": 0.3305785123966942, + "grad_norm": 0.47740036249160767, + "learning_rate": 0.0002, + "loss": 1.8655, + "step": 100 + }, + { + "epoch": 0.36363636363636365, + "grad_norm": 0.41685664653778076, + "learning_rate": 0.0002, + "loss": 1.8797, + "step": 110 + }, + { + "epoch": 0.39669421487603307, + "grad_norm": 2.368595838546753, + "learning_rate": 0.0002, + "loss": 1.8951, + "step": 120 + }, + { + "epoch": 0.4297520661157025, + "grad_norm": 0.4861043095588684, + "learning_rate": 0.0002, + "loss": 1.8753, + "step": 130 + }, + { + "epoch": 0.4628099173553719, + "grad_norm": 0.41848257184028625, + "learning_rate": 0.0002, + "loss": 1.8413, + "step": 140 + }, + { + "epoch": 0.49586776859504134, + "grad_norm": 0.38776087760925293, + "learning_rate": 0.0002, + "loss": 1.9775, + "step": 150 + }, + { + "epoch": 0.5289256198347108, + "grad_norm": 0.4095233380794525, + "learning_rate": 0.0002, + "loss": 1.8172, + "step": 160 + }, + { + "epoch": 0.5619834710743802, + "grad_norm": 0.4492895007133484, + "learning_rate": 0.0002, + "loss": 1.9401, + "step": 170 + }, + { + "epoch": 0.5950413223140496, + "grad_norm": 0.5678786039352417, + "learning_rate": 0.0002, + "loss": 1.8707, + "step": 180 + }, + { + "epoch": 0.628099173553719, + "grad_norm": 0.4926881492137909, + "learning_rate": 0.0002, + "loss": 1.8067, + "step": 190 + }, + { + "epoch": 0.6611570247933884, + "grad_norm": 0.3865489363670349, + "learning_rate": 0.0002, + "loss": 1.8567, + "step": 200 + }, + { + "epoch": 0.6942148760330579, + "grad_norm": 0.40578970313072205, + "learning_rate": 0.0002, + "loss": 1.7555, + "step": 210 + }, + { + "epoch": 0.7272727272727273, + "grad_norm": 0.3729846775531769, + "learning_rate": 0.0002, + "loss": 1.8192, + "step": 220 + }, + { + "epoch": 0.7603305785123967, + "grad_norm": 0.36989861726760864, + "learning_rate": 0.0002, + "loss": 1.8787, + "step": 230 + }, + { + "epoch": 0.7933884297520661, + "grad_norm": 0.3764864206314087, + "learning_rate": 0.0002, + "loss": 1.8254, + "step": 240 + }, + { + "epoch": 0.8264462809917356, + "grad_norm": 1.2193230390548706, + "learning_rate": 0.0002, + "loss": 1.8008, + "step": 250 + }, + { + "epoch": 0.859504132231405, + "grad_norm": 0.37381255626678467, + "learning_rate": 0.0002, + "loss": 1.8093, + "step": 260 + }, + { + "epoch": 0.8925619834710744, + "grad_norm": 0.35480767488479614, + "learning_rate": 0.0002, + "loss": 1.7911, + "step": 270 + }, + { + "epoch": 0.9256198347107438, + "grad_norm": 0.4945891201496124, + "learning_rate": 0.0002, + "loss": 1.7824, + "step": 280 + }, + { + "epoch": 0.9586776859504132, + "grad_norm": 0.39967674016952515, + "learning_rate": 0.0002, + "loss": 1.7842, + "step": 290 + }, + { + "epoch": 0.9917355371900827, + "grad_norm": 0.4257008135318756, + "learning_rate": 0.0002, + "loss": 1.8321, + "step": 300 + }, + { + "epoch": 0.9983471074380166, + "eval_loss": 1.8413277864456177, + "eval_runtime": 38.8241, + "eval_samples_per_second": 13.265, + "eval_steps_per_second": 1.674, + "step": 302 + }, + { + "epoch": 1.024793388429752, + "grad_norm": 0.4019509255886078, + "learning_rate": 0.0002, + "loss": 1.7265, + "step": 310 + }, + { + "epoch": 1.0578512396694215, + "grad_norm": 0.3439880311489105, + "learning_rate": 0.0002, + "loss": 1.7756, + "step": 320 + }, + { + "epoch": 1.0909090909090908, + "grad_norm": 0.4353587031364441, + "learning_rate": 0.0002, + "loss": 1.7719, + "step": 330 + }, + { + "epoch": 1.1239669421487604, + "grad_norm": 0.41257765889167786, + "learning_rate": 0.0002, + "loss": 1.7419, + "step": 340 + }, + { + "epoch": 1.1570247933884297, + "grad_norm": 0.4224575161933899, + "learning_rate": 0.0002, + "loss": 1.7774, + "step": 350 + }, + { + "epoch": 1.1900826446280992, + "grad_norm": 0.36395177245140076, + "learning_rate": 0.0002, + "loss": 1.7502, + "step": 360 + }, + { + "epoch": 1.2231404958677685, + "grad_norm": 0.4251839518547058, + "learning_rate": 0.0002, + "loss": 1.8064, + "step": 370 + }, + { + "epoch": 1.256198347107438, + "grad_norm": 0.43602821230888367, + "learning_rate": 0.0002, + "loss": 1.7626, + "step": 380 + }, + { + "epoch": 1.2892561983471074, + "grad_norm": 0.3940708637237549, + "learning_rate": 0.0002, + "loss": 1.8261, + "step": 390 + }, + { + "epoch": 1.322314049586777, + "grad_norm": 0.3626866042613983, + "learning_rate": 0.0002, + "loss": 1.7317, + "step": 400 + }, + { + "epoch": 1.3553719008264462, + "grad_norm": 0.40716150403022766, + "learning_rate": 0.0002, + "loss": 1.7493, + "step": 410 + }, + { + "epoch": 1.3884297520661157, + "grad_norm": 0.39323991537094116, + "learning_rate": 0.0002, + "loss": 1.7313, + "step": 420 + }, + { + "epoch": 1.421487603305785, + "grad_norm": 0.44480809569358826, + "learning_rate": 0.0002, + "loss": 1.7863, + "step": 430 + }, + { + "epoch": 1.4545454545454546, + "grad_norm": 0.4438270032405853, + "learning_rate": 0.0002, + "loss": 1.7477, + "step": 440 + }, + { + "epoch": 1.487603305785124, + "grad_norm": 0.3953928053379059, + "learning_rate": 0.0002, + "loss": 1.774, + "step": 450 + }, + { + "epoch": 1.5206611570247934, + "grad_norm": 0.4152870178222656, + "learning_rate": 0.0002, + "loss": 1.7162, + "step": 460 + }, + { + "epoch": 1.553719008264463, + "grad_norm": 0.45231857895851135, + "learning_rate": 0.0002, + "loss": 1.8176, + "step": 470 + }, + { + "epoch": 1.5867768595041323, + "grad_norm": 0.46560999751091003, + "learning_rate": 0.0002, + "loss": 1.7281, + "step": 480 + }, + { + "epoch": 1.6198347107438016, + "grad_norm": 0.3510372042655945, + "learning_rate": 0.0002, + "loss": 1.8047, + "step": 490 + }, + { + "epoch": 1.6528925619834711, + "grad_norm": 0.36788758635520935, + "learning_rate": 0.0002, + "loss": 1.7719, + "step": 500 + }, + { + "epoch": 1.6859504132231407, + "grad_norm": 0.3911917209625244, + "learning_rate": 0.0002, + "loss": 1.8287, + "step": 510 + }, + { + "epoch": 1.71900826446281, + "grad_norm": 0.440964937210083, + "learning_rate": 0.0002, + "loss": 1.7891, + "step": 520 + }, + { + "epoch": 1.7520661157024793, + "grad_norm": 0.36718201637268066, + "learning_rate": 0.0002, + "loss": 1.6858, + "step": 530 + }, + { + "epoch": 1.7851239669421488, + "grad_norm": 0.3927479088306427, + "learning_rate": 0.0002, + "loss": 1.7828, + "step": 540 + }, + { + "epoch": 1.8181818181818183, + "grad_norm": 0.4298672378063202, + "learning_rate": 0.0002, + "loss": 1.7406, + "step": 550 + }, + { + "epoch": 1.8512396694214877, + "grad_norm": 0.4257620871067047, + "learning_rate": 0.0002, + "loss": 1.7626, + "step": 560 + }, + { + "epoch": 1.884297520661157, + "grad_norm": 0.3743717670440674, + "learning_rate": 0.0002, + "loss": 1.7677, + "step": 570 + }, + { + "epoch": 1.9173553719008265, + "grad_norm": 0.4413471817970276, + "learning_rate": 0.0002, + "loss": 1.7263, + "step": 580 + }, + { + "epoch": 1.950413223140496, + "grad_norm": 0.41639673709869385, + "learning_rate": 0.0002, + "loss": 1.7528, + "step": 590 + }, + { + "epoch": 1.9834710743801653, + "grad_norm": 0.46319296956062317, + "learning_rate": 0.0002, + "loss": 1.7141, + "step": 600 + }, + { + "epoch": 2.0, + "eval_loss": 1.833760380744934, + "eval_runtime": 38.8469, + "eval_samples_per_second": 13.257, + "eval_steps_per_second": 1.673, + "step": 605 + }, + { + "epoch": 2.0165289256198347, + "grad_norm": 0.38033604621887207, + "learning_rate": 0.0002, + "loss": 1.7399, + "step": 610 + }, + { + "epoch": 2.049586776859504, + "grad_norm": 0.4522306025028229, + "learning_rate": 0.0002, + "loss": 1.6414, + "step": 620 + }, + { + "epoch": 2.0826446280991737, + "grad_norm": 0.41294756531715393, + "learning_rate": 0.0002, + "loss": 1.5976, + "step": 630 + }, + { + "epoch": 2.115702479338843, + "grad_norm": 0.5129091739654541, + "learning_rate": 0.0002, + "loss": 1.6664, + "step": 640 + }, + { + "epoch": 2.1487603305785123, + "grad_norm": 0.4630700647830963, + "learning_rate": 0.0002, + "loss": 1.7207, + "step": 650 + }, + { + "epoch": 2.1818181818181817, + "grad_norm": 0.4368151128292084, + "learning_rate": 0.0002, + "loss": 1.5884, + "step": 660 + }, + { + "epoch": 2.2148760330578514, + "grad_norm": 0.5266494154930115, + "learning_rate": 0.0002, + "loss": 1.7271, + "step": 670 + }, + { + "epoch": 2.2479338842975207, + "grad_norm": 0.4744901955127716, + "learning_rate": 0.0002, + "loss": 1.5749, + "step": 680 + }, + { + "epoch": 2.28099173553719, + "grad_norm": 0.5312414765357971, + "learning_rate": 0.0002, + "loss": 1.6512, + "step": 690 + }, + { + "epoch": 2.3140495867768593, + "grad_norm": 0.49116063117980957, + "learning_rate": 0.0002, + "loss": 1.6957, + "step": 700 + }, + { + "epoch": 2.347107438016529, + "grad_norm": 0.4626988172531128, + "learning_rate": 0.0002, + "loss": 1.646, + "step": 710 + }, + { + "epoch": 2.3801652892561984, + "grad_norm": 0.4851135015487671, + "learning_rate": 0.0002, + "loss": 1.6474, + "step": 720 + }, + { + "epoch": 2.4132231404958677, + "grad_norm": 0.4882378578186035, + "learning_rate": 0.0002, + "loss": 1.67, + "step": 730 + }, + { + "epoch": 2.446280991735537, + "grad_norm": 0.4470290243625641, + "learning_rate": 0.0002, + "loss": 1.6588, + "step": 740 + }, + { + "epoch": 2.479338842975207, + "grad_norm": 0.5901731848716736, + "learning_rate": 0.0002, + "loss": 1.6419, + "step": 750 + }, + { + "epoch": 2.512396694214876, + "grad_norm": 0.48137718439102173, + "learning_rate": 0.0002, + "loss": 1.6756, + "step": 760 + }, + { + "epoch": 2.5454545454545454, + "grad_norm": 0.45636510848999023, + "learning_rate": 0.0002, + "loss": 1.6708, + "step": 770 + }, + { + "epoch": 2.5785123966942147, + "grad_norm": 0.48216402530670166, + "learning_rate": 0.0002, + "loss": 1.6693, + "step": 780 + }, + { + "epoch": 2.6115702479338845, + "grad_norm": 0.47188714146614075, + "learning_rate": 0.0002, + "loss": 1.664, + "step": 790 + }, + { + "epoch": 2.644628099173554, + "grad_norm": 0.44025519490242004, + "learning_rate": 0.0002, + "loss": 1.619, + "step": 800 + }, + { + "epoch": 2.677685950413223, + "grad_norm": 0.4918605387210846, + "learning_rate": 0.0002, + "loss": 1.6532, + "step": 810 + }, + { + "epoch": 2.7107438016528924, + "grad_norm": 0.5082308650016785, + "learning_rate": 0.0002, + "loss": 1.7513, + "step": 820 + }, + { + "epoch": 2.7438016528925617, + "grad_norm": 0.5610618591308594, + "learning_rate": 0.0002, + "loss": 1.7221, + "step": 830 + }, + { + "epoch": 2.7768595041322315, + "grad_norm": 0.540302574634552, + "learning_rate": 0.0002, + "loss": 1.7115, + "step": 840 + }, + { + "epoch": 2.809917355371901, + "grad_norm": 0.46016451716423035, + "learning_rate": 0.0002, + "loss": 1.659, + "step": 850 + }, + { + "epoch": 2.84297520661157, + "grad_norm": 0.45313313603401184, + "learning_rate": 0.0002, + "loss": 1.672, + "step": 860 + }, + { + "epoch": 2.87603305785124, + "grad_norm": 0.49267083406448364, + "learning_rate": 0.0002, + "loss": 1.6676, + "step": 870 + }, + { + "epoch": 2.909090909090909, + "grad_norm": 0.4506530463695526, + "learning_rate": 0.0002, + "loss": 1.6577, + "step": 880 + }, + { + "epoch": 2.9421487603305785, + "grad_norm": 0.4393260180950165, + "learning_rate": 0.0002, + "loss": 1.7059, + "step": 890 + }, + { + "epoch": 2.975206611570248, + "grad_norm": 0.438073068857193, + "learning_rate": 0.0002, + "loss": 1.7042, + "step": 900 + }, + { + "epoch": 2.9983471074380166, + "eval_loss": 1.853971004486084, + "eval_runtime": 38.8404, + "eval_samples_per_second": 13.259, + "eval_steps_per_second": 1.674, + "step": 907 + }, + { + "epoch": 3.0082644628099175, + "grad_norm": 0.4399570822715759, + "learning_rate": 0.0002, + "loss": 1.6173, + "step": 910 + }, + { + "epoch": 3.041322314049587, + "grad_norm": 0.5338484644889832, + "learning_rate": 0.0002, + "loss": 1.5578, + "step": 920 + }, + { + "epoch": 3.074380165289256, + "grad_norm": 0.6154358983039856, + "learning_rate": 0.0002, + "loss": 1.5507, + "step": 930 + }, + { + "epoch": 3.1074380165289255, + "grad_norm": 0.6429790258407593, + "learning_rate": 0.0002, + "loss": 1.6189, + "step": 940 + }, + { + "epoch": 3.1404958677685952, + "grad_norm": 0.5375680923461914, + "learning_rate": 0.0002, + "loss": 1.5866, + "step": 950 + }, + { + "epoch": 3.1735537190082646, + "grad_norm": 0.5594999194145203, + "learning_rate": 0.0002, + "loss": 1.5119, + "step": 960 + }, + { + "epoch": 3.206611570247934, + "grad_norm": 0.6742738485336304, + "learning_rate": 0.0002, + "loss": 1.5096, + "step": 970 + }, + { + "epoch": 3.239669421487603, + "grad_norm": 0.563497006893158, + "learning_rate": 0.0002, + "loss": 1.5477, + "step": 980 + }, + { + "epoch": 3.2727272727272725, + "grad_norm": 0.6521140933036804, + "learning_rate": 0.0002, + "loss": 1.5559, + "step": 990 + }, + { + "epoch": 3.3057851239669422, + "grad_norm": 0.6016622185707092, + "learning_rate": 0.0002, + "loss": 1.4905, + "step": 1000 + }, + { + "epoch": 3.3388429752066116, + "grad_norm": 0.6564913988113403, + "learning_rate": 0.0002, + "loss": 1.5307, + "step": 1010 + }, + { + "epoch": 3.371900826446281, + "grad_norm": 0.6528742909431458, + "learning_rate": 0.0002, + "loss": 1.4595, + "step": 1020 + }, + { + "epoch": 3.4049586776859506, + "grad_norm": 0.5843546390533447, + "learning_rate": 0.0002, + "loss": 1.518, + "step": 1030 + }, + { + "epoch": 3.43801652892562, + "grad_norm": 0.5892922282218933, + "learning_rate": 0.0002, + "loss": 1.5148, + "step": 1040 + }, + { + "epoch": 3.4710743801652892, + "grad_norm": 0.6217362284660339, + "learning_rate": 0.0002, + "loss": 1.5125, + "step": 1050 + }, + { + "epoch": 3.5041322314049586, + "grad_norm": 0.5837283134460449, + "learning_rate": 0.0002, + "loss": 1.526, + "step": 1060 + }, + { + "epoch": 3.537190082644628, + "grad_norm": 0.6369057893753052, + "learning_rate": 0.0002, + "loss": 1.5776, + "step": 1070 + }, + { + "epoch": 3.5702479338842976, + "grad_norm": 0.632115364074707, + "learning_rate": 0.0002, + "loss": 1.4758, + "step": 1080 + }, + { + "epoch": 3.603305785123967, + "grad_norm": 0.6364002823829651, + "learning_rate": 0.0002, + "loss": 1.5604, + "step": 1090 + }, + { + "epoch": 3.6363636363636362, + "grad_norm": 0.550032377243042, + "learning_rate": 0.0002, + "loss": 1.508, + "step": 1100 + }, + { + "epoch": 3.669421487603306, + "grad_norm": 0.6106863617897034, + "learning_rate": 0.0002, + "loss": 1.5548, + "step": 1110 + }, + { + "epoch": 3.7024793388429753, + "grad_norm": 0.635955274105072, + "learning_rate": 0.0002, + "loss": 1.5237, + "step": 1120 + }, + { + "epoch": 3.7355371900826446, + "grad_norm": 0.615804135799408, + "learning_rate": 0.0002, + "loss": 1.5698, + "step": 1130 + }, + { + "epoch": 3.768595041322314, + "grad_norm": 0.5769386887550354, + "learning_rate": 0.0002, + "loss": 1.6068, + "step": 1140 + }, + { + "epoch": 3.8016528925619832, + "grad_norm": 0.5938104391098022, + "learning_rate": 0.0002, + "loss": 1.5262, + "step": 1150 + }, + { + "epoch": 3.834710743801653, + "grad_norm": 0.6149733066558838, + "learning_rate": 0.0002, + "loss": 1.5236, + "step": 1160 + }, + { + "epoch": 3.8677685950413223, + "grad_norm": 0.6228950023651123, + "learning_rate": 0.0002, + "loss": 1.5585, + "step": 1170 + }, + { + "epoch": 3.9008264462809916, + "grad_norm": 0.6196513175964355, + "learning_rate": 0.0002, + "loss": 1.5815, + "step": 1180 + }, + { + "epoch": 3.9338842975206614, + "grad_norm": 0.5946677327156067, + "learning_rate": 0.0002, + "loss": 1.5894, + "step": 1190 + }, + { + "epoch": 3.9669421487603307, + "grad_norm": 0.5882220268249512, + "learning_rate": 0.0002, + "loss": 1.5451, + "step": 1200 + }, + { + "epoch": 4.0, + "grad_norm": 0.6291728019714355, + "learning_rate": 0.0002, + "loss": 1.6202, + "step": 1210 + }, + { + "epoch": 4.0, + "eval_loss": 1.8943731784820557, + "eval_runtime": 38.826, + "eval_samples_per_second": 13.264, + "eval_steps_per_second": 1.674, + "step": 1210 + }, + { + "epoch": 4.033057851239669, + "grad_norm": 1.0843605995178223, + "learning_rate": 0.0002, + "loss": 1.3944, + "step": 1220 + }, + { + "epoch": 4.066115702479339, + "grad_norm": 0.6460382342338562, + "learning_rate": 0.0002, + "loss": 1.3453, + "step": 1230 + }, + { + "epoch": 4.099173553719008, + "grad_norm": 0.7872665524482727, + "learning_rate": 0.0002, + "loss": 1.3714, + "step": 1240 + }, + { + "epoch": 4.132231404958677, + "grad_norm": 0.7585243582725525, + "learning_rate": 0.0002, + "loss": 1.3247, + "step": 1250 + }, + { + "epoch": 4.1652892561983474, + "grad_norm": 0.7955290079116821, + "learning_rate": 0.0002, + "loss": 1.4162, + "step": 1260 + }, + { + "epoch": 4.198347107438017, + "grad_norm": 0.8847756385803223, + "learning_rate": 0.0002, + "loss": 1.4127, + "step": 1270 + }, + { + "epoch": 4.231404958677686, + "grad_norm": 0.7897582650184631, + "learning_rate": 0.0002, + "loss": 1.3972, + "step": 1280 + }, + { + "epoch": 4.264462809917355, + "grad_norm": 0.755404531955719, + "learning_rate": 0.0002, + "loss": 1.3631, + "step": 1290 + }, + { + "epoch": 4.297520661157025, + "grad_norm": 0.7718978524208069, + "learning_rate": 0.0002, + "loss": 1.4219, + "step": 1300 + }, + { + "epoch": 4.330578512396694, + "grad_norm": 0.8073238134384155, + "learning_rate": 0.0002, + "loss": 1.3832, + "step": 1310 + }, + { + "epoch": 4.363636363636363, + "grad_norm": 0.8661217093467712, + "learning_rate": 0.0002, + "loss": 1.3968, + "step": 1320 + }, + { + "epoch": 4.3966942148760335, + "grad_norm": 0.8859766721725464, + "learning_rate": 0.0002, + "loss": 1.3809, + "step": 1330 + }, + { + "epoch": 4.429752066115703, + "grad_norm": 0.8635476231575012, + "learning_rate": 0.0002, + "loss": 1.3779, + "step": 1340 + }, + { + "epoch": 4.462809917355372, + "grad_norm": 0.7376685738563538, + "learning_rate": 0.0002, + "loss": 1.403, + "step": 1350 + }, + { + "epoch": 4.4958677685950414, + "grad_norm": 0.7924236059188843, + "learning_rate": 0.0002, + "loss": 1.4346, + "step": 1360 + }, + { + "epoch": 4.528925619834711, + "grad_norm": 0.6969273686408997, + "learning_rate": 0.0002, + "loss": 1.3205, + "step": 1370 + }, + { + "epoch": 4.56198347107438, + "grad_norm": 0.7346147894859314, + "learning_rate": 0.0002, + "loss": 1.399, + "step": 1380 + }, + { + "epoch": 4.595041322314049, + "grad_norm": 0.8515401482582092, + "learning_rate": 0.0002, + "loss": 1.4308, + "step": 1390 + }, + { + "epoch": 4.628099173553719, + "grad_norm": 0.8154449462890625, + "learning_rate": 0.0002, + "loss": 1.407, + "step": 1400 + }, + { + "epoch": 4.661157024793388, + "grad_norm": 0.8922461271286011, + "learning_rate": 0.0002, + "loss": 1.4182, + "step": 1410 + }, + { + "epoch": 4.694214876033058, + "grad_norm": 0.8835586309432983, + "learning_rate": 0.0002, + "loss": 1.3894, + "step": 1420 + }, + { + "epoch": 4.7272727272727275, + "grad_norm": 0.7689077258110046, + "learning_rate": 0.0002, + "loss": 1.411, + "step": 1430 + }, + { + "epoch": 4.760330578512397, + "grad_norm": 0.7515250444412231, + "learning_rate": 0.0002, + "loss": 1.4083, + "step": 1440 + }, + { + "epoch": 4.793388429752066, + "grad_norm": 0.7655003070831299, + "learning_rate": 0.0002, + "loss": 1.4004, + "step": 1450 + }, + { + "epoch": 4.8264462809917354, + "grad_norm": 0.7187207341194153, + "learning_rate": 0.0002, + "loss": 1.3633, + "step": 1460 + }, + { + "epoch": 4.859504132231405, + "grad_norm": 0.7122251987457275, + "learning_rate": 0.0002, + "loss": 1.3647, + "step": 1470 + }, + { + "epoch": 4.892561983471074, + "grad_norm": 0.7744072675704956, + "learning_rate": 0.0002, + "loss": 1.4481, + "step": 1480 + }, + { + "epoch": 4.925619834710744, + "grad_norm": 0.8202858567237854, + "learning_rate": 0.0002, + "loss": 1.3959, + "step": 1490 + }, + { + "epoch": 4.958677685950414, + "grad_norm": 0.7144979238510132, + "learning_rate": 0.0002, + "loss": 1.4176, + "step": 1500 + }, + { + "epoch": 4.991735537190083, + "grad_norm": 0.7824931144714355, + "learning_rate": 0.0002, + "loss": 1.4398, + "step": 1510 + }, + { + "epoch": 4.998347107438017, + "eval_loss": 1.9822860956192017, + "eval_runtime": 38.8296, + "eval_samples_per_second": 13.263, + "eval_steps_per_second": 1.674, + "step": 1512 + }, + { + "epoch": 5.024793388429752, + "grad_norm": 1.0307862758636475, + "learning_rate": 0.0002, + "loss": 1.3009, + "step": 1520 + }, + { + "epoch": 5.0578512396694215, + "grad_norm": 0.9152393341064453, + "learning_rate": 0.0002, + "loss": 1.1822, + "step": 1530 + }, + { + "epoch": 5.090909090909091, + "grad_norm": 0.9560136198997498, + "learning_rate": 0.0002, + "loss": 1.251, + "step": 1540 + }, + { + "epoch": 5.12396694214876, + "grad_norm": 0.8285775184631348, + "learning_rate": 0.0002, + "loss": 1.2552, + "step": 1550 + }, + { + "epoch": 5.1570247933884295, + "grad_norm": 0.9479135870933533, + "learning_rate": 0.0002, + "loss": 1.1603, + "step": 1560 + }, + { + "epoch": 5.190082644628099, + "grad_norm": 0.9731078743934631, + "learning_rate": 0.0002, + "loss": 1.229, + "step": 1570 + }, + { + "epoch": 5.223140495867769, + "grad_norm": 0.8167943358421326, + "learning_rate": 0.0002, + "loss": 1.2084, + "step": 1580 + }, + { + "epoch": 5.256198347107438, + "grad_norm": 1.1679469347000122, + "learning_rate": 0.0002, + "loss": 1.1937, + "step": 1590 + }, + { + "epoch": 5.289256198347108, + "grad_norm": 0.9156213402748108, + "learning_rate": 0.0002, + "loss": 1.1662, + "step": 1600 + }, + { + "epoch": 5.322314049586777, + "grad_norm": 1.082939624786377, + "learning_rate": 0.0002, + "loss": 1.2014, + "step": 1610 + }, + { + "epoch": 5.355371900826446, + "grad_norm": 1.0271905660629272, + "learning_rate": 0.0002, + "loss": 1.2701, + "step": 1620 + }, + { + "epoch": 5.3884297520661155, + "grad_norm": 1.2237807512283325, + "learning_rate": 0.0002, + "loss": 1.2275, + "step": 1630 + }, + { + "epoch": 5.421487603305785, + "grad_norm": 1.1419697999954224, + "learning_rate": 0.0002, + "loss": 1.267, + "step": 1640 + }, + { + "epoch": 5.454545454545454, + "grad_norm": 1.4427895545959473, + "learning_rate": 0.0002, + "loss": 1.2424, + "step": 1650 + }, + { + "epoch": 5.487603305785124, + "grad_norm": 1.117572546005249, + "learning_rate": 0.0002, + "loss": 1.24, + "step": 1660 + }, + { + "epoch": 5.520661157024794, + "grad_norm": 1.1476300954818726, + "learning_rate": 0.0002, + "loss": 1.1912, + "step": 1670 + }, + { + "epoch": 5.553719008264463, + "grad_norm": 1.1372056007385254, + "learning_rate": 0.0002, + "loss": 1.2397, + "step": 1680 + }, + { + "epoch": 5.586776859504132, + "grad_norm": 1.0415048599243164, + "learning_rate": 0.0002, + "loss": 1.2875, + "step": 1690 + }, + { + "epoch": 5.619834710743802, + "grad_norm": 0.9535173177719116, + "learning_rate": 0.0002, + "loss": 1.2062, + "step": 1700 + }, + { + "epoch": 5.652892561983471, + "grad_norm": 0.9918773174285889, + "learning_rate": 0.0002, + "loss": 1.2528, + "step": 1710 + }, + { + "epoch": 5.68595041322314, + "grad_norm": 0.9184247255325317, + "learning_rate": 0.0002, + "loss": 1.2443, + "step": 1720 + }, + { + "epoch": 5.7190082644628095, + "grad_norm": 0.9416358470916748, + "learning_rate": 0.0002, + "loss": 1.2273, + "step": 1730 + }, + { + "epoch": 5.75206611570248, + "grad_norm": 1.0027815103530884, + "learning_rate": 0.0002, + "loss": 1.2815, + "step": 1740 + }, + { + "epoch": 5.785123966942149, + "grad_norm": 1.0766979455947876, + "learning_rate": 0.0002, + "loss": 1.2261, + "step": 1750 + }, + { + "epoch": 5.818181818181818, + "grad_norm": 0.9244554042816162, + "learning_rate": 0.0002, + "loss": 1.2221, + "step": 1760 + }, + { + "epoch": 5.851239669421488, + "grad_norm": 1.2514721155166626, + "learning_rate": 0.0002, + "loss": 1.312, + "step": 1770 + }, + { + "epoch": 5.884297520661157, + "grad_norm": 1.0198537111282349, + "learning_rate": 0.0002, + "loss": 1.3042, + "step": 1780 + }, + { + "epoch": 5.917355371900826, + "grad_norm": 0.9569677114486694, + "learning_rate": 0.0002, + "loss": 1.2032, + "step": 1790 + }, + { + "epoch": 5.950413223140496, + "grad_norm": 0.9748323559761047, + "learning_rate": 0.0002, + "loss": 1.23, + "step": 1800 + }, + { + "epoch": 5.983471074380166, + "grad_norm": 1.0731725692749023, + "learning_rate": 0.0002, + "loss": 1.2167, + "step": 1810 + }, + { + "epoch": 6.0, + "eval_loss": 2.0875232219696045, + "eval_runtime": 38.8414, + "eval_samples_per_second": 13.259, + "eval_steps_per_second": 1.673, + "step": 1815 + } + ], + "logging_steps": 10, + "max_steps": 2416, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8.399413605040128e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/training_args.bin b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bdae94a3bb1cbb687094648bad0dbd67438f1665 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-1815/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59d6797aee4f8f861b969f822d3a50bd2f069252a018ea198e60b03a920e2c9f +size 5560 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/README.md b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/README.md new file mode 100644 index 0000000000000000000000000000000000000000..835e31ab8469ee39ddc8b2b6b2143a8c66dad510 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/adapter_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d63cb87eaccf2d81de3cdcfa11d2e99c440c0ea0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/adapter_model.safetensors b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e66276cac58178cd7b91c44e4ab81970229f71da --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc221925e8596f0c163e26f7a7b39d01ca593b5aae775954f7359acb0b531e44 +size 109069176 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/optimizer.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1f65b5ee90c033c17c7d076ee6898ffda18f39d0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:197df53fc78468649bfe3f6972061aea7735ffdda3b45f95094c0f8cc959ad34 +size 55532666 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/rng_state.pth b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bbae5aaf38758a79efb9b1b766b7a901e8d38459 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fac8bc31919c5973fd124c26aba10331a7a96f82fd00fad11742b92113345f72 +size 14244 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/scheduler.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..77bb3d5e5721199431b5a0835f9aa24d5a260942 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:17a911c4efa94710db06e75585864cc129af0a338e17fe7ab9cf875c17482c0d +size 1064 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/special_tokens_map.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/tokenizer.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/tokenizer_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..061e40d9db3253624f86e8e364c15ef546527c9d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/trainer_state.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6e14e49f883dff22b8d0dbac71862b6f5a9c08ae --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/trainer_state.json @@ -0,0 +1,1566 @@ +{ + "best_metric": 1.833760380744934, + "best_model_checkpoint": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605", + "epoch": 6.998347107438017, + "eval_steps": 10, + "global_step": 2117, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03305785123966942, + "grad_norm": 0.4384556710720062, + "learning_rate": 0.0002, + "loss": 2.528, + "step": 10 + }, + { + "epoch": 0.06611570247933884, + "grad_norm": 0.48312580585479736, + "learning_rate": 0.0002, + "loss": 2.2675, + "step": 20 + }, + { + "epoch": 0.09917355371900827, + "grad_norm": 0.6193496584892273, + "learning_rate": 0.0002, + "loss": 2.0439, + "step": 30 + }, + { + "epoch": 0.1322314049586777, + "grad_norm": 0.471858948469162, + "learning_rate": 0.0002, + "loss": 1.9649, + "step": 40 + }, + { + "epoch": 0.1652892561983471, + "grad_norm": 0.43199431896209717, + "learning_rate": 0.0002, + "loss": 1.9945, + "step": 50 + }, + { + "epoch": 0.19834710743801653, + "grad_norm": 0.5022176504135132, + "learning_rate": 0.0002, + "loss": 1.9409, + "step": 60 + }, + { + "epoch": 0.23140495867768596, + "grad_norm": 0.4934026300907135, + "learning_rate": 0.0002, + "loss": 1.89, + "step": 70 + }, + { + "epoch": 0.2644628099173554, + "grad_norm": 0.4313369691371918, + "learning_rate": 0.0002, + "loss": 1.9036, + "step": 80 + }, + { + "epoch": 0.2975206611570248, + "grad_norm": 0.48663529753685, + "learning_rate": 0.0002, + "loss": 1.8992, + "step": 90 + }, + { + "epoch": 0.3305785123966942, + "grad_norm": 0.47740036249160767, + "learning_rate": 0.0002, + "loss": 1.8655, + "step": 100 + }, + { + "epoch": 0.36363636363636365, + "grad_norm": 0.41685664653778076, + "learning_rate": 0.0002, + "loss": 1.8797, + "step": 110 + }, + { + "epoch": 0.39669421487603307, + "grad_norm": 2.368595838546753, + "learning_rate": 0.0002, + "loss": 1.8951, + "step": 120 + }, + { + "epoch": 0.4297520661157025, + "grad_norm": 0.4861043095588684, + "learning_rate": 0.0002, + "loss": 1.8753, + "step": 130 + }, + { + "epoch": 0.4628099173553719, + "grad_norm": 0.41848257184028625, + "learning_rate": 0.0002, + "loss": 1.8413, + "step": 140 + }, + { + "epoch": 0.49586776859504134, + "grad_norm": 0.38776087760925293, + "learning_rate": 0.0002, + "loss": 1.9775, + "step": 150 + }, + { + "epoch": 0.5289256198347108, + "grad_norm": 0.4095233380794525, + "learning_rate": 0.0002, + "loss": 1.8172, + "step": 160 + }, + { + "epoch": 0.5619834710743802, + "grad_norm": 0.4492895007133484, + "learning_rate": 0.0002, + "loss": 1.9401, + "step": 170 + }, + { + "epoch": 0.5950413223140496, + "grad_norm": 0.5678786039352417, + "learning_rate": 0.0002, + "loss": 1.8707, + "step": 180 + }, + { + "epoch": 0.628099173553719, + "grad_norm": 0.4926881492137909, + "learning_rate": 0.0002, + "loss": 1.8067, + "step": 190 + }, + { + "epoch": 0.6611570247933884, + "grad_norm": 0.3865489363670349, + "learning_rate": 0.0002, + "loss": 1.8567, + "step": 200 + }, + { + "epoch": 0.6942148760330579, + "grad_norm": 0.40578970313072205, + "learning_rate": 0.0002, + "loss": 1.7555, + "step": 210 + }, + { + "epoch": 0.7272727272727273, + "grad_norm": 0.3729846775531769, + "learning_rate": 0.0002, + "loss": 1.8192, + "step": 220 + }, + { + "epoch": 0.7603305785123967, + "grad_norm": 0.36989861726760864, + "learning_rate": 0.0002, + "loss": 1.8787, + "step": 230 + }, + { + "epoch": 0.7933884297520661, + "grad_norm": 0.3764864206314087, + "learning_rate": 0.0002, + "loss": 1.8254, + "step": 240 + }, + { + "epoch": 0.8264462809917356, + "grad_norm": 1.2193230390548706, + "learning_rate": 0.0002, + "loss": 1.8008, + "step": 250 + }, + { + "epoch": 0.859504132231405, + "grad_norm": 0.37381255626678467, + "learning_rate": 0.0002, + "loss": 1.8093, + "step": 260 + }, + { + "epoch": 0.8925619834710744, + "grad_norm": 0.35480767488479614, + "learning_rate": 0.0002, + "loss": 1.7911, + "step": 270 + }, + { + "epoch": 0.9256198347107438, + "grad_norm": 0.4945891201496124, + "learning_rate": 0.0002, + "loss": 1.7824, + "step": 280 + }, + { + "epoch": 0.9586776859504132, + "grad_norm": 0.39967674016952515, + "learning_rate": 0.0002, + "loss": 1.7842, + "step": 290 + }, + { + "epoch": 0.9917355371900827, + "grad_norm": 0.4257008135318756, + "learning_rate": 0.0002, + "loss": 1.8321, + "step": 300 + }, + { + "epoch": 0.9983471074380166, + "eval_loss": 1.8413277864456177, + "eval_runtime": 38.8241, + "eval_samples_per_second": 13.265, + "eval_steps_per_second": 1.674, + "step": 302 + }, + { + "epoch": 1.024793388429752, + "grad_norm": 0.4019509255886078, + "learning_rate": 0.0002, + "loss": 1.7265, + "step": 310 + }, + { + "epoch": 1.0578512396694215, + "grad_norm": 0.3439880311489105, + "learning_rate": 0.0002, + "loss": 1.7756, + "step": 320 + }, + { + "epoch": 1.0909090909090908, + "grad_norm": 0.4353587031364441, + "learning_rate": 0.0002, + "loss": 1.7719, + "step": 330 + }, + { + "epoch": 1.1239669421487604, + "grad_norm": 0.41257765889167786, + "learning_rate": 0.0002, + "loss": 1.7419, + "step": 340 + }, + { + "epoch": 1.1570247933884297, + "grad_norm": 0.4224575161933899, + "learning_rate": 0.0002, + "loss": 1.7774, + "step": 350 + }, + { + "epoch": 1.1900826446280992, + "grad_norm": 0.36395177245140076, + "learning_rate": 0.0002, + "loss": 1.7502, + "step": 360 + }, + { + "epoch": 1.2231404958677685, + "grad_norm": 0.4251839518547058, + "learning_rate": 0.0002, + "loss": 1.8064, + "step": 370 + }, + { + "epoch": 1.256198347107438, + "grad_norm": 0.43602821230888367, + "learning_rate": 0.0002, + "loss": 1.7626, + "step": 380 + }, + { + "epoch": 1.2892561983471074, + "grad_norm": 0.3940708637237549, + "learning_rate": 0.0002, + "loss": 1.8261, + "step": 390 + }, + { + "epoch": 1.322314049586777, + "grad_norm": 0.3626866042613983, + "learning_rate": 0.0002, + "loss": 1.7317, + "step": 400 + }, + { + "epoch": 1.3553719008264462, + "grad_norm": 0.40716150403022766, + "learning_rate": 0.0002, + "loss": 1.7493, + "step": 410 + }, + { + "epoch": 1.3884297520661157, + "grad_norm": 0.39323991537094116, + "learning_rate": 0.0002, + "loss": 1.7313, + "step": 420 + }, + { + "epoch": 1.421487603305785, + "grad_norm": 0.44480809569358826, + "learning_rate": 0.0002, + "loss": 1.7863, + "step": 430 + }, + { + "epoch": 1.4545454545454546, + "grad_norm": 0.4438270032405853, + "learning_rate": 0.0002, + "loss": 1.7477, + "step": 440 + }, + { + "epoch": 1.487603305785124, + "grad_norm": 0.3953928053379059, + "learning_rate": 0.0002, + "loss": 1.774, + "step": 450 + }, + { + "epoch": 1.5206611570247934, + "grad_norm": 0.4152870178222656, + "learning_rate": 0.0002, + "loss": 1.7162, + "step": 460 + }, + { + "epoch": 1.553719008264463, + "grad_norm": 0.45231857895851135, + "learning_rate": 0.0002, + "loss": 1.8176, + "step": 470 + }, + { + "epoch": 1.5867768595041323, + "grad_norm": 0.46560999751091003, + "learning_rate": 0.0002, + "loss": 1.7281, + "step": 480 + }, + { + "epoch": 1.6198347107438016, + "grad_norm": 0.3510372042655945, + "learning_rate": 0.0002, + "loss": 1.8047, + "step": 490 + }, + { + "epoch": 1.6528925619834711, + "grad_norm": 0.36788758635520935, + "learning_rate": 0.0002, + "loss": 1.7719, + "step": 500 + }, + { + "epoch": 1.6859504132231407, + "grad_norm": 0.3911917209625244, + "learning_rate": 0.0002, + "loss": 1.8287, + "step": 510 + }, + { + "epoch": 1.71900826446281, + "grad_norm": 0.440964937210083, + "learning_rate": 0.0002, + "loss": 1.7891, + "step": 520 + }, + { + "epoch": 1.7520661157024793, + "grad_norm": 0.36718201637268066, + "learning_rate": 0.0002, + "loss": 1.6858, + "step": 530 + }, + { + "epoch": 1.7851239669421488, + "grad_norm": 0.3927479088306427, + "learning_rate": 0.0002, + "loss": 1.7828, + "step": 540 + }, + { + "epoch": 1.8181818181818183, + "grad_norm": 0.4298672378063202, + "learning_rate": 0.0002, + "loss": 1.7406, + "step": 550 + }, + { + "epoch": 1.8512396694214877, + "grad_norm": 0.4257620871067047, + "learning_rate": 0.0002, + "loss": 1.7626, + "step": 560 + }, + { + "epoch": 1.884297520661157, + "grad_norm": 0.3743717670440674, + "learning_rate": 0.0002, + "loss": 1.7677, + "step": 570 + }, + { + "epoch": 1.9173553719008265, + "grad_norm": 0.4413471817970276, + "learning_rate": 0.0002, + "loss": 1.7263, + "step": 580 + }, + { + "epoch": 1.950413223140496, + "grad_norm": 0.41639673709869385, + "learning_rate": 0.0002, + "loss": 1.7528, + "step": 590 + }, + { + "epoch": 1.9834710743801653, + "grad_norm": 0.46319296956062317, + "learning_rate": 0.0002, + "loss": 1.7141, + "step": 600 + }, + { + "epoch": 2.0, + "eval_loss": 1.833760380744934, + "eval_runtime": 38.8469, + "eval_samples_per_second": 13.257, + "eval_steps_per_second": 1.673, + "step": 605 + }, + { + "epoch": 2.0165289256198347, + "grad_norm": 0.38033604621887207, + "learning_rate": 0.0002, + "loss": 1.7399, + "step": 610 + }, + { + "epoch": 2.049586776859504, + "grad_norm": 0.4522306025028229, + "learning_rate": 0.0002, + "loss": 1.6414, + "step": 620 + }, + { + "epoch": 2.0826446280991737, + "grad_norm": 0.41294756531715393, + "learning_rate": 0.0002, + "loss": 1.5976, + "step": 630 + }, + { + "epoch": 2.115702479338843, + "grad_norm": 0.5129091739654541, + "learning_rate": 0.0002, + "loss": 1.6664, + "step": 640 + }, + { + "epoch": 2.1487603305785123, + "grad_norm": 0.4630700647830963, + "learning_rate": 0.0002, + "loss": 1.7207, + "step": 650 + }, + { + "epoch": 2.1818181818181817, + "grad_norm": 0.4368151128292084, + "learning_rate": 0.0002, + "loss": 1.5884, + "step": 660 + }, + { + "epoch": 2.2148760330578514, + "grad_norm": 0.5266494154930115, + "learning_rate": 0.0002, + "loss": 1.7271, + "step": 670 + }, + { + "epoch": 2.2479338842975207, + "grad_norm": 0.4744901955127716, + "learning_rate": 0.0002, + "loss": 1.5749, + "step": 680 + }, + { + "epoch": 2.28099173553719, + "grad_norm": 0.5312414765357971, + "learning_rate": 0.0002, + "loss": 1.6512, + "step": 690 + }, + { + "epoch": 2.3140495867768593, + "grad_norm": 0.49116063117980957, + "learning_rate": 0.0002, + "loss": 1.6957, + "step": 700 + }, + { + "epoch": 2.347107438016529, + "grad_norm": 0.4626988172531128, + "learning_rate": 0.0002, + "loss": 1.646, + "step": 710 + }, + { + "epoch": 2.3801652892561984, + "grad_norm": 0.4851135015487671, + "learning_rate": 0.0002, + "loss": 1.6474, + "step": 720 + }, + { + "epoch": 2.4132231404958677, + "grad_norm": 0.4882378578186035, + "learning_rate": 0.0002, + "loss": 1.67, + "step": 730 + }, + { + "epoch": 2.446280991735537, + "grad_norm": 0.4470290243625641, + "learning_rate": 0.0002, + "loss": 1.6588, + "step": 740 + }, + { + "epoch": 2.479338842975207, + "grad_norm": 0.5901731848716736, + "learning_rate": 0.0002, + "loss": 1.6419, + "step": 750 + }, + { + "epoch": 2.512396694214876, + "grad_norm": 0.48137718439102173, + "learning_rate": 0.0002, + "loss": 1.6756, + "step": 760 + }, + { + "epoch": 2.5454545454545454, + "grad_norm": 0.45636510848999023, + "learning_rate": 0.0002, + "loss": 1.6708, + "step": 770 + }, + { + "epoch": 2.5785123966942147, + "grad_norm": 0.48216402530670166, + "learning_rate": 0.0002, + "loss": 1.6693, + "step": 780 + }, + { + "epoch": 2.6115702479338845, + "grad_norm": 0.47188714146614075, + "learning_rate": 0.0002, + "loss": 1.664, + "step": 790 + }, + { + "epoch": 2.644628099173554, + "grad_norm": 0.44025519490242004, + "learning_rate": 0.0002, + "loss": 1.619, + "step": 800 + }, + { + "epoch": 2.677685950413223, + "grad_norm": 0.4918605387210846, + "learning_rate": 0.0002, + "loss": 1.6532, + "step": 810 + }, + { + "epoch": 2.7107438016528924, + "grad_norm": 0.5082308650016785, + "learning_rate": 0.0002, + "loss": 1.7513, + "step": 820 + }, + { + "epoch": 2.7438016528925617, + "grad_norm": 0.5610618591308594, + "learning_rate": 0.0002, + "loss": 1.7221, + "step": 830 + }, + { + "epoch": 2.7768595041322315, + "grad_norm": 0.540302574634552, + "learning_rate": 0.0002, + "loss": 1.7115, + "step": 840 + }, + { + "epoch": 2.809917355371901, + "grad_norm": 0.46016451716423035, + "learning_rate": 0.0002, + "loss": 1.659, + "step": 850 + }, + { + "epoch": 2.84297520661157, + "grad_norm": 0.45313313603401184, + "learning_rate": 0.0002, + "loss": 1.672, + "step": 860 + }, + { + "epoch": 2.87603305785124, + "grad_norm": 0.49267083406448364, + "learning_rate": 0.0002, + "loss": 1.6676, + "step": 870 + }, + { + "epoch": 2.909090909090909, + "grad_norm": 0.4506530463695526, + "learning_rate": 0.0002, + "loss": 1.6577, + "step": 880 + }, + { + "epoch": 2.9421487603305785, + "grad_norm": 0.4393260180950165, + "learning_rate": 0.0002, + "loss": 1.7059, + "step": 890 + }, + { + "epoch": 2.975206611570248, + "grad_norm": 0.438073068857193, + "learning_rate": 0.0002, + "loss": 1.7042, + "step": 900 + }, + { + "epoch": 2.9983471074380166, + "eval_loss": 1.853971004486084, + "eval_runtime": 38.8404, + "eval_samples_per_second": 13.259, + "eval_steps_per_second": 1.674, + "step": 907 + }, + { + "epoch": 3.0082644628099175, + "grad_norm": 0.4399570822715759, + "learning_rate": 0.0002, + "loss": 1.6173, + "step": 910 + }, + { + "epoch": 3.041322314049587, + "grad_norm": 0.5338484644889832, + "learning_rate": 0.0002, + "loss": 1.5578, + "step": 920 + }, + { + "epoch": 3.074380165289256, + "grad_norm": 0.6154358983039856, + "learning_rate": 0.0002, + "loss": 1.5507, + "step": 930 + }, + { + "epoch": 3.1074380165289255, + "grad_norm": 0.6429790258407593, + "learning_rate": 0.0002, + "loss": 1.6189, + "step": 940 + }, + { + "epoch": 3.1404958677685952, + "grad_norm": 0.5375680923461914, + "learning_rate": 0.0002, + "loss": 1.5866, + "step": 950 + }, + { + "epoch": 3.1735537190082646, + "grad_norm": 0.5594999194145203, + "learning_rate": 0.0002, + "loss": 1.5119, + "step": 960 + }, + { + "epoch": 3.206611570247934, + "grad_norm": 0.6742738485336304, + "learning_rate": 0.0002, + "loss": 1.5096, + "step": 970 + }, + { + "epoch": 3.239669421487603, + "grad_norm": 0.563497006893158, + "learning_rate": 0.0002, + "loss": 1.5477, + "step": 980 + }, + { + "epoch": 3.2727272727272725, + "grad_norm": 0.6521140933036804, + "learning_rate": 0.0002, + "loss": 1.5559, + "step": 990 + }, + { + "epoch": 3.3057851239669422, + "grad_norm": 0.6016622185707092, + "learning_rate": 0.0002, + "loss": 1.4905, + "step": 1000 + }, + { + "epoch": 3.3388429752066116, + "grad_norm": 0.6564913988113403, + "learning_rate": 0.0002, + "loss": 1.5307, + "step": 1010 + }, + { + "epoch": 3.371900826446281, + "grad_norm": 0.6528742909431458, + "learning_rate": 0.0002, + "loss": 1.4595, + "step": 1020 + }, + { + "epoch": 3.4049586776859506, + "grad_norm": 0.5843546390533447, + "learning_rate": 0.0002, + "loss": 1.518, + "step": 1030 + }, + { + "epoch": 3.43801652892562, + "grad_norm": 0.5892922282218933, + "learning_rate": 0.0002, + "loss": 1.5148, + "step": 1040 + }, + { + "epoch": 3.4710743801652892, + "grad_norm": 0.6217362284660339, + "learning_rate": 0.0002, + "loss": 1.5125, + "step": 1050 + }, + { + "epoch": 3.5041322314049586, + "grad_norm": 0.5837283134460449, + "learning_rate": 0.0002, + "loss": 1.526, + "step": 1060 + }, + { + "epoch": 3.537190082644628, + "grad_norm": 0.6369057893753052, + "learning_rate": 0.0002, + "loss": 1.5776, + "step": 1070 + }, + { + "epoch": 3.5702479338842976, + "grad_norm": 0.632115364074707, + "learning_rate": 0.0002, + "loss": 1.4758, + "step": 1080 + }, + { + "epoch": 3.603305785123967, + "grad_norm": 0.6364002823829651, + "learning_rate": 0.0002, + "loss": 1.5604, + "step": 1090 + }, + { + "epoch": 3.6363636363636362, + "grad_norm": 0.550032377243042, + "learning_rate": 0.0002, + "loss": 1.508, + "step": 1100 + }, + { + "epoch": 3.669421487603306, + "grad_norm": 0.6106863617897034, + "learning_rate": 0.0002, + "loss": 1.5548, + "step": 1110 + }, + { + "epoch": 3.7024793388429753, + "grad_norm": 0.635955274105072, + "learning_rate": 0.0002, + "loss": 1.5237, + "step": 1120 + }, + { + "epoch": 3.7355371900826446, + "grad_norm": 0.615804135799408, + "learning_rate": 0.0002, + "loss": 1.5698, + "step": 1130 + }, + { + "epoch": 3.768595041322314, + "grad_norm": 0.5769386887550354, + "learning_rate": 0.0002, + "loss": 1.6068, + "step": 1140 + }, + { + "epoch": 3.8016528925619832, + "grad_norm": 0.5938104391098022, + "learning_rate": 0.0002, + "loss": 1.5262, + "step": 1150 + }, + { + "epoch": 3.834710743801653, + "grad_norm": 0.6149733066558838, + "learning_rate": 0.0002, + "loss": 1.5236, + "step": 1160 + }, + { + "epoch": 3.8677685950413223, + "grad_norm": 0.6228950023651123, + "learning_rate": 0.0002, + "loss": 1.5585, + "step": 1170 + }, + { + "epoch": 3.9008264462809916, + "grad_norm": 0.6196513175964355, + "learning_rate": 0.0002, + "loss": 1.5815, + "step": 1180 + }, + { + "epoch": 3.9338842975206614, + "grad_norm": 0.5946677327156067, + "learning_rate": 0.0002, + "loss": 1.5894, + "step": 1190 + }, + { + "epoch": 3.9669421487603307, + "grad_norm": 0.5882220268249512, + "learning_rate": 0.0002, + "loss": 1.5451, + "step": 1200 + }, + { + "epoch": 4.0, + "grad_norm": 0.6291728019714355, + "learning_rate": 0.0002, + "loss": 1.6202, + "step": 1210 + }, + { + "epoch": 4.0, + "eval_loss": 1.8943731784820557, + "eval_runtime": 38.826, + "eval_samples_per_second": 13.264, + "eval_steps_per_second": 1.674, + "step": 1210 + }, + { + "epoch": 4.033057851239669, + "grad_norm": 1.0843605995178223, + "learning_rate": 0.0002, + "loss": 1.3944, + "step": 1220 + }, + { + "epoch": 4.066115702479339, + "grad_norm": 0.6460382342338562, + "learning_rate": 0.0002, + "loss": 1.3453, + "step": 1230 + }, + { + "epoch": 4.099173553719008, + "grad_norm": 0.7872665524482727, + "learning_rate": 0.0002, + "loss": 1.3714, + "step": 1240 + }, + { + "epoch": 4.132231404958677, + "grad_norm": 0.7585243582725525, + "learning_rate": 0.0002, + "loss": 1.3247, + "step": 1250 + }, + { + "epoch": 4.1652892561983474, + "grad_norm": 0.7955290079116821, + "learning_rate": 0.0002, + "loss": 1.4162, + "step": 1260 + }, + { + "epoch": 4.198347107438017, + "grad_norm": 0.8847756385803223, + "learning_rate": 0.0002, + "loss": 1.4127, + "step": 1270 + }, + { + "epoch": 4.231404958677686, + "grad_norm": 0.7897582650184631, + "learning_rate": 0.0002, + "loss": 1.3972, + "step": 1280 + }, + { + "epoch": 4.264462809917355, + "grad_norm": 0.755404531955719, + "learning_rate": 0.0002, + "loss": 1.3631, + "step": 1290 + }, + { + "epoch": 4.297520661157025, + "grad_norm": 0.7718978524208069, + "learning_rate": 0.0002, + "loss": 1.4219, + "step": 1300 + }, + { + "epoch": 4.330578512396694, + "grad_norm": 0.8073238134384155, + "learning_rate": 0.0002, + "loss": 1.3832, + "step": 1310 + }, + { + "epoch": 4.363636363636363, + "grad_norm": 0.8661217093467712, + "learning_rate": 0.0002, + "loss": 1.3968, + "step": 1320 + }, + { + "epoch": 4.3966942148760335, + "grad_norm": 0.8859766721725464, + "learning_rate": 0.0002, + "loss": 1.3809, + "step": 1330 + }, + { + "epoch": 4.429752066115703, + "grad_norm": 0.8635476231575012, + "learning_rate": 0.0002, + "loss": 1.3779, + "step": 1340 + }, + { + "epoch": 4.462809917355372, + "grad_norm": 0.7376685738563538, + "learning_rate": 0.0002, + "loss": 1.403, + "step": 1350 + }, + { + "epoch": 4.4958677685950414, + "grad_norm": 0.7924236059188843, + "learning_rate": 0.0002, + "loss": 1.4346, + "step": 1360 + }, + { + "epoch": 4.528925619834711, + "grad_norm": 0.6969273686408997, + "learning_rate": 0.0002, + "loss": 1.3205, + "step": 1370 + }, + { + "epoch": 4.56198347107438, + "grad_norm": 0.7346147894859314, + "learning_rate": 0.0002, + "loss": 1.399, + "step": 1380 + }, + { + "epoch": 4.595041322314049, + "grad_norm": 0.8515401482582092, + "learning_rate": 0.0002, + "loss": 1.4308, + "step": 1390 + }, + { + "epoch": 4.628099173553719, + "grad_norm": 0.8154449462890625, + "learning_rate": 0.0002, + "loss": 1.407, + "step": 1400 + }, + { + "epoch": 4.661157024793388, + "grad_norm": 0.8922461271286011, + "learning_rate": 0.0002, + "loss": 1.4182, + "step": 1410 + }, + { + "epoch": 4.694214876033058, + "grad_norm": 0.8835586309432983, + "learning_rate": 0.0002, + "loss": 1.3894, + "step": 1420 + }, + { + "epoch": 4.7272727272727275, + "grad_norm": 0.7689077258110046, + "learning_rate": 0.0002, + "loss": 1.411, + "step": 1430 + }, + { + "epoch": 4.760330578512397, + "grad_norm": 0.7515250444412231, + "learning_rate": 0.0002, + "loss": 1.4083, + "step": 1440 + }, + { + "epoch": 4.793388429752066, + "grad_norm": 0.7655003070831299, + "learning_rate": 0.0002, + "loss": 1.4004, + "step": 1450 + }, + { + "epoch": 4.8264462809917354, + "grad_norm": 0.7187207341194153, + "learning_rate": 0.0002, + "loss": 1.3633, + "step": 1460 + }, + { + "epoch": 4.859504132231405, + "grad_norm": 0.7122251987457275, + "learning_rate": 0.0002, + "loss": 1.3647, + "step": 1470 + }, + { + "epoch": 4.892561983471074, + "grad_norm": 0.7744072675704956, + "learning_rate": 0.0002, + "loss": 1.4481, + "step": 1480 + }, + { + "epoch": 4.925619834710744, + "grad_norm": 0.8202858567237854, + "learning_rate": 0.0002, + "loss": 1.3959, + "step": 1490 + }, + { + "epoch": 4.958677685950414, + "grad_norm": 0.7144979238510132, + "learning_rate": 0.0002, + "loss": 1.4176, + "step": 1500 + }, + { + "epoch": 4.991735537190083, + "grad_norm": 0.7824931144714355, + "learning_rate": 0.0002, + "loss": 1.4398, + "step": 1510 + }, + { + "epoch": 4.998347107438017, + "eval_loss": 1.9822860956192017, + "eval_runtime": 38.8296, + "eval_samples_per_second": 13.263, + "eval_steps_per_second": 1.674, + "step": 1512 + }, + { + "epoch": 5.024793388429752, + "grad_norm": 1.0307862758636475, + "learning_rate": 0.0002, + "loss": 1.3009, + "step": 1520 + }, + { + "epoch": 5.0578512396694215, + "grad_norm": 0.9152393341064453, + "learning_rate": 0.0002, + "loss": 1.1822, + "step": 1530 + }, + { + "epoch": 5.090909090909091, + "grad_norm": 0.9560136198997498, + "learning_rate": 0.0002, + "loss": 1.251, + "step": 1540 + }, + { + "epoch": 5.12396694214876, + "grad_norm": 0.8285775184631348, + "learning_rate": 0.0002, + "loss": 1.2552, + "step": 1550 + }, + { + "epoch": 5.1570247933884295, + "grad_norm": 0.9479135870933533, + "learning_rate": 0.0002, + "loss": 1.1603, + "step": 1560 + }, + { + "epoch": 5.190082644628099, + "grad_norm": 0.9731078743934631, + "learning_rate": 0.0002, + "loss": 1.229, + "step": 1570 + }, + { + "epoch": 5.223140495867769, + "grad_norm": 0.8167943358421326, + "learning_rate": 0.0002, + "loss": 1.2084, + "step": 1580 + }, + { + "epoch": 5.256198347107438, + "grad_norm": 1.1679469347000122, + "learning_rate": 0.0002, + "loss": 1.1937, + "step": 1590 + }, + { + "epoch": 5.289256198347108, + "grad_norm": 0.9156213402748108, + "learning_rate": 0.0002, + "loss": 1.1662, + "step": 1600 + }, + { + "epoch": 5.322314049586777, + "grad_norm": 1.082939624786377, + "learning_rate": 0.0002, + "loss": 1.2014, + "step": 1610 + }, + { + "epoch": 5.355371900826446, + "grad_norm": 1.0271905660629272, + "learning_rate": 0.0002, + "loss": 1.2701, + "step": 1620 + }, + { + "epoch": 5.3884297520661155, + "grad_norm": 1.2237807512283325, + "learning_rate": 0.0002, + "loss": 1.2275, + "step": 1630 + }, + { + "epoch": 5.421487603305785, + "grad_norm": 1.1419697999954224, + "learning_rate": 0.0002, + "loss": 1.267, + "step": 1640 + }, + { + "epoch": 5.454545454545454, + "grad_norm": 1.4427895545959473, + "learning_rate": 0.0002, + "loss": 1.2424, + "step": 1650 + }, + { + "epoch": 5.487603305785124, + "grad_norm": 1.117572546005249, + "learning_rate": 0.0002, + "loss": 1.24, + "step": 1660 + }, + { + "epoch": 5.520661157024794, + "grad_norm": 1.1476300954818726, + "learning_rate": 0.0002, + "loss": 1.1912, + "step": 1670 + }, + { + "epoch": 5.553719008264463, + "grad_norm": 1.1372056007385254, + "learning_rate": 0.0002, + "loss": 1.2397, + "step": 1680 + }, + { + "epoch": 5.586776859504132, + "grad_norm": 1.0415048599243164, + "learning_rate": 0.0002, + "loss": 1.2875, + "step": 1690 + }, + { + "epoch": 5.619834710743802, + "grad_norm": 0.9535173177719116, + "learning_rate": 0.0002, + "loss": 1.2062, + "step": 1700 + }, + { + "epoch": 5.652892561983471, + "grad_norm": 0.9918773174285889, + "learning_rate": 0.0002, + "loss": 1.2528, + "step": 1710 + }, + { + "epoch": 5.68595041322314, + "grad_norm": 0.9184247255325317, + "learning_rate": 0.0002, + "loss": 1.2443, + "step": 1720 + }, + { + "epoch": 5.7190082644628095, + "grad_norm": 0.9416358470916748, + "learning_rate": 0.0002, + "loss": 1.2273, + "step": 1730 + }, + { + "epoch": 5.75206611570248, + "grad_norm": 1.0027815103530884, + "learning_rate": 0.0002, + "loss": 1.2815, + "step": 1740 + }, + { + "epoch": 5.785123966942149, + "grad_norm": 1.0766979455947876, + "learning_rate": 0.0002, + "loss": 1.2261, + "step": 1750 + }, + { + "epoch": 5.818181818181818, + "grad_norm": 0.9244554042816162, + "learning_rate": 0.0002, + "loss": 1.2221, + "step": 1760 + }, + { + "epoch": 5.851239669421488, + "grad_norm": 1.2514721155166626, + "learning_rate": 0.0002, + "loss": 1.312, + "step": 1770 + }, + { + "epoch": 5.884297520661157, + "grad_norm": 1.0198537111282349, + "learning_rate": 0.0002, + "loss": 1.3042, + "step": 1780 + }, + { + "epoch": 5.917355371900826, + "grad_norm": 0.9569677114486694, + "learning_rate": 0.0002, + "loss": 1.2032, + "step": 1790 + }, + { + "epoch": 5.950413223140496, + "grad_norm": 0.9748323559761047, + "learning_rate": 0.0002, + "loss": 1.23, + "step": 1800 + }, + { + "epoch": 5.983471074380166, + "grad_norm": 1.0731725692749023, + "learning_rate": 0.0002, + "loss": 1.2167, + "step": 1810 + }, + { + "epoch": 6.0, + "eval_loss": 2.0875232219696045, + "eval_runtime": 38.8414, + "eval_samples_per_second": 13.259, + "eval_steps_per_second": 1.673, + "step": 1815 + }, + { + "epoch": 6.016528925619835, + "grad_norm": 1.1357579231262207, + "learning_rate": 0.0002, + "loss": 1.2001, + "step": 1820 + }, + { + "epoch": 6.049586776859504, + "grad_norm": 1.1450963020324707, + "learning_rate": 0.0002, + "loss": 1.0238, + "step": 1830 + }, + { + "epoch": 6.082644628099174, + "grad_norm": 1.2671376466751099, + "learning_rate": 0.0002, + "loss": 1.0099, + "step": 1840 + }, + { + "epoch": 6.115702479338843, + "grad_norm": 1.1405659914016724, + "learning_rate": 0.0002, + "loss": 1.07, + "step": 1850 + }, + { + "epoch": 6.148760330578512, + "grad_norm": 1.176552176475525, + "learning_rate": 0.0002, + "loss": 1.0456, + "step": 1860 + }, + { + "epoch": 6.181818181818182, + "grad_norm": 1.2722952365875244, + "learning_rate": 0.0002, + "loss": 1.0804, + "step": 1870 + }, + { + "epoch": 6.214876033057851, + "grad_norm": 1.2505744695663452, + "learning_rate": 0.0002, + "loss": 1.0305, + "step": 1880 + }, + { + "epoch": 6.24793388429752, + "grad_norm": 1.388776183128357, + "learning_rate": 0.0002, + "loss": 1.0496, + "step": 1890 + }, + { + "epoch": 6.2809917355371905, + "grad_norm": 1.3574049472808838, + "learning_rate": 0.0002, + "loss": 1.0727, + "step": 1900 + }, + { + "epoch": 6.31404958677686, + "grad_norm": 1.15278160572052, + "learning_rate": 0.0002, + "loss": 1.0142, + "step": 1910 + }, + { + "epoch": 6.347107438016529, + "grad_norm": 1.280260682106018, + "learning_rate": 0.0002, + "loss": 1.0977, + "step": 1920 + }, + { + "epoch": 6.380165289256198, + "grad_norm": 1.3323947191238403, + "learning_rate": 0.0002, + "loss": 1.0319, + "step": 1930 + }, + { + "epoch": 6.413223140495868, + "grad_norm": 1.2422343492507935, + "learning_rate": 0.0002, + "loss": 1.0816, + "step": 1940 + }, + { + "epoch": 6.446280991735537, + "grad_norm": 1.485025405883789, + "learning_rate": 0.0002, + "loss": 1.0144, + "step": 1950 + }, + { + "epoch": 6.479338842975206, + "grad_norm": 1.132170557975769, + "learning_rate": 0.0002, + "loss": 1.0489, + "step": 1960 + }, + { + "epoch": 6.512396694214876, + "grad_norm": 1.1854133605957031, + "learning_rate": 0.0002, + "loss": 1.0551, + "step": 1970 + }, + { + "epoch": 6.545454545454545, + "grad_norm": 1.2570922374725342, + "learning_rate": 0.0002, + "loss": 1.0353, + "step": 1980 + }, + { + "epoch": 6.578512396694215, + "grad_norm": 1.1590516567230225, + "learning_rate": 0.0002, + "loss": 1.0693, + "step": 1990 + }, + { + "epoch": 6.6115702479338845, + "grad_norm": 1.3472840785980225, + "learning_rate": 0.0002, + "loss": 1.0899, + "step": 2000 + }, + { + "epoch": 6.644628099173554, + "grad_norm": 1.4928734302520752, + "learning_rate": 0.0002, + "loss": 1.1226, + "step": 2010 + }, + { + "epoch": 6.677685950413223, + "grad_norm": 1.243243932723999, + "learning_rate": 0.0002, + "loss": 1.0448, + "step": 2020 + }, + { + "epoch": 6.710743801652892, + "grad_norm": 1.6677647829055786, + "learning_rate": 0.0002, + "loss": 1.0557, + "step": 2030 + }, + { + "epoch": 6.743801652892562, + "grad_norm": 1.1295818090438843, + "learning_rate": 0.0002, + "loss": 1.1326, + "step": 2040 + }, + { + "epoch": 6.776859504132231, + "grad_norm": 1.2596524953842163, + "learning_rate": 0.0002, + "loss": 1.0889, + "step": 2050 + }, + { + "epoch": 6.809917355371901, + "grad_norm": 1.2924189567565918, + "learning_rate": 0.0002, + "loss": 1.1344, + "step": 2060 + }, + { + "epoch": 6.8429752066115705, + "grad_norm": 1.1653043031692505, + "learning_rate": 0.0002, + "loss": 1.1409, + "step": 2070 + }, + { + "epoch": 6.87603305785124, + "grad_norm": 1.3316930532455444, + "learning_rate": 0.0002, + "loss": 1.0911, + "step": 2080 + }, + { + "epoch": 6.909090909090909, + "grad_norm": 1.2001112699508667, + "learning_rate": 0.0002, + "loss": 1.1134, + "step": 2090 + }, + { + "epoch": 6.9421487603305785, + "grad_norm": 1.1454474925994873, + "learning_rate": 0.0002, + "loss": 1.0538, + "step": 2100 + }, + { + "epoch": 6.975206611570248, + "grad_norm": 1.1415315866470337, + "learning_rate": 0.0002, + "loss": 1.0955, + "step": 2110 + }, + { + "epoch": 6.998347107438017, + "eval_loss": 2.2608585357666016, + "eval_runtime": 38.8512, + "eval_samples_per_second": 13.256, + "eval_steps_per_second": 1.673, + "step": 2117 + } + ], + "logging_steps": 10, + "max_steps": 2416, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 9.799315872546816e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/training_args.bin b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bdae94a3bb1cbb687094648bad0dbd67438f1665 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2117/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59d6797aee4f8f861b969f822d3a50bd2f069252a018ea198e60b03a920e2c9f +size 5560 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/README.md b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/README.md new file mode 100644 index 0000000000000000000000000000000000000000..835e31ab8469ee39ddc8b2b6b2143a8c66dad510 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/adapter_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d63cb87eaccf2d81de3cdcfa11d2e99c440c0ea0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/adapter_model.safetensors b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5ddc1c71ca88d5112b7b09f2bac8a2c3b4d47214 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b126b7d713a5042ac87c308a270c55b2e928a6cade9a0a8695c2c22ef4c6d3af +size 109069176 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/optimizer.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4e54b7f6e0e1a6500e310ef2fbe2b385a372edb --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bedb321561d090cbfb9be8c2e1dfbeef508af29acd4f673cb65205610234345e +size 55532666 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/rng_state.pth b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d54050abf4c6dd4a6501a14fc0f2c6802bfdfe05 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb375654d65137c39ea9ee3acb38be6283fc1998b21a10ca793992a44f1852f6 +size 14244 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/scheduler.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..edabb4141748a26c7a6e63169d76fd47743d86e8 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:44592ef56a66bf6abda893d22dc5ce2e24c042ae82369d710004806da182f4da +size 1064 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/special_tokens_map.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/tokenizer.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/tokenizer_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..061e40d9db3253624f86e8e364c15ef546527c9d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/trainer_state.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1644c2781b55aa1db1cc1c04ea35a0ee7e886bda --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/trainer_state.json @@ -0,0 +1,1784 @@ +{ + "best_metric": 1.833760380744934, + "best_model_checkpoint": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605", + "epoch": 7.986776859504133, + "eval_steps": 10, + "global_step": 2416, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03305785123966942, + "grad_norm": 0.4384556710720062, + "learning_rate": 0.0002, + "loss": 2.528, + "step": 10 + }, + { + "epoch": 0.06611570247933884, + "grad_norm": 0.48312580585479736, + "learning_rate": 0.0002, + "loss": 2.2675, + "step": 20 + }, + { + "epoch": 0.09917355371900827, + "grad_norm": 0.6193496584892273, + "learning_rate": 0.0002, + "loss": 2.0439, + "step": 30 + }, + { + "epoch": 0.1322314049586777, + "grad_norm": 0.471858948469162, + "learning_rate": 0.0002, + "loss": 1.9649, + "step": 40 + }, + { + "epoch": 0.1652892561983471, + "grad_norm": 0.43199431896209717, + "learning_rate": 0.0002, + "loss": 1.9945, + "step": 50 + }, + { + "epoch": 0.19834710743801653, + "grad_norm": 0.5022176504135132, + "learning_rate": 0.0002, + "loss": 1.9409, + "step": 60 + }, + { + "epoch": 0.23140495867768596, + "grad_norm": 0.4934026300907135, + "learning_rate": 0.0002, + "loss": 1.89, + "step": 70 + }, + { + "epoch": 0.2644628099173554, + "grad_norm": 0.4313369691371918, + "learning_rate": 0.0002, + "loss": 1.9036, + "step": 80 + }, + { + "epoch": 0.2975206611570248, + "grad_norm": 0.48663529753685, + "learning_rate": 0.0002, + "loss": 1.8992, + "step": 90 + }, + { + "epoch": 0.3305785123966942, + "grad_norm": 0.47740036249160767, + "learning_rate": 0.0002, + "loss": 1.8655, + "step": 100 + }, + { + "epoch": 0.36363636363636365, + "grad_norm": 0.41685664653778076, + "learning_rate": 0.0002, + "loss": 1.8797, + "step": 110 + }, + { + "epoch": 0.39669421487603307, + "grad_norm": 2.368595838546753, + "learning_rate": 0.0002, + "loss": 1.8951, + "step": 120 + }, + { + "epoch": 0.4297520661157025, + "grad_norm": 0.4861043095588684, + "learning_rate": 0.0002, + "loss": 1.8753, + "step": 130 + }, + { + "epoch": 0.4628099173553719, + "grad_norm": 0.41848257184028625, + "learning_rate": 0.0002, + "loss": 1.8413, + "step": 140 + }, + { + "epoch": 0.49586776859504134, + "grad_norm": 0.38776087760925293, + "learning_rate": 0.0002, + "loss": 1.9775, + "step": 150 + }, + { + "epoch": 0.5289256198347108, + "grad_norm": 0.4095233380794525, + "learning_rate": 0.0002, + "loss": 1.8172, + "step": 160 + }, + { + "epoch": 0.5619834710743802, + "grad_norm": 0.4492895007133484, + "learning_rate": 0.0002, + "loss": 1.9401, + "step": 170 + }, + { + "epoch": 0.5950413223140496, + "grad_norm": 0.5678786039352417, + "learning_rate": 0.0002, + "loss": 1.8707, + "step": 180 + }, + { + "epoch": 0.628099173553719, + "grad_norm": 0.4926881492137909, + "learning_rate": 0.0002, + "loss": 1.8067, + "step": 190 + }, + { + "epoch": 0.6611570247933884, + "grad_norm": 0.3865489363670349, + "learning_rate": 0.0002, + "loss": 1.8567, + "step": 200 + }, + { + "epoch": 0.6942148760330579, + "grad_norm": 0.40578970313072205, + "learning_rate": 0.0002, + "loss": 1.7555, + "step": 210 + }, + { + "epoch": 0.7272727272727273, + "grad_norm": 0.3729846775531769, + "learning_rate": 0.0002, + "loss": 1.8192, + "step": 220 + }, + { + "epoch": 0.7603305785123967, + "grad_norm": 0.36989861726760864, + "learning_rate": 0.0002, + "loss": 1.8787, + "step": 230 + }, + { + "epoch": 0.7933884297520661, + "grad_norm": 0.3764864206314087, + "learning_rate": 0.0002, + "loss": 1.8254, + "step": 240 + }, + { + "epoch": 0.8264462809917356, + "grad_norm": 1.2193230390548706, + "learning_rate": 0.0002, + "loss": 1.8008, + "step": 250 + }, + { + "epoch": 0.859504132231405, + "grad_norm": 0.37381255626678467, + "learning_rate": 0.0002, + "loss": 1.8093, + "step": 260 + }, + { + "epoch": 0.8925619834710744, + "grad_norm": 0.35480767488479614, + "learning_rate": 0.0002, + "loss": 1.7911, + "step": 270 + }, + { + "epoch": 0.9256198347107438, + "grad_norm": 0.4945891201496124, + "learning_rate": 0.0002, + "loss": 1.7824, + "step": 280 + }, + { + "epoch": 0.9586776859504132, + "grad_norm": 0.39967674016952515, + "learning_rate": 0.0002, + "loss": 1.7842, + "step": 290 + }, + { + "epoch": 0.9917355371900827, + "grad_norm": 0.4257008135318756, + "learning_rate": 0.0002, + "loss": 1.8321, + "step": 300 + }, + { + "epoch": 0.9983471074380166, + "eval_loss": 1.8413277864456177, + "eval_runtime": 38.8241, + "eval_samples_per_second": 13.265, + "eval_steps_per_second": 1.674, + "step": 302 + }, + { + "epoch": 1.024793388429752, + "grad_norm": 0.4019509255886078, + "learning_rate": 0.0002, + "loss": 1.7265, + "step": 310 + }, + { + "epoch": 1.0578512396694215, + "grad_norm": 0.3439880311489105, + "learning_rate": 0.0002, + "loss": 1.7756, + "step": 320 + }, + { + "epoch": 1.0909090909090908, + "grad_norm": 0.4353587031364441, + "learning_rate": 0.0002, + "loss": 1.7719, + "step": 330 + }, + { + "epoch": 1.1239669421487604, + "grad_norm": 0.41257765889167786, + "learning_rate": 0.0002, + "loss": 1.7419, + "step": 340 + }, + { + "epoch": 1.1570247933884297, + "grad_norm": 0.4224575161933899, + "learning_rate": 0.0002, + "loss": 1.7774, + "step": 350 + }, + { + "epoch": 1.1900826446280992, + "grad_norm": 0.36395177245140076, + "learning_rate": 0.0002, + "loss": 1.7502, + "step": 360 + }, + { + "epoch": 1.2231404958677685, + "grad_norm": 0.4251839518547058, + "learning_rate": 0.0002, + "loss": 1.8064, + "step": 370 + }, + { + "epoch": 1.256198347107438, + "grad_norm": 0.43602821230888367, + "learning_rate": 0.0002, + "loss": 1.7626, + "step": 380 + }, + { + "epoch": 1.2892561983471074, + "grad_norm": 0.3940708637237549, + "learning_rate": 0.0002, + "loss": 1.8261, + "step": 390 + }, + { + "epoch": 1.322314049586777, + "grad_norm": 0.3626866042613983, + "learning_rate": 0.0002, + "loss": 1.7317, + "step": 400 + }, + { + "epoch": 1.3553719008264462, + "grad_norm": 0.40716150403022766, + "learning_rate": 0.0002, + "loss": 1.7493, + "step": 410 + }, + { + "epoch": 1.3884297520661157, + "grad_norm": 0.39323991537094116, + "learning_rate": 0.0002, + "loss": 1.7313, + "step": 420 + }, + { + "epoch": 1.421487603305785, + "grad_norm": 0.44480809569358826, + "learning_rate": 0.0002, + "loss": 1.7863, + "step": 430 + }, + { + "epoch": 1.4545454545454546, + "grad_norm": 0.4438270032405853, + "learning_rate": 0.0002, + "loss": 1.7477, + "step": 440 + }, + { + "epoch": 1.487603305785124, + "grad_norm": 0.3953928053379059, + "learning_rate": 0.0002, + "loss": 1.774, + "step": 450 + }, + { + "epoch": 1.5206611570247934, + "grad_norm": 0.4152870178222656, + "learning_rate": 0.0002, + "loss": 1.7162, + "step": 460 + }, + { + "epoch": 1.553719008264463, + "grad_norm": 0.45231857895851135, + "learning_rate": 0.0002, + "loss": 1.8176, + "step": 470 + }, + { + "epoch": 1.5867768595041323, + "grad_norm": 0.46560999751091003, + "learning_rate": 0.0002, + "loss": 1.7281, + "step": 480 + }, + { + "epoch": 1.6198347107438016, + "grad_norm": 0.3510372042655945, + "learning_rate": 0.0002, + "loss": 1.8047, + "step": 490 + }, + { + "epoch": 1.6528925619834711, + "grad_norm": 0.36788758635520935, + "learning_rate": 0.0002, + "loss": 1.7719, + "step": 500 + }, + { + "epoch": 1.6859504132231407, + "grad_norm": 0.3911917209625244, + "learning_rate": 0.0002, + "loss": 1.8287, + "step": 510 + }, + { + "epoch": 1.71900826446281, + "grad_norm": 0.440964937210083, + "learning_rate": 0.0002, + "loss": 1.7891, + "step": 520 + }, + { + "epoch": 1.7520661157024793, + "grad_norm": 0.36718201637268066, + "learning_rate": 0.0002, + "loss": 1.6858, + "step": 530 + }, + { + "epoch": 1.7851239669421488, + "grad_norm": 0.3927479088306427, + "learning_rate": 0.0002, + "loss": 1.7828, + "step": 540 + }, + { + "epoch": 1.8181818181818183, + "grad_norm": 0.4298672378063202, + "learning_rate": 0.0002, + "loss": 1.7406, + "step": 550 + }, + { + "epoch": 1.8512396694214877, + "grad_norm": 0.4257620871067047, + "learning_rate": 0.0002, + "loss": 1.7626, + "step": 560 + }, + { + "epoch": 1.884297520661157, + "grad_norm": 0.3743717670440674, + "learning_rate": 0.0002, + "loss": 1.7677, + "step": 570 + }, + { + "epoch": 1.9173553719008265, + "grad_norm": 0.4413471817970276, + "learning_rate": 0.0002, + "loss": 1.7263, + "step": 580 + }, + { + "epoch": 1.950413223140496, + "grad_norm": 0.41639673709869385, + "learning_rate": 0.0002, + "loss": 1.7528, + "step": 590 + }, + { + "epoch": 1.9834710743801653, + "grad_norm": 0.46319296956062317, + "learning_rate": 0.0002, + "loss": 1.7141, + "step": 600 + }, + { + "epoch": 2.0, + "eval_loss": 1.833760380744934, + "eval_runtime": 38.8469, + "eval_samples_per_second": 13.257, + "eval_steps_per_second": 1.673, + "step": 605 + }, + { + "epoch": 2.0165289256198347, + "grad_norm": 0.38033604621887207, + "learning_rate": 0.0002, + "loss": 1.7399, + "step": 610 + }, + { + "epoch": 2.049586776859504, + "grad_norm": 0.4522306025028229, + "learning_rate": 0.0002, + "loss": 1.6414, + "step": 620 + }, + { + "epoch": 2.0826446280991737, + "grad_norm": 0.41294756531715393, + "learning_rate": 0.0002, + "loss": 1.5976, + "step": 630 + }, + { + "epoch": 2.115702479338843, + "grad_norm": 0.5129091739654541, + "learning_rate": 0.0002, + "loss": 1.6664, + "step": 640 + }, + { + "epoch": 2.1487603305785123, + "grad_norm": 0.4630700647830963, + "learning_rate": 0.0002, + "loss": 1.7207, + "step": 650 + }, + { + "epoch": 2.1818181818181817, + "grad_norm": 0.4368151128292084, + "learning_rate": 0.0002, + "loss": 1.5884, + "step": 660 + }, + { + "epoch": 2.2148760330578514, + "grad_norm": 0.5266494154930115, + "learning_rate": 0.0002, + "loss": 1.7271, + "step": 670 + }, + { + "epoch": 2.2479338842975207, + "grad_norm": 0.4744901955127716, + "learning_rate": 0.0002, + "loss": 1.5749, + "step": 680 + }, + { + "epoch": 2.28099173553719, + "grad_norm": 0.5312414765357971, + "learning_rate": 0.0002, + "loss": 1.6512, + "step": 690 + }, + { + "epoch": 2.3140495867768593, + "grad_norm": 0.49116063117980957, + "learning_rate": 0.0002, + "loss": 1.6957, + "step": 700 + }, + { + "epoch": 2.347107438016529, + "grad_norm": 0.4626988172531128, + "learning_rate": 0.0002, + "loss": 1.646, + "step": 710 + }, + { + "epoch": 2.3801652892561984, + "grad_norm": 0.4851135015487671, + "learning_rate": 0.0002, + "loss": 1.6474, + "step": 720 + }, + { + "epoch": 2.4132231404958677, + "grad_norm": 0.4882378578186035, + "learning_rate": 0.0002, + "loss": 1.67, + "step": 730 + }, + { + "epoch": 2.446280991735537, + "grad_norm": 0.4470290243625641, + "learning_rate": 0.0002, + "loss": 1.6588, + "step": 740 + }, + { + "epoch": 2.479338842975207, + "grad_norm": 0.5901731848716736, + "learning_rate": 0.0002, + "loss": 1.6419, + "step": 750 + }, + { + "epoch": 2.512396694214876, + "grad_norm": 0.48137718439102173, + "learning_rate": 0.0002, + "loss": 1.6756, + "step": 760 + }, + { + "epoch": 2.5454545454545454, + "grad_norm": 0.45636510848999023, + "learning_rate": 0.0002, + "loss": 1.6708, + "step": 770 + }, + { + "epoch": 2.5785123966942147, + "grad_norm": 0.48216402530670166, + "learning_rate": 0.0002, + "loss": 1.6693, + "step": 780 + }, + { + "epoch": 2.6115702479338845, + "grad_norm": 0.47188714146614075, + "learning_rate": 0.0002, + "loss": 1.664, + "step": 790 + }, + { + "epoch": 2.644628099173554, + "grad_norm": 0.44025519490242004, + "learning_rate": 0.0002, + "loss": 1.619, + "step": 800 + }, + { + "epoch": 2.677685950413223, + "grad_norm": 0.4918605387210846, + "learning_rate": 0.0002, + "loss": 1.6532, + "step": 810 + }, + { + "epoch": 2.7107438016528924, + "grad_norm": 0.5082308650016785, + "learning_rate": 0.0002, + "loss": 1.7513, + "step": 820 + }, + { + "epoch": 2.7438016528925617, + "grad_norm": 0.5610618591308594, + "learning_rate": 0.0002, + "loss": 1.7221, + "step": 830 + }, + { + "epoch": 2.7768595041322315, + "grad_norm": 0.540302574634552, + "learning_rate": 0.0002, + "loss": 1.7115, + "step": 840 + }, + { + "epoch": 2.809917355371901, + "grad_norm": 0.46016451716423035, + "learning_rate": 0.0002, + "loss": 1.659, + "step": 850 + }, + { + "epoch": 2.84297520661157, + "grad_norm": 0.45313313603401184, + "learning_rate": 0.0002, + "loss": 1.672, + "step": 860 + }, + { + "epoch": 2.87603305785124, + "grad_norm": 0.49267083406448364, + "learning_rate": 0.0002, + "loss": 1.6676, + "step": 870 + }, + { + "epoch": 2.909090909090909, + "grad_norm": 0.4506530463695526, + "learning_rate": 0.0002, + "loss": 1.6577, + "step": 880 + }, + { + "epoch": 2.9421487603305785, + "grad_norm": 0.4393260180950165, + "learning_rate": 0.0002, + "loss": 1.7059, + "step": 890 + }, + { + "epoch": 2.975206611570248, + "grad_norm": 0.438073068857193, + "learning_rate": 0.0002, + "loss": 1.7042, + "step": 900 + }, + { + "epoch": 2.9983471074380166, + "eval_loss": 1.853971004486084, + "eval_runtime": 38.8404, + "eval_samples_per_second": 13.259, + "eval_steps_per_second": 1.674, + "step": 907 + }, + { + "epoch": 3.0082644628099175, + "grad_norm": 0.4399570822715759, + "learning_rate": 0.0002, + "loss": 1.6173, + "step": 910 + }, + { + "epoch": 3.041322314049587, + "grad_norm": 0.5338484644889832, + "learning_rate": 0.0002, + "loss": 1.5578, + "step": 920 + }, + { + "epoch": 3.074380165289256, + "grad_norm": 0.6154358983039856, + "learning_rate": 0.0002, + "loss": 1.5507, + "step": 930 + }, + { + "epoch": 3.1074380165289255, + "grad_norm": 0.6429790258407593, + "learning_rate": 0.0002, + "loss": 1.6189, + "step": 940 + }, + { + "epoch": 3.1404958677685952, + "grad_norm": 0.5375680923461914, + "learning_rate": 0.0002, + "loss": 1.5866, + "step": 950 + }, + { + "epoch": 3.1735537190082646, + "grad_norm": 0.5594999194145203, + "learning_rate": 0.0002, + "loss": 1.5119, + "step": 960 + }, + { + "epoch": 3.206611570247934, + "grad_norm": 0.6742738485336304, + "learning_rate": 0.0002, + "loss": 1.5096, + "step": 970 + }, + { + "epoch": 3.239669421487603, + "grad_norm": 0.563497006893158, + "learning_rate": 0.0002, + "loss": 1.5477, + "step": 980 + }, + { + "epoch": 3.2727272727272725, + "grad_norm": 0.6521140933036804, + "learning_rate": 0.0002, + "loss": 1.5559, + "step": 990 + }, + { + "epoch": 3.3057851239669422, + "grad_norm": 0.6016622185707092, + "learning_rate": 0.0002, + "loss": 1.4905, + "step": 1000 + }, + { + "epoch": 3.3388429752066116, + "grad_norm": 0.6564913988113403, + "learning_rate": 0.0002, + "loss": 1.5307, + "step": 1010 + }, + { + "epoch": 3.371900826446281, + "grad_norm": 0.6528742909431458, + "learning_rate": 0.0002, + "loss": 1.4595, + "step": 1020 + }, + { + "epoch": 3.4049586776859506, + "grad_norm": 0.5843546390533447, + "learning_rate": 0.0002, + "loss": 1.518, + "step": 1030 + }, + { + "epoch": 3.43801652892562, + "grad_norm": 0.5892922282218933, + "learning_rate": 0.0002, + "loss": 1.5148, + "step": 1040 + }, + { + "epoch": 3.4710743801652892, + "grad_norm": 0.6217362284660339, + "learning_rate": 0.0002, + "loss": 1.5125, + "step": 1050 + }, + { + "epoch": 3.5041322314049586, + "grad_norm": 0.5837283134460449, + "learning_rate": 0.0002, + "loss": 1.526, + "step": 1060 + }, + { + "epoch": 3.537190082644628, + "grad_norm": 0.6369057893753052, + "learning_rate": 0.0002, + "loss": 1.5776, + "step": 1070 + }, + { + "epoch": 3.5702479338842976, + "grad_norm": 0.632115364074707, + "learning_rate": 0.0002, + "loss": 1.4758, + "step": 1080 + }, + { + "epoch": 3.603305785123967, + "grad_norm": 0.6364002823829651, + "learning_rate": 0.0002, + "loss": 1.5604, + "step": 1090 + }, + { + "epoch": 3.6363636363636362, + "grad_norm": 0.550032377243042, + "learning_rate": 0.0002, + "loss": 1.508, + "step": 1100 + }, + { + "epoch": 3.669421487603306, + "grad_norm": 0.6106863617897034, + "learning_rate": 0.0002, + "loss": 1.5548, + "step": 1110 + }, + { + "epoch": 3.7024793388429753, + "grad_norm": 0.635955274105072, + "learning_rate": 0.0002, + "loss": 1.5237, + "step": 1120 + }, + { + "epoch": 3.7355371900826446, + "grad_norm": 0.615804135799408, + "learning_rate": 0.0002, + "loss": 1.5698, + "step": 1130 + }, + { + "epoch": 3.768595041322314, + "grad_norm": 0.5769386887550354, + "learning_rate": 0.0002, + "loss": 1.6068, + "step": 1140 + }, + { + "epoch": 3.8016528925619832, + "grad_norm": 0.5938104391098022, + "learning_rate": 0.0002, + "loss": 1.5262, + "step": 1150 + }, + { + "epoch": 3.834710743801653, + "grad_norm": 0.6149733066558838, + "learning_rate": 0.0002, + "loss": 1.5236, + "step": 1160 + }, + { + "epoch": 3.8677685950413223, + "grad_norm": 0.6228950023651123, + "learning_rate": 0.0002, + "loss": 1.5585, + "step": 1170 + }, + { + "epoch": 3.9008264462809916, + "grad_norm": 0.6196513175964355, + "learning_rate": 0.0002, + "loss": 1.5815, + "step": 1180 + }, + { + "epoch": 3.9338842975206614, + "grad_norm": 0.5946677327156067, + "learning_rate": 0.0002, + "loss": 1.5894, + "step": 1190 + }, + { + "epoch": 3.9669421487603307, + "grad_norm": 0.5882220268249512, + "learning_rate": 0.0002, + "loss": 1.5451, + "step": 1200 + }, + { + "epoch": 4.0, + "grad_norm": 0.6291728019714355, + "learning_rate": 0.0002, + "loss": 1.6202, + "step": 1210 + }, + { + "epoch": 4.0, + "eval_loss": 1.8943731784820557, + "eval_runtime": 38.826, + "eval_samples_per_second": 13.264, + "eval_steps_per_second": 1.674, + "step": 1210 + }, + { + "epoch": 4.033057851239669, + "grad_norm": 1.0843605995178223, + "learning_rate": 0.0002, + "loss": 1.3944, + "step": 1220 + }, + { + "epoch": 4.066115702479339, + "grad_norm": 0.6460382342338562, + "learning_rate": 0.0002, + "loss": 1.3453, + "step": 1230 + }, + { + "epoch": 4.099173553719008, + "grad_norm": 0.7872665524482727, + "learning_rate": 0.0002, + "loss": 1.3714, + "step": 1240 + }, + { + "epoch": 4.132231404958677, + "grad_norm": 0.7585243582725525, + "learning_rate": 0.0002, + "loss": 1.3247, + "step": 1250 + }, + { + "epoch": 4.1652892561983474, + "grad_norm": 0.7955290079116821, + "learning_rate": 0.0002, + "loss": 1.4162, + "step": 1260 + }, + { + "epoch": 4.198347107438017, + "grad_norm": 0.8847756385803223, + "learning_rate": 0.0002, + "loss": 1.4127, + "step": 1270 + }, + { + "epoch": 4.231404958677686, + "grad_norm": 0.7897582650184631, + "learning_rate": 0.0002, + "loss": 1.3972, + "step": 1280 + }, + { + "epoch": 4.264462809917355, + "grad_norm": 0.755404531955719, + "learning_rate": 0.0002, + "loss": 1.3631, + "step": 1290 + }, + { + "epoch": 4.297520661157025, + "grad_norm": 0.7718978524208069, + "learning_rate": 0.0002, + "loss": 1.4219, + "step": 1300 + }, + { + "epoch": 4.330578512396694, + "grad_norm": 0.8073238134384155, + "learning_rate": 0.0002, + "loss": 1.3832, + "step": 1310 + }, + { + "epoch": 4.363636363636363, + "grad_norm": 0.8661217093467712, + "learning_rate": 0.0002, + "loss": 1.3968, + "step": 1320 + }, + { + "epoch": 4.3966942148760335, + "grad_norm": 0.8859766721725464, + "learning_rate": 0.0002, + "loss": 1.3809, + "step": 1330 + }, + { + "epoch": 4.429752066115703, + "grad_norm": 0.8635476231575012, + "learning_rate": 0.0002, + "loss": 1.3779, + "step": 1340 + }, + { + "epoch": 4.462809917355372, + "grad_norm": 0.7376685738563538, + "learning_rate": 0.0002, + "loss": 1.403, + "step": 1350 + }, + { + "epoch": 4.4958677685950414, + "grad_norm": 0.7924236059188843, + "learning_rate": 0.0002, + "loss": 1.4346, + "step": 1360 + }, + { + "epoch": 4.528925619834711, + "grad_norm": 0.6969273686408997, + "learning_rate": 0.0002, + "loss": 1.3205, + "step": 1370 + }, + { + "epoch": 4.56198347107438, + "grad_norm": 0.7346147894859314, + "learning_rate": 0.0002, + "loss": 1.399, + "step": 1380 + }, + { + "epoch": 4.595041322314049, + "grad_norm": 0.8515401482582092, + "learning_rate": 0.0002, + "loss": 1.4308, + "step": 1390 + }, + { + "epoch": 4.628099173553719, + "grad_norm": 0.8154449462890625, + "learning_rate": 0.0002, + "loss": 1.407, + "step": 1400 + }, + { + "epoch": 4.661157024793388, + "grad_norm": 0.8922461271286011, + "learning_rate": 0.0002, + "loss": 1.4182, + "step": 1410 + }, + { + "epoch": 4.694214876033058, + "grad_norm": 0.8835586309432983, + "learning_rate": 0.0002, + "loss": 1.3894, + "step": 1420 + }, + { + "epoch": 4.7272727272727275, + "grad_norm": 0.7689077258110046, + "learning_rate": 0.0002, + "loss": 1.411, + "step": 1430 + }, + { + "epoch": 4.760330578512397, + "grad_norm": 0.7515250444412231, + "learning_rate": 0.0002, + "loss": 1.4083, + "step": 1440 + }, + { + "epoch": 4.793388429752066, + "grad_norm": 0.7655003070831299, + "learning_rate": 0.0002, + "loss": 1.4004, + "step": 1450 + }, + { + "epoch": 4.8264462809917354, + "grad_norm": 0.7187207341194153, + "learning_rate": 0.0002, + "loss": 1.3633, + "step": 1460 + }, + { + "epoch": 4.859504132231405, + "grad_norm": 0.7122251987457275, + "learning_rate": 0.0002, + "loss": 1.3647, + "step": 1470 + }, + { + "epoch": 4.892561983471074, + "grad_norm": 0.7744072675704956, + "learning_rate": 0.0002, + "loss": 1.4481, + "step": 1480 + }, + { + "epoch": 4.925619834710744, + "grad_norm": 0.8202858567237854, + "learning_rate": 0.0002, + "loss": 1.3959, + "step": 1490 + }, + { + "epoch": 4.958677685950414, + "grad_norm": 0.7144979238510132, + "learning_rate": 0.0002, + "loss": 1.4176, + "step": 1500 + }, + { + "epoch": 4.991735537190083, + "grad_norm": 0.7824931144714355, + "learning_rate": 0.0002, + "loss": 1.4398, + "step": 1510 + }, + { + "epoch": 4.998347107438017, + "eval_loss": 1.9822860956192017, + "eval_runtime": 38.8296, + "eval_samples_per_second": 13.263, + "eval_steps_per_second": 1.674, + "step": 1512 + }, + { + "epoch": 5.024793388429752, + "grad_norm": 1.0307862758636475, + "learning_rate": 0.0002, + "loss": 1.3009, + "step": 1520 + }, + { + "epoch": 5.0578512396694215, + "grad_norm": 0.9152393341064453, + "learning_rate": 0.0002, + "loss": 1.1822, + "step": 1530 + }, + { + "epoch": 5.090909090909091, + "grad_norm": 0.9560136198997498, + "learning_rate": 0.0002, + "loss": 1.251, + "step": 1540 + }, + { + "epoch": 5.12396694214876, + "grad_norm": 0.8285775184631348, + "learning_rate": 0.0002, + "loss": 1.2552, + "step": 1550 + }, + { + "epoch": 5.1570247933884295, + "grad_norm": 0.9479135870933533, + "learning_rate": 0.0002, + "loss": 1.1603, + "step": 1560 + }, + { + "epoch": 5.190082644628099, + "grad_norm": 0.9731078743934631, + "learning_rate": 0.0002, + "loss": 1.229, + "step": 1570 + }, + { + "epoch": 5.223140495867769, + "grad_norm": 0.8167943358421326, + "learning_rate": 0.0002, + "loss": 1.2084, + "step": 1580 + }, + { + "epoch": 5.256198347107438, + "grad_norm": 1.1679469347000122, + "learning_rate": 0.0002, + "loss": 1.1937, + "step": 1590 + }, + { + "epoch": 5.289256198347108, + "grad_norm": 0.9156213402748108, + "learning_rate": 0.0002, + "loss": 1.1662, + "step": 1600 + }, + { + "epoch": 5.322314049586777, + "grad_norm": 1.082939624786377, + "learning_rate": 0.0002, + "loss": 1.2014, + "step": 1610 + }, + { + "epoch": 5.355371900826446, + "grad_norm": 1.0271905660629272, + "learning_rate": 0.0002, + "loss": 1.2701, + "step": 1620 + }, + { + "epoch": 5.3884297520661155, + "grad_norm": 1.2237807512283325, + "learning_rate": 0.0002, + "loss": 1.2275, + "step": 1630 + }, + { + "epoch": 5.421487603305785, + "grad_norm": 1.1419697999954224, + "learning_rate": 0.0002, + "loss": 1.267, + "step": 1640 + }, + { + "epoch": 5.454545454545454, + "grad_norm": 1.4427895545959473, + "learning_rate": 0.0002, + "loss": 1.2424, + "step": 1650 + }, + { + "epoch": 5.487603305785124, + "grad_norm": 1.117572546005249, + "learning_rate": 0.0002, + "loss": 1.24, + "step": 1660 + }, + { + "epoch": 5.520661157024794, + "grad_norm": 1.1476300954818726, + "learning_rate": 0.0002, + "loss": 1.1912, + "step": 1670 + }, + { + "epoch": 5.553719008264463, + "grad_norm": 1.1372056007385254, + "learning_rate": 0.0002, + "loss": 1.2397, + "step": 1680 + }, + { + "epoch": 5.586776859504132, + "grad_norm": 1.0415048599243164, + "learning_rate": 0.0002, + "loss": 1.2875, + "step": 1690 + }, + { + "epoch": 5.619834710743802, + "grad_norm": 0.9535173177719116, + "learning_rate": 0.0002, + "loss": 1.2062, + "step": 1700 + }, + { + "epoch": 5.652892561983471, + "grad_norm": 0.9918773174285889, + "learning_rate": 0.0002, + "loss": 1.2528, + "step": 1710 + }, + { + "epoch": 5.68595041322314, + "grad_norm": 0.9184247255325317, + "learning_rate": 0.0002, + "loss": 1.2443, + "step": 1720 + }, + { + "epoch": 5.7190082644628095, + "grad_norm": 0.9416358470916748, + "learning_rate": 0.0002, + "loss": 1.2273, + "step": 1730 + }, + { + "epoch": 5.75206611570248, + "grad_norm": 1.0027815103530884, + "learning_rate": 0.0002, + "loss": 1.2815, + "step": 1740 + }, + { + "epoch": 5.785123966942149, + "grad_norm": 1.0766979455947876, + "learning_rate": 0.0002, + "loss": 1.2261, + "step": 1750 + }, + { + "epoch": 5.818181818181818, + "grad_norm": 0.9244554042816162, + "learning_rate": 0.0002, + "loss": 1.2221, + "step": 1760 + }, + { + "epoch": 5.851239669421488, + "grad_norm": 1.2514721155166626, + "learning_rate": 0.0002, + "loss": 1.312, + "step": 1770 + }, + { + "epoch": 5.884297520661157, + "grad_norm": 1.0198537111282349, + "learning_rate": 0.0002, + "loss": 1.3042, + "step": 1780 + }, + { + "epoch": 5.917355371900826, + "grad_norm": 0.9569677114486694, + "learning_rate": 0.0002, + "loss": 1.2032, + "step": 1790 + }, + { + "epoch": 5.950413223140496, + "grad_norm": 0.9748323559761047, + "learning_rate": 0.0002, + "loss": 1.23, + "step": 1800 + }, + { + "epoch": 5.983471074380166, + "grad_norm": 1.0731725692749023, + "learning_rate": 0.0002, + "loss": 1.2167, + "step": 1810 + }, + { + "epoch": 6.0, + "eval_loss": 2.0875232219696045, + "eval_runtime": 38.8414, + "eval_samples_per_second": 13.259, + "eval_steps_per_second": 1.673, + "step": 1815 + }, + { + "epoch": 6.016528925619835, + "grad_norm": 1.1357579231262207, + "learning_rate": 0.0002, + "loss": 1.2001, + "step": 1820 + }, + { + "epoch": 6.049586776859504, + "grad_norm": 1.1450963020324707, + "learning_rate": 0.0002, + "loss": 1.0238, + "step": 1830 + }, + { + "epoch": 6.082644628099174, + "grad_norm": 1.2671376466751099, + "learning_rate": 0.0002, + "loss": 1.0099, + "step": 1840 + }, + { + "epoch": 6.115702479338843, + "grad_norm": 1.1405659914016724, + "learning_rate": 0.0002, + "loss": 1.07, + "step": 1850 + }, + { + "epoch": 6.148760330578512, + "grad_norm": 1.176552176475525, + "learning_rate": 0.0002, + "loss": 1.0456, + "step": 1860 + }, + { + "epoch": 6.181818181818182, + "grad_norm": 1.2722952365875244, + "learning_rate": 0.0002, + "loss": 1.0804, + "step": 1870 + }, + { + "epoch": 6.214876033057851, + "grad_norm": 1.2505744695663452, + "learning_rate": 0.0002, + "loss": 1.0305, + "step": 1880 + }, + { + "epoch": 6.24793388429752, + "grad_norm": 1.388776183128357, + "learning_rate": 0.0002, + "loss": 1.0496, + "step": 1890 + }, + { + "epoch": 6.2809917355371905, + "grad_norm": 1.3574049472808838, + "learning_rate": 0.0002, + "loss": 1.0727, + "step": 1900 + }, + { + "epoch": 6.31404958677686, + "grad_norm": 1.15278160572052, + "learning_rate": 0.0002, + "loss": 1.0142, + "step": 1910 + }, + { + "epoch": 6.347107438016529, + "grad_norm": 1.280260682106018, + "learning_rate": 0.0002, + "loss": 1.0977, + "step": 1920 + }, + { + "epoch": 6.380165289256198, + "grad_norm": 1.3323947191238403, + "learning_rate": 0.0002, + "loss": 1.0319, + "step": 1930 + }, + { + "epoch": 6.413223140495868, + "grad_norm": 1.2422343492507935, + "learning_rate": 0.0002, + "loss": 1.0816, + "step": 1940 + }, + { + "epoch": 6.446280991735537, + "grad_norm": 1.485025405883789, + "learning_rate": 0.0002, + "loss": 1.0144, + "step": 1950 + }, + { + "epoch": 6.479338842975206, + "grad_norm": 1.132170557975769, + "learning_rate": 0.0002, + "loss": 1.0489, + "step": 1960 + }, + { + "epoch": 6.512396694214876, + "grad_norm": 1.1854133605957031, + "learning_rate": 0.0002, + "loss": 1.0551, + "step": 1970 + }, + { + "epoch": 6.545454545454545, + "grad_norm": 1.2570922374725342, + "learning_rate": 0.0002, + "loss": 1.0353, + "step": 1980 + }, + { + "epoch": 6.578512396694215, + "grad_norm": 1.1590516567230225, + "learning_rate": 0.0002, + "loss": 1.0693, + "step": 1990 + }, + { + "epoch": 6.6115702479338845, + "grad_norm": 1.3472840785980225, + "learning_rate": 0.0002, + "loss": 1.0899, + "step": 2000 + }, + { + "epoch": 6.644628099173554, + "grad_norm": 1.4928734302520752, + "learning_rate": 0.0002, + "loss": 1.1226, + "step": 2010 + }, + { + "epoch": 6.677685950413223, + "grad_norm": 1.243243932723999, + "learning_rate": 0.0002, + "loss": 1.0448, + "step": 2020 + }, + { + "epoch": 6.710743801652892, + "grad_norm": 1.6677647829055786, + "learning_rate": 0.0002, + "loss": 1.0557, + "step": 2030 + }, + { + "epoch": 6.743801652892562, + "grad_norm": 1.1295818090438843, + "learning_rate": 0.0002, + "loss": 1.1326, + "step": 2040 + }, + { + "epoch": 6.776859504132231, + "grad_norm": 1.2596524953842163, + "learning_rate": 0.0002, + "loss": 1.0889, + "step": 2050 + }, + { + "epoch": 6.809917355371901, + "grad_norm": 1.2924189567565918, + "learning_rate": 0.0002, + "loss": 1.1344, + "step": 2060 + }, + { + "epoch": 6.8429752066115705, + "grad_norm": 1.1653043031692505, + "learning_rate": 0.0002, + "loss": 1.1409, + "step": 2070 + }, + { + "epoch": 6.87603305785124, + "grad_norm": 1.3316930532455444, + "learning_rate": 0.0002, + "loss": 1.0911, + "step": 2080 + }, + { + "epoch": 6.909090909090909, + "grad_norm": 1.2001112699508667, + "learning_rate": 0.0002, + "loss": 1.1134, + "step": 2090 + }, + { + "epoch": 6.9421487603305785, + "grad_norm": 1.1454474925994873, + "learning_rate": 0.0002, + "loss": 1.0538, + "step": 2100 + }, + { + "epoch": 6.975206611570248, + "grad_norm": 1.1415315866470337, + "learning_rate": 0.0002, + "loss": 1.0955, + "step": 2110 + }, + { + "epoch": 6.998347107438017, + "eval_loss": 2.2608585357666016, + "eval_runtime": 38.8512, + "eval_samples_per_second": 13.256, + "eval_steps_per_second": 1.673, + "step": 2117 + }, + { + "epoch": 7.008264462809917, + "grad_norm": 1.212228536605835, + "learning_rate": 0.0002, + "loss": 1.0526, + "step": 2120 + }, + { + "epoch": 7.041322314049586, + "grad_norm": 1.713937520980835, + "learning_rate": 0.0002, + "loss": 0.8893, + "step": 2130 + }, + { + "epoch": 7.074380165289257, + "grad_norm": 1.5111262798309326, + "learning_rate": 0.0002, + "loss": 0.8403, + "step": 2140 + }, + { + "epoch": 7.107438016528926, + "grad_norm": 1.3368730545043945, + "learning_rate": 0.0002, + "loss": 0.8349, + "step": 2150 + }, + { + "epoch": 7.140495867768595, + "grad_norm": 2.0835201740264893, + "learning_rate": 0.0002, + "loss": 0.921, + "step": 2160 + }, + { + "epoch": 7.1735537190082646, + "grad_norm": 1.7185221910476685, + "learning_rate": 0.0002, + "loss": 0.8702, + "step": 2170 + }, + { + "epoch": 7.206611570247934, + "grad_norm": 1.3082201480865479, + "learning_rate": 0.0002, + "loss": 0.8934, + "step": 2180 + }, + { + "epoch": 7.239669421487603, + "grad_norm": 1.6471620798110962, + "learning_rate": 0.0002, + "loss": 0.9314, + "step": 2190 + }, + { + "epoch": 7.2727272727272725, + "grad_norm": 1.3716152906417847, + "learning_rate": 0.0002, + "loss": 0.8917, + "step": 2200 + }, + { + "epoch": 7.305785123966942, + "grad_norm": 1.6379696130752563, + "learning_rate": 0.0002, + "loss": 0.9069, + "step": 2210 + }, + { + "epoch": 7.338842975206612, + "grad_norm": 1.3955202102661133, + "learning_rate": 0.0002, + "loss": 0.9263, + "step": 2220 + }, + { + "epoch": 7.371900826446281, + "grad_norm": 1.4794671535491943, + "learning_rate": 0.0002, + "loss": 0.8964, + "step": 2230 + }, + { + "epoch": 7.404958677685951, + "grad_norm": 1.5542606115341187, + "learning_rate": 0.0002, + "loss": 0.8703, + "step": 2240 + }, + { + "epoch": 7.43801652892562, + "grad_norm": 1.3853563070297241, + "learning_rate": 0.0002, + "loss": 0.8864, + "step": 2250 + }, + { + "epoch": 7.471074380165289, + "grad_norm": 1.428218126296997, + "learning_rate": 0.0002, + "loss": 0.9029, + "step": 2260 + }, + { + "epoch": 7.5041322314049586, + "grad_norm": 1.78374183177948, + "learning_rate": 0.0002, + "loss": 0.9793, + "step": 2270 + }, + { + "epoch": 7.537190082644628, + "grad_norm": 1.4092047214508057, + "learning_rate": 0.0002, + "loss": 0.9102, + "step": 2280 + }, + { + "epoch": 7.570247933884297, + "grad_norm": 1.601216197013855, + "learning_rate": 0.0002, + "loss": 0.8833, + "step": 2290 + }, + { + "epoch": 7.6033057851239665, + "grad_norm": 1.82637619972229, + "learning_rate": 0.0002, + "loss": 0.9367, + "step": 2300 + }, + { + "epoch": 7.636363636363637, + "grad_norm": 1.434897541999817, + "learning_rate": 0.0002, + "loss": 0.9162, + "step": 2310 + }, + { + "epoch": 7.669421487603306, + "grad_norm": 1.5651953220367432, + "learning_rate": 0.0002, + "loss": 0.8969, + "step": 2320 + }, + { + "epoch": 7.702479338842975, + "grad_norm": 1.793326735496521, + "learning_rate": 0.0002, + "loss": 0.9189, + "step": 2330 + }, + { + "epoch": 7.735537190082645, + "grad_norm": 1.3330620527267456, + "learning_rate": 0.0002, + "loss": 0.9133, + "step": 2340 + }, + { + "epoch": 7.768595041322314, + "grad_norm": 1.8976562023162842, + "learning_rate": 0.0002, + "loss": 0.9519, + "step": 2350 + }, + { + "epoch": 7.801652892561983, + "grad_norm": 1.7769376039505005, + "learning_rate": 0.0002, + "loss": 0.9381, + "step": 2360 + }, + { + "epoch": 7.8347107438016526, + "grad_norm": 1.607336163520813, + "learning_rate": 0.0002, + "loss": 0.9588, + "step": 2370 + }, + { + "epoch": 7.867768595041323, + "grad_norm": 1.807392954826355, + "learning_rate": 0.0002, + "loss": 0.9241, + "step": 2380 + }, + { + "epoch": 7.900826446280992, + "grad_norm": 1.501326322555542, + "learning_rate": 0.0002, + "loss": 0.9395, + "step": 2390 + }, + { + "epoch": 7.933884297520661, + "grad_norm": 1.473686695098877, + "learning_rate": 0.0002, + "loss": 0.9857, + "step": 2400 + }, + { + "epoch": 7.966942148760331, + "grad_norm": 1.8466233015060425, + "learning_rate": 0.0002, + "loss": 0.8913, + "step": 2410 + }, + { + "epoch": 7.986776859504133, + "eval_loss": 2.399585008621216, + "eval_runtime": 38.8435, + "eval_samples_per_second": 13.258, + "eval_steps_per_second": 1.673, + "step": 2416 + } + ], + "logging_steps": 10, + "max_steps": 2416, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.118070703568978e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/training_args.bin b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bdae94a3bb1cbb687094648bad0dbd67438f1665 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-2416/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59d6797aee4f8f861b969f822d3a50bd2f069252a018ea198e60b03a920e2c9f +size 5560 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/README.md b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/README.md new file mode 100644 index 0000000000000000000000000000000000000000..835e31ab8469ee39ddc8b2b6b2143a8c66dad510 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/adapter_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d63cb87eaccf2d81de3cdcfa11d2e99c440c0ea0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/adapter_model.safetensors b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e210dad2c72eaa5c931e0c5ea32def7e482c994 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bab7d4fbf773b3d6d58b6cedb794464827281ff2938bd1b22c579a37b90f82df +size 109069176 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/optimizer.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..01de7d5082057ad3efc288399a0985efe5bd23aa --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca85378ba0fd9bbc0ad842e12a961b4c09f07a6155ecc004ae302146bc077dd8 +size 55532666 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/rng_state.pth b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b167899af4ed8969eb42697df1d88cd6c7e707a6 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86d57fd5d95865b8f23416fb38184d55f00351f6e9f87f3e69828c5436088c80 +size 14244 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/scheduler.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0404244065302d519d83156c47104f8752aed0c8 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0da3308f8a86795b057c964c0e312e52cbe39d012514bbd29842178d7064b6b2 +size 1064 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/special_tokens_map.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/tokenizer.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/tokenizer_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..061e40d9db3253624f86e8e364c15ef546527c9d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/trainer_state.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..09d6ab2edb21b1471adfc92815515e5dae7d1b1b --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/trainer_state.json @@ -0,0 +1,251 @@ +{ + "best_metric": 1.8413277864456177, + "best_model_checkpoint": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302", + "epoch": 0.9983471074380166, + "eval_steps": 10, + "global_step": 302, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03305785123966942, + "grad_norm": 0.4384556710720062, + "learning_rate": 0.0002, + "loss": 2.528, + "step": 10 + }, + { + "epoch": 0.06611570247933884, + "grad_norm": 0.48312580585479736, + "learning_rate": 0.0002, + "loss": 2.2675, + "step": 20 + }, + { + "epoch": 0.09917355371900827, + "grad_norm": 0.6193496584892273, + "learning_rate": 0.0002, + "loss": 2.0439, + "step": 30 + }, + { + "epoch": 0.1322314049586777, + "grad_norm": 0.471858948469162, + "learning_rate": 0.0002, + "loss": 1.9649, + "step": 40 + }, + { + "epoch": 0.1652892561983471, + "grad_norm": 0.43199431896209717, + "learning_rate": 0.0002, + "loss": 1.9945, + "step": 50 + }, + { + "epoch": 0.19834710743801653, + "grad_norm": 0.5022176504135132, + "learning_rate": 0.0002, + "loss": 1.9409, + "step": 60 + }, + { + "epoch": 0.23140495867768596, + "grad_norm": 0.4934026300907135, + "learning_rate": 0.0002, + "loss": 1.89, + "step": 70 + }, + { + "epoch": 0.2644628099173554, + "grad_norm": 0.4313369691371918, + "learning_rate": 0.0002, + "loss": 1.9036, + "step": 80 + }, + { + "epoch": 0.2975206611570248, + "grad_norm": 0.48663529753685, + "learning_rate": 0.0002, + "loss": 1.8992, + "step": 90 + }, + { + "epoch": 0.3305785123966942, + "grad_norm": 0.47740036249160767, + "learning_rate": 0.0002, + "loss": 1.8655, + "step": 100 + }, + { + "epoch": 0.36363636363636365, + "grad_norm": 0.41685664653778076, + "learning_rate": 0.0002, + "loss": 1.8797, + "step": 110 + }, + { + "epoch": 0.39669421487603307, + "grad_norm": 2.368595838546753, + "learning_rate": 0.0002, + "loss": 1.8951, + "step": 120 + }, + { + "epoch": 0.4297520661157025, + "grad_norm": 0.4861043095588684, + "learning_rate": 0.0002, + "loss": 1.8753, + "step": 130 + }, + { + "epoch": 0.4628099173553719, + "grad_norm": 0.41848257184028625, + "learning_rate": 0.0002, + "loss": 1.8413, + "step": 140 + }, + { + "epoch": 0.49586776859504134, + "grad_norm": 0.38776087760925293, + "learning_rate": 0.0002, + "loss": 1.9775, + "step": 150 + }, + { + "epoch": 0.5289256198347108, + "grad_norm": 0.4095233380794525, + "learning_rate": 0.0002, + "loss": 1.8172, + "step": 160 + }, + { + "epoch": 0.5619834710743802, + "grad_norm": 0.4492895007133484, + "learning_rate": 0.0002, + "loss": 1.9401, + "step": 170 + }, + { + "epoch": 0.5950413223140496, + "grad_norm": 0.5678786039352417, + "learning_rate": 0.0002, + "loss": 1.8707, + "step": 180 + }, + { + "epoch": 0.628099173553719, + "grad_norm": 0.4926881492137909, + "learning_rate": 0.0002, + "loss": 1.8067, + "step": 190 + }, + { + "epoch": 0.6611570247933884, + "grad_norm": 0.3865489363670349, + "learning_rate": 0.0002, + "loss": 1.8567, + "step": 200 + }, + { + "epoch": 0.6942148760330579, + "grad_norm": 0.40578970313072205, + "learning_rate": 0.0002, + "loss": 1.7555, + "step": 210 + }, + { + "epoch": 0.7272727272727273, + "grad_norm": 0.3729846775531769, + "learning_rate": 0.0002, + "loss": 1.8192, + "step": 220 + }, + { + "epoch": 0.7603305785123967, + "grad_norm": 0.36989861726760864, + "learning_rate": 0.0002, + "loss": 1.8787, + "step": 230 + }, + { + "epoch": 0.7933884297520661, + "grad_norm": 0.3764864206314087, + "learning_rate": 0.0002, + "loss": 1.8254, + "step": 240 + }, + { + "epoch": 0.8264462809917356, + "grad_norm": 1.2193230390548706, + "learning_rate": 0.0002, + "loss": 1.8008, + "step": 250 + }, + { + "epoch": 0.859504132231405, + "grad_norm": 0.37381255626678467, + "learning_rate": 0.0002, + "loss": 1.8093, + "step": 260 + }, + { + "epoch": 0.8925619834710744, + "grad_norm": 0.35480767488479614, + "learning_rate": 0.0002, + "loss": 1.7911, + "step": 270 + }, + { + "epoch": 0.9256198347107438, + "grad_norm": 0.4945891201496124, + "learning_rate": 0.0002, + "loss": 1.7824, + "step": 280 + }, + { + "epoch": 0.9586776859504132, + "grad_norm": 0.39967674016952515, + "learning_rate": 0.0002, + "loss": 1.7842, + "step": 290 + }, + { + "epoch": 0.9917355371900827, + "grad_norm": 0.4257008135318756, + "learning_rate": 0.0002, + "loss": 1.8321, + "step": 300 + }, + { + "epoch": 0.9983471074380166, + "eval_loss": 1.8413277864456177, + "eval_runtime": 38.8241, + "eval_samples_per_second": 13.265, + "eval_steps_per_second": 1.674, + "step": 302 + } + ], + "logging_steps": 10, + "max_steps": 2416, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.399902267506688e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/training_args.bin b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bdae94a3bb1cbb687094648bad0dbd67438f1665 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59d6797aee4f8f861b969f822d3a50bd2f069252a018ea198e60b03a920e2c9f +size 5560 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/README.md b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/README.md new file mode 100644 index 0000000000000000000000000000000000000000..835e31ab8469ee39ddc8b2b6b2143a8c66dad510 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/adapter_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d63cb87eaccf2d81de3cdcfa11d2e99c440c0ea0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/adapter_model.safetensors b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08683fb978187499f5a5a94f1ef7270659f69cbe --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3197d67f8761102919f87e8312b13d7ed1575c87e89b969c1714ee45600b400 +size 109069176 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/optimizer.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5dde839749c61802554864dd935c8613e887a9c2 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d6e0faada965c3067900872f1504eb5342127bc36605e12e1dc302017f2b365 +size 55532666 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/rng_state.pth b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6db4e3d3b4d824ad85d222aba0b1eb2d6dbf0755 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:974e77b2760cd158cde4a21b3a72b73e8adacfadba82de71cf516a80fc514d54 +size 14244 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/scheduler.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d927ca296c27d438aeae34c09ceadcedef151637 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6c2c0b1b8bc33d346865ca0dab2ae1c17fb58e3ccd67bca381b1ce866a84ba0 +size 1064 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/special_tokens_map.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/tokenizer.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/tokenizer_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..061e40d9db3253624f86e8e364c15ef546527c9d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/trainer_state.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ff663b28e6204133a46314d7baa896817490accf --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/trainer_state.json @@ -0,0 +1,469 @@ +{ + "best_metric": 1.833760380744934, + "best_model_checkpoint": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605", + "epoch": 2.0, + "eval_steps": 10, + "global_step": 605, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03305785123966942, + "grad_norm": 0.4384556710720062, + "learning_rate": 0.0002, + "loss": 2.528, + "step": 10 + }, + { + "epoch": 0.06611570247933884, + "grad_norm": 0.48312580585479736, + "learning_rate": 0.0002, + "loss": 2.2675, + "step": 20 + }, + { + "epoch": 0.09917355371900827, + "grad_norm": 0.6193496584892273, + "learning_rate": 0.0002, + "loss": 2.0439, + "step": 30 + }, + { + "epoch": 0.1322314049586777, + "grad_norm": 0.471858948469162, + "learning_rate": 0.0002, + "loss": 1.9649, + "step": 40 + }, + { + "epoch": 0.1652892561983471, + "grad_norm": 0.43199431896209717, + "learning_rate": 0.0002, + "loss": 1.9945, + "step": 50 + }, + { + "epoch": 0.19834710743801653, + "grad_norm": 0.5022176504135132, + "learning_rate": 0.0002, + "loss": 1.9409, + "step": 60 + }, + { + "epoch": 0.23140495867768596, + "grad_norm": 0.4934026300907135, + "learning_rate": 0.0002, + "loss": 1.89, + "step": 70 + }, + { + "epoch": 0.2644628099173554, + "grad_norm": 0.4313369691371918, + "learning_rate": 0.0002, + "loss": 1.9036, + "step": 80 + }, + { + "epoch": 0.2975206611570248, + "grad_norm": 0.48663529753685, + "learning_rate": 0.0002, + "loss": 1.8992, + "step": 90 + }, + { + "epoch": 0.3305785123966942, + "grad_norm": 0.47740036249160767, + "learning_rate": 0.0002, + "loss": 1.8655, + "step": 100 + }, + { + "epoch": 0.36363636363636365, + "grad_norm": 0.41685664653778076, + "learning_rate": 0.0002, + "loss": 1.8797, + "step": 110 + }, + { + "epoch": 0.39669421487603307, + "grad_norm": 2.368595838546753, + "learning_rate": 0.0002, + "loss": 1.8951, + "step": 120 + }, + { + "epoch": 0.4297520661157025, + "grad_norm": 0.4861043095588684, + "learning_rate": 0.0002, + "loss": 1.8753, + "step": 130 + }, + { + "epoch": 0.4628099173553719, + "grad_norm": 0.41848257184028625, + "learning_rate": 0.0002, + "loss": 1.8413, + "step": 140 + }, + { + "epoch": 0.49586776859504134, + "grad_norm": 0.38776087760925293, + "learning_rate": 0.0002, + "loss": 1.9775, + "step": 150 + }, + { + "epoch": 0.5289256198347108, + "grad_norm": 0.4095233380794525, + "learning_rate": 0.0002, + "loss": 1.8172, + "step": 160 + }, + { + "epoch": 0.5619834710743802, + "grad_norm": 0.4492895007133484, + "learning_rate": 0.0002, + "loss": 1.9401, + "step": 170 + }, + { + "epoch": 0.5950413223140496, + "grad_norm": 0.5678786039352417, + "learning_rate": 0.0002, + "loss": 1.8707, + "step": 180 + }, + { + "epoch": 0.628099173553719, + "grad_norm": 0.4926881492137909, + "learning_rate": 0.0002, + "loss": 1.8067, + "step": 190 + }, + { + "epoch": 0.6611570247933884, + "grad_norm": 0.3865489363670349, + "learning_rate": 0.0002, + "loss": 1.8567, + "step": 200 + }, + { + "epoch": 0.6942148760330579, + "grad_norm": 0.40578970313072205, + "learning_rate": 0.0002, + "loss": 1.7555, + "step": 210 + }, + { + "epoch": 0.7272727272727273, + "grad_norm": 0.3729846775531769, + "learning_rate": 0.0002, + "loss": 1.8192, + "step": 220 + }, + { + "epoch": 0.7603305785123967, + "grad_norm": 0.36989861726760864, + "learning_rate": 0.0002, + "loss": 1.8787, + "step": 230 + }, + { + "epoch": 0.7933884297520661, + "grad_norm": 0.3764864206314087, + "learning_rate": 0.0002, + "loss": 1.8254, + "step": 240 + }, + { + "epoch": 0.8264462809917356, + "grad_norm": 1.2193230390548706, + "learning_rate": 0.0002, + "loss": 1.8008, + "step": 250 + }, + { + "epoch": 0.859504132231405, + "grad_norm": 0.37381255626678467, + "learning_rate": 0.0002, + "loss": 1.8093, + "step": 260 + }, + { + "epoch": 0.8925619834710744, + "grad_norm": 0.35480767488479614, + "learning_rate": 0.0002, + "loss": 1.7911, + "step": 270 + }, + { + "epoch": 0.9256198347107438, + "grad_norm": 0.4945891201496124, + "learning_rate": 0.0002, + "loss": 1.7824, + "step": 280 + }, + { + "epoch": 0.9586776859504132, + "grad_norm": 0.39967674016952515, + "learning_rate": 0.0002, + "loss": 1.7842, + "step": 290 + }, + { + "epoch": 0.9917355371900827, + "grad_norm": 0.4257008135318756, + "learning_rate": 0.0002, + "loss": 1.8321, + "step": 300 + }, + { + "epoch": 0.9983471074380166, + "eval_loss": 1.8413277864456177, + "eval_runtime": 38.8241, + "eval_samples_per_second": 13.265, + "eval_steps_per_second": 1.674, + "step": 302 + }, + { + "epoch": 1.024793388429752, + "grad_norm": 0.4019509255886078, + "learning_rate": 0.0002, + "loss": 1.7265, + "step": 310 + }, + { + "epoch": 1.0578512396694215, + "grad_norm": 0.3439880311489105, + "learning_rate": 0.0002, + "loss": 1.7756, + "step": 320 + }, + { + "epoch": 1.0909090909090908, + "grad_norm": 0.4353587031364441, + "learning_rate": 0.0002, + "loss": 1.7719, + "step": 330 + }, + { + "epoch": 1.1239669421487604, + "grad_norm": 0.41257765889167786, + "learning_rate": 0.0002, + "loss": 1.7419, + "step": 340 + }, + { + "epoch": 1.1570247933884297, + "grad_norm": 0.4224575161933899, + "learning_rate": 0.0002, + "loss": 1.7774, + "step": 350 + }, + { + "epoch": 1.1900826446280992, + "grad_norm": 0.36395177245140076, + "learning_rate": 0.0002, + "loss": 1.7502, + "step": 360 + }, + { + "epoch": 1.2231404958677685, + "grad_norm": 0.4251839518547058, + "learning_rate": 0.0002, + "loss": 1.8064, + "step": 370 + }, + { + "epoch": 1.256198347107438, + "grad_norm": 0.43602821230888367, + "learning_rate": 0.0002, + "loss": 1.7626, + "step": 380 + }, + { + "epoch": 1.2892561983471074, + "grad_norm": 0.3940708637237549, + "learning_rate": 0.0002, + "loss": 1.8261, + "step": 390 + }, + { + "epoch": 1.322314049586777, + "grad_norm": 0.3626866042613983, + "learning_rate": 0.0002, + "loss": 1.7317, + "step": 400 + }, + { + "epoch": 1.3553719008264462, + "grad_norm": 0.40716150403022766, + "learning_rate": 0.0002, + "loss": 1.7493, + "step": 410 + }, + { + "epoch": 1.3884297520661157, + "grad_norm": 0.39323991537094116, + "learning_rate": 0.0002, + "loss": 1.7313, + "step": 420 + }, + { + "epoch": 1.421487603305785, + "grad_norm": 0.44480809569358826, + "learning_rate": 0.0002, + "loss": 1.7863, + "step": 430 + }, + { + "epoch": 1.4545454545454546, + "grad_norm": 0.4438270032405853, + "learning_rate": 0.0002, + "loss": 1.7477, + "step": 440 + }, + { + "epoch": 1.487603305785124, + "grad_norm": 0.3953928053379059, + "learning_rate": 0.0002, + "loss": 1.774, + "step": 450 + }, + { + "epoch": 1.5206611570247934, + "grad_norm": 0.4152870178222656, + "learning_rate": 0.0002, + "loss": 1.7162, + "step": 460 + }, + { + "epoch": 1.553719008264463, + "grad_norm": 0.45231857895851135, + "learning_rate": 0.0002, + "loss": 1.8176, + "step": 470 + }, + { + "epoch": 1.5867768595041323, + "grad_norm": 0.46560999751091003, + "learning_rate": 0.0002, + "loss": 1.7281, + "step": 480 + }, + { + "epoch": 1.6198347107438016, + "grad_norm": 0.3510372042655945, + "learning_rate": 0.0002, + "loss": 1.8047, + "step": 490 + }, + { + "epoch": 1.6528925619834711, + "grad_norm": 0.36788758635520935, + "learning_rate": 0.0002, + "loss": 1.7719, + "step": 500 + }, + { + "epoch": 1.6859504132231407, + "grad_norm": 0.3911917209625244, + "learning_rate": 0.0002, + "loss": 1.8287, + "step": 510 + }, + { + "epoch": 1.71900826446281, + "grad_norm": 0.440964937210083, + "learning_rate": 0.0002, + "loss": 1.7891, + "step": 520 + }, + { + "epoch": 1.7520661157024793, + "grad_norm": 0.36718201637268066, + "learning_rate": 0.0002, + "loss": 1.6858, + "step": 530 + }, + { + "epoch": 1.7851239669421488, + "grad_norm": 0.3927479088306427, + "learning_rate": 0.0002, + "loss": 1.7828, + "step": 540 + }, + { + "epoch": 1.8181818181818183, + "grad_norm": 0.4298672378063202, + "learning_rate": 0.0002, + "loss": 1.7406, + "step": 550 + }, + { + "epoch": 1.8512396694214877, + "grad_norm": 0.4257620871067047, + "learning_rate": 0.0002, + "loss": 1.7626, + "step": 560 + }, + { + "epoch": 1.884297520661157, + "grad_norm": 0.3743717670440674, + "learning_rate": 0.0002, + "loss": 1.7677, + "step": 570 + }, + { + "epoch": 1.9173553719008265, + "grad_norm": 0.4413471817970276, + "learning_rate": 0.0002, + "loss": 1.7263, + "step": 580 + }, + { + "epoch": 1.950413223140496, + "grad_norm": 0.41639673709869385, + "learning_rate": 0.0002, + "loss": 1.7528, + "step": 590 + }, + { + "epoch": 1.9834710743801653, + "grad_norm": 0.46319296956062317, + "learning_rate": 0.0002, + "loss": 1.7141, + "step": 600 + }, + { + "epoch": 2.0, + "eval_loss": 1.833760380744934, + "eval_runtime": 38.8469, + "eval_samples_per_second": 13.257, + "eval_steps_per_second": 1.673, + "step": 605 + } + ], + "logging_steps": 10, + "max_steps": 2416, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.799804535013376e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/training_args.bin b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bdae94a3bb1cbb687094648bad0dbd67438f1665 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59d6797aee4f8f861b969f822d3a50bd2f069252a018ea198e60b03a920e2c9f +size 5560 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/README.md b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/README.md new file mode 100644 index 0000000000000000000000000000000000000000..835e31ab8469ee39ddc8b2b6b2143a8c66dad510 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/README.md @@ -0,0 +1,202 @@ +--- +base_model: meta-llama/Meta-Llama-3-8B-Instruct +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/adapter_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..d63cb87eaccf2d81de3cdcfa11d2e99c440c0ea0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/adapter_model.safetensors b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a465c5fb80e4779b68fcd3438301e4f91528cc8c --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea9831e79ee5fef9cc82e1bb1e205e373bace6e07b7a96d6f9e58d1ee68c6083 +size 109069176 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/optimizer.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..94d23ffcb103c15e31e64a0c97ab6d6a06a5a8ee --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40bbbefb17d70c433334f12c9eb4cadedcd3be5428f703450217bdc78a8ae4ae +size 55532666 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/rng_state.pth b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..34bd54d4052acdfe35111e8776fca445da578d9c --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e2e881c2ccb84bdab1c32627c27b24f6e871a2ebec9f91b1fc706a7be8e523f +size 14244 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/scheduler.pt b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c82fb1987f57c2837f52b21424a3c813f536e7ca --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f0f32b6ab5ee208497111827e9bb96baaffffa4f2e5307f27252827597086c6 +size 1064 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/special_tokens_map.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/tokenizer.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/tokenizer_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..061e40d9db3253624f86e8e364c15ef546527c9d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/trainer_state.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a7141d08517e2cf341bfba511ebd2e072e16b8b7 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/trainer_state.json @@ -0,0 +1,687 @@ +{ + "best_metric": 1.833760380744934, + "best_model_checkpoint": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605", + "epoch": 2.9983471074380166, + "eval_steps": 10, + "global_step": 907, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.03305785123966942, + "grad_norm": 0.4384556710720062, + "learning_rate": 0.0002, + "loss": 2.528, + "step": 10 + }, + { + "epoch": 0.06611570247933884, + "grad_norm": 0.48312580585479736, + "learning_rate": 0.0002, + "loss": 2.2675, + "step": 20 + }, + { + "epoch": 0.09917355371900827, + "grad_norm": 0.6193496584892273, + "learning_rate": 0.0002, + "loss": 2.0439, + "step": 30 + }, + { + "epoch": 0.1322314049586777, + "grad_norm": 0.471858948469162, + "learning_rate": 0.0002, + "loss": 1.9649, + "step": 40 + }, + { + "epoch": 0.1652892561983471, + "grad_norm": 0.43199431896209717, + "learning_rate": 0.0002, + "loss": 1.9945, + "step": 50 + }, + { + "epoch": 0.19834710743801653, + "grad_norm": 0.5022176504135132, + "learning_rate": 0.0002, + "loss": 1.9409, + "step": 60 + }, + { + "epoch": 0.23140495867768596, + "grad_norm": 0.4934026300907135, + "learning_rate": 0.0002, + "loss": 1.89, + "step": 70 + }, + { + "epoch": 0.2644628099173554, + "grad_norm": 0.4313369691371918, + "learning_rate": 0.0002, + "loss": 1.9036, + "step": 80 + }, + { + "epoch": 0.2975206611570248, + "grad_norm": 0.48663529753685, + "learning_rate": 0.0002, + "loss": 1.8992, + "step": 90 + }, + { + "epoch": 0.3305785123966942, + "grad_norm": 0.47740036249160767, + "learning_rate": 0.0002, + "loss": 1.8655, + "step": 100 + }, + { + "epoch": 0.36363636363636365, + "grad_norm": 0.41685664653778076, + "learning_rate": 0.0002, + "loss": 1.8797, + "step": 110 + }, + { + "epoch": 0.39669421487603307, + "grad_norm": 2.368595838546753, + "learning_rate": 0.0002, + "loss": 1.8951, + "step": 120 + }, + { + "epoch": 0.4297520661157025, + "grad_norm": 0.4861043095588684, + "learning_rate": 0.0002, + "loss": 1.8753, + "step": 130 + }, + { + "epoch": 0.4628099173553719, + "grad_norm": 0.41848257184028625, + "learning_rate": 0.0002, + "loss": 1.8413, + "step": 140 + }, + { + "epoch": 0.49586776859504134, + "grad_norm": 0.38776087760925293, + "learning_rate": 0.0002, + "loss": 1.9775, + "step": 150 + }, + { + "epoch": 0.5289256198347108, + "grad_norm": 0.4095233380794525, + "learning_rate": 0.0002, + "loss": 1.8172, + "step": 160 + }, + { + "epoch": 0.5619834710743802, + "grad_norm": 0.4492895007133484, + "learning_rate": 0.0002, + "loss": 1.9401, + "step": 170 + }, + { + "epoch": 0.5950413223140496, + "grad_norm": 0.5678786039352417, + "learning_rate": 0.0002, + "loss": 1.8707, + "step": 180 + }, + { + "epoch": 0.628099173553719, + "grad_norm": 0.4926881492137909, + "learning_rate": 0.0002, + "loss": 1.8067, + "step": 190 + }, + { + "epoch": 0.6611570247933884, + "grad_norm": 0.3865489363670349, + "learning_rate": 0.0002, + "loss": 1.8567, + "step": 200 + }, + { + "epoch": 0.6942148760330579, + "grad_norm": 0.40578970313072205, + "learning_rate": 0.0002, + "loss": 1.7555, + "step": 210 + }, + { + "epoch": 0.7272727272727273, + "grad_norm": 0.3729846775531769, + "learning_rate": 0.0002, + "loss": 1.8192, + "step": 220 + }, + { + "epoch": 0.7603305785123967, + "grad_norm": 0.36989861726760864, + "learning_rate": 0.0002, + "loss": 1.8787, + "step": 230 + }, + { + "epoch": 0.7933884297520661, + "grad_norm": 0.3764864206314087, + "learning_rate": 0.0002, + "loss": 1.8254, + "step": 240 + }, + { + "epoch": 0.8264462809917356, + "grad_norm": 1.2193230390548706, + "learning_rate": 0.0002, + "loss": 1.8008, + "step": 250 + }, + { + "epoch": 0.859504132231405, + "grad_norm": 0.37381255626678467, + "learning_rate": 0.0002, + "loss": 1.8093, + "step": 260 + }, + { + "epoch": 0.8925619834710744, + "grad_norm": 0.35480767488479614, + "learning_rate": 0.0002, + "loss": 1.7911, + "step": 270 + }, + { + "epoch": 0.9256198347107438, + "grad_norm": 0.4945891201496124, + "learning_rate": 0.0002, + "loss": 1.7824, + "step": 280 + }, + { + "epoch": 0.9586776859504132, + "grad_norm": 0.39967674016952515, + "learning_rate": 0.0002, + "loss": 1.7842, + "step": 290 + }, + { + "epoch": 0.9917355371900827, + "grad_norm": 0.4257008135318756, + "learning_rate": 0.0002, + "loss": 1.8321, + "step": 300 + }, + { + "epoch": 0.9983471074380166, + "eval_loss": 1.8413277864456177, + "eval_runtime": 38.8241, + "eval_samples_per_second": 13.265, + "eval_steps_per_second": 1.674, + "step": 302 + }, + { + "epoch": 1.024793388429752, + "grad_norm": 0.4019509255886078, + "learning_rate": 0.0002, + "loss": 1.7265, + "step": 310 + }, + { + "epoch": 1.0578512396694215, + "grad_norm": 0.3439880311489105, + "learning_rate": 0.0002, + "loss": 1.7756, + "step": 320 + }, + { + "epoch": 1.0909090909090908, + "grad_norm": 0.4353587031364441, + "learning_rate": 0.0002, + "loss": 1.7719, + "step": 330 + }, + { + "epoch": 1.1239669421487604, + "grad_norm": 0.41257765889167786, + "learning_rate": 0.0002, + "loss": 1.7419, + "step": 340 + }, + { + "epoch": 1.1570247933884297, + "grad_norm": 0.4224575161933899, + "learning_rate": 0.0002, + "loss": 1.7774, + "step": 350 + }, + { + "epoch": 1.1900826446280992, + "grad_norm": 0.36395177245140076, + "learning_rate": 0.0002, + "loss": 1.7502, + "step": 360 + }, + { + "epoch": 1.2231404958677685, + "grad_norm": 0.4251839518547058, + "learning_rate": 0.0002, + "loss": 1.8064, + "step": 370 + }, + { + "epoch": 1.256198347107438, + "grad_norm": 0.43602821230888367, + "learning_rate": 0.0002, + "loss": 1.7626, + "step": 380 + }, + { + "epoch": 1.2892561983471074, + "grad_norm": 0.3940708637237549, + "learning_rate": 0.0002, + "loss": 1.8261, + "step": 390 + }, + { + "epoch": 1.322314049586777, + "grad_norm": 0.3626866042613983, + "learning_rate": 0.0002, + "loss": 1.7317, + "step": 400 + }, + { + "epoch": 1.3553719008264462, + "grad_norm": 0.40716150403022766, + "learning_rate": 0.0002, + "loss": 1.7493, + "step": 410 + }, + { + "epoch": 1.3884297520661157, + "grad_norm": 0.39323991537094116, + "learning_rate": 0.0002, + "loss": 1.7313, + "step": 420 + }, + { + "epoch": 1.421487603305785, + "grad_norm": 0.44480809569358826, + "learning_rate": 0.0002, + "loss": 1.7863, + "step": 430 + }, + { + "epoch": 1.4545454545454546, + "grad_norm": 0.4438270032405853, + "learning_rate": 0.0002, + "loss": 1.7477, + "step": 440 + }, + { + "epoch": 1.487603305785124, + "grad_norm": 0.3953928053379059, + "learning_rate": 0.0002, + "loss": 1.774, + "step": 450 + }, + { + "epoch": 1.5206611570247934, + "grad_norm": 0.4152870178222656, + "learning_rate": 0.0002, + "loss": 1.7162, + "step": 460 + }, + { + "epoch": 1.553719008264463, + "grad_norm": 0.45231857895851135, + "learning_rate": 0.0002, + "loss": 1.8176, + "step": 470 + }, + { + "epoch": 1.5867768595041323, + "grad_norm": 0.46560999751091003, + "learning_rate": 0.0002, + "loss": 1.7281, + "step": 480 + }, + { + "epoch": 1.6198347107438016, + "grad_norm": 0.3510372042655945, + "learning_rate": 0.0002, + "loss": 1.8047, + "step": 490 + }, + { + "epoch": 1.6528925619834711, + "grad_norm": 0.36788758635520935, + "learning_rate": 0.0002, + "loss": 1.7719, + "step": 500 + }, + { + "epoch": 1.6859504132231407, + "grad_norm": 0.3911917209625244, + "learning_rate": 0.0002, + "loss": 1.8287, + "step": 510 + }, + { + "epoch": 1.71900826446281, + "grad_norm": 0.440964937210083, + "learning_rate": 0.0002, + "loss": 1.7891, + "step": 520 + }, + { + "epoch": 1.7520661157024793, + "grad_norm": 0.36718201637268066, + "learning_rate": 0.0002, + "loss": 1.6858, + "step": 530 + }, + { + "epoch": 1.7851239669421488, + "grad_norm": 0.3927479088306427, + "learning_rate": 0.0002, + "loss": 1.7828, + "step": 540 + }, + { + "epoch": 1.8181818181818183, + "grad_norm": 0.4298672378063202, + "learning_rate": 0.0002, + "loss": 1.7406, + "step": 550 + }, + { + "epoch": 1.8512396694214877, + "grad_norm": 0.4257620871067047, + "learning_rate": 0.0002, + "loss": 1.7626, + "step": 560 + }, + { + "epoch": 1.884297520661157, + "grad_norm": 0.3743717670440674, + "learning_rate": 0.0002, + "loss": 1.7677, + "step": 570 + }, + { + "epoch": 1.9173553719008265, + "grad_norm": 0.4413471817970276, + "learning_rate": 0.0002, + "loss": 1.7263, + "step": 580 + }, + { + "epoch": 1.950413223140496, + "grad_norm": 0.41639673709869385, + "learning_rate": 0.0002, + "loss": 1.7528, + "step": 590 + }, + { + "epoch": 1.9834710743801653, + "grad_norm": 0.46319296956062317, + "learning_rate": 0.0002, + "loss": 1.7141, + "step": 600 + }, + { + "epoch": 2.0, + "eval_loss": 1.833760380744934, + "eval_runtime": 38.8469, + "eval_samples_per_second": 13.257, + "eval_steps_per_second": 1.673, + "step": 605 + }, + { + "epoch": 2.0165289256198347, + "grad_norm": 0.38033604621887207, + "learning_rate": 0.0002, + "loss": 1.7399, + "step": 610 + }, + { + "epoch": 2.049586776859504, + "grad_norm": 0.4522306025028229, + "learning_rate": 0.0002, + "loss": 1.6414, + "step": 620 + }, + { + "epoch": 2.0826446280991737, + "grad_norm": 0.41294756531715393, + "learning_rate": 0.0002, + "loss": 1.5976, + "step": 630 + }, + { + "epoch": 2.115702479338843, + "grad_norm": 0.5129091739654541, + "learning_rate": 0.0002, + "loss": 1.6664, + "step": 640 + }, + { + "epoch": 2.1487603305785123, + "grad_norm": 0.4630700647830963, + "learning_rate": 0.0002, + "loss": 1.7207, + "step": 650 + }, + { + "epoch": 2.1818181818181817, + "grad_norm": 0.4368151128292084, + "learning_rate": 0.0002, + "loss": 1.5884, + "step": 660 + }, + { + "epoch": 2.2148760330578514, + "grad_norm": 0.5266494154930115, + "learning_rate": 0.0002, + "loss": 1.7271, + "step": 670 + }, + { + "epoch": 2.2479338842975207, + "grad_norm": 0.4744901955127716, + "learning_rate": 0.0002, + "loss": 1.5749, + "step": 680 + }, + { + "epoch": 2.28099173553719, + "grad_norm": 0.5312414765357971, + "learning_rate": 0.0002, + "loss": 1.6512, + "step": 690 + }, + { + "epoch": 2.3140495867768593, + "grad_norm": 0.49116063117980957, + "learning_rate": 0.0002, + "loss": 1.6957, + "step": 700 + }, + { + "epoch": 2.347107438016529, + "grad_norm": 0.4626988172531128, + "learning_rate": 0.0002, + "loss": 1.646, + "step": 710 + }, + { + "epoch": 2.3801652892561984, + "grad_norm": 0.4851135015487671, + "learning_rate": 0.0002, + "loss": 1.6474, + "step": 720 + }, + { + "epoch": 2.4132231404958677, + "grad_norm": 0.4882378578186035, + "learning_rate": 0.0002, + "loss": 1.67, + "step": 730 + }, + { + "epoch": 2.446280991735537, + "grad_norm": 0.4470290243625641, + "learning_rate": 0.0002, + "loss": 1.6588, + "step": 740 + }, + { + "epoch": 2.479338842975207, + "grad_norm": 0.5901731848716736, + "learning_rate": 0.0002, + "loss": 1.6419, + "step": 750 + }, + { + "epoch": 2.512396694214876, + "grad_norm": 0.48137718439102173, + "learning_rate": 0.0002, + "loss": 1.6756, + "step": 760 + }, + { + "epoch": 2.5454545454545454, + "grad_norm": 0.45636510848999023, + "learning_rate": 0.0002, + "loss": 1.6708, + "step": 770 + }, + { + "epoch": 2.5785123966942147, + "grad_norm": 0.48216402530670166, + "learning_rate": 0.0002, + "loss": 1.6693, + "step": 780 + }, + { + "epoch": 2.6115702479338845, + "grad_norm": 0.47188714146614075, + "learning_rate": 0.0002, + "loss": 1.664, + "step": 790 + }, + { + "epoch": 2.644628099173554, + "grad_norm": 0.44025519490242004, + "learning_rate": 0.0002, + "loss": 1.619, + "step": 800 + }, + { + "epoch": 2.677685950413223, + "grad_norm": 0.4918605387210846, + "learning_rate": 0.0002, + "loss": 1.6532, + "step": 810 + }, + { + "epoch": 2.7107438016528924, + "grad_norm": 0.5082308650016785, + "learning_rate": 0.0002, + "loss": 1.7513, + "step": 820 + }, + { + "epoch": 2.7438016528925617, + "grad_norm": 0.5610618591308594, + "learning_rate": 0.0002, + "loss": 1.7221, + "step": 830 + }, + { + "epoch": 2.7768595041322315, + "grad_norm": 0.540302574634552, + "learning_rate": 0.0002, + "loss": 1.7115, + "step": 840 + }, + { + "epoch": 2.809917355371901, + "grad_norm": 0.46016451716423035, + "learning_rate": 0.0002, + "loss": 1.659, + "step": 850 + }, + { + "epoch": 2.84297520661157, + "grad_norm": 0.45313313603401184, + "learning_rate": 0.0002, + "loss": 1.672, + "step": 860 + }, + { + "epoch": 2.87603305785124, + "grad_norm": 0.49267083406448364, + "learning_rate": 0.0002, + "loss": 1.6676, + "step": 870 + }, + { + "epoch": 2.909090909090909, + "grad_norm": 0.4506530463695526, + "learning_rate": 0.0002, + "loss": 1.6577, + "step": 880 + }, + { + "epoch": 2.9421487603305785, + "grad_norm": 0.4393260180950165, + "learning_rate": 0.0002, + "loss": 1.7059, + "step": 890 + }, + { + "epoch": 2.975206611570248, + "grad_norm": 0.438073068857193, + "learning_rate": 0.0002, + "loss": 1.7042, + "step": 900 + }, + { + "epoch": 2.9983471074380166, + "eval_loss": 1.853971004486084, + "eval_runtime": 38.8404, + "eval_samples_per_second": 13.259, + "eval_steps_per_second": 1.674, + "step": 907 + } + ], + "logging_steps": 10, + "max_steps": 2416, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.199706802520064e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/training_args.bin b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bdae94a3bb1cbb687094648bad0dbd67438f1665 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-907/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59d6797aee4f8f861b969f822d3a50bd2f069252a018ea198e60b03a920e2c9f +size 5560 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/special_tokens_map.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..b43be96621d147110fb8a18b5776ec6e38516127 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/special_tokens_map.json @@ -0,0 +1,17 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/tokenizer.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..86a33946b0c77216d2cce91bb28c8fada4a5e80b --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/tokenizer_config.json b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..061e40d9db3253624f86e8e364c15ef546527c9d --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/tokenizer_config.json @@ -0,0 +1,2063 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/training_args.bin b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..bdae94a3bb1cbb687094648bad0dbd67438f1665 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59d6797aee4f8f861b969f822d3a50bd2f069252a018ea198e60b03a920e2c9f +size 5560 diff --git a/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/training_log.jsonl b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/training_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..51e563c8e097bc4e263e5dde8fe424aeb545dcc0 --- /dev/null +++ b/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/training_log.jsonl @@ -0,0 +1,8 @@ +{"epoch": 0.9983471074380166, "step": 302, "epoch_duration": 324.91444635391235, "total_accumulated_duration": 324.91444635391235, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7672.4541015625}, "peak_memory_usage": {"GPU_0": 9688.99365234375}, "avg_memory_reserved": {"GPU_0": 10406.0}, "peak_memory_reserved": {"GPU_0": 10406.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.528, "grad_norm": 0.4384556710720062, "learning_rate": 0.0002, "epoch": 0.03305785123966942, "step": 10}, {"loss": 2.2675, "grad_norm": 0.48312580585479736, "learning_rate": 0.0002, "epoch": 0.06611570247933884, "step": 20}, {"loss": 2.0439, "grad_norm": 0.6193496584892273, "learning_rate": 0.0002, "epoch": 0.09917355371900827, "step": 30}, {"loss": 1.9649, "grad_norm": 0.471858948469162, "learning_rate": 0.0002, "epoch": 0.1322314049586777, "step": 40}, {"loss": 1.9945, "grad_norm": 0.43199431896209717, "learning_rate": 0.0002, "epoch": 0.1652892561983471, "step": 50}, {"loss": 1.9409, "grad_norm": 0.5022176504135132, "learning_rate": 0.0002, "epoch": 0.19834710743801653, "step": 60}, {"loss": 1.89, "grad_norm": 0.4934026300907135, "learning_rate": 0.0002, "epoch": 0.23140495867768596, "step": 70}, {"loss": 1.9036, "grad_norm": 0.4313369691371918, "learning_rate": 0.0002, "epoch": 0.2644628099173554, "step": 80}, {"loss": 1.8992, "grad_norm": 0.48663529753685, "learning_rate": 0.0002, "epoch": 0.2975206611570248, "step": 90}, {"loss": 1.8655, "grad_norm": 0.47740036249160767, "learning_rate": 0.0002, "epoch": 0.3305785123966942, "step": 100}, {"loss": 1.8797, "grad_norm": 0.41685664653778076, "learning_rate": 0.0002, "epoch": 0.36363636363636365, "step": 110}, {"loss": 1.8951, "grad_norm": 2.368595838546753, "learning_rate": 0.0002, "epoch": 0.39669421487603307, "step": 120}, {"loss": 1.8753, "grad_norm": 0.4861043095588684, "learning_rate": 0.0002, "epoch": 0.4297520661157025, "step": 130}, {"loss": 1.8413, "grad_norm": 0.41848257184028625, "learning_rate": 0.0002, "epoch": 0.4628099173553719, "step": 140}, {"loss": 1.9775, "grad_norm": 0.38776087760925293, "learning_rate": 0.0002, "epoch": 0.49586776859504134, "step": 150}, {"loss": 1.8172, "grad_norm": 0.4095233380794525, "learning_rate": 0.0002, "epoch": 0.5289256198347108, "step": 160}, {"loss": 1.9401, "grad_norm": 0.4492895007133484, "learning_rate": 0.0002, "epoch": 0.5619834710743802, "step": 170}, {"loss": 1.8707, "grad_norm": 0.5678786039352417, "learning_rate": 0.0002, "epoch": 0.5950413223140496, "step": 180}, {"loss": 1.8067, "grad_norm": 0.4926881492137909, "learning_rate": 0.0002, "epoch": 0.628099173553719, "step": 190}, {"loss": 1.8567, "grad_norm": 0.3865489363670349, "learning_rate": 0.0002, "epoch": 0.6611570247933884, "step": 200}, {"loss": 1.7555, "grad_norm": 0.40578970313072205, "learning_rate": 0.0002, "epoch": 0.6942148760330579, "step": 210}, {"loss": 1.8192, "grad_norm": 0.3729846775531769, "learning_rate": 0.0002, "epoch": 0.7272727272727273, "step": 220}, {"loss": 1.8787, "grad_norm": 0.36989861726760864, "learning_rate": 0.0002, "epoch": 0.7603305785123967, "step": 230}, {"loss": 1.8254, "grad_norm": 0.3764864206314087, "learning_rate": 0.0002, "epoch": 0.7933884297520661, "step": 240}, {"loss": 1.8008, "grad_norm": 1.2193230390548706, "learning_rate": 0.0002, "epoch": 0.8264462809917356, "step": 250}, {"loss": 1.8093, "grad_norm": 0.37381255626678467, "learning_rate": 0.0002, "epoch": 0.859504132231405, "step": 260}, {"loss": 1.7911, "grad_norm": 0.35480767488479614, "learning_rate": 0.0002, "epoch": 0.8925619834710744, "step": 270}, {"loss": 1.7824, "grad_norm": 0.4945891201496124, "learning_rate": 0.0002, "epoch": 0.9256198347107438, "step": 280}, {"loss": 1.7842, "grad_norm": 0.39967674016952515, "learning_rate": 0.0002, "epoch": 0.9586776859504132, "step": 290}, {"loss": 1.8321, "grad_norm": 0.4257008135318756, "learning_rate": 0.0002, "epoch": 0.9917355371900827, "step": 300}]} +{"epoch": 2.0, "step": 605, "epoch_duration": 324.62790083885193, "total_accumulated_duration": 649.5423471927643, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7568.4541015625}, "peak_memory_usage": {"GPU_0": 13792.75439453125}, "avg_memory_reserved": {"GPU_0": 17416.0}, "peak_memory_reserved": {"GPU_0": 17416.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-302", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.528, "grad_norm": 0.4384556710720062, "learning_rate": 0.0002, "epoch": 0.03305785123966942, "step": 10}, {"loss": 2.2675, "grad_norm": 0.48312580585479736, "learning_rate": 0.0002, "epoch": 0.06611570247933884, "step": 20}, {"loss": 2.0439, "grad_norm": 0.6193496584892273, "learning_rate": 0.0002, "epoch": 0.09917355371900827, "step": 30}, {"loss": 1.9649, "grad_norm": 0.471858948469162, "learning_rate": 0.0002, "epoch": 0.1322314049586777, "step": 40}, {"loss": 1.9945, "grad_norm": 0.43199431896209717, "learning_rate": 0.0002, "epoch": 0.1652892561983471, "step": 50}, {"loss": 1.9409, "grad_norm": 0.5022176504135132, "learning_rate": 0.0002, "epoch": 0.19834710743801653, "step": 60}, {"loss": 1.89, "grad_norm": 0.4934026300907135, "learning_rate": 0.0002, "epoch": 0.23140495867768596, "step": 70}, {"loss": 1.9036, "grad_norm": 0.4313369691371918, "learning_rate": 0.0002, "epoch": 0.2644628099173554, "step": 80}, {"loss": 1.8992, "grad_norm": 0.48663529753685, "learning_rate": 0.0002, "epoch": 0.2975206611570248, "step": 90}, {"loss": 1.8655, "grad_norm": 0.47740036249160767, "learning_rate": 0.0002, "epoch": 0.3305785123966942, "step": 100}, {"loss": 1.8797, "grad_norm": 0.41685664653778076, "learning_rate": 0.0002, "epoch": 0.36363636363636365, "step": 110}, {"loss": 1.8951, "grad_norm": 2.368595838546753, "learning_rate": 0.0002, "epoch": 0.39669421487603307, "step": 120}, {"loss": 1.8753, "grad_norm": 0.4861043095588684, "learning_rate": 0.0002, "epoch": 0.4297520661157025, "step": 130}, {"loss": 1.8413, "grad_norm": 0.41848257184028625, "learning_rate": 0.0002, "epoch": 0.4628099173553719, "step": 140}, {"loss": 1.9775, "grad_norm": 0.38776087760925293, "learning_rate": 0.0002, "epoch": 0.49586776859504134, "step": 150}, {"loss": 1.8172, "grad_norm": 0.4095233380794525, "learning_rate": 0.0002, "epoch": 0.5289256198347108, "step": 160}, {"loss": 1.9401, "grad_norm": 0.4492895007133484, "learning_rate": 0.0002, "epoch": 0.5619834710743802, "step": 170}, {"loss": 1.8707, "grad_norm": 0.5678786039352417, "learning_rate": 0.0002, "epoch": 0.5950413223140496, "step": 180}, {"loss": 1.8067, "grad_norm": 0.4926881492137909, "learning_rate": 0.0002, "epoch": 0.628099173553719, "step": 190}, {"loss": 1.8567, "grad_norm": 0.3865489363670349, "learning_rate": 0.0002, "epoch": 0.6611570247933884, "step": 200}, {"loss": 1.7555, "grad_norm": 0.40578970313072205, "learning_rate": 0.0002, "epoch": 0.6942148760330579, "step": 210}, {"loss": 1.8192, "grad_norm": 0.3729846775531769, "learning_rate": 0.0002, "epoch": 0.7272727272727273, "step": 220}, {"loss": 1.8787, "grad_norm": 0.36989861726760864, "learning_rate": 0.0002, "epoch": 0.7603305785123967, "step": 230}, {"loss": 1.8254, "grad_norm": 0.3764864206314087, "learning_rate": 0.0002, "epoch": 0.7933884297520661, "step": 240}, {"loss": 1.8008, "grad_norm": 1.2193230390548706, "learning_rate": 0.0002, "epoch": 0.8264462809917356, "step": 250}, {"loss": 1.8093, "grad_norm": 0.37381255626678467, "learning_rate": 0.0002, "epoch": 0.859504132231405, "step": 260}, {"loss": 1.7911, "grad_norm": 0.35480767488479614, "learning_rate": 0.0002, "epoch": 0.8925619834710744, "step": 270}, {"loss": 1.7824, "grad_norm": 0.4945891201496124, "learning_rate": 0.0002, "epoch": 0.9256198347107438, "step": 280}, {"loss": 1.7842, "grad_norm": 0.39967674016952515, "learning_rate": 0.0002, "epoch": 0.9586776859504132, "step": 290}, {"loss": 1.8321, "grad_norm": 0.4257008135318756, "learning_rate": 0.0002, "epoch": 0.9917355371900827, "step": 300}, {"eval_loss": 1.8413277864456177, "eval_runtime": 38.8241, "eval_samples_per_second": 13.265, "eval_steps_per_second": 1.674, "epoch": 0.9983471074380166, "step": 302}, {"loss": 1.7265, "grad_norm": 0.4019509255886078, "learning_rate": 0.0002, "epoch": 1.024793388429752, "step": 310}, {"loss": 1.7756, "grad_norm": 0.3439880311489105, "learning_rate": 0.0002, "epoch": 1.0578512396694215, "step": 320}, {"loss": 1.7719, "grad_norm": 0.4353587031364441, "learning_rate": 0.0002, "epoch": 1.0909090909090908, "step": 330}, {"loss": 1.7419, "grad_norm": 0.41257765889167786, "learning_rate": 0.0002, "epoch": 1.1239669421487604, "step": 340}, {"loss": 1.7774, "grad_norm": 0.4224575161933899, "learning_rate": 0.0002, "epoch": 1.1570247933884297, "step": 350}, {"loss": 1.7502, "grad_norm": 0.36395177245140076, "learning_rate": 0.0002, "epoch": 1.1900826446280992, "step": 360}, {"loss": 1.8064, "grad_norm": 0.4251839518547058, "learning_rate": 0.0002, "epoch": 1.2231404958677685, "step": 370}, {"loss": 1.7626, "grad_norm": 0.43602821230888367, "learning_rate": 0.0002, "epoch": 1.256198347107438, "step": 380}, {"loss": 1.8261, "grad_norm": 0.3940708637237549, "learning_rate": 0.0002, "epoch": 1.2892561983471074, "step": 390}, {"loss": 1.7317, "grad_norm": 0.3626866042613983, "learning_rate": 0.0002, "epoch": 1.322314049586777, "step": 400}, {"loss": 1.7493, "grad_norm": 0.40716150403022766, "learning_rate": 0.0002, "epoch": 1.3553719008264462, "step": 410}, {"loss": 1.7313, "grad_norm": 0.39323991537094116, "learning_rate": 0.0002, "epoch": 1.3884297520661157, "step": 420}, {"loss": 1.7863, "grad_norm": 0.44480809569358826, "learning_rate": 0.0002, "epoch": 1.421487603305785, "step": 430}, {"loss": 1.7477, "grad_norm": 0.4438270032405853, "learning_rate": 0.0002, "epoch": 1.4545454545454546, "step": 440}, {"loss": 1.774, "grad_norm": 0.3953928053379059, "learning_rate": 0.0002, "epoch": 1.487603305785124, "step": 450}, {"loss": 1.7162, "grad_norm": 0.4152870178222656, "learning_rate": 0.0002, "epoch": 1.5206611570247934, "step": 460}, {"loss": 1.8176, "grad_norm": 0.45231857895851135, "learning_rate": 0.0002, "epoch": 1.553719008264463, "step": 470}, {"loss": 1.7281, "grad_norm": 0.46560999751091003, "learning_rate": 0.0002, "epoch": 1.5867768595041323, "step": 480}, {"loss": 1.8047, "grad_norm": 0.3510372042655945, "learning_rate": 0.0002, "epoch": 1.6198347107438016, "step": 490}, {"loss": 1.7719, "grad_norm": 0.36788758635520935, "learning_rate": 0.0002, "epoch": 1.6528925619834711, "step": 500}, {"loss": 1.8287, "grad_norm": 0.3911917209625244, "learning_rate": 0.0002, "epoch": 1.6859504132231407, "step": 510}, {"loss": 1.7891, "grad_norm": 0.440964937210083, "learning_rate": 0.0002, "epoch": 1.71900826446281, "step": 520}, {"loss": 1.6858, "grad_norm": 0.36718201637268066, "learning_rate": 0.0002, "epoch": 1.7520661157024793, "step": 530}, {"loss": 1.7828, "grad_norm": 0.3927479088306427, "learning_rate": 0.0002, "epoch": 1.7851239669421488, "step": 540}, {"loss": 1.7406, "grad_norm": 0.4298672378063202, "learning_rate": 0.0002, "epoch": 1.8181818181818183, "step": 550}, {"loss": 1.7626, "grad_norm": 0.4257620871067047, "learning_rate": 0.0002, "epoch": 1.8512396694214877, "step": 560}, {"loss": 1.7677, "grad_norm": 0.3743717670440674, "learning_rate": 0.0002, "epoch": 1.884297520661157, "step": 570}, {"loss": 1.7263, "grad_norm": 0.4413471817970276, "learning_rate": 0.0002, "epoch": 1.9173553719008265, "step": 580}, {"loss": 1.7528, "grad_norm": 0.41639673709869385, "learning_rate": 0.0002, "epoch": 1.950413223140496, "step": 590}, {"loss": 1.7141, "grad_norm": 0.46319296956062317, "learning_rate": 0.0002, "epoch": 1.9834710743801653, "step": 600}]} +{"epoch": 2.9983471074380166, "step": 907, "epoch_duration": 324.92122411727905, "total_accumulated_duration": 974.4635713100433, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7672.4541015625}, "peak_memory_usage": {"GPU_0": 13792.75439453125}, "avg_memory_reserved": {"GPU_0": 17416.0}, "peak_memory_reserved": {"GPU_0": 17416.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.528, "grad_norm": 0.4384556710720062, "learning_rate": 0.0002, "epoch": 0.03305785123966942, "step": 10}, {"loss": 2.2675, "grad_norm": 0.48312580585479736, "learning_rate": 0.0002, "epoch": 0.06611570247933884, "step": 20}, {"loss": 2.0439, "grad_norm": 0.6193496584892273, "learning_rate": 0.0002, "epoch": 0.09917355371900827, "step": 30}, {"loss": 1.9649, "grad_norm": 0.471858948469162, "learning_rate": 0.0002, "epoch": 0.1322314049586777, "step": 40}, {"loss": 1.9945, "grad_norm": 0.43199431896209717, "learning_rate": 0.0002, "epoch": 0.1652892561983471, "step": 50}, {"loss": 1.9409, "grad_norm": 0.5022176504135132, "learning_rate": 0.0002, "epoch": 0.19834710743801653, "step": 60}, {"loss": 1.89, "grad_norm": 0.4934026300907135, "learning_rate": 0.0002, "epoch": 0.23140495867768596, "step": 70}, {"loss": 1.9036, "grad_norm": 0.4313369691371918, "learning_rate": 0.0002, "epoch": 0.2644628099173554, "step": 80}, {"loss": 1.8992, "grad_norm": 0.48663529753685, "learning_rate": 0.0002, "epoch": 0.2975206611570248, "step": 90}, {"loss": 1.8655, "grad_norm": 0.47740036249160767, "learning_rate": 0.0002, "epoch": 0.3305785123966942, "step": 100}, {"loss": 1.8797, "grad_norm": 0.41685664653778076, "learning_rate": 0.0002, "epoch": 0.36363636363636365, "step": 110}, {"loss": 1.8951, "grad_norm": 2.368595838546753, "learning_rate": 0.0002, "epoch": 0.39669421487603307, "step": 120}, {"loss": 1.8753, "grad_norm": 0.4861043095588684, "learning_rate": 0.0002, "epoch": 0.4297520661157025, "step": 130}, {"loss": 1.8413, "grad_norm": 0.41848257184028625, "learning_rate": 0.0002, "epoch": 0.4628099173553719, "step": 140}, {"loss": 1.9775, "grad_norm": 0.38776087760925293, "learning_rate": 0.0002, "epoch": 0.49586776859504134, "step": 150}, {"loss": 1.8172, "grad_norm": 0.4095233380794525, "learning_rate": 0.0002, "epoch": 0.5289256198347108, "step": 160}, {"loss": 1.9401, "grad_norm": 0.4492895007133484, "learning_rate": 0.0002, "epoch": 0.5619834710743802, "step": 170}, {"loss": 1.8707, "grad_norm": 0.5678786039352417, "learning_rate": 0.0002, "epoch": 0.5950413223140496, "step": 180}, {"loss": 1.8067, "grad_norm": 0.4926881492137909, "learning_rate": 0.0002, "epoch": 0.628099173553719, "step": 190}, {"loss": 1.8567, "grad_norm": 0.3865489363670349, "learning_rate": 0.0002, "epoch": 0.6611570247933884, "step": 200}, {"loss": 1.7555, "grad_norm": 0.40578970313072205, "learning_rate": 0.0002, "epoch": 0.6942148760330579, "step": 210}, {"loss": 1.8192, "grad_norm": 0.3729846775531769, "learning_rate": 0.0002, "epoch": 0.7272727272727273, "step": 220}, {"loss": 1.8787, "grad_norm": 0.36989861726760864, "learning_rate": 0.0002, "epoch": 0.7603305785123967, "step": 230}, {"loss": 1.8254, "grad_norm": 0.3764864206314087, "learning_rate": 0.0002, "epoch": 0.7933884297520661, "step": 240}, {"loss": 1.8008, "grad_norm": 1.2193230390548706, "learning_rate": 0.0002, "epoch": 0.8264462809917356, "step": 250}, {"loss": 1.8093, "grad_norm": 0.37381255626678467, "learning_rate": 0.0002, "epoch": 0.859504132231405, "step": 260}, {"loss": 1.7911, "grad_norm": 0.35480767488479614, "learning_rate": 0.0002, "epoch": 0.8925619834710744, "step": 270}, {"loss": 1.7824, "grad_norm": 0.4945891201496124, "learning_rate": 0.0002, "epoch": 0.9256198347107438, "step": 280}, {"loss": 1.7842, "grad_norm": 0.39967674016952515, "learning_rate": 0.0002, "epoch": 0.9586776859504132, "step": 290}, {"loss": 1.8321, "grad_norm": 0.4257008135318756, "learning_rate": 0.0002, "epoch": 0.9917355371900827, "step": 300}, {"eval_loss": 1.8413277864456177, "eval_runtime": 38.8241, "eval_samples_per_second": 13.265, "eval_steps_per_second": 1.674, "epoch": 0.9983471074380166, "step": 302}, {"loss": 1.7265, "grad_norm": 0.4019509255886078, "learning_rate": 0.0002, "epoch": 1.024793388429752, "step": 310}, {"loss": 1.7756, "grad_norm": 0.3439880311489105, "learning_rate": 0.0002, "epoch": 1.0578512396694215, "step": 320}, {"loss": 1.7719, "grad_norm": 0.4353587031364441, "learning_rate": 0.0002, "epoch": 1.0909090909090908, "step": 330}, {"loss": 1.7419, "grad_norm": 0.41257765889167786, "learning_rate": 0.0002, "epoch": 1.1239669421487604, "step": 340}, {"loss": 1.7774, "grad_norm": 0.4224575161933899, "learning_rate": 0.0002, "epoch": 1.1570247933884297, "step": 350}, {"loss": 1.7502, "grad_norm": 0.36395177245140076, "learning_rate": 0.0002, "epoch": 1.1900826446280992, "step": 360}, {"loss": 1.8064, "grad_norm": 0.4251839518547058, "learning_rate": 0.0002, "epoch": 1.2231404958677685, "step": 370}, {"loss": 1.7626, "grad_norm": 0.43602821230888367, "learning_rate": 0.0002, "epoch": 1.256198347107438, "step": 380}, {"loss": 1.8261, "grad_norm": 0.3940708637237549, "learning_rate": 0.0002, "epoch": 1.2892561983471074, "step": 390}, {"loss": 1.7317, "grad_norm": 0.3626866042613983, "learning_rate": 0.0002, "epoch": 1.322314049586777, "step": 400}, {"loss": 1.7493, "grad_norm": 0.40716150403022766, "learning_rate": 0.0002, "epoch": 1.3553719008264462, "step": 410}, {"loss": 1.7313, "grad_norm": 0.39323991537094116, "learning_rate": 0.0002, "epoch": 1.3884297520661157, "step": 420}, {"loss": 1.7863, "grad_norm": 0.44480809569358826, "learning_rate": 0.0002, "epoch": 1.421487603305785, "step": 430}, {"loss": 1.7477, "grad_norm": 0.4438270032405853, "learning_rate": 0.0002, "epoch": 1.4545454545454546, "step": 440}, {"loss": 1.774, "grad_norm": 0.3953928053379059, "learning_rate": 0.0002, "epoch": 1.487603305785124, "step": 450}, {"loss": 1.7162, "grad_norm": 0.4152870178222656, "learning_rate": 0.0002, "epoch": 1.5206611570247934, "step": 460}, {"loss": 1.8176, "grad_norm": 0.45231857895851135, "learning_rate": 0.0002, "epoch": 1.553719008264463, "step": 470}, {"loss": 1.7281, "grad_norm": 0.46560999751091003, "learning_rate": 0.0002, "epoch": 1.5867768595041323, "step": 480}, {"loss": 1.8047, "grad_norm": 0.3510372042655945, "learning_rate": 0.0002, "epoch": 1.6198347107438016, "step": 490}, {"loss": 1.7719, "grad_norm": 0.36788758635520935, "learning_rate": 0.0002, "epoch": 1.6528925619834711, "step": 500}, {"loss": 1.8287, "grad_norm": 0.3911917209625244, "learning_rate": 0.0002, "epoch": 1.6859504132231407, "step": 510}, {"loss": 1.7891, "grad_norm": 0.440964937210083, "learning_rate": 0.0002, "epoch": 1.71900826446281, "step": 520}, {"loss": 1.6858, "grad_norm": 0.36718201637268066, "learning_rate": 0.0002, "epoch": 1.7520661157024793, "step": 530}, {"loss": 1.7828, "grad_norm": 0.3927479088306427, "learning_rate": 0.0002, "epoch": 1.7851239669421488, "step": 540}, {"loss": 1.7406, "grad_norm": 0.4298672378063202, "learning_rate": 0.0002, "epoch": 1.8181818181818183, "step": 550}, {"loss": 1.7626, "grad_norm": 0.4257620871067047, "learning_rate": 0.0002, "epoch": 1.8512396694214877, "step": 560}, {"loss": 1.7677, "grad_norm": 0.3743717670440674, "learning_rate": 0.0002, "epoch": 1.884297520661157, "step": 570}, {"loss": 1.7263, "grad_norm": 0.4413471817970276, "learning_rate": 0.0002, "epoch": 1.9173553719008265, "step": 580}, {"loss": 1.7528, "grad_norm": 0.41639673709869385, "learning_rate": 0.0002, "epoch": 1.950413223140496, "step": 590}, {"loss": 1.7141, "grad_norm": 0.46319296956062317, "learning_rate": 0.0002, "epoch": 1.9834710743801653, "step": 600}, {"eval_loss": 1.833760380744934, "eval_runtime": 38.8469, "eval_samples_per_second": 13.257, "eval_steps_per_second": 1.673, "epoch": 2.0, "step": 605}, {"loss": 1.7399, "grad_norm": 0.38033604621887207, "learning_rate": 0.0002, "epoch": 2.0165289256198347, "step": 610}, {"loss": 1.6414, "grad_norm": 0.4522306025028229, "learning_rate": 0.0002, "epoch": 2.049586776859504, "step": 620}, {"loss": 1.5976, "grad_norm": 0.41294756531715393, "learning_rate": 0.0002, "epoch": 2.0826446280991737, "step": 630}, {"loss": 1.6664, "grad_norm": 0.5129091739654541, "learning_rate": 0.0002, "epoch": 2.115702479338843, "step": 640}, {"loss": 1.7207, "grad_norm": 0.4630700647830963, "learning_rate": 0.0002, "epoch": 2.1487603305785123, "step": 650}, {"loss": 1.5884, "grad_norm": 0.4368151128292084, "learning_rate": 0.0002, "epoch": 2.1818181818181817, "step": 660}, {"loss": 1.7271, "grad_norm": 0.5266494154930115, "learning_rate": 0.0002, "epoch": 2.2148760330578514, "step": 670}, {"loss": 1.5749, "grad_norm": 0.4744901955127716, "learning_rate": 0.0002, "epoch": 2.2479338842975207, "step": 680}, {"loss": 1.6512, "grad_norm": 0.5312414765357971, "learning_rate": 0.0002, "epoch": 2.28099173553719, "step": 690}, {"loss": 1.6957, "grad_norm": 0.49116063117980957, "learning_rate": 0.0002, "epoch": 2.3140495867768593, "step": 700}, {"loss": 1.646, "grad_norm": 0.4626988172531128, "learning_rate": 0.0002, "epoch": 2.347107438016529, "step": 710}, {"loss": 1.6474, "grad_norm": 0.4851135015487671, "learning_rate": 0.0002, "epoch": 2.3801652892561984, "step": 720}, {"loss": 1.67, "grad_norm": 0.4882378578186035, "learning_rate": 0.0002, "epoch": 2.4132231404958677, "step": 730}, {"loss": 1.6588, "grad_norm": 0.4470290243625641, "learning_rate": 0.0002, "epoch": 2.446280991735537, "step": 740}, {"loss": 1.6419, "grad_norm": 0.5901731848716736, "learning_rate": 0.0002, "epoch": 2.479338842975207, "step": 750}, {"loss": 1.6756, "grad_norm": 0.48137718439102173, "learning_rate": 0.0002, "epoch": 2.512396694214876, "step": 760}, {"loss": 1.6708, "grad_norm": 0.45636510848999023, "learning_rate": 0.0002, "epoch": 2.5454545454545454, "step": 770}, {"loss": 1.6693, "grad_norm": 0.48216402530670166, "learning_rate": 0.0002, "epoch": 2.5785123966942147, "step": 780}, {"loss": 1.664, "grad_norm": 0.47188714146614075, "learning_rate": 0.0002, "epoch": 2.6115702479338845, "step": 790}, {"loss": 1.619, "grad_norm": 0.44025519490242004, "learning_rate": 0.0002, "epoch": 2.644628099173554, "step": 800}, {"loss": 1.6532, "grad_norm": 0.4918605387210846, "learning_rate": 0.0002, "epoch": 2.677685950413223, "step": 810}, {"loss": 1.7513, "grad_norm": 0.5082308650016785, "learning_rate": 0.0002, "epoch": 2.7107438016528924, "step": 820}, {"loss": 1.7221, "grad_norm": 0.5610618591308594, "learning_rate": 0.0002, "epoch": 2.7438016528925617, "step": 830}, {"loss": 1.7115, "grad_norm": 0.540302574634552, "learning_rate": 0.0002, "epoch": 2.7768595041322315, "step": 840}, {"loss": 1.659, "grad_norm": 0.46016451716423035, "learning_rate": 0.0002, "epoch": 2.809917355371901, "step": 850}, {"loss": 1.672, "grad_norm": 0.45313313603401184, "learning_rate": 0.0002, "epoch": 2.84297520661157, "step": 860}, {"loss": 1.6676, "grad_norm": 0.49267083406448364, "learning_rate": 0.0002, "epoch": 2.87603305785124, "step": 870}, {"loss": 1.6577, "grad_norm": 0.4506530463695526, "learning_rate": 0.0002, "epoch": 2.909090909090909, "step": 880}, {"loss": 1.7059, "grad_norm": 0.4393260180950165, "learning_rate": 0.0002, "epoch": 2.9421487603305785, "step": 890}, {"loss": 1.7042, "grad_norm": 0.438073068857193, "learning_rate": 0.0002, "epoch": 2.975206611570248, "step": 900}]} +{"epoch": 4.0, "step": 1210, "epoch_duration": 325.15446400642395, "total_accumulated_duration": 1299.6180353164673, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7568.4541015625}, "peak_memory_usage": {"GPU_0": 13792.75439453125}, "avg_memory_reserved": {"GPU_0": 17416.0}, "peak_memory_reserved": {"GPU_0": 17416.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.528, "grad_norm": 0.4384556710720062, "learning_rate": 0.0002, "epoch": 0.03305785123966942, "step": 10}, {"loss": 2.2675, "grad_norm": 0.48312580585479736, "learning_rate": 0.0002, "epoch": 0.06611570247933884, "step": 20}, {"loss": 2.0439, "grad_norm": 0.6193496584892273, "learning_rate": 0.0002, "epoch": 0.09917355371900827, "step": 30}, {"loss": 1.9649, "grad_norm": 0.471858948469162, "learning_rate": 0.0002, "epoch": 0.1322314049586777, "step": 40}, {"loss": 1.9945, "grad_norm": 0.43199431896209717, "learning_rate": 0.0002, "epoch": 0.1652892561983471, "step": 50}, {"loss": 1.9409, "grad_norm": 0.5022176504135132, "learning_rate": 0.0002, "epoch": 0.19834710743801653, "step": 60}, {"loss": 1.89, "grad_norm": 0.4934026300907135, "learning_rate": 0.0002, "epoch": 0.23140495867768596, "step": 70}, {"loss": 1.9036, "grad_norm": 0.4313369691371918, "learning_rate": 0.0002, "epoch": 0.2644628099173554, "step": 80}, {"loss": 1.8992, "grad_norm": 0.48663529753685, "learning_rate": 0.0002, "epoch": 0.2975206611570248, "step": 90}, {"loss": 1.8655, "grad_norm": 0.47740036249160767, "learning_rate": 0.0002, "epoch": 0.3305785123966942, "step": 100}, {"loss": 1.8797, "grad_norm": 0.41685664653778076, "learning_rate": 0.0002, "epoch": 0.36363636363636365, "step": 110}, {"loss": 1.8951, "grad_norm": 2.368595838546753, "learning_rate": 0.0002, "epoch": 0.39669421487603307, "step": 120}, {"loss": 1.8753, "grad_norm": 0.4861043095588684, "learning_rate": 0.0002, "epoch": 0.4297520661157025, "step": 130}, {"loss": 1.8413, "grad_norm": 0.41848257184028625, "learning_rate": 0.0002, "epoch": 0.4628099173553719, "step": 140}, {"loss": 1.9775, "grad_norm": 0.38776087760925293, "learning_rate": 0.0002, "epoch": 0.49586776859504134, "step": 150}, {"loss": 1.8172, "grad_norm": 0.4095233380794525, "learning_rate": 0.0002, "epoch": 0.5289256198347108, "step": 160}, {"loss": 1.9401, "grad_norm": 0.4492895007133484, "learning_rate": 0.0002, "epoch": 0.5619834710743802, "step": 170}, {"loss": 1.8707, "grad_norm": 0.5678786039352417, "learning_rate": 0.0002, "epoch": 0.5950413223140496, "step": 180}, {"loss": 1.8067, "grad_norm": 0.4926881492137909, "learning_rate": 0.0002, "epoch": 0.628099173553719, "step": 190}, {"loss": 1.8567, "grad_norm": 0.3865489363670349, "learning_rate": 0.0002, "epoch": 0.6611570247933884, "step": 200}, {"loss": 1.7555, "grad_norm": 0.40578970313072205, "learning_rate": 0.0002, "epoch": 0.6942148760330579, "step": 210}, {"loss": 1.8192, "grad_norm": 0.3729846775531769, "learning_rate": 0.0002, "epoch": 0.7272727272727273, "step": 220}, {"loss": 1.8787, "grad_norm": 0.36989861726760864, "learning_rate": 0.0002, "epoch": 0.7603305785123967, "step": 230}, {"loss": 1.8254, "grad_norm": 0.3764864206314087, "learning_rate": 0.0002, "epoch": 0.7933884297520661, "step": 240}, {"loss": 1.8008, "grad_norm": 1.2193230390548706, "learning_rate": 0.0002, "epoch": 0.8264462809917356, "step": 250}, {"loss": 1.8093, "grad_norm": 0.37381255626678467, "learning_rate": 0.0002, "epoch": 0.859504132231405, "step": 260}, {"loss": 1.7911, "grad_norm": 0.35480767488479614, "learning_rate": 0.0002, "epoch": 0.8925619834710744, "step": 270}, {"loss": 1.7824, "grad_norm": 0.4945891201496124, "learning_rate": 0.0002, "epoch": 0.9256198347107438, "step": 280}, {"loss": 1.7842, "grad_norm": 0.39967674016952515, "learning_rate": 0.0002, "epoch": 0.9586776859504132, "step": 290}, {"loss": 1.8321, "grad_norm": 0.4257008135318756, "learning_rate": 0.0002, "epoch": 0.9917355371900827, "step": 300}, {"eval_loss": 1.8413277864456177, "eval_runtime": 38.8241, "eval_samples_per_second": 13.265, "eval_steps_per_second": 1.674, "epoch": 0.9983471074380166, "step": 302}, {"loss": 1.7265, "grad_norm": 0.4019509255886078, "learning_rate": 0.0002, "epoch": 1.024793388429752, "step": 310}, {"loss": 1.7756, "grad_norm": 0.3439880311489105, "learning_rate": 0.0002, "epoch": 1.0578512396694215, "step": 320}, {"loss": 1.7719, "grad_norm": 0.4353587031364441, "learning_rate": 0.0002, "epoch": 1.0909090909090908, "step": 330}, {"loss": 1.7419, "grad_norm": 0.41257765889167786, "learning_rate": 0.0002, "epoch": 1.1239669421487604, "step": 340}, {"loss": 1.7774, "grad_norm": 0.4224575161933899, "learning_rate": 0.0002, "epoch": 1.1570247933884297, "step": 350}, {"loss": 1.7502, "grad_norm": 0.36395177245140076, "learning_rate": 0.0002, "epoch": 1.1900826446280992, "step": 360}, {"loss": 1.8064, "grad_norm": 0.4251839518547058, "learning_rate": 0.0002, "epoch": 1.2231404958677685, "step": 370}, {"loss": 1.7626, "grad_norm": 0.43602821230888367, "learning_rate": 0.0002, "epoch": 1.256198347107438, "step": 380}, {"loss": 1.8261, "grad_norm": 0.3940708637237549, "learning_rate": 0.0002, "epoch": 1.2892561983471074, "step": 390}, {"loss": 1.7317, "grad_norm": 0.3626866042613983, "learning_rate": 0.0002, "epoch": 1.322314049586777, "step": 400}, {"loss": 1.7493, "grad_norm": 0.40716150403022766, "learning_rate": 0.0002, "epoch": 1.3553719008264462, "step": 410}, {"loss": 1.7313, "grad_norm": 0.39323991537094116, "learning_rate": 0.0002, "epoch": 1.3884297520661157, "step": 420}, {"loss": 1.7863, "grad_norm": 0.44480809569358826, "learning_rate": 0.0002, "epoch": 1.421487603305785, "step": 430}, {"loss": 1.7477, "grad_norm": 0.4438270032405853, "learning_rate": 0.0002, "epoch": 1.4545454545454546, "step": 440}, {"loss": 1.774, "grad_norm": 0.3953928053379059, "learning_rate": 0.0002, "epoch": 1.487603305785124, "step": 450}, {"loss": 1.7162, "grad_norm": 0.4152870178222656, "learning_rate": 0.0002, "epoch": 1.5206611570247934, "step": 460}, {"loss": 1.8176, "grad_norm": 0.45231857895851135, "learning_rate": 0.0002, "epoch": 1.553719008264463, "step": 470}, {"loss": 1.7281, "grad_norm": 0.46560999751091003, "learning_rate": 0.0002, "epoch": 1.5867768595041323, "step": 480}, {"loss": 1.8047, "grad_norm": 0.3510372042655945, "learning_rate": 0.0002, "epoch": 1.6198347107438016, "step": 490}, {"loss": 1.7719, "grad_norm": 0.36788758635520935, "learning_rate": 0.0002, "epoch": 1.6528925619834711, "step": 500}, {"loss": 1.8287, "grad_norm": 0.3911917209625244, "learning_rate": 0.0002, "epoch": 1.6859504132231407, "step": 510}, {"loss": 1.7891, "grad_norm": 0.440964937210083, "learning_rate": 0.0002, "epoch": 1.71900826446281, "step": 520}, {"loss": 1.6858, "grad_norm": 0.36718201637268066, "learning_rate": 0.0002, "epoch": 1.7520661157024793, "step": 530}, {"loss": 1.7828, "grad_norm": 0.3927479088306427, "learning_rate": 0.0002, "epoch": 1.7851239669421488, "step": 540}, {"loss": 1.7406, "grad_norm": 0.4298672378063202, "learning_rate": 0.0002, "epoch": 1.8181818181818183, "step": 550}, {"loss": 1.7626, "grad_norm": 0.4257620871067047, "learning_rate": 0.0002, "epoch": 1.8512396694214877, "step": 560}, {"loss": 1.7677, "grad_norm": 0.3743717670440674, "learning_rate": 0.0002, "epoch": 1.884297520661157, "step": 570}, {"loss": 1.7263, "grad_norm": 0.4413471817970276, "learning_rate": 0.0002, "epoch": 1.9173553719008265, "step": 580}, {"loss": 1.7528, "grad_norm": 0.41639673709869385, "learning_rate": 0.0002, "epoch": 1.950413223140496, "step": 590}, {"loss": 1.7141, "grad_norm": 0.46319296956062317, "learning_rate": 0.0002, "epoch": 1.9834710743801653, "step": 600}, {"eval_loss": 1.833760380744934, "eval_runtime": 38.8469, "eval_samples_per_second": 13.257, "eval_steps_per_second": 1.673, "epoch": 2.0, "step": 605}, {"loss": 1.7399, "grad_norm": 0.38033604621887207, "learning_rate": 0.0002, "epoch": 2.0165289256198347, "step": 610}, {"loss": 1.6414, "grad_norm": 0.4522306025028229, "learning_rate": 0.0002, "epoch": 2.049586776859504, "step": 620}, {"loss": 1.5976, "grad_norm": 0.41294756531715393, "learning_rate": 0.0002, "epoch": 2.0826446280991737, "step": 630}, {"loss": 1.6664, "grad_norm": 0.5129091739654541, "learning_rate": 0.0002, "epoch": 2.115702479338843, "step": 640}, {"loss": 1.7207, "grad_norm": 0.4630700647830963, "learning_rate": 0.0002, "epoch": 2.1487603305785123, "step": 650}, {"loss": 1.5884, "grad_norm": 0.4368151128292084, "learning_rate": 0.0002, "epoch": 2.1818181818181817, "step": 660}, {"loss": 1.7271, "grad_norm": 0.5266494154930115, "learning_rate": 0.0002, "epoch": 2.2148760330578514, "step": 670}, {"loss": 1.5749, "grad_norm": 0.4744901955127716, "learning_rate": 0.0002, "epoch": 2.2479338842975207, "step": 680}, {"loss": 1.6512, "grad_norm": 0.5312414765357971, "learning_rate": 0.0002, "epoch": 2.28099173553719, "step": 690}, {"loss": 1.6957, "grad_norm": 0.49116063117980957, "learning_rate": 0.0002, "epoch": 2.3140495867768593, "step": 700}, {"loss": 1.646, "grad_norm": 0.4626988172531128, "learning_rate": 0.0002, "epoch": 2.347107438016529, "step": 710}, {"loss": 1.6474, "grad_norm": 0.4851135015487671, "learning_rate": 0.0002, "epoch": 2.3801652892561984, "step": 720}, {"loss": 1.67, "grad_norm": 0.4882378578186035, "learning_rate": 0.0002, "epoch": 2.4132231404958677, "step": 730}, {"loss": 1.6588, "grad_norm": 0.4470290243625641, "learning_rate": 0.0002, "epoch": 2.446280991735537, "step": 740}, {"loss": 1.6419, "grad_norm": 0.5901731848716736, "learning_rate": 0.0002, "epoch": 2.479338842975207, "step": 750}, {"loss": 1.6756, "grad_norm": 0.48137718439102173, "learning_rate": 0.0002, "epoch": 2.512396694214876, "step": 760}, {"loss": 1.6708, "grad_norm": 0.45636510848999023, "learning_rate": 0.0002, "epoch": 2.5454545454545454, "step": 770}, {"loss": 1.6693, "grad_norm": 0.48216402530670166, "learning_rate": 0.0002, "epoch": 2.5785123966942147, "step": 780}, {"loss": 1.664, "grad_norm": 0.47188714146614075, "learning_rate": 0.0002, "epoch": 2.6115702479338845, "step": 790}, {"loss": 1.619, "grad_norm": 0.44025519490242004, "learning_rate": 0.0002, "epoch": 2.644628099173554, "step": 800}, {"loss": 1.6532, "grad_norm": 0.4918605387210846, "learning_rate": 0.0002, "epoch": 2.677685950413223, "step": 810}, {"loss": 1.7513, "grad_norm": 0.5082308650016785, "learning_rate": 0.0002, "epoch": 2.7107438016528924, "step": 820}, {"loss": 1.7221, "grad_norm": 0.5610618591308594, "learning_rate": 0.0002, "epoch": 2.7438016528925617, "step": 830}, {"loss": 1.7115, "grad_norm": 0.540302574634552, "learning_rate": 0.0002, "epoch": 2.7768595041322315, "step": 840}, {"loss": 1.659, "grad_norm": 0.46016451716423035, "learning_rate": 0.0002, "epoch": 2.809917355371901, "step": 850}, {"loss": 1.672, "grad_norm": 0.45313313603401184, "learning_rate": 0.0002, "epoch": 2.84297520661157, "step": 860}, {"loss": 1.6676, "grad_norm": 0.49267083406448364, "learning_rate": 0.0002, "epoch": 2.87603305785124, "step": 870}, {"loss": 1.6577, "grad_norm": 0.4506530463695526, "learning_rate": 0.0002, "epoch": 2.909090909090909, "step": 880}, {"loss": 1.7059, "grad_norm": 0.4393260180950165, "learning_rate": 0.0002, "epoch": 2.9421487603305785, "step": 890}, {"loss": 1.7042, "grad_norm": 0.438073068857193, "learning_rate": 0.0002, "epoch": 2.975206611570248, "step": 900}, {"eval_loss": 1.853971004486084, "eval_runtime": 38.8404, "eval_samples_per_second": 13.259, "eval_steps_per_second": 1.674, "epoch": 2.9983471074380166, "step": 907}, {"loss": 1.6173, "grad_norm": 0.4399570822715759, "learning_rate": 0.0002, "epoch": 3.0082644628099175, "step": 910}, {"loss": 1.5578, "grad_norm": 0.5338484644889832, "learning_rate": 0.0002, "epoch": 3.041322314049587, "step": 920}, {"loss": 1.5507, "grad_norm": 0.6154358983039856, "learning_rate": 0.0002, "epoch": 3.074380165289256, "step": 930}, {"loss": 1.6189, "grad_norm": 0.6429790258407593, "learning_rate": 0.0002, "epoch": 3.1074380165289255, "step": 940}, {"loss": 1.5866, "grad_norm": 0.5375680923461914, "learning_rate": 0.0002, "epoch": 3.1404958677685952, "step": 950}, {"loss": 1.5119, "grad_norm": 0.5594999194145203, "learning_rate": 0.0002, "epoch": 3.1735537190082646, "step": 960}, {"loss": 1.5096, "grad_norm": 0.6742738485336304, "learning_rate": 0.0002, "epoch": 3.206611570247934, "step": 970}, {"loss": 1.5477, "grad_norm": 0.563497006893158, "learning_rate": 0.0002, "epoch": 3.239669421487603, "step": 980}, {"loss": 1.5559, "grad_norm": 0.6521140933036804, "learning_rate": 0.0002, "epoch": 3.2727272727272725, "step": 990}, {"loss": 1.4905, "grad_norm": 0.6016622185707092, "learning_rate": 0.0002, "epoch": 3.3057851239669422, "step": 1000}, {"loss": 1.5307, "grad_norm": 0.6564913988113403, "learning_rate": 0.0002, "epoch": 3.3388429752066116, "step": 1010}, {"loss": 1.4595, "grad_norm": 0.6528742909431458, "learning_rate": 0.0002, "epoch": 3.371900826446281, "step": 1020}, {"loss": 1.518, "grad_norm": 0.5843546390533447, "learning_rate": 0.0002, "epoch": 3.4049586776859506, "step": 1030}, {"loss": 1.5148, "grad_norm": 0.5892922282218933, "learning_rate": 0.0002, "epoch": 3.43801652892562, "step": 1040}, {"loss": 1.5125, "grad_norm": 0.6217362284660339, "learning_rate": 0.0002, "epoch": 3.4710743801652892, "step": 1050}, {"loss": 1.526, "grad_norm": 0.5837283134460449, "learning_rate": 0.0002, "epoch": 3.5041322314049586, "step": 1060}, {"loss": 1.5776, "grad_norm": 0.6369057893753052, "learning_rate": 0.0002, "epoch": 3.537190082644628, "step": 1070}, {"loss": 1.4758, "grad_norm": 0.632115364074707, "learning_rate": 0.0002, "epoch": 3.5702479338842976, "step": 1080}, {"loss": 1.5604, "grad_norm": 0.6364002823829651, "learning_rate": 0.0002, "epoch": 3.603305785123967, "step": 1090}, {"loss": 1.508, "grad_norm": 0.550032377243042, "learning_rate": 0.0002, "epoch": 3.6363636363636362, "step": 1100}, {"loss": 1.5548, "grad_norm": 0.6106863617897034, "learning_rate": 0.0002, "epoch": 3.669421487603306, "step": 1110}, {"loss": 1.5237, "grad_norm": 0.635955274105072, "learning_rate": 0.0002, "epoch": 3.7024793388429753, "step": 1120}, {"loss": 1.5698, "grad_norm": 0.615804135799408, "learning_rate": 0.0002, "epoch": 3.7355371900826446, "step": 1130}, {"loss": 1.6068, "grad_norm": 0.5769386887550354, "learning_rate": 0.0002, "epoch": 3.768595041322314, "step": 1140}, {"loss": 1.5262, "grad_norm": 0.5938104391098022, "learning_rate": 0.0002, "epoch": 3.8016528925619832, "step": 1150}, {"loss": 1.5236, "grad_norm": 0.6149733066558838, "learning_rate": 0.0002, "epoch": 3.834710743801653, "step": 1160}, {"loss": 1.5585, "grad_norm": 0.6228950023651123, "learning_rate": 0.0002, "epoch": 3.8677685950413223, "step": 1170}, {"loss": 1.5815, "grad_norm": 0.6196513175964355, "learning_rate": 0.0002, "epoch": 3.9008264462809916, "step": 1180}, {"loss": 1.5894, "grad_norm": 0.5946677327156067, "learning_rate": 0.0002, "epoch": 3.9338842975206614, "step": 1190}, {"loss": 1.5451, "grad_norm": 0.5882220268249512, "learning_rate": 0.0002, "epoch": 3.9669421487603307, "step": 1200}, {"loss": 1.6202, "grad_norm": 0.6291728019714355, "learning_rate": 0.0002, "epoch": 4.0, "step": 1210}]} +{"epoch": 4.998347107438017, "step": 1512, "epoch_duration": 325.137104511261, "total_accumulated_duration": 1624.7551398277283, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7672.4541015625}, "peak_memory_usage": {"GPU_0": 13792.75439453125}, "avg_memory_reserved": {"GPU_0": 17416.0}, "peak_memory_reserved": {"GPU_0": 17416.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.528, "grad_norm": 0.4384556710720062, "learning_rate": 0.0002, "epoch": 0.03305785123966942, "step": 10}, {"loss": 2.2675, "grad_norm": 0.48312580585479736, "learning_rate": 0.0002, "epoch": 0.06611570247933884, "step": 20}, {"loss": 2.0439, "grad_norm": 0.6193496584892273, "learning_rate": 0.0002, "epoch": 0.09917355371900827, "step": 30}, {"loss": 1.9649, "grad_norm": 0.471858948469162, "learning_rate": 0.0002, "epoch": 0.1322314049586777, "step": 40}, {"loss": 1.9945, "grad_norm": 0.43199431896209717, "learning_rate": 0.0002, "epoch": 0.1652892561983471, "step": 50}, {"loss": 1.9409, "grad_norm": 0.5022176504135132, "learning_rate": 0.0002, "epoch": 0.19834710743801653, "step": 60}, {"loss": 1.89, "grad_norm": 0.4934026300907135, "learning_rate": 0.0002, "epoch": 0.23140495867768596, "step": 70}, {"loss": 1.9036, "grad_norm": 0.4313369691371918, "learning_rate": 0.0002, "epoch": 0.2644628099173554, "step": 80}, {"loss": 1.8992, "grad_norm": 0.48663529753685, "learning_rate": 0.0002, "epoch": 0.2975206611570248, "step": 90}, {"loss": 1.8655, "grad_norm": 0.47740036249160767, "learning_rate": 0.0002, "epoch": 0.3305785123966942, "step": 100}, {"loss": 1.8797, "grad_norm": 0.41685664653778076, "learning_rate": 0.0002, "epoch": 0.36363636363636365, "step": 110}, {"loss": 1.8951, "grad_norm": 2.368595838546753, "learning_rate": 0.0002, "epoch": 0.39669421487603307, "step": 120}, {"loss": 1.8753, "grad_norm": 0.4861043095588684, "learning_rate": 0.0002, "epoch": 0.4297520661157025, "step": 130}, {"loss": 1.8413, "grad_norm": 0.41848257184028625, "learning_rate": 0.0002, "epoch": 0.4628099173553719, "step": 140}, {"loss": 1.9775, "grad_norm": 0.38776087760925293, "learning_rate": 0.0002, "epoch": 0.49586776859504134, "step": 150}, {"loss": 1.8172, "grad_norm": 0.4095233380794525, "learning_rate": 0.0002, "epoch": 0.5289256198347108, "step": 160}, {"loss": 1.9401, "grad_norm": 0.4492895007133484, "learning_rate": 0.0002, "epoch": 0.5619834710743802, "step": 170}, {"loss": 1.8707, "grad_norm": 0.5678786039352417, "learning_rate": 0.0002, "epoch": 0.5950413223140496, "step": 180}, {"loss": 1.8067, "grad_norm": 0.4926881492137909, "learning_rate": 0.0002, "epoch": 0.628099173553719, "step": 190}, {"loss": 1.8567, "grad_norm": 0.3865489363670349, "learning_rate": 0.0002, "epoch": 0.6611570247933884, "step": 200}, {"loss": 1.7555, "grad_norm": 0.40578970313072205, "learning_rate": 0.0002, "epoch": 0.6942148760330579, "step": 210}, {"loss": 1.8192, "grad_norm": 0.3729846775531769, "learning_rate": 0.0002, "epoch": 0.7272727272727273, "step": 220}, {"loss": 1.8787, "grad_norm": 0.36989861726760864, "learning_rate": 0.0002, "epoch": 0.7603305785123967, "step": 230}, {"loss": 1.8254, "grad_norm": 0.3764864206314087, "learning_rate": 0.0002, "epoch": 0.7933884297520661, "step": 240}, {"loss": 1.8008, "grad_norm": 1.2193230390548706, "learning_rate": 0.0002, "epoch": 0.8264462809917356, "step": 250}, {"loss": 1.8093, "grad_norm": 0.37381255626678467, "learning_rate": 0.0002, "epoch": 0.859504132231405, "step": 260}, {"loss": 1.7911, "grad_norm": 0.35480767488479614, "learning_rate": 0.0002, "epoch": 0.8925619834710744, "step": 270}, {"loss": 1.7824, "grad_norm": 0.4945891201496124, "learning_rate": 0.0002, "epoch": 0.9256198347107438, "step": 280}, {"loss": 1.7842, "grad_norm": 0.39967674016952515, "learning_rate": 0.0002, "epoch": 0.9586776859504132, "step": 290}, {"loss": 1.8321, "grad_norm": 0.4257008135318756, "learning_rate": 0.0002, "epoch": 0.9917355371900827, "step": 300}, {"eval_loss": 1.8413277864456177, "eval_runtime": 38.8241, "eval_samples_per_second": 13.265, "eval_steps_per_second": 1.674, "epoch": 0.9983471074380166, "step": 302}, {"loss": 1.7265, "grad_norm": 0.4019509255886078, "learning_rate": 0.0002, "epoch": 1.024793388429752, "step": 310}, {"loss": 1.7756, "grad_norm": 0.3439880311489105, "learning_rate": 0.0002, "epoch": 1.0578512396694215, "step": 320}, {"loss": 1.7719, "grad_norm": 0.4353587031364441, "learning_rate": 0.0002, "epoch": 1.0909090909090908, "step": 330}, {"loss": 1.7419, "grad_norm": 0.41257765889167786, "learning_rate": 0.0002, "epoch": 1.1239669421487604, "step": 340}, {"loss": 1.7774, "grad_norm": 0.4224575161933899, "learning_rate": 0.0002, "epoch": 1.1570247933884297, "step": 350}, {"loss": 1.7502, "grad_norm": 0.36395177245140076, "learning_rate": 0.0002, "epoch": 1.1900826446280992, "step": 360}, {"loss": 1.8064, "grad_norm": 0.4251839518547058, "learning_rate": 0.0002, "epoch": 1.2231404958677685, "step": 370}, {"loss": 1.7626, "grad_norm": 0.43602821230888367, "learning_rate": 0.0002, "epoch": 1.256198347107438, "step": 380}, {"loss": 1.8261, "grad_norm": 0.3940708637237549, "learning_rate": 0.0002, "epoch": 1.2892561983471074, "step": 390}, {"loss": 1.7317, "grad_norm": 0.3626866042613983, "learning_rate": 0.0002, "epoch": 1.322314049586777, "step": 400}, {"loss": 1.7493, "grad_norm": 0.40716150403022766, "learning_rate": 0.0002, "epoch": 1.3553719008264462, "step": 410}, {"loss": 1.7313, "grad_norm": 0.39323991537094116, "learning_rate": 0.0002, "epoch": 1.3884297520661157, "step": 420}, {"loss": 1.7863, "grad_norm": 0.44480809569358826, "learning_rate": 0.0002, "epoch": 1.421487603305785, "step": 430}, {"loss": 1.7477, "grad_norm": 0.4438270032405853, "learning_rate": 0.0002, "epoch": 1.4545454545454546, "step": 440}, {"loss": 1.774, "grad_norm": 0.3953928053379059, "learning_rate": 0.0002, "epoch": 1.487603305785124, "step": 450}, {"loss": 1.7162, "grad_norm": 0.4152870178222656, "learning_rate": 0.0002, "epoch": 1.5206611570247934, "step": 460}, {"loss": 1.8176, "grad_norm": 0.45231857895851135, "learning_rate": 0.0002, "epoch": 1.553719008264463, "step": 470}, {"loss": 1.7281, "grad_norm": 0.46560999751091003, "learning_rate": 0.0002, "epoch": 1.5867768595041323, "step": 480}, {"loss": 1.8047, "grad_norm": 0.3510372042655945, "learning_rate": 0.0002, "epoch": 1.6198347107438016, "step": 490}, {"loss": 1.7719, "grad_norm": 0.36788758635520935, "learning_rate": 0.0002, "epoch": 1.6528925619834711, "step": 500}, {"loss": 1.8287, "grad_norm": 0.3911917209625244, "learning_rate": 0.0002, "epoch": 1.6859504132231407, "step": 510}, {"loss": 1.7891, "grad_norm": 0.440964937210083, "learning_rate": 0.0002, "epoch": 1.71900826446281, "step": 520}, {"loss": 1.6858, "grad_norm": 0.36718201637268066, "learning_rate": 0.0002, "epoch": 1.7520661157024793, "step": 530}, {"loss": 1.7828, "grad_norm": 0.3927479088306427, "learning_rate": 0.0002, "epoch": 1.7851239669421488, "step": 540}, {"loss": 1.7406, "grad_norm": 0.4298672378063202, "learning_rate": 0.0002, "epoch": 1.8181818181818183, "step": 550}, {"loss": 1.7626, "grad_norm": 0.4257620871067047, "learning_rate": 0.0002, "epoch": 1.8512396694214877, "step": 560}, {"loss": 1.7677, "grad_norm": 0.3743717670440674, "learning_rate": 0.0002, "epoch": 1.884297520661157, "step": 570}, {"loss": 1.7263, "grad_norm": 0.4413471817970276, "learning_rate": 0.0002, "epoch": 1.9173553719008265, "step": 580}, {"loss": 1.7528, "grad_norm": 0.41639673709869385, "learning_rate": 0.0002, "epoch": 1.950413223140496, "step": 590}, {"loss": 1.7141, "grad_norm": 0.46319296956062317, "learning_rate": 0.0002, "epoch": 1.9834710743801653, "step": 600}, {"eval_loss": 1.833760380744934, "eval_runtime": 38.8469, "eval_samples_per_second": 13.257, "eval_steps_per_second": 1.673, "epoch": 2.0, "step": 605}, {"loss": 1.7399, "grad_norm": 0.38033604621887207, "learning_rate": 0.0002, "epoch": 2.0165289256198347, "step": 610}, {"loss": 1.6414, "grad_norm": 0.4522306025028229, "learning_rate": 0.0002, "epoch": 2.049586776859504, "step": 620}, {"loss": 1.5976, "grad_norm": 0.41294756531715393, "learning_rate": 0.0002, "epoch": 2.0826446280991737, "step": 630}, {"loss": 1.6664, "grad_norm": 0.5129091739654541, "learning_rate": 0.0002, "epoch": 2.115702479338843, "step": 640}, {"loss": 1.7207, "grad_norm": 0.4630700647830963, "learning_rate": 0.0002, "epoch": 2.1487603305785123, "step": 650}, {"loss": 1.5884, "grad_norm": 0.4368151128292084, "learning_rate": 0.0002, "epoch": 2.1818181818181817, "step": 660}, {"loss": 1.7271, "grad_norm": 0.5266494154930115, "learning_rate": 0.0002, "epoch": 2.2148760330578514, "step": 670}, {"loss": 1.5749, "grad_norm": 0.4744901955127716, "learning_rate": 0.0002, "epoch": 2.2479338842975207, "step": 680}, {"loss": 1.6512, "grad_norm": 0.5312414765357971, "learning_rate": 0.0002, "epoch": 2.28099173553719, "step": 690}, {"loss": 1.6957, "grad_norm": 0.49116063117980957, "learning_rate": 0.0002, "epoch": 2.3140495867768593, "step": 700}, {"loss": 1.646, "grad_norm": 0.4626988172531128, "learning_rate": 0.0002, "epoch": 2.347107438016529, "step": 710}, {"loss": 1.6474, "grad_norm": 0.4851135015487671, "learning_rate": 0.0002, "epoch": 2.3801652892561984, "step": 720}, {"loss": 1.67, "grad_norm": 0.4882378578186035, "learning_rate": 0.0002, "epoch": 2.4132231404958677, "step": 730}, {"loss": 1.6588, "grad_norm": 0.4470290243625641, "learning_rate": 0.0002, "epoch": 2.446280991735537, "step": 740}, {"loss": 1.6419, "grad_norm": 0.5901731848716736, "learning_rate": 0.0002, "epoch": 2.479338842975207, "step": 750}, {"loss": 1.6756, "grad_norm": 0.48137718439102173, "learning_rate": 0.0002, "epoch": 2.512396694214876, "step": 760}, {"loss": 1.6708, "grad_norm": 0.45636510848999023, "learning_rate": 0.0002, "epoch": 2.5454545454545454, "step": 770}, {"loss": 1.6693, "grad_norm": 0.48216402530670166, "learning_rate": 0.0002, "epoch": 2.5785123966942147, "step": 780}, {"loss": 1.664, "grad_norm": 0.47188714146614075, "learning_rate": 0.0002, "epoch": 2.6115702479338845, "step": 790}, {"loss": 1.619, "grad_norm": 0.44025519490242004, "learning_rate": 0.0002, "epoch": 2.644628099173554, "step": 800}, {"loss": 1.6532, "grad_norm": 0.4918605387210846, "learning_rate": 0.0002, "epoch": 2.677685950413223, "step": 810}, {"loss": 1.7513, "grad_norm": 0.5082308650016785, "learning_rate": 0.0002, "epoch": 2.7107438016528924, "step": 820}, {"loss": 1.7221, "grad_norm": 0.5610618591308594, "learning_rate": 0.0002, "epoch": 2.7438016528925617, "step": 830}, {"loss": 1.7115, "grad_norm": 0.540302574634552, "learning_rate": 0.0002, "epoch": 2.7768595041322315, "step": 840}, {"loss": 1.659, "grad_norm": 0.46016451716423035, "learning_rate": 0.0002, "epoch": 2.809917355371901, "step": 850}, {"loss": 1.672, "grad_norm": 0.45313313603401184, "learning_rate": 0.0002, "epoch": 2.84297520661157, "step": 860}, {"loss": 1.6676, "grad_norm": 0.49267083406448364, "learning_rate": 0.0002, "epoch": 2.87603305785124, "step": 870}, {"loss": 1.6577, "grad_norm": 0.4506530463695526, "learning_rate": 0.0002, "epoch": 2.909090909090909, "step": 880}, {"loss": 1.7059, "grad_norm": 0.4393260180950165, "learning_rate": 0.0002, "epoch": 2.9421487603305785, "step": 890}, {"loss": 1.7042, "grad_norm": 0.438073068857193, "learning_rate": 0.0002, "epoch": 2.975206611570248, "step": 900}, {"eval_loss": 1.853971004486084, "eval_runtime": 38.8404, "eval_samples_per_second": 13.259, "eval_steps_per_second": 1.674, "epoch": 2.9983471074380166, "step": 907}, {"loss": 1.6173, "grad_norm": 0.4399570822715759, "learning_rate": 0.0002, "epoch": 3.0082644628099175, "step": 910}, {"loss": 1.5578, "grad_norm": 0.5338484644889832, "learning_rate": 0.0002, "epoch": 3.041322314049587, "step": 920}, {"loss": 1.5507, "grad_norm": 0.6154358983039856, "learning_rate": 0.0002, "epoch": 3.074380165289256, "step": 930}, {"loss": 1.6189, "grad_norm": 0.6429790258407593, "learning_rate": 0.0002, "epoch": 3.1074380165289255, "step": 940}, {"loss": 1.5866, "grad_norm": 0.5375680923461914, "learning_rate": 0.0002, "epoch": 3.1404958677685952, "step": 950}, {"loss": 1.5119, "grad_norm": 0.5594999194145203, "learning_rate": 0.0002, "epoch": 3.1735537190082646, "step": 960}, {"loss": 1.5096, "grad_norm": 0.6742738485336304, "learning_rate": 0.0002, "epoch": 3.206611570247934, "step": 970}, {"loss": 1.5477, "grad_norm": 0.563497006893158, "learning_rate": 0.0002, "epoch": 3.239669421487603, "step": 980}, {"loss": 1.5559, "grad_norm": 0.6521140933036804, "learning_rate": 0.0002, "epoch": 3.2727272727272725, "step": 990}, {"loss": 1.4905, "grad_norm": 0.6016622185707092, "learning_rate": 0.0002, "epoch": 3.3057851239669422, "step": 1000}, {"loss": 1.5307, "grad_norm": 0.6564913988113403, "learning_rate": 0.0002, "epoch": 3.3388429752066116, "step": 1010}, {"loss": 1.4595, "grad_norm": 0.6528742909431458, "learning_rate": 0.0002, "epoch": 3.371900826446281, "step": 1020}, {"loss": 1.518, "grad_norm": 0.5843546390533447, "learning_rate": 0.0002, "epoch": 3.4049586776859506, "step": 1030}, {"loss": 1.5148, "grad_norm": 0.5892922282218933, "learning_rate": 0.0002, "epoch": 3.43801652892562, "step": 1040}, {"loss": 1.5125, "grad_norm": 0.6217362284660339, "learning_rate": 0.0002, "epoch": 3.4710743801652892, "step": 1050}, {"loss": 1.526, "grad_norm": 0.5837283134460449, "learning_rate": 0.0002, "epoch": 3.5041322314049586, "step": 1060}, {"loss": 1.5776, "grad_norm": 0.6369057893753052, "learning_rate": 0.0002, "epoch": 3.537190082644628, "step": 1070}, {"loss": 1.4758, "grad_norm": 0.632115364074707, "learning_rate": 0.0002, "epoch": 3.5702479338842976, "step": 1080}, {"loss": 1.5604, "grad_norm": 0.6364002823829651, "learning_rate": 0.0002, "epoch": 3.603305785123967, "step": 1090}, {"loss": 1.508, "grad_norm": 0.550032377243042, "learning_rate": 0.0002, "epoch": 3.6363636363636362, "step": 1100}, {"loss": 1.5548, "grad_norm": 0.6106863617897034, "learning_rate": 0.0002, "epoch": 3.669421487603306, "step": 1110}, {"loss": 1.5237, "grad_norm": 0.635955274105072, "learning_rate": 0.0002, "epoch": 3.7024793388429753, "step": 1120}, {"loss": 1.5698, "grad_norm": 0.615804135799408, "learning_rate": 0.0002, "epoch": 3.7355371900826446, "step": 1130}, {"loss": 1.6068, "grad_norm": 0.5769386887550354, "learning_rate": 0.0002, "epoch": 3.768595041322314, "step": 1140}, {"loss": 1.5262, "grad_norm": 0.5938104391098022, "learning_rate": 0.0002, "epoch": 3.8016528925619832, "step": 1150}, {"loss": 1.5236, "grad_norm": 0.6149733066558838, "learning_rate": 0.0002, "epoch": 3.834710743801653, "step": 1160}, {"loss": 1.5585, "grad_norm": 0.6228950023651123, "learning_rate": 0.0002, "epoch": 3.8677685950413223, "step": 1170}, {"loss": 1.5815, "grad_norm": 0.6196513175964355, "learning_rate": 0.0002, "epoch": 3.9008264462809916, "step": 1180}, {"loss": 1.5894, "grad_norm": 0.5946677327156067, "learning_rate": 0.0002, "epoch": 3.9338842975206614, "step": 1190}, {"loss": 1.5451, "grad_norm": 0.5882220268249512, "learning_rate": 0.0002, "epoch": 3.9669421487603307, "step": 1200}, {"loss": 1.6202, "grad_norm": 0.6291728019714355, "learning_rate": 0.0002, "epoch": 4.0, "step": 1210}, {"eval_loss": 1.8943731784820557, "eval_runtime": 38.826, "eval_samples_per_second": 13.264, "eval_steps_per_second": 1.674, "epoch": 4.0, "step": 1210}, {"loss": 1.3944, "grad_norm": 1.0843605995178223, "learning_rate": 0.0002, "epoch": 4.033057851239669, "step": 1220}, {"loss": 1.3453, "grad_norm": 0.6460382342338562, "learning_rate": 0.0002, "epoch": 4.066115702479339, "step": 1230}, {"loss": 1.3714, "grad_norm": 0.7872665524482727, "learning_rate": 0.0002, "epoch": 4.099173553719008, "step": 1240}, {"loss": 1.3247, "grad_norm": 0.7585243582725525, "learning_rate": 0.0002, "epoch": 4.132231404958677, "step": 1250}, {"loss": 1.4162, "grad_norm": 0.7955290079116821, "learning_rate": 0.0002, "epoch": 4.1652892561983474, "step": 1260}, {"loss": 1.4127, "grad_norm": 0.8847756385803223, "learning_rate": 0.0002, "epoch": 4.198347107438017, "step": 1270}, {"loss": 1.3972, "grad_norm": 0.7897582650184631, "learning_rate": 0.0002, "epoch": 4.231404958677686, "step": 1280}, {"loss": 1.3631, "grad_norm": 0.755404531955719, "learning_rate": 0.0002, "epoch": 4.264462809917355, "step": 1290}, {"loss": 1.4219, "grad_norm": 0.7718978524208069, "learning_rate": 0.0002, "epoch": 4.297520661157025, "step": 1300}, {"loss": 1.3832, "grad_norm": 0.8073238134384155, "learning_rate": 0.0002, "epoch": 4.330578512396694, "step": 1310}, {"loss": 1.3968, "grad_norm": 0.8661217093467712, "learning_rate": 0.0002, "epoch": 4.363636363636363, "step": 1320}, {"loss": 1.3809, "grad_norm": 0.8859766721725464, "learning_rate": 0.0002, "epoch": 4.3966942148760335, "step": 1330}, {"loss": 1.3779, "grad_norm": 0.8635476231575012, "learning_rate": 0.0002, "epoch": 4.429752066115703, "step": 1340}, {"loss": 1.403, "grad_norm": 0.7376685738563538, "learning_rate": 0.0002, "epoch": 4.462809917355372, "step": 1350}, {"loss": 1.4346, "grad_norm": 0.7924236059188843, "learning_rate": 0.0002, "epoch": 4.4958677685950414, "step": 1360}, {"loss": 1.3205, "grad_norm": 0.6969273686408997, "learning_rate": 0.0002, "epoch": 4.528925619834711, "step": 1370}, {"loss": 1.399, "grad_norm": 0.7346147894859314, "learning_rate": 0.0002, "epoch": 4.56198347107438, "step": 1380}, {"loss": 1.4308, "grad_norm": 0.8515401482582092, "learning_rate": 0.0002, "epoch": 4.595041322314049, "step": 1390}, {"loss": 1.407, "grad_norm": 0.8154449462890625, "learning_rate": 0.0002, "epoch": 4.628099173553719, "step": 1400}, {"loss": 1.4182, "grad_norm": 0.8922461271286011, "learning_rate": 0.0002, "epoch": 4.661157024793388, "step": 1410}, {"loss": 1.3894, "grad_norm": 0.8835586309432983, "learning_rate": 0.0002, "epoch": 4.694214876033058, "step": 1420}, {"loss": 1.411, "grad_norm": 0.7689077258110046, "learning_rate": 0.0002, "epoch": 4.7272727272727275, "step": 1430}, {"loss": 1.4083, "grad_norm": 0.7515250444412231, "learning_rate": 0.0002, "epoch": 4.760330578512397, "step": 1440}, {"loss": 1.4004, "grad_norm": 0.7655003070831299, "learning_rate": 0.0002, "epoch": 4.793388429752066, "step": 1450}, {"loss": 1.3633, "grad_norm": 0.7187207341194153, "learning_rate": 0.0002, "epoch": 4.8264462809917354, "step": 1460}, {"loss": 1.3647, "grad_norm": 0.7122251987457275, "learning_rate": 0.0002, "epoch": 4.859504132231405, "step": 1470}, {"loss": 1.4481, "grad_norm": 0.7744072675704956, "learning_rate": 0.0002, "epoch": 4.892561983471074, "step": 1480}, {"loss": 1.3959, "grad_norm": 0.8202858567237854, "learning_rate": 0.0002, "epoch": 4.925619834710744, "step": 1490}, {"loss": 1.4176, "grad_norm": 0.7144979238510132, "learning_rate": 0.0002, "epoch": 4.958677685950414, "step": 1500}, {"loss": 1.4398, "grad_norm": 0.7824931144714355, "learning_rate": 0.0002, "epoch": 4.991735537190083, "step": 1510}]} +{"epoch": 6.0, "step": 1815, "epoch_duration": 324.663699388504, "total_accumulated_duration": 1949.4188392162323, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7568.4541015625}, "peak_memory_usage": {"GPU_0": 13792.75439453125}, "avg_memory_reserved": {"GPU_0": 17416.0}, "peak_memory_reserved": {"GPU_0": 17416.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.528, "grad_norm": 0.4384556710720062, "learning_rate": 0.0002, "epoch": 0.03305785123966942, "step": 10}, {"loss": 2.2675, "grad_norm": 0.48312580585479736, "learning_rate": 0.0002, "epoch": 0.06611570247933884, "step": 20}, {"loss": 2.0439, "grad_norm": 0.6193496584892273, "learning_rate": 0.0002, "epoch": 0.09917355371900827, "step": 30}, {"loss": 1.9649, "grad_norm": 0.471858948469162, "learning_rate": 0.0002, "epoch": 0.1322314049586777, "step": 40}, {"loss": 1.9945, "grad_norm": 0.43199431896209717, "learning_rate": 0.0002, "epoch": 0.1652892561983471, "step": 50}, {"loss": 1.9409, "grad_norm": 0.5022176504135132, "learning_rate": 0.0002, "epoch": 0.19834710743801653, "step": 60}, {"loss": 1.89, "grad_norm": 0.4934026300907135, "learning_rate": 0.0002, "epoch": 0.23140495867768596, "step": 70}, {"loss": 1.9036, "grad_norm": 0.4313369691371918, "learning_rate": 0.0002, "epoch": 0.2644628099173554, "step": 80}, {"loss": 1.8992, "grad_norm": 0.48663529753685, "learning_rate": 0.0002, "epoch": 0.2975206611570248, "step": 90}, {"loss": 1.8655, "grad_norm": 0.47740036249160767, "learning_rate": 0.0002, "epoch": 0.3305785123966942, "step": 100}, {"loss": 1.8797, "grad_norm": 0.41685664653778076, "learning_rate": 0.0002, "epoch": 0.36363636363636365, "step": 110}, {"loss": 1.8951, "grad_norm": 2.368595838546753, "learning_rate": 0.0002, "epoch": 0.39669421487603307, "step": 120}, {"loss": 1.8753, "grad_norm": 0.4861043095588684, "learning_rate": 0.0002, "epoch": 0.4297520661157025, "step": 130}, {"loss": 1.8413, "grad_norm": 0.41848257184028625, "learning_rate": 0.0002, "epoch": 0.4628099173553719, "step": 140}, {"loss": 1.9775, "grad_norm": 0.38776087760925293, "learning_rate": 0.0002, "epoch": 0.49586776859504134, "step": 150}, {"loss": 1.8172, "grad_norm": 0.4095233380794525, "learning_rate": 0.0002, "epoch": 0.5289256198347108, "step": 160}, {"loss": 1.9401, "grad_norm": 0.4492895007133484, "learning_rate": 0.0002, "epoch": 0.5619834710743802, "step": 170}, {"loss": 1.8707, "grad_norm": 0.5678786039352417, "learning_rate": 0.0002, "epoch": 0.5950413223140496, "step": 180}, {"loss": 1.8067, "grad_norm": 0.4926881492137909, "learning_rate": 0.0002, "epoch": 0.628099173553719, "step": 190}, {"loss": 1.8567, "grad_norm": 0.3865489363670349, "learning_rate": 0.0002, "epoch": 0.6611570247933884, "step": 200}, {"loss": 1.7555, "grad_norm": 0.40578970313072205, "learning_rate": 0.0002, "epoch": 0.6942148760330579, "step": 210}, {"loss": 1.8192, "grad_norm": 0.3729846775531769, "learning_rate": 0.0002, "epoch": 0.7272727272727273, "step": 220}, {"loss": 1.8787, "grad_norm": 0.36989861726760864, "learning_rate": 0.0002, "epoch": 0.7603305785123967, "step": 230}, {"loss": 1.8254, "grad_norm": 0.3764864206314087, "learning_rate": 0.0002, "epoch": 0.7933884297520661, "step": 240}, {"loss": 1.8008, "grad_norm": 1.2193230390548706, "learning_rate": 0.0002, "epoch": 0.8264462809917356, "step": 250}, {"loss": 1.8093, "grad_norm": 0.37381255626678467, "learning_rate": 0.0002, "epoch": 0.859504132231405, "step": 260}, {"loss": 1.7911, "grad_norm": 0.35480767488479614, "learning_rate": 0.0002, "epoch": 0.8925619834710744, "step": 270}, {"loss": 1.7824, "grad_norm": 0.4945891201496124, "learning_rate": 0.0002, "epoch": 0.9256198347107438, "step": 280}, {"loss": 1.7842, "grad_norm": 0.39967674016952515, "learning_rate": 0.0002, "epoch": 0.9586776859504132, "step": 290}, {"loss": 1.8321, "grad_norm": 0.4257008135318756, "learning_rate": 0.0002, "epoch": 0.9917355371900827, "step": 300}, {"eval_loss": 1.8413277864456177, "eval_runtime": 38.8241, "eval_samples_per_second": 13.265, "eval_steps_per_second": 1.674, "epoch": 0.9983471074380166, "step": 302}, {"loss": 1.7265, "grad_norm": 0.4019509255886078, "learning_rate": 0.0002, "epoch": 1.024793388429752, "step": 310}, {"loss": 1.7756, "grad_norm": 0.3439880311489105, "learning_rate": 0.0002, "epoch": 1.0578512396694215, "step": 320}, {"loss": 1.7719, "grad_norm": 0.4353587031364441, "learning_rate": 0.0002, "epoch": 1.0909090909090908, "step": 330}, {"loss": 1.7419, "grad_norm": 0.41257765889167786, "learning_rate": 0.0002, "epoch": 1.1239669421487604, "step": 340}, {"loss": 1.7774, "grad_norm": 0.4224575161933899, "learning_rate": 0.0002, "epoch": 1.1570247933884297, "step": 350}, {"loss": 1.7502, "grad_norm": 0.36395177245140076, "learning_rate": 0.0002, "epoch": 1.1900826446280992, "step": 360}, {"loss": 1.8064, "grad_norm": 0.4251839518547058, "learning_rate": 0.0002, "epoch": 1.2231404958677685, "step": 370}, {"loss": 1.7626, "grad_norm": 0.43602821230888367, "learning_rate": 0.0002, "epoch": 1.256198347107438, "step": 380}, {"loss": 1.8261, "grad_norm": 0.3940708637237549, "learning_rate": 0.0002, "epoch": 1.2892561983471074, "step": 390}, {"loss": 1.7317, "grad_norm": 0.3626866042613983, "learning_rate": 0.0002, "epoch": 1.322314049586777, "step": 400}, {"loss": 1.7493, "grad_norm": 0.40716150403022766, "learning_rate": 0.0002, "epoch": 1.3553719008264462, "step": 410}, {"loss": 1.7313, "grad_norm": 0.39323991537094116, "learning_rate": 0.0002, "epoch": 1.3884297520661157, "step": 420}, {"loss": 1.7863, "grad_norm": 0.44480809569358826, "learning_rate": 0.0002, "epoch": 1.421487603305785, "step": 430}, {"loss": 1.7477, "grad_norm": 0.4438270032405853, "learning_rate": 0.0002, "epoch": 1.4545454545454546, "step": 440}, {"loss": 1.774, "grad_norm": 0.3953928053379059, "learning_rate": 0.0002, "epoch": 1.487603305785124, "step": 450}, {"loss": 1.7162, "grad_norm": 0.4152870178222656, "learning_rate": 0.0002, "epoch": 1.5206611570247934, "step": 460}, {"loss": 1.8176, "grad_norm": 0.45231857895851135, "learning_rate": 0.0002, "epoch": 1.553719008264463, "step": 470}, {"loss": 1.7281, "grad_norm": 0.46560999751091003, "learning_rate": 0.0002, "epoch": 1.5867768595041323, "step": 480}, {"loss": 1.8047, "grad_norm": 0.3510372042655945, "learning_rate": 0.0002, "epoch": 1.6198347107438016, "step": 490}, {"loss": 1.7719, "grad_norm": 0.36788758635520935, "learning_rate": 0.0002, "epoch": 1.6528925619834711, "step": 500}, {"loss": 1.8287, "grad_norm": 0.3911917209625244, "learning_rate": 0.0002, "epoch": 1.6859504132231407, "step": 510}, {"loss": 1.7891, "grad_norm": 0.440964937210083, "learning_rate": 0.0002, "epoch": 1.71900826446281, "step": 520}, {"loss": 1.6858, "grad_norm": 0.36718201637268066, "learning_rate": 0.0002, "epoch": 1.7520661157024793, "step": 530}, {"loss": 1.7828, "grad_norm": 0.3927479088306427, "learning_rate": 0.0002, "epoch": 1.7851239669421488, "step": 540}, {"loss": 1.7406, "grad_norm": 0.4298672378063202, "learning_rate": 0.0002, "epoch": 1.8181818181818183, "step": 550}, {"loss": 1.7626, "grad_norm": 0.4257620871067047, "learning_rate": 0.0002, "epoch": 1.8512396694214877, "step": 560}, {"loss": 1.7677, "grad_norm": 0.3743717670440674, "learning_rate": 0.0002, "epoch": 1.884297520661157, "step": 570}, {"loss": 1.7263, "grad_norm": 0.4413471817970276, "learning_rate": 0.0002, "epoch": 1.9173553719008265, "step": 580}, {"loss": 1.7528, "grad_norm": 0.41639673709869385, "learning_rate": 0.0002, "epoch": 1.950413223140496, "step": 590}, {"loss": 1.7141, "grad_norm": 0.46319296956062317, "learning_rate": 0.0002, "epoch": 1.9834710743801653, "step": 600}, {"eval_loss": 1.833760380744934, "eval_runtime": 38.8469, "eval_samples_per_second": 13.257, "eval_steps_per_second": 1.673, "epoch": 2.0, "step": 605}, {"loss": 1.7399, "grad_norm": 0.38033604621887207, "learning_rate": 0.0002, "epoch": 2.0165289256198347, "step": 610}, {"loss": 1.6414, "grad_norm": 0.4522306025028229, "learning_rate": 0.0002, "epoch": 2.049586776859504, "step": 620}, {"loss": 1.5976, "grad_norm": 0.41294756531715393, "learning_rate": 0.0002, "epoch": 2.0826446280991737, "step": 630}, {"loss": 1.6664, "grad_norm": 0.5129091739654541, "learning_rate": 0.0002, "epoch": 2.115702479338843, "step": 640}, {"loss": 1.7207, "grad_norm": 0.4630700647830963, "learning_rate": 0.0002, "epoch": 2.1487603305785123, "step": 650}, {"loss": 1.5884, "grad_norm": 0.4368151128292084, "learning_rate": 0.0002, "epoch": 2.1818181818181817, "step": 660}, {"loss": 1.7271, "grad_norm": 0.5266494154930115, "learning_rate": 0.0002, "epoch": 2.2148760330578514, "step": 670}, {"loss": 1.5749, "grad_norm": 0.4744901955127716, "learning_rate": 0.0002, "epoch": 2.2479338842975207, "step": 680}, {"loss": 1.6512, "grad_norm": 0.5312414765357971, "learning_rate": 0.0002, "epoch": 2.28099173553719, "step": 690}, {"loss": 1.6957, "grad_norm": 0.49116063117980957, "learning_rate": 0.0002, "epoch": 2.3140495867768593, "step": 700}, {"loss": 1.646, "grad_norm": 0.4626988172531128, "learning_rate": 0.0002, "epoch": 2.347107438016529, "step": 710}, {"loss": 1.6474, "grad_norm": 0.4851135015487671, "learning_rate": 0.0002, "epoch": 2.3801652892561984, "step": 720}, {"loss": 1.67, "grad_norm": 0.4882378578186035, "learning_rate": 0.0002, "epoch": 2.4132231404958677, "step": 730}, {"loss": 1.6588, "grad_norm": 0.4470290243625641, "learning_rate": 0.0002, "epoch": 2.446280991735537, "step": 740}, {"loss": 1.6419, "grad_norm": 0.5901731848716736, "learning_rate": 0.0002, "epoch": 2.479338842975207, "step": 750}, {"loss": 1.6756, "grad_norm": 0.48137718439102173, "learning_rate": 0.0002, "epoch": 2.512396694214876, "step": 760}, {"loss": 1.6708, "grad_norm": 0.45636510848999023, "learning_rate": 0.0002, "epoch": 2.5454545454545454, "step": 770}, {"loss": 1.6693, "grad_norm": 0.48216402530670166, "learning_rate": 0.0002, "epoch": 2.5785123966942147, "step": 780}, {"loss": 1.664, "grad_norm": 0.47188714146614075, "learning_rate": 0.0002, "epoch": 2.6115702479338845, "step": 790}, {"loss": 1.619, "grad_norm": 0.44025519490242004, "learning_rate": 0.0002, "epoch": 2.644628099173554, "step": 800}, {"loss": 1.6532, "grad_norm": 0.4918605387210846, "learning_rate": 0.0002, "epoch": 2.677685950413223, "step": 810}, {"loss": 1.7513, "grad_norm": 0.5082308650016785, "learning_rate": 0.0002, "epoch": 2.7107438016528924, "step": 820}, {"loss": 1.7221, "grad_norm": 0.5610618591308594, "learning_rate": 0.0002, "epoch": 2.7438016528925617, "step": 830}, {"loss": 1.7115, "grad_norm": 0.540302574634552, "learning_rate": 0.0002, "epoch": 2.7768595041322315, "step": 840}, {"loss": 1.659, "grad_norm": 0.46016451716423035, "learning_rate": 0.0002, "epoch": 2.809917355371901, "step": 850}, {"loss": 1.672, "grad_norm": 0.45313313603401184, "learning_rate": 0.0002, "epoch": 2.84297520661157, "step": 860}, {"loss": 1.6676, "grad_norm": 0.49267083406448364, "learning_rate": 0.0002, "epoch": 2.87603305785124, "step": 870}, {"loss": 1.6577, "grad_norm": 0.4506530463695526, "learning_rate": 0.0002, "epoch": 2.909090909090909, "step": 880}, {"loss": 1.7059, "grad_norm": 0.4393260180950165, "learning_rate": 0.0002, "epoch": 2.9421487603305785, "step": 890}, {"loss": 1.7042, "grad_norm": 0.438073068857193, "learning_rate": 0.0002, "epoch": 2.975206611570248, "step": 900}, {"eval_loss": 1.853971004486084, "eval_runtime": 38.8404, "eval_samples_per_second": 13.259, "eval_steps_per_second": 1.674, "epoch": 2.9983471074380166, "step": 907}, {"loss": 1.6173, "grad_norm": 0.4399570822715759, "learning_rate": 0.0002, "epoch": 3.0082644628099175, "step": 910}, {"loss": 1.5578, "grad_norm": 0.5338484644889832, "learning_rate": 0.0002, "epoch": 3.041322314049587, "step": 920}, {"loss": 1.5507, "grad_norm": 0.6154358983039856, "learning_rate": 0.0002, "epoch": 3.074380165289256, "step": 930}, {"loss": 1.6189, "grad_norm": 0.6429790258407593, "learning_rate": 0.0002, "epoch": 3.1074380165289255, "step": 940}, {"loss": 1.5866, "grad_norm": 0.5375680923461914, "learning_rate": 0.0002, "epoch": 3.1404958677685952, "step": 950}, {"loss": 1.5119, "grad_norm": 0.5594999194145203, "learning_rate": 0.0002, "epoch": 3.1735537190082646, "step": 960}, {"loss": 1.5096, "grad_norm": 0.6742738485336304, "learning_rate": 0.0002, "epoch": 3.206611570247934, "step": 970}, {"loss": 1.5477, "grad_norm": 0.563497006893158, "learning_rate": 0.0002, "epoch": 3.239669421487603, "step": 980}, {"loss": 1.5559, "grad_norm": 0.6521140933036804, "learning_rate": 0.0002, "epoch": 3.2727272727272725, "step": 990}, {"loss": 1.4905, "grad_norm": 0.6016622185707092, "learning_rate": 0.0002, "epoch": 3.3057851239669422, "step": 1000}, {"loss": 1.5307, "grad_norm": 0.6564913988113403, "learning_rate": 0.0002, "epoch": 3.3388429752066116, "step": 1010}, {"loss": 1.4595, "grad_norm": 0.6528742909431458, "learning_rate": 0.0002, "epoch": 3.371900826446281, "step": 1020}, {"loss": 1.518, "grad_norm": 0.5843546390533447, "learning_rate": 0.0002, "epoch": 3.4049586776859506, "step": 1030}, {"loss": 1.5148, "grad_norm": 0.5892922282218933, "learning_rate": 0.0002, "epoch": 3.43801652892562, "step": 1040}, {"loss": 1.5125, "grad_norm": 0.6217362284660339, "learning_rate": 0.0002, "epoch": 3.4710743801652892, "step": 1050}, {"loss": 1.526, "grad_norm": 0.5837283134460449, "learning_rate": 0.0002, "epoch": 3.5041322314049586, "step": 1060}, {"loss": 1.5776, "grad_norm": 0.6369057893753052, "learning_rate": 0.0002, "epoch": 3.537190082644628, "step": 1070}, {"loss": 1.4758, "grad_norm": 0.632115364074707, "learning_rate": 0.0002, "epoch": 3.5702479338842976, "step": 1080}, {"loss": 1.5604, "grad_norm": 0.6364002823829651, "learning_rate": 0.0002, "epoch": 3.603305785123967, "step": 1090}, {"loss": 1.508, "grad_norm": 0.550032377243042, "learning_rate": 0.0002, "epoch": 3.6363636363636362, "step": 1100}, {"loss": 1.5548, "grad_norm": 0.6106863617897034, "learning_rate": 0.0002, "epoch": 3.669421487603306, "step": 1110}, {"loss": 1.5237, "grad_norm": 0.635955274105072, "learning_rate": 0.0002, "epoch": 3.7024793388429753, "step": 1120}, {"loss": 1.5698, "grad_norm": 0.615804135799408, "learning_rate": 0.0002, "epoch": 3.7355371900826446, "step": 1130}, {"loss": 1.6068, "grad_norm": 0.5769386887550354, "learning_rate": 0.0002, "epoch": 3.768595041322314, "step": 1140}, {"loss": 1.5262, "grad_norm": 0.5938104391098022, "learning_rate": 0.0002, "epoch": 3.8016528925619832, "step": 1150}, {"loss": 1.5236, "grad_norm": 0.6149733066558838, "learning_rate": 0.0002, "epoch": 3.834710743801653, "step": 1160}, {"loss": 1.5585, "grad_norm": 0.6228950023651123, "learning_rate": 0.0002, "epoch": 3.8677685950413223, "step": 1170}, {"loss": 1.5815, "grad_norm": 0.6196513175964355, "learning_rate": 0.0002, "epoch": 3.9008264462809916, "step": 1180}, {"loss": 1.5894, "grad_norm": 0.5946677327156067, "learning_rate": 0.0002, "epoch": 3.9338842975206614, "step": 1190}, {"loss": 1.5451, "grad_norm": 0.5882220268249512, "learning_rate": 0.0002, "epoch": 3.9669421487603307, "step": 1200}, {"loss": 1.6202, "grad_norm": 0.6291728019714355, "learning_rate": 0.0002, "epoch": 4.0, "step": 1210}, {"eval_loss": 1.8943731784820557, "eval_runtime": 38.826, "eval_samples_per_second": 13.264, "eval_steps_per_second": 1.674, "epoch": 4.0, "step": 1210}, {"loss": 1.3944, "grad_norm": 1.0843605995178223, "learning_rate": 0.0002, "epoch": 4.033057851239669, "step": 1220}, {"loss": 1.3453, "grad_norm": 0.6460382342338562, "learning_rate": 0.0002, "epoch": 4.066115702479339, "step": 1230}, {"loss": 1.3714, "grad_norm": 0.7872665524482727, "learning_rate": 0.0002, "epoch": 4.099173553719008, "step": 1240}, {"loss": 1.3247, "grad_norm": 0.7585243582725525, "learning_rate": 0.0002, "epoch": 4.132231404958677, "step": 1250}, {"loss": 1.4162, "grad_norm": 0.7955290079116821, "learning_rate": 0.0002, "epoch": 4.1652892561983474, "step": 1260}, {"loss": 1.4127, "grad_norm": 0.8847756385803223, "learning_rate": 0.0002, "epoch": 4.198347107438017, "step": 1270}, {"loss": 1.3972, "grad_norm": 0.7897582650184631, "learning_rate": 0.0002, "epoch": 4.231404958677686, "step": 1280}, {"loss": 1.3631, "grad_norm": 0.755404531955719, "learning_rate": 0.0002, "epoch": 4.264462809917355, "step": 1290}, {"loss": 1.4219, "grad_norm": 0.7718978524208069, "learning_rate": 0.0002, "epoch": 4.297520661157025, "step": 1300}, {"loss": 1.3832, "grad_norm": 0.8073238134384155, "learning_rate": 0.0002, "epoch": 4.330578512396694, "step": 1310}, {"loss": 1.3968, "grad_norm": 0.8661217093467712, "learning_rate": 0.0002, "epoch": 4.363636363636363, "step": 1320}, {"loss": 1.3809, "grad_norm": 0.8859766721725464, "learning_rate": 0.0002, "epoch": 4.3966942148760335, "step": 1330}, {"loss": 1.3779, "grad_norm": 0.8635476231575012, "learning_rate": 0.0002, "epoch": 4.429752066115703, "step": 1340}, {"loss": 1.403, "grad_norm": 0.7376685738563538, "learning_rate": 0.0002, "epoch": 4.462809917355372, "step": 1350}, {"loss": 1.4346, "grad_norm": 0.7924236059188843, "learning_rate": 0.0002, "epoch": 4.4958677685950414, "step": 1360}, {"loss": 1.3205, "grad_norm": 0.6969273686408997, "learning_rate": 0.0002, "epoch": 4.528925619834711, "step": 1370}, {"loss": 1.399, "grad_norm": 0.7346147894859314, "learning_rate": 0.0002, "epoch": 4.56198347107438, "step": 1380}, {"loss": 1.4308, "grad_norm": 0.8515401482582092, "learning_rate": 0.0002, "epoch": 4.595041322314049, "step": 1390}, {"loss": 1.407, "grad_norm": 0.8154449462890625, "learning_rate": 0.0002, "epoch": 4.628099173553719, "step": 1400}, {"loss": 1.4182, "grad_norm": 0.8922461271286011, "learning_rate": 0.0002, "epoch": 4.661157024793388, "step": 1410}, {"loss": 1.3894, "grad_norm": 0.8835586309432983, "learning_rate": 0.0002, "epoch": 4.694214876033058, "step": 1420}, {"loss": 1.411, "grad_norm": 0.7689077258110046, "learning_rate": 0.0002, "epoch": 4.7272727272727275, "step": 1430}, {"loss": 1.4083, "grad_norm": 0.7515250444412231, "learning_rate": 0.0002, "epoch": 4.760330578512397, "step": 1440}, {"loss": 1.4004, "grad_norm": 0.7655003070831299, "learning_rate": 0.0002, "epoch": 4.793388429752066, "step": 1450}, {"loss": 1.3633, "grad_norm": 0.7187207341194153, "learning_rate": 0.0002, "epoch": 4.8264462809917354, "step": 1460}, {"loss": 1.3647, "grad_norm": 0.7122251987457275, "learning_rate": 0.0002, "epoch": 4.859504132231405, "step": 1470}, {"loss": 1.4481, "grad_norm": 0.7744072675704956, "learning_rate": 0.0002, "epoch": 4.892561983471074, "step": 1480}, {"loss": 1.3959, "grad_norm": 0.8202858567237854, "learning_rate": 0.0002, "epoch": 4.925619834710744, "step": 1490}, {"loss": 1.4176, "grad_norm": 0.7144979238510132, "learning_rate": 0.0002, "epoch": 4.958677685950414, "step": 1500}, {"loss": 1.4398, "grad_norm": 0.7824931144714355, "learning_rate": 0.0002, "epoch": 4.991735537190083, "step": 1510}, {"eval_loss": 1.9822860956192017, "eval_runtime": 38.8296, "eval_samples_per_second": 13.263, "eval_steps_per_second": 1.674, "epoch": 4.998347107438017, "step": 1512}, {"loss": 1.3009, "grad_norm": 1.0307862758636475, "learning_rate": 0.0002, "epoch": 5.024793388429752, "step": 1520}, {"loss": 1.1822, "grad_norm": 0.9152393341064453, "learning_rate": 0.0002, "epoch": 5.0578512396694215, "step": 1530}, {"loss": 1.251, "grad_norm": 0.9560136198997498, "learning_rate": 0.0002, "epoch": 5.090909090909091, "step": 1540}, {"loss": 1.2552, "grad_norm": 0.8285775184631348, "learning_rate": 0.0002, "epoch": 5.12396694214876, "step": 1550}, {"loss": 1.1603, "grad_norm": 0.9479135870933533, "learning_rate": 0.0002, "epoch": 5.1570247933884295, "step": 1560}, {"loss": 1.229, "grad_norm": 0.9731078743934631, "learning_rate": 0.0002, "epoch": 5.190082644628099, "step": 1570}, {"loss": 1.2084, "grad_norm": 0.8167943358421326, "learning_rate": 0.0002, "epoch": 5.223140495867769, "step": 1580}, {"loss": 1.1937, "grad_norm": 1.1679469347000122, "learning_rate": 0.0002, "epoch": 5.256198347107438, "step": 1590}, {"loss": 1.1662, "grad_norm": 0.9156213402748108, "learning_rate": 0.0002, "epoch": 5.289256198347108, "step": 1600}, {"loss": 1.2014, "grad_norm": 1.082939624786377, "learning_rate": 0.0002, "epoch": 5.322314049586777, "step": 1610}, {"loss": 1.2701, "grad_norm": 1.0271905660629272, "learning_rate": 0.0002, "epoch": 5.355371900826446, "step": 1620}, {"loss": 1.2275, "grad_norm": 1.2237807512283325, "learning_rate": 0.0002, "epoch": 5.3884297520661155, "step": 1630}, {"loss": 1.267, "grad_norm": 1.1419697999954224, "learning_rate": 0.0002, "epoch": 5.421487603305785, "step": 1640}, {"loss": 1.2424, "grad_norm": 1.4427895545959473, "learning_rate": 0.0002, "epoch": 5.454545454545454, "step": 1650}, {"loss": 1.24, "grad_norm": 1.117572546005249, "learning_rate": 0.0002, "epoch": 5.487603305785124, "step": 1660}, {"loss": 1.1912, "grad_norm": 1.1476300954818726, "learning_rate": 0.0002, "epoch": 5.520661157024794, "step": 1670}, {"loss": 1.2397, "grad_norm": 1.1372056007385254, "learning_rate": 0.0002, "epoch": 5.553719008264463, "step": 1680}, {"loss": 1.2875, "grad_norm": 1.0415048599243164, "learning_rate": 0.0002, "epoch": 5.586776859504132, "step": 1690}, {"loss": 1.2062, "grad_norm": 0.9535173177719116, "learning_rate": 0.0002, "epoch": 5.619834710743802, "step": 1700}, {"loss": 1.2528, "grad_norm": 0.9918773174285889, "learning_rate": 0.0002, "epoch": 5.652892561983471, "step": 1710}, {"loss": 1.2443, "grad_norm": 0.9184247255325317, "learning_rate": 0.0002, "epoch": 5.68595041322314, "step": 1720}, {"loss": 1.2273, "grad_norm": 0.9416358470916748, "learning_rate": 0.0002, "epoch": 5.7190082644628095, "step": 1730}, {"loss": 1.2815, "grad_norm": 1.0027815103530884, "learning_rate": 0.0002, "epoch": 5.75206611570248, "step": 1740}, {"loss": 1.2261, "grad_norm": 1.0766979455947876, "learning_rate": 0.0002, "epoch": 5.785123966942149, "step": 1750}, {"loss": 1.2221, "grad_norm": 0.9244554042816162, "learning_rate": 0.0002, "epoch": 5.818181818181818, "step": 1760}, {"loss": 1.312, "grad_norm": 1.2514721155166626, "learning_rate": 0.0002, "epoch": 5.851239669421488, "step": 1770}, {"loss": 1.3042, "grad_norm": 1.0198537111282349, "learning_rate": 0.0002, "epoch": 5.884297520661157, "step": 1780}, {"loss": 1.2032, "grad_norm": 0.9569677114486694, "learning_rate": 0.0002, "epoch": 5.917355371900826, "step": 1790}, {"loss": 1.23, "grad_norm": 0.9748323559761047, "learning_rate": 0.0002, "epoch": 5.950413223140496, "step": 1800}, {"loss": 1.2167, "grad_norm": 1.0731725692749023, "learning_rate": 0.0002, "epoch": 5.983471074380166, "step": 1810}]} +{"epoch": 6.998347107438017, "step": 2117, "epoch_duration": 324.59880232810974, "total_accumulated_duration": 2274.017641544342, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7672.4541015625}, "peak_memory_usage": {"GPU_0": 13792.75439453125}, "avg_memory_reserved": {"GPU_0": 17416.0}, "peak_memory_reserved": {"GPU_0": 17416.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.528, "grad_norm": 0.4384556710720062, "learning_rate": 0.0002, "epoch": 0.03305785123966942, "step": 10}, {"loss": 2.2675, "grad_norm": 0.48312580585479736, "learning_rate": 0.0002, "epoch": 0.06611570247933884, "step": 20}, {"loss": 2.0439, "grad_norm": 0.6193496584892273, "learning_rate": 0.0002, "epoch": 0.09917355371900827, "step": 30}, {"loss": 1.9649, "grad_norm": 0.471858948469162, "learning_rate": 0.0002, "epoch": 0.1322314049586777, "step": 40}, {"loss": 1.9945, "grad_norm": 0.43199431896209717, "learning_rate": 0.0002, "epoch": 0.1652892561983471, "step": 50}, {"loss": 1.9409, "grad_norm": 0.5022176504135132, "learning_rate": 0.0002, "epoch": 0.19834710743801653, "step": 60}, {"loss": 1.89, "grad_norm": 0.4934026300907135, "learning_rate": 0.0002, "epoch": 0.23140495867768596, "step": 70}, {"loss": 1.9036, "grad_norm": 0.4313369691371918, "learning_rate": 0.0002, "epoch": 0.2644628099173554, "step": 80}, {"loss": 1.8992, "grad_norm": 0.48663529753685, "learning_rate": 0.0002, "epoch": 0.2975206611570248, "step": 90}, {"loss": 1.8655, "grad_norm": 0.47740036249160767, "learning_rate": 0.0002, "epoch": 0.3305785123966942, "step": 100}, {"loss": 1.8797, "grad_norm": 0.41685664653778076, "learning_rate": 0.0002, "epoch": 0.36363636363636365, "step": 110}, {"loss": 1.8951, "grad_norm": 2.368595838546753, "learning_rate": 0.0002, "epoch": 0.39669421487603307, "step": 120}, {"loss": 1.8753, "grad_norm": 0.4861043095588684, "learning_rate": 0.0002, "epoch": 0.4297520661157025, "step": 130}, {"loss": 1.8413, "grad_norm": 0.41848257184028625, "learning_rate": 0.0002, "epoch": 0.4628099173553719, "step": 140}, {"loss": 1.9775, "grad_norm": 0.38776087760925293, "learning_rate": 0.0002, "epoch": 0.49586776859504134, "step": 150}, {"loss": 1.8172, "grad_norm": 0.4095233380794525, "learning_rate": 0.0002, "epoch": 0.5289256198347108, "step": 160}, {"loss": 1.9401, "grad_norm": 0.4492895007133484, "learning_rate": 0.0002, "epoch": 0.5619834710743802, "step": 170}, {"loss": 1.8707, "grad_norm": 0.5678786039352417, "learning_rate": 0.0002, "epoch": 0.5950413223140496, "step": 180}, {"loss": 1.8067, "grad_norm": 0.4926881492137909, "learning_rate": 0.0002, "epoch": 0.628099173553719, "step": 190}, {"loss": 1.8567, "grad_norm": 0.3865489363670349, "learning_rate": 0.0002, "epoch": 0.6611570247933884, "step": 200}, {"loss": 1.7555, "grad_norm": 0.40578970313072205, "learning_rate": 0.0002, "epoch": 0.6942148760330579, "step": 210}, {"loss": 1.8192, "grad_norm": 0.3729846775531769, "learning_rate": 0.0002, "epoch": 0.7272727272727273, "step": 220}, {"loss": 1.8787, "grad_norm": 0.36989861726760864, "learning_rate": 0.0002, "epoch": 0.7603305785123967, "step": 230}, {"loss": 1.8254, "grad_norm": 0.3764864206314087, "learning_rate": 0.0002, "epoch": 0.7933884297520661, "step": 240}, {"loss": 1.8008, "grad_norm": 1.2193230390548706, "learning_rate": 0.0002, "epoch": 0.8264462809917356, "step": 250}, {"loss": 1.8093, "grad_norm": 0.37381255626678467, "learning_rate": 0.0002, "epoch": 0.859504132231405, "step": 260}, {"loss": 1.7911, "grad_norm": 0.35480767488479614, "learning_rate": 0.0002, "epoch": 0.8925619834710744, "step": 270}, {"loss": 1.7824, "grad_norm": 0.4945891201496124, "learning_rate": 0.0002, "epoch": 0.9256198347107438, "step": 280}, {"loss": 1.7842, "grad_norm": 0.39967674016952515, "learning_rate": 0.0002, "epoch": 0.9586776859504132, "step": 290}, {"loss": 1.8321, "grad_norm": 0.4257008135318756, "learning_rate": 0.0002, "epoch": 0.9917355371900827, "step": 300}, {"eval_loss": 1.8413277864456177, "eval_runtime": 38.8241, "eval_samples_per_second": 13.265, "eval_steps_per_second": 1.674, "epoch": 0.9983471074380166, "step": 302}, {"loss": 1.7265, "grad_norm": 0.4019509255886078, "learning_rate": 0.0002, "epoch": 1.024793388429752, "step": 310}, {"loss": 1.7756, "grad_norm": 0.3439880311489105, "learning_rate": 0.0002, "epoch": 1.0578512396694215, "step": 320}, {"loss": 1.7719, "grad_norm": 0.4353587031364441, "learning_rate": 0.0002, "epoch": 1.0909090909090908, "step": 330}, {"loss": 1.7419, "grad_norm": 0.41257765889167786, "learning_rate": 0.0002, "epoch": 1.1239669421487604, "step": 340}, {"loss": 1.7774, "grad_norm": 0.4224575161933899, "learning_rate": 0.0002, "epoch": 1.1570247933884297, "step": 350}, {"loss": 1.7502, "grad_norm": 0.36395177245140076, "learning_rate": 0.0002, "epoch": 1.1900826446280992, "step": 360}, {"loss": 1.8064, "grad_norm": 0.4251839518547058, "learning_rate": 0.0002, "epoch": 1.2231404958677685, "step": 370}, {"loss": 1.7626, "grad_norm": 0.43602821230888367, "learning_rate": 0.0002, "epoch": 1.256198347107438, "step": 380}, {"loss": 1.8261, "grad_norm": 0.3940708637237549, "learning_rate": 0.0002, "epoch": 1.2892561983471074, "step": 390}, {"loss": 1.7317, "grad_norm": 0.3626866042613983, "learning_rate": 0.0002, "epoch": 1.322314049586777, "step": 400}, {"loss": 1.7493, "grad_norm": 0.40716150403022766, "learning_rate": 0.0002, "epoch": 1.3553719008264462, "step": 410}, {"loss": 1.7313, "grad_norm": 0.39323991537094116, "learning_rate": 0.0002, "epoch": 1.3884297520661157, "step": 420}, {"loss": 1.7863, "grad_norm": 0.44480809569358826, "learning_rate": 0.0002, "epoch": 1.421487603305785, "step": 430}, {"loss": 1.7477, "grad_norm": 0.4438270032405853, "learning_rate": 0.0002, "epoch": 1.4545454545454546, "step": 440}, {"loss": 1.774, "grad_norm": 0.3953928053379059, "learning_rate": 0.0002, "epoch": 1.487603305785124, "step": 450}, {"loss": 1.7162, "grad_norm": 0.4152870178222656, "learning_rate": 0.0002, "epoch": 1.5206611570247934, "step": 460}, {"loss": 1.8176, "grad_norm": 0.45231857895851135, "learning_rate": 0.0002, "epoch": 1.553719008264463, "step": 470}, {"loss": 1.7281, "grad_norm": 0.46560999751091003, "learning_rate": 0.0002, "epoch": 1.5867768595041323, "step": 480}, {"loss": 1.8047, "grad_norm": 0.3510372042655945, "learning_rate": 0.0002, "epoch": 1.6198347107438016, "step": 490}, {"loss": 1.7719, "grad_norm": 0.36788758635520935, "learning_rate": 0.0002, "epoch": 1.6528925619834711, "step": 500}, {"loss": 1.8287, "grad_norm": 0.3911917209625244, "learning_rate": 0.0002, "epoch": 1.6859504132231407, "step": 510}, {"loss": 1.7891, "grad_norm": 0.440964937210083, "learning_rate": 0.0002, "epoch": 1.71900826446281, "step": 520}, {"loss": 1.6858, "grad_norm": 0.36718201637268066, "learning_rate": 0.0002, "epoch": 1.7520661157024793, "step": 530}, {"loss": 1.7828, "grad_norm": 0.3927479088306427, "learning_rate": 0.0002, "epoch": 1.7851239669421488, "step": 540}, {"loss": 1.7406, "grad_norm": 0.4298672378063202, "learning_rate": 0.0002, "epoch": 1.8181818181818183, "step": 550}, {"loss": 1.7626, "grad_norm": 0.4257620871067047, "learning_rate": 0.0002, "epoch": 1.8512396694214877, "step": 560}, {"loss": 1.7677, "grad_norm": 0.3743717670440674, "learning_rate": 0.0002, "epoch": 1.884297520661157, "step": 570}, {"loss": 1.7263, "grad_norm": 0.4413471817970276, "learning_rate": 0.0002, "epoch": 1.9173553719008265, "step": 580}, {"loss": 1.7528, "grad_norm": 0.41639673709869385, "learning_rate": 0.0002, "epoch": 1.950413223140496, "step": 590}, {"loss": 1.7141, "grad_norm": 0.46319296956062317, "learning_rate": 0.0002, "epoch": 1.9834710743801653, "step": 600}, {"eval_loss": 1.833760380744934, "eval_runtime": 38.8469, "eval_samples_per_second": 13.257, "eval_steps_per_second": 1.673, "epoch": 2.0, "step": 605}, {"loss": 1.7399, "grad_norm": 0.38033604621887207, "learning_rate": 0.0002, "epoch": 2.0165289256198347, "step": 610}, {"loss": 1.6414, "grad_norm": 0.4522306025028229, "learning_rate": 0.0002, "epoch": 2.049586776859504, "step": 620}, {"loss": 1.5976, "grad_norm": 0.41294756531715393, "learning_rate": 0.0002, "epoch": 2.0826446280991737, "step": 630}, {"loss": 1.6664, "grad_norm": 0.5129091739654541, "learning_rate": 0.0002, "epoch": 2.115702479338843, "step": 640}, {"loss": 1.7207, "grad_norm": 0.4630700647830963, "learning_rate": 0.0002, "epoch": 2.1487603305785123, "step": 650}, {"loss": 1.5884, "grad_norm": 0.4368151128292084, "learning_rate": 0.0002, "epoch": 2.1818181818181817, "step": 660}, {"loss": 1.7271, "grad_norm": 0.5266494154930115, "learning_rate": 0.0002, "epoch": 2.2148760330578514, "step": 670}, {"loss": 1.5749, "grad_norm": 0.4744901955127716, "learning_rate": 0.0002, "epoch": 2.2479338842975207, "step": 680}, {"loss": 1.6512, "grad_norm": 0.5312414765357971, "learning_rate": 0.0002, "epoch": 2.28099173553719, "step": 690}, {"loss": 1.6957, "grad_norm": 0.49116063117980957, "learning_rate": 0.0002, "epoch": 2.3140495867768593, "step": 700}, {"loss": 1.646, "grad_norm": 0.4626988172531128, "learning_rate": 0.0002, "epoch": 2.347107438016529, "step": 710}, {"loss": 1.6474, "grad_norm": 0.4851135015487671, "learning_rate": 0.0002, "epoch": 2.3801652892561984, "step": 720}, {"loss": 1.67, "grad_norm": 0.4882378578186035, "learning_rate": 0.0002, "epoch": 2.4132231404958677, "step": 730}, {"loss": 1.6588, "grad_norm": 0.4470290243625641, "learning_rate": 0.0002, "epoch": 2.446280991735537, "step": 740}, {"loss": 1.6419, "grad_norm": 0.5901731848716736, "learning_rate": 0.0002, "epoch": 2.479338842975207, "step": 750}, {"loss": 1.6756, "grad_norm": 0.48137718439102173, "learning_rate": 0.0002, "epoch": 2.512396694214876, "step": 760}, {"loss": 1.6708, "grad_norm": 0.45636510848999023, "learning_rate": 0.0002, "epoch": 2.5454545454545454, "step": 770}, {"loss": 1.6693, "grad_norm": 0.48216402530670166, "learning_rate": 0.0002, "epoch": 2.5785123966942147, "step": 780}, {"loss": 1.664, "grad_norm": 0.47188714146614075, "learning_rate": 0.0002, "epoch": 2.6115702479338845, "step": 790}, {"loss": 1.619, "grad_norm": 0.44025519490242004, "learning_rate": 0.0002, "epoch": 2.644628099173554, "step": 800}, {"loss": 1.6532, "grad_norm": 0.4918605387210846, "learning_rate": 0.0002, "epoch": 2.677685950413223, "step": 810}, {"loss": 1.7513, "grad_norm": 0.5082308650016785, "learning_rate": 0.0002, "epoch": 2.7107438016528924, "step": 820}, {"loss": 1.7221, "grad_norm": 0.5610618591308594, "learning_rate": 0.0002, "epoch": 2.7438016528925617, "step": 830}, {"loss": 1.7115, "grad_norm": 0.540302574634552, "learning_rate": 0.0002, "epoch": 2.7768595041322315, "step": 840}, {"loss": 1.659, "grad_norm": 0.46016451716423035, "learning_rate": 0.0002, "epoch": 2.809917355371901, "step": 850}, {"loss": 1.672, "grad_norm": 0.45313313603401184, "learning_rate": 0.0002, "epoch": 2.84297520661157, "step": 860}, {"loss": 1.6676, "grad_norm": 0.49267083406448364, "learning_rate": 0.0002, "epoch": 2.87603305785124, "step": 870}, {"loss": 1.6577, "grad_norm": 0.4506530463695526, "learning_rate": 0.0002, "epoch": 2.909090909090909, "step": 880}, {"loss": 1.7059, "grad_norm": 0.4393260180950165, "learning_rate": 0.0002, "epoch": 2.9421487603305785, "step": 890}, {"loss": 1.7042, "grad_norm": 0.438073068857193, "learning_rate": 0.0002, "epoch": 2.975206611570248, "step": 900}, {"eval_loss": 1.853971004486084, "eval_runtime": 38.8404, "eval_samples_per_second": 13.259, "eval_steps_per_second": 1.674, "epoch": 2.9983471074380166, "step": 907}, {"loss": 1.6173, "grad_norm": 0.4399570822715759, "learning_rate": 0.0002, "epoch": 3.0082644628099175, "step": 910}, {"loss": 1.5578, "grad_norm": 0.5338484644889832, "learning_rate": 0.0002, "epoch": 3.041322314049587, "step": 920}, {"loss": 1.5507, "grad_norm": 0.6154358983039856, "learning_rate": 0.0002, "epoch": 3.074380165289256, "step": 930}, {"loss": 1.6189, "grad_norm": 0.6429790258407593, "learning_rate": 0.0002, "epoch": 3.1074380165289255, "step": 940}, {"loss": 1.5866, "grad_norm": 0.5375680923461914, "learning_rate": 0.0002, "epoch": 3.1404958677685952, "step": 950}, {"loss": 1.5119, "grad_norm": 0.5594999194145203, "learning_rate": 0.0002, "epoch": 3.1735537190082646, "step": 960}, {"loss": 1.5096, "grad_norm": 0.6742738485336304, "learning_rate": 0.0002, "epoch": 3.206611570247934, "step": 970}, {"loss": 1.5477, "grad_norm": 0.563497006893158, "learning_rate": 0.0002, "epoch": 3.239669421487603, "step": 980}, {"loss": 1.5559, "grad_norm": 0.6521140933036804, "learning_rate": 0.0002, "epoch": 3.2727272727272725, "step": 990}, {"loss": 1.4905, "grad_norm": 0.6016622185707092, "learning_rate": 0.0002, "epoch": 3.3057851239669422, "step": 1000}, {"loss": 1.5307, "grad_norm": 0.6564913988113403, "learning_rate": 0.0002, "epoch": 3.3388429752066116, "step": 1010}, {"loss": 1.4595, "grad_norm": 0.6528742909431458, "learning_rate": 0.0002, "epoch": 3.371900826446281, "step": 1020}, {"loss": 1.518, "grad_norm": 0.5843546390533447, "learning_rate": 0.0002, "epoch": 3.4049586776859506, "step": 1030}, {"loss": 1.5148, "grad_norm": 0.5892922282218933, "learning_rate": 0.0002, "epoch": 3.43801652892562, "step": 1040}, {"loss": 1.5125, "grad_norm": 0.6217362284660339, "learning_rate": 0.0002, "epoch": 3.4710743801652892, "step": 1050}, {"loss": 1.526, "grad_norm": 0.5837283134460449, "learning_rate": 0.0002, "epoch": 3.5041322314049586, "step": 1060}, {"loss": 1.5776, "grad_norm": 0.6369057893753052, "learning_rate": 0.0002, "epoch": 3.537190082644628, "step": 1070}, {"loss": 1.4758, "grad_norm": 0.632115364074707, "learning_rate": 0.0002, "epoch": 3.5702479338842976, "step": 1080}, {"loss": 1.5604, "grad_norm": 0.6364002823829651, "learning_rate": 0.0002, "epoch": 3.603305785123967, "step": 1090}, {"loss": 1.508, "grad_norm": 0.550032377243042, "learning_rate": 0.0002, "epoch": 3.6363636363636362, "step": 1100}, {"loss": 1.5548, "grad_norm": 0.6106863617897034, "learning_rate": 0.0002, "epoch": 3.669421487603306, "step": 1110}, {"loss": 1.5237, "grad_norm": 0.635955274105072, "learning_rate": 0.0002, "epoch": 3.7024793388429753, "step": 1120}, {"loss": 1.5698, "grad_norm": 0.615804135799408, "learning_rate": 0.0002, "epoch": 3.7355371900826446, "step": 1130}, {"loss": 1.6068, "grad_norm": 0.5769386887550354, "learning_rate": 0.0002, "epoch": 3.768595041322314, "step": 1140}, {"loss": 1.5262, "grad_norm": 0.5938104391098022, "learning_rate": 0.0002, "epoch": 3.8016528925619832, "step": 1150}, {"loss": 1.5236, "grad_norm": 0.6149733066558838, "learning_rate": 0.0002, "epoch": 3.834710743801653, "step": 1160}, {"loss": 1.5585, "grad_norm": 0.6228950023651123, "learning_rate": 0.0002, "epoch": 3.8677685950413223, "step": 1170}, {"loss": 1.5815, "grad_norm": 0.6196513175964355, "learning_rate": 0.0002, "epoch": 3.9008264462809916, "step": 1180}, {"loss": 1.5894, "grad_norm": 0.5946677327156067, "learning_rate": 0.0002, "epoch": 3.9338842975206614, "step": 1190}, {"loss": 1.5451, "grad_norm": 0.5882220268249512, "learning_rate": 0.0002, "epoch": 3.9669421487603307, "step": 1200}, {"loss": 1.6202, "grad_norm": 0.6291728019714355, "learning_rate": 0.0002, "epoch": 4.0, "step": 1210}, {"eval_loss": 1.8943731784820557, "eval_runtime": 38.826, "eval_samples_per_second": 13.264, "eval_steps_per_second": 1.674, "epoch": 4.0, "step": 1210}, {"loss": 1.3944, "grad_norm": 1.0843605995178223, "learning_rate": 0.0002, "epoch": 4.033057851239669, "step": 1220}, {"loss": 1.3453, "grad_norm": 0.6460382342338562, "learning_rate": 0.0002, "epoch": 4.066115702479339, "step": 1230}, {"loss": 1.3714, "grad_norm": 0.7872665524482727, "learning_rate": 0.0002, "epoch": 4.099173553719008, "step": 1240}, {"loss": 1.3247, "grad_norm": 0.7585243582725525, "learning_rate": 0.0002, "epoch": 4.132231404958677, "step": 1250}, {"loss": 1.4162, "grad_norm": 0.7955290079116821, "learning_rate": 0.0002, "epoch": 4.1652892561983474, "step": 1260}, {"loss": 1.4127, "grad_norm": 0.8847756385803223, "learning_rate": 0.0002, "epoch": 4.198347107438017, "step": 1270}, {"loss": 1.3972, "grad_norm": 0.7897582650184631, "learning_rate": 0.0002, "epoch": 4.231404958677686, "step": 1280}, {"loss": 1.3631, "grad_norm": 0.755404531955719, "learning_rate": 0.0002, "epoch": 4.264462809917355, "step": 1290}, {"loss": 1.4219, "grad_norm": 0.7718978524208069, "learning_rate": 0.0002, "epoch": 4.297520661157025, "step": 1300}, {"loss": 1.3832, "grad_norm": 0.8073238134384155, "learning_rate": 0.0002, "epoch": 4.330578512396694, "step": 1310}, {"loss": 1.3968, "grad_norm": 0.8661217093467712, "learning_rate": 0.0002, "epoch": 4.363636363636363, "step": 1320}, {"loss": 1.3809, "grad_norm": 0.8859766721725464, "learning_rate": 0.0002, "epoch": 4.3966942148760335, "step": 1330}, {"loss": 1.3779, "grad_norm": 0.8635476231575012, "learning_rate": 0.0002, "epoch": 4.429752066115703, "step": 1340}, {"loss": 1.403, "grad_norm": 0.7376685738563538, "learning_rate": 0.0002, "epoch": 4.462809917355372, "step": 1350}, {"loss": 1.4346, "grad_norm": 0.7924236059188843, "learning_rate": 0.0002, "epoch": 4.4958677685950414, "step": 1360}, {"loss": 1.3205, "grad_norm": 0.6969273686408997, "learning_rate": 0.0002, "epoch": 4.528925619834711, "step": 1370}, {"loss": 1.399, "grad_norm": 0.7346147894859314, "learning_rate": 0.0002, "epoch": 4.56198347107438, "step": 1380}, {"loss": 1.4308, "grad_norm": 0.8515401482582092, "learning_rate": 0.0002, "epoch": 4.595041322314049, "step": 1390}, {"loss": 1.407, "grad_norm": 0.8154449462890625, "learning_rate": 0.0002, "epoch": 4.628099173553719, "step": 1400}, {"loss": 1.4182, "grad_norm": 0.8922461271286011, "learning_rate": 0.0002, "epoch": 4.661157024793388, "step": 1410}, {"loss": 1.3894, "grad_norm": 0.8835586309432983, "learning_rate": 0.0002, "epoch": 4.694214876033058, "step": 1420}, {"loss": 1.411, "grad_norm": 0.7689077258110046, "learning_rate": 0.0002, "epoch": 4.7272727272727275, "step": 1430}, {"loss": 1.4083, "grad_norm": 0.7515250444412231, "learning_rate": 0.0002, "epoch": 4.760330578512397, "step": 1440}, {"loss": 1.4004, "grad_norm": 0.7655003070831299, "learning_rate": 0.0002, "epoch": 4.793388429752066, "step": 1450}, {"loss": 1.3633, "grad_norm": 0.7187207341194153, "learning_rate": 0.0002, "epoch": 4.8264462809917354, "step": 1460}, {"loss": 1.3647, "grad_norm": 0.7122251987457275, "learning_rate": 0.0002, "epoch": 4.859504132231405, "step": 1470}, {"loss": 1.4481, "grad_norm": 0.7744072675704956, "learning_rate": 0.0002, "epoch": 4.892561983471074, "step": 1480}, {"loss": 1.3959, "grad_norm": 0.8202858567237854, "learning_rate": 0.0002, "epoch": 4.925619834710744, "step": 1490}, {"loss": 1.4176, "grad_norm": 0.7144979238510132, "learning_rate": 0.0002, "epoch": 4.958677685950414, "step": 1500}, {"loss": 1.4398, "grad_norm": 0.7824931144714355, "learning_rate": 0.0002, "epoch": 4.991735537190083, "step": 1510}, {"eval_loss": 1.9822860956192017, "eval_runtime": 38.8296, "eval_samples_per_second": 13.263, "eval_steps_per_second": 1.674, "epoch": 4.998347107438017, "step": 1512}, {"loss": 1.3009, "grad_norm": 1.0307862758636475, "learning_rate": 0.0002, "epoch": 5.024793388429752, "step": 1520}, {"loss": 1.1822, "grad_norm": 0.9152393341064453, "learning_rate": 0.0002, "epoch": 5.0578512396694215, "step": 1530}, {"loss": 1.251, "grad_norm": 0.9560136198997498, "learning_rate": 0.0002, "epoch": 5.090909090909091, "step": 1540}, {"loss": 1.2552, "grad_norm": 0.8285775184631348, "learning_rate": 0.0002, "epoch": 5.12396694214876, "step": 1550}, {"loss": 1.1603, "grad_norm": 0.9479135870933533, "learning_rate": 0.0002, "epoch": 5.1570247933884295, "step": 1560}, {"loss": 1.229, "grad_norm": 0.9731078743934631, "learning_rate": 0.0002, "epoch": 5.190082644628099, "step": 1570}, {"loss": 1.2084, "grad_norm": 0.8167943358421326, "learning_rate": 0.0002, "epoch": 5.223140495867769, "step": 1580}, {"loss": 1.1937, "grad_norm": 1.1679469347000122, "learning_rate": 0.0002, "epoch": 5.256198347107438, "step": 1590}, {"loss": 1.1662, "grad_norm": 0.9156213402748108, "learning_rate": 0.0002, "epoch": 5.289256198347108, "step": 1600}, {"loss": 1.2014, "grad_norm": 1.082939624786377, "learning_rate": 0.0002, "epoch": 5.322314049586777, "step": 1610}, {"loss": 1.2701, "grad_norm": 1.0271905660629272, "learning_rate": 0.0002, "epoch": 5.355371900826446, "step": 1620}, {"loss": 1.2275, "grad_norm": 1.2237807512283325, "learning_rate": 0.0002, "epoch": 5.3884297520661155, "step": 1630}, {"loss": 1.267, "grad_norm": 1.1419697999954224, "learning_rate": 0.0002, "epoch": 5.421487603305785, "step": 1640}, {"loss": 1.2424, "grad_norm": 1.4427895545959473, "learning_rate": 0.0002, "epoch": 5.454545454545454, "step": 1650}, {"loss": 1.24, "grad_norm": 1.117572546005249, "learning_rate": 0.0002, "epoch": 5.487603305785124, "step": 1660}, {"loss": 1.1912, "grad_norm": 1.1476300954818726, "learning_rate": 0.0002, "epoch": 5.520661157024794, "step": 1670}, {"loss": 1.2397, "grad_norm": 1.1372056007385254, "learning_rate": 0.0002, "epoch": 5.553719008264463, "step": 1680}, {"loss": 1.2875, "grad_norm": 1.0415048599243164, "learning_rate": 0.0002, "epoch": 5.586776859504132, "step": 1690}, {"loss": 1.2062, "grad_norm": 0.9535173177719116, "learning_rate": 0.0002, "epoch": 5.619834710743802, "step": 1700}, {"loss": 1.2528, "grad_norm": 0.9918773174285889, "learning_rate": 0.0002, "epoch": 5.652892561983471, "step": 1710}, {"loss": 1.2443, "grad_norm": 0.9184247255325317, "learning_rate": 0.0002, "epoch": 5.68595041322314, "step": 1720}, {"loss": 1.2273, "grad_norm": 0.9416358470916748, "learning_rate": 0.0002, "epoch": 5.7190082644628095, "step": 1730}, {"loss": 1.2815, "grad_norm": 1.0027815103530884, "learning_rate": 0.0002, "epoch": 5.75206611570248, "step": 1740}, {"loss": 1.2261, "grad_norm": 1.0766979455947876, "learning_rate": 0.0002, "epoch": 5.785123966942149, "step": 1750}, {"loss": 1.2221, "grad_norm": 0.9244554042816162, "learning_rate": 0.0002, "epoch": 5.818181818181818, "step": 1760}, {"loss": 1.312, "grad_norm": 1.2514721155166626, "learning_rate": 0.0002, "epoch": 5.851239669421488, "step": 1770}, {"loss": 1.3042, "grad_norm": 1.0198537111282349, "learning_rate": 0.0002, "epoch": 5.884297520661157, "step": 1780}, {"loss": 1.2032, "grad_norm": 0.9569677114486694, "learning_rate": 0.0002, "epoch": 5.917355371900826, "step": 1790}, {"loss": 1.23, "grad_norm": 0.9748323559761047, "learning_rate": 0.0002, "epoch": 5.950413223140496, "step": 1800}, {"loss": 1.2167, "grad_norm": 1.0731725692749023, "learning_rate": 0.0002, "epoch": 5.983471074380166, "step": 1810}, {"eval_loss": 2.0875232219696045, "eval_runtime": 38.8414, "eval_samples_per_second": 13.259, "eval_steps_per_second": 1.673, "epoch": 6.0, "step": 1815}, {"loss": 1.2001, "grad_norm": 1.1357579231262207, "learning_rate": 0.0002, "epoch": 6.016528925619835, "step": 1820}, {"loss": 1.0238, "grad_norm": 1.1450963020324707, "learning_rate": 0.0002, "epoch": 6.049586776859504, "step": 1830}, {"loss": 1.0099, "grad_norm": 1.2671376466751099, "learning_rate": 0.0002, "epoch": 6.082644628099174, "step": 1840}, {"loss": 1.07, "grad_norm": 1.1405659914016724, "learning_rate": 0.0002, "epoch": 6.115702479338843, "step": 1850}, {"loss": 1.0456, "grad_norm": 1.176552176475525, "learning_rate": 0.0002, "epoch": 6.148760330578512, "step": 1860}, {"loss": 1.0804, "grad_norm": 1.2722952365875244, "learning_rate": 0.0002, "epoch": 6.181818181818182, "step": 1870}, {"loss": 1.0305, "grad_norm": 1.2505744695663452, "learning_rate": 0.0002, "epoch": 6.214876033057851, "step": 1880}, {"loss": 1.0496, "grad_norm": 1.388776183128357, "learning_rate": 0.0002, "epoch": 6.24793388429752, "step": 1890}, {"loss": 1.0727, "grad_norm": 1.3574049472808838, "learning_rate": 0.0002, "epoch": 6.2809917355371905, "step": 1900}, {"loss": 1.0142, "grad_norm": 1.15278160572052, "learning_rate": 0.0002, "epoch": 6.31404958677686, "step": 1910}, {"loss": 1.0977, "grad_norm": 1.280260682106018, "learning_rate": 0.0002, "epoch": 6.347107438016529, "step": 1920}, {"loss": 1.0319, "grad_norm": 1.3323947191238403, "learning_rate": 0.0002, "epoch": 6.380165289256198, "step": 1930}, {"loss": 1.0816, "grad_norm": 1.2422343492507935, "learning_rate": 0.0002, "epoch": 6.413223140495868, "step": 1940}, {"loss": 1.0144, "grad_norm": 1.485025405883789, "learning_rate": 0.0002, "epoch": 6.446280991735537, "step": 1950}, {"loss": 1.0489, "grad_norm": 1.132170557975769, "learning_rate": 0.0002, "epoch": 6.479338842975206, "step": 1960}, {"loss": 1.0551, "grad_norm": 1.1854133605957031, "learning_rate": 0.0002, "epoch": 6.512396694214876, "step": 1970}, {"loss": 1.0353, "grad_norm": 1.2570922374725342, "learning_rate": 0.0002, "epoch": 6.545454545454545, "step": 1980}, {"loss": 1.0693, "grad_norm": 1.1590516567230225, "learning_rate": 0.0002, "epoch": 6.578512396694215, "step": 1990}, {"loss": 1.0899, "grad_norm": 1.3472840785980225, "learning_rate": 0.0002, "epoch": 6.6115702479338845, "step": 2000}, {"loss": 1.1226, "grad_norm": 1.4928734302520752, "learning_rate": 0.0002, "epoch": 6.644628099173554, "step": 2010}, {"loss": 1.0448, "grad_norm": 1.243243932723999, "learning_rate": 0.0002, "epoch": 6.677685950413223, "step": 2020}, {"loss": 1.0557, "grad_norm": 1.6677647829055786, "learning_rate": 0.0002, "epoch": 6.710743801652892, "step": 2030}, {"loss": 1.1326, "grad_norm": 1.1295818090438843, "learning_rate": 0.0002, "epoch": 6.743801652892562, "step": 2040}, {"loss": 1.0889, "grad_norm": 1.2596524953842163, "learning_rate": 0.0002, "epoch": 6.776859504132231, "step": 2050}, {"loss": 1.1344, "grad_norm": 1.2924189567565918, "learning_rate": 0.0002, "epoch": 6.809917355371901, "step": 2060}, {"loss": 1.1409, "grad_norm": 1.1653043031692505, "learning_rate": 0.0002, "epoch": 6.8429752066115705, "step": 2070}, {"loss": 1.0911, "grad_norm": 1.3316930532455444, "learning_rate": 0.0002, "epoch": 6.87603305785124, "step": 2080}, {"loss": 1.1134, "grad_norm": 1.2001112699508667, "learning_rate": 0.0002, "epoch": 6.909090909090909, "step": 2090}, {"loss": 1.0538, "grad_norm": 1.1454474925994873, "learning_rate": 0.0002, "epoch": 6.9421487603305785, "step": 2100}, {"loss": 1.0955, "grad_norm": 1.1415315866470337, "learning_rate": 0.0002, "epoch": 6.975206611570248, "step": 2110}]} +{"epoch": 7.986776859504133, "step": 2416, "epoch_duration": 321.383802652359, "total_accumulated_duration": 2595.401444196701, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7568.4541015625}, "peak_memory_usage": {"GPU_0": 13792.75439453125}, "avg_memory_reserved": {"GPU_0": 17416.0}, "peak_memory_reserved": {"GPU_0": 17416.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/Meta-Llama-3-8B-Instruct_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1405-sd-0/checkpoint-605", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.528, "grad_norm": 0.4384556710720062, "learning_rate": 0.0002, "epoch": 0.03305785123966942, "step": 10}, {"loss": 2.2675, "grad_norm": 0.48312580585479736, "learning_rate": 0.0002, "epoch": 0.06611570247933884, "step": 20}, {"loss": 2.0439, "grad_norm": 0.6193496584892273, "learning_rate": 0.0002, "epoch": 0.09917355371900827, "step": 30}, {"loss": 1.9649, "grad_norm": 0.471858948469162, "learning_rate": 0.0002, "epoch": 0.1322314049586777, "step": 40}, {"loss": 1.9945, "grad_norm": 0.43199431896209717, "learning_rate": 0.0002, "epoch": 0.1652892561983471, "step": 50}, {"loss": 1.9409, "grad_norm": 0.5022176504135132, "learning_rate": 0.0002, "epoch": 0.19834710743801653, "step": 60}, {"loss": 1.89, "grad_norm": 0.4934026300907135, "learning_rate": 0.0002, "epoch": 0.23140495867768596, "step": 70}, {"loss": 1.9036, "grad_norm": 0.4313369691371918, "learning_rate": 0.0002, "epoch": 0.2644628099173554, "step": 80}, {"loss": 1.8992, "grad_norm": 0.48663529753685, "learning_rate": 0.0002, "epoch": 0.2975206611570248, "step": 90}, {"loss": 1.8655, "grad_norm": 0.47740036249160767, "learning_rate": 0.0002, "epoch": 0.3305785123966942, "step": 100}, {"loss": 1.8797, "grad_norm": 0.41685664653778076, "learning_rate": 0.0002, "epoch": 0.36363636363636365, "step": 110}, {"loss": 1.8951, "grad_norm": 2.368595838546753, "learning_rate": 0.0002, "epoch": 0.39669421487603307, "step": 120}, {"loss": 1.8753, "grad_norm": 0.4861043095588684, "learning_rate": 0.0002, "epoch": 0.4297520661157025, "step": 130}, {"loss": 1.8413, "grad_norm": 0.41848257184028625, "learning_rate": 0.0002, "epoch": 0.4628099173553719, "step": 140}, {"loss": 1.9775, "grad_norm": 0.38776087760925293, "learning_rate": 0.0002, "epoch": 0.49586776859504134, "step": 150}, {"loss": 1.8172, "grad_norm": 0.4095233380794525, "learning_rate": 0.0002, "epoch": 0.5289256198347108, "step": 160}, {"loss": 1.9401, "grad_norm": 0.4492895007133484, "learning_rate": 0.0002, "epoch": 0.5619834710743802, "step": 170}, {"loss": 1.8707, "grad_norm": 0.5678786039352417, "learning_rate": 0.0002, "epoch": 0.5950413223140496, "step": 180}, {"loss": 1.8067, "grad_norm": 0.4926881492137909, "learning_rate": 0.0002, "epoch": 0.628099173553719, "step": 190}, {"loss": 1.8567, "grad_norm": 0.3865489363670349, "learning_rate": 0.0002, "epoch": 0.6611570247933884, "step": 200}, {"loss": 1.7555, "grad_norm": 0.40578970313072205, "learning_rate": 0.0002, "epoch": 0.6942148760330579, "step": 210}, {"loss": 1.8192, "grad_norm": 0.3729846775531769, "learning_rate": 0.0002, "epoch": 0.7272727272727273, "step": 220}, {"loss": 1.8787, "grad_norm": 0.36989861726760864, "learning_rate": 0.0002, "epoch": 0.7603305785123967, "step": 230}, {"loss": 1.8254, "grad_norm": 0.3764864206314087, "learning_rate": 0.0002, "epoch": 0.7933884297520661, "step": 240}, {"loss": 1.8008, "grad_norm": 1.2193230390548706, "learning_rate": 0.0002, "epoch": 0.8264462809917356, "step": 250}, {"loss": 1.8093, "grad_norm": 0.37381255626678467, "learning_rate": 0.0002, "epoch": 0.859504132231405, "step": 260}, {"loss": 1.7911, "grad_norm": 0.35480767488479614, "learning_rate": 0.0002, "epoch": 0.8925619834710744, "step": 270}, {"loss": 1.7824, "grad_norm": 0.4945891201496124, "learning_rate": 0.0002, "epoch": 0.9256198347107438, "step": 280}, {"loss": 1.7842, "grad_norm": 0.39967674016952515, "learning_rate": 0.0002, "epoch": 0.9586776859504132, "step": 290}, {"loss": 1.8321, "grad_norm": 0.4257008135318756, "learning_rate": 0.0002, "epoch": 0.9917355371900827, "step": 300}, {"eval_loss": 1.8413277864456177, "eval_runtime": 38.8241, "eval_samples_per_second": 13.265, "eval_steps_per_second": 1.674, "epoch": 0.9983471074380166, "step": 302}, {"loss": 1.7265, "grad_norm": 0.4019509255886078, "learning_rate": 0.0002, "epoch": 1.024793388429752, "step": 310}, {"loss": 1.7756, "grad_norm": 0.3439880311489105, "learning_rate": 0.0002, "epoch": 1.0578512396694215, "step": 320}, {"loss": 1.7719, "grad_norm": 0.4353587031364441, "learning_rate": 0.0002, "epoch": 1.0909090909090908, "step": 330}, {"loss": 1.7419, "grad_norm": 0.41257765889167786, "learning_rate": 0.0002, "epoch": 1.1239669421487604, "step": 340}, {"loss": 1.7774, "grad_norm": 0.4224575161933899, "learning_rate": 0.0002, "epoch": 1.1570247933884297, "step": 350}, {"loss": 1.7502, "grad_norm": 0.36395177245140076, "learning_rate": 0.0002, "epoch": 1.1900826446280992, "step": 360}, {"loss": 1.8064, "grad_norm": 0.4251839518547058, "learning_rate": 0.0002, "epoch": 1.2231404958677685, "step": 370}, {"loss": 1.7626, "grad_norm": 0.43602821230888367, "learning_rate": 0.0002, "epoch": 1.256198347107438, "step": 380}, {"loss": 1.8261, "grad_norm": 0.3940708637237549, "learning_rate": 0.0002, "epoch": 1.2892561983471074, "step": 390}, {"loss": 1.7317, "grad_norm": 0.3626866042613983, "learning_rate": 0.0002, "epoch": 1.322314049586777, "step": 400}, {"loss": 1.7493, "grad_norm": 0.40716150403022766, "learning_rate": 0.0002, "epoch": 1.3553719008264462, "step": 410}, {"loss": 1.7313, "grad_norm": 0.39323991537094116, "learning_rate": 0.0002, "epoch": 1.3884297520661157, "step": 420}, {"loss": 1.7863, "grad_norm": 0.44480809569358826, "learning_rate": 0.0002, "epoch": 1.421487603305785, "step": 430}, {"loss": 1.7477, "grad_norm": 0.4438270032405853, "learning_rate": 0.0002, "epoch": 1.4545454545454546, "step": 440}, {"loss": 1.774, "grad_norm": 0.3953928053379059, "learning_rate": 0.0002, "epoch": 1.487603305785124, "step": 450}, {"loss": 1.7162, "grad_norm": 0.4152870178222656, "learning_rate": 0.0002, "epoch": 1.5206611570247934, "step": 460}, {"loss": 1.8176, "grad_norm": 0.45231857895851135, "learning_rate": 0.0002, "epoch": 1.553719008264463, "step": 470}, {"loss": 1.7281, "grad_norm": 0.46560999751091003, "learning_rate": 0.0002, "epoch": 1.5867768595041323, "step": 480}, {"loss": 1.8047, "grad_norm": 0.3510372042655945, "learning_rate": 0.0002, "epoch": 1.6198347107438016, "step": 490}, {"loss": 1.7719, "grad_norm": 0.36788758635520935, "learning_rate": 0.0002, "epoch": 1.6528925619834711, "step": 500}, {"loss": 1.8287, "grad_norm": 0.3911917209625244, "learning_rate": 0.0002, "epoch": 1.6859504132231407, "step": 510}, {"loss": 1.7891, "grad_norm": 0.440964937210083, "learning_rate": 0.0002, "epoch": 1.71900826446281, "step": 520}, {"loss": 1.6858, "grad_norm": 0.36718201637268066, "learning_rate": 0.0002, "epoch": 1.7520661157024793, "step": 530}, {"loss": 1.7828, "grad_norm": 0.3927479088306427, "learning_rate": 0.0002, "epoch": 1.7851239669421488, "step": 540}, {"loss": 1.7406, "grad_norm": 0.4298672378063202, "learning_rate": 0.0002, "epoch": 1.8181818181818183, "step": 550}, {"loss": 1.7626, "grad_norm": 0.4257620871067047, "learning_rate": 0.0002, "epoch": 1.8512396694214877, "step": 560}, {"loss": 1.7677, "grad_norm": 0.3743717670440674, "learning_rate": 0.0002, "epoch": 1.884297520661157, "step": 570}, {"loss": 1.7263, "grad_norm": 0.4413471817970276, "learning_rate": 0.0002, "epoch": 1.9173553719008265, "step": 580}, {"loss": 1.7528, "grad_norm": 0.41639673709869385, "learning_rate": 0.0002, "epoch": 1.950413223140496, "step": 590}, {"loss": 1.7141, "grad_norm": 0.46319296956062317, "learning_rate": 0.0002, "epoch": 1.9834710743801653, "step": 600}, {"eval_loss": 1.833760380744934, "eval_runtime": 38.8469, "eval_samples_per_second": 13.257, "eval_steps_per_second": 1.673, "epoch": 2.0, "step": 605}, {"loss": 1.7399, "grad_norm": 0.38033604621887207, "learning_rate": 0.0002, "epoch": 2.0165289256198347, "step": 610}, {"loss": 1.6414, "grad_norm": 0.4522306025028229, "learning_rate": 0.0002, "epoch": 2.049586776859504, "step": 620}, {"loss": 1.5976, "grad_norm": 0.41294756531715393, "learning_rate": 0.0002, "epoch": 2.0826446280991737, "step": 630}, {"loss": 1.6664, "grad_norm": 0.5129091739654541, "learning_rate": 0.0002, "epoch": 2.115702479338843, "step": 640}, {"loss": 1.7207, "grad_norm": 0.4630700647830963, "learning_rate": 0.0002, "epoch": 2.1487603305785123, "step": 650}, {"loss": 1.5884, "grad_norm": 0.4368151128292084, "learning_rate": 0.0002, "epoch": 2.1818181818181817, "step": 660}, {"loss": 1.7271, "grad_norm": 0.5266494154930115, "learning_rate": 0.0002, "epoch": 2.2148760330578514, "step": 670}, {"loss": 1.5749, "grad_norm": 0.4744901955127716, "learning_rate": 0.0002, "epoch": 2.2479338842975207, "step": 680}, {"loss": 1.6512, "grad_norm": 0.5312414765357971, "learning_rate": 0.0002, "epoch": 2.28099173553719, "step": 690}, {"loss": 1.6957, "grad_norm": 0.49116063117980957, "learning_rate": 0.0002, "epoch": 2.3140495867768593, "step": 700}, {"loss": 1.646, "grad_norm": 0.4626988172531128, "learning_rate": 0.0002, "epoch": 2.347107438016529, "step": 710}, {"loss": 1.6474, "grad_norm": 0.4851135015487671, "learning_rate": 0.0002, "epoch": 2.3801652892561984, "step": 720}, {"loss": 1.67, "grad_norm": 0.4882378578186035, "learning_rate": 0.0002, "epoch": 2.4132231404958677, "step": 730}, {"loss": 1.6588, "grad_norm": 0.4470290243625641, "learning_rate": 0.0002, "epoch": 2.446280991735537, "step": 740}, {"loss": 1.6419, "grad_norm": 0.5901731848716736, "learning_rate": 0.0002, "epoch": 2.479338842975207, "step": 750}, {"loss": 1.6756, "grad_norm": 0.48137718439102173, "learning_rate": 0.0002, "epoch": 2.512396694214876, "step": 760}, {"loss": 1.6708, "grad_norm": 0.45636510848999023, "learning_rate": 0.0002, "epoch": 2.5454545454545454, "step": 770}, {"loss": 1.6693, "grad_norm": 0.48216402530670166, "learning_rate": 0.0002, "epoch": 2.5785123966942147, "step": 780}, {"loss": 1.664, "grad_norm": 0.47188714146614075, "learning_rate": 0.0002, "epoch": 2.6115702479338845, "step": 790}, {"loss": 1.619, "grad_norm": 0.44025519490242004, "learning_rate": 0.0002, "epoch": 2.644628099173554, "step": 800}, {"loss": 1.6532, "grad_norm": 0.4918605387210846, "learning_rate": 0.0002, "epoch": 2.677685950413223, "step": 810}, {"loss": 1.7513, "grad_norm": 0.5082308650016785, "learning_rate": 0.0002, "epoch": 2.7107438016528924, "step": 820}, {"loss": 1.7221, "grad_norm": 0.5610618591308594, "learning_rate": 0.0002, "epoch": 2.7438016528925617, "step": 830}, {"loss": 1.7115, "grad_norm": 0.540302574634552, "learning_rate": 0.0002, "epoch": 2.7768595041322315, "step": 840}, {"loss": 1.659, "grad_norm": 0.46016451716423035, "learning_rate": 0.0002, "epoch": 2.809917355371901, "step": 850}, {"loss": 1.672, "grad_norm": 0.45313313603401184, "learning_rate": 0.0002, "epoch": 2.84297520661157, "step": 860}, {"loss": 1.6676, "grad_norm": 0.49267083406448364, "learning_rate": 0.0002, "epoch": 2.87603305785124, "step": 870}, {"loss": 1.6577, "grad_norm": 0.4506530463695526, "learning_rate": 0.0002, "epoch": 2.909090909090909, "step": 880}, {"loss": 1.7059, "grad_norm": 0.4393260180950165, "learning_rate": 0.0002, "epoch": 2.9421487603305785, "step": 890}, {"loss": 1.7042, "grad_norm": 0.438073068857193, "learning_rate": 0.0002, "epoch": 2.975206611570248, "step": 900}, {"eval_loss": 1.853971004486084, "eval_runtime": 38.8404, "eval_samples_per_second": 13.259, "eval_steps_per_second": 1.674, "epoch": 2.9983471074380166, "step": 907}, {"loss": 1.6173, "grad_norm": 0.4399570822715759, "learning_rate": 0.0002, "epoch": 3.0082644628099175, "step": 910}, {"loss": 1.5578, "grad_norm": 0.5338484644889832, "learning_rate": 0.0002, "epoch": 3.041322314049587, "step": 920}, {"loss": 1.5507, "grad_norm": 0.6154358983039856, "learning_rate": 0.0002, "epoch": 3.074380165289256, "step": 930}, {"loss": 1.6189, "grad_norm": 0.6429790258407593, "learning_rate": 0.0002, "epoch": 3.1074380165289255, "step": 940}, {"loss": 1.5866, "grad_norm": 0.5375680923461914, "learning_rate": 0.0002, "epoch": 3.1404958677685952, "step": 950}, {"loss": 1.5119, "grad_norm": 0.5594999194145203, "learning_rate": 0.0002, "epoch": 3.1735537190082646, "step": 960}, {"loss": 1.5096, "grad_norm": 0.6742738485336304, "learning_rate": 0.0002, "epoch": 3.206611570247934, "step": 970}, {"loss": 1.5477, "grad_norm": 0.563497006893158, "learning_rate": 0.0002, "epoch": 3.239669421487603, "step": 980}, {"loss": 1.5559, "grad_norm": 0.6521140933036804, "learning_rate": 0.0002, "epoch": 3.2727272727272725, "step": 990}, {"loss": 1.4905, "grad_norm": 0.6016622185707092, "learning_rate": 0.0002, "epoch": 3.3057851239669422, "step": 1000}, {"loss": 1.5307, "grad_norm": 0.6564913988113403, "learning_rate": 0.0002, "epoch": 3.3388429752066116, "step": 1010}, {"loss": 1.4595, "grad_norm": 0.6528742909431458, "learning_rate": 0.0002, "epoch": 3.371900826446281, "step": 1020}, {"loss": 1.518, "grad_norm": 0.5843546390533447, "learning_rate": 0.0002, "epoch": 3.4049586776859506, "step": 1030}, {"loss": 1.5148, "grad_norm": 0.5892922282218933, "learning_rate": 0.0002, "epoch": 3.43801652892562, "step": 1040}, {"loss": 1.5125, "grad_norm": 0.6217362284660339, "learning_rate": 0.0002, "epoch": 3.4710743801652892, "step": 1050}, {"loss": 1.526, "grad_norm": 0.5837283134460449, "learning_rate": 0.0002, "epoch": 3.5041322314049586, "step": 1060}, {"loss": 1.5776, "grad_norm": 0.6369057893753052, "learning_rate": 0.0002, "epoch": 3.537190082644628, "step": 1070}, {"loss": 1.4758, "grad_norm": 0.632115364074707, "learning_rate": 0.0002, "epoch": 3.5702479338842976, "step": 1080}, {"loss": 1.5604, "grad_norm": 0.6364002823829651, "learning_rate": 0.0002, "epoch": 3.603305785123967, "step": 1090}, {"loss": 1.508, "grad_norm": 0.550032377243042, "learning_rate": 0.0002, "epoch": 3.6363636363636362, "step": 1100}, {"loss": 1.5548, "grad_norm": 0.6106863617897034, "learning_rate": 0.0002, "epoch": 3.669421487603306, "step": 1110}, {"loss": 1.5237, "grad_norm": 0.635955274105072, "learning_rate": 0.0002, "epoch": 3.7024793388429753, "step": 1120}, {"loss": 1.5698, "grad_norm": 0.615804135799408, "learning_rate": 0.0002, "epoch": 3.7355371900826446, "step": 1130}, {"loss": 1.6068, "grad_norm": 0.5769386887550354, "learning_rate": 0.0002, "epoch": 3.768595041322314, "step": 1140}, {"loss": 1.5262, "grad_norm": 0.5938104391098022, "learning_rate": 0.0002, "epoch": 3.8016528925619832, "step": 1150}, {"loss": 1.5236, "grad_norm": 0.6149733066558838, "learning_rate": 0.0002, "epoch": 3.834710743801653, "step": 1160}, {"loss": 1.5585, "grad_norm": 0.6228950023651123, "learning_rate": 0.0002, "epoch": 3.8677685950413223, "step": 1170}, {"loss": 1.5815, "grad_norm": 0.6196513175964355, "learning_rate": 0.0002, "epoch": 3.9008264462809916, "step": 1180}, {"loss": 1.5894, "grad_norm": 0.5946677327156067, "learning_rate": 0.0002, "epoch": 3.9338842975206614, "step": 1190}, {"loss": 1.5451, "grad_norm": 0.5882220268249512, "learning_rate": 0.0002, "epoch": 3.9669421487603307, "step": 1200}, {"loss": 1.6202, "grad_norm": 0.6291728019714355, "learning_rate": 0.0002, "epoch": 4.0, "step": 1210}, {"eval_loss": 1.8943731784820557, "eval_runtime": 38.826, "eval_samples_per_second": 13.264, "eval_steps_per_second": 1.674, "epoch": 4.0, "step": 1210}, {"loss": 1.3944, "grad_norm": 1.0843605995178223, "learning_rate": 0.0002, "epoch": 4.033057851239669, "step": 1220}, {"loss": 1.3453, "grad_norm": 0.6460382342338562, "learning_rate": 0.0002, "epoch": 4.066115702479339, "step": 1230}, {"loss": 1.3714, "grad_norm": 0.7872665524482727, "learning_rate": 0.0002, "epoch": 4.099173553719008, "step": 1240}, {"loss": 1.3247, "grad_norm": 0.7585243582725525, "learning_rate": 0.0002, "epoch": 4.132231404958677, "step": 1250}, {"loss": 1.4162, "grad_norm": 0.7955290079116821, "learning_rate": 0.0002, "epoch": 4.1652892561983474, "step": 1260}, {"loss": 1.4127, "grad_norm": 0.8847756385803223, "learning_rate": 0.0002, "epoch": 4.198347107438017, "step": 1270}, {"loss": 1.3972, "grad_norm": 0.7897582650184631, "learning_rate": 0.0002, "epoch": 4.231404958677686, "step": 1280}, {"loss": 1.3631, "grad_norm": 0.755404531955719, "learning_rate": 0.0002, "epoch": 4.264462809917355, "step": 1290}, {"loss": 1.4219, "grad_norm": 0.7718978524208069, "learning_rate": 0.0002, "epoch": 4.297520661157025, "step": 1300}, {"loss": 1.3832, "grad_norm": 0.8073238134384155, "learning_rate": 0.0002, "epoch": 4.330578512396694, "step": 1310}, {"loss": 1.3968, "grad_norm": 0.8661217093467712, "learning_rate": 0.0002, "epoch": 4.363636363636363, "step": 1320}, {"loss": 1.3809, "grad_norm": 0.8859766721725464, "learning_rate": 0.0002, "epoch": 4.3966942148760335, "step": 1330}, {"loss": 1.3779, "grad_norm": 0.8635476231575012, "learning_rate": 0.0002, "epoch": 4.429752066115703, "step": 1340}, {"loss": 1.403, "grad_norm": 0.7376685738563538, "learning_rate": 0.0002, "epoch": 4.462809917355372, "step": 1350}, {"loss": 1.4346, "grad_norm": 0.7924236059188843, "learning_rate": 0.0002, "epoch": 4.4958677685950414, "step": 1360}, {"loss": 1.3205, "grad_norm": 0.6969273686408997, "learning_rate": 0.0002, "epoch": 4.528925619834711, "step": 1370}, {"loss": 1.399, "grad_norm": 0.7346147894859314, "learning_rate": 0.0002, "epoch": 4.56198347107438, "step": 1380}, {"loss": 1.4308, "grad_norm": 0.8515401482582092, "learning_rate": 0.0002, "epoch": 4.595041322314049, "step": 1390}, {"loss": 1.407, "grad_norm": 0.8154449462890625, "learning_rate": 0.0002, "epoch": 4.628099173553719, "step": 1400}, {"loss": 1.4182, "grad_norm": 0.8922461271286011, "learning_rate": 0.0002, "epoch": 4.661157024793388, "step": 1410}, {"loss": 1.3894, "grad_norm": 0.8835586309432983, "learning_rate": 0.0002, "epoch": 4.694214876033058, "step": 1420}, {"loss": 1.411, "grad_norm": 0.7689077258110046, "learning_rate": 0.0002, "epoch": 4.7272727272727275, "step": 1430}, {"loss": 1.4083, "grad_norm": 0.7515250444412231, "learning_rate": 0.0002, "epoch": 4.760330578512397, "step": 1440}, {"loss": 1.4004, "grad_norm": 0.7655003070831299, "learning_rate": 0.0002, "epoch": 4.793388429752066, "step": 1450}, {"loss": 1.3633, "grad_norm": 0.7187207341194153, "learning_rate": 0.0002, "epoch": 4.8264462809917354, "step": 1460}, {"loss": 1.3647, "grad_norm": 0.7122251987457275, "learning_rate": 0.0002, "epoch": 4.859504132231405, "step": 1470}, {"loss": 1.4481, "grad_norm": 0.7744072675704956, "learning_rate": 0.0002, "epoch": 4.892561983471074, "step": 1480}, {"loss": 1.3959, "grad_norm": 0.8202858567237854, "learning_rate": 0.0002, "epoch": 4.925619834710744, "step": 1490}, {"loss": 1.4176, "grad_norm": 0.7144979238510132, "learning_rate": 0.0002, "epoch": 4.958677685950414, "step": 1500}, {"loss": 1.4398, "grad_norm": 0.7824931144714355, "learning_rate": 0.0002, "epoch": 4.991735537190083, "step": 1510}, {"eval_loss": 1.9822860956192017, "eval_runtime": 38.8296, "eval_samples_per_second": 13.263, "eval_steps_per_second": 1.674, "epoch": 4.998347107438017, "step": 1512}, {"loss": 1.3009, "grad_norm": 1.0307862758636475, "learning_rate": 0.0002, "epoch": 5.024793388429752, "step": 1520}, {"loss": 1.1822, "grad_norm": 0.9152393341064453, "learning_rate": 0.0002, "epoch": 5.0578512396694215, "step": 1530}, {"loss": 1.251, "grad_norm": 0.9560136198997498, "learning_rate": 0.0002, "epoch": 5.090909090909091, "step": 1540}, {"loss": 1.2552, "grad_norm": 0.8285775184631348, "learning_rate": 0.0002, "epoch": 5.12396694214876, "step": 1550}, {"loss": 1.1603, "grad_norm": 0.9479135870933533, "learning_rate": 0.0002, "epoch": 5.1570247933884295, "step": 1560}, {"loss": 1.229, "grad_norm": 0.9731078743934631, "learning_rate": 0.0002, "epoch": 5.190082644628099, "step": 1570}, {"loss": 1.2084, "grad_norm": 0.8167943358421326, "learning_rate": 0.0002, "epoch": 5.223140495867769, "step": 1580}, {"loss": 1.1937, "grad_norm": 1.1679469347000122, "learning_rate": 0.0002, "epoch": 5.256198347107438, "step": 1590}, {"loss": 1.1662, "grad_norm": 0.9156213402748108, "learning_rate": 0.0002, "epoch": 5.289256198347108, "step": 1600}, {"loss": 1.2014, "grad_norm": 1.082939624786377, "learning_rate": 0.0002, "epoch": 5.322314049586777, "step": 1610}, {"loss": 1.2701, "grad_norm": 1.0271905660629272, "learning_rate": 0.0002, "epoch": 5.355371900826446, "step": 1620}, {"loss": 1.2275, "grad_norm": 1.2237807512283325, "learning_rate": 0.0002, "epoch": 5.3884297520661155, "step": 1630}, {"loss": 1.267, "grad_norm": 1.1419697999954224, "learning_rate": 0.0002, "epoch": 5.421487603305785, "step": 1640}, {"loss": 1.2424, "grad_norm": 1.4427895545959473, "learning_rate": 0.0002, "epoch": 5.454545454545454, "step": 1650}, {"loss": 1.24, "grad_norm": 1.117572546005249, "learning_rate": 0.0002, "epoch": 5.487603305785124, "step": 1660}, {"loss": 1.1912, "grad_norm": 1.1476300954818726, "learning_rate": 0.0002, "epoch": 5.520661157024794, "step": 1670}, {"loss": 1.2397, "grad_norm": 1.1372056007385254, "learning_rate": 0.0002, "epoch": 5.553719008264463, "step": 1680}, {"loss": 1.2875, "grad_norm": 1.0415048599243164, "learning_rate": 0.0002, "epoch": 5.586776859504132, "step": 1690}, {"loss": 1.2062, "grad_norm": 0.9535173177719116, "learning_rate": 0.0002, "epoch": 5.619834710743802, "step": 1700}, {"loss": 1.2528, "grad_norm": 0.9918773174285889, "learning_rate": 0.0002, "epoch": 5.652892561983471, "step": 1710}, {"loss": 1.2443, "grad_norm": 0.9184247255325317, "learning_rate": 0.0002, "epoch": 5.68595041322314, "step": 1720}, {"loss": 1.2273, "grad_norm": 0.9416358470916748, "learning_rate": 0.0002, "epoch": 5.7190082644628095, "step": 1730}, {"loss": 1.2815, "grad_norm": 1.0027815103530884, "learning_rate": 0.0002, "epoch": 5.75206611570248, "step": 1740}, {"loss": 1.2261, "grad_norm": 1.0766979455947876, "learning_rate": 0.0002, "epoch": 5.785123966942149, "step": 1750}, {"loss": 1.2221, "grad_norm": 0.9244554042816162, "learning_rate": 0.0002, "epoch": 5.818181818181818, "step": 1760}, {"loss": 1.312, "grad_norm": 1.2514721155166626, "learning_rate": 0.0002, "epoch": 5.851239669421488, "step": 1770}, {"loss": 1.3042, "grad_norm": 1.0198537111282349, "learning_rate": 0.0002, "epoch": 5.884297520661157, "step": 1780}, {"loss": 1.2032, "grad_norm": 0.9569677114486694, "learning_rate": 0.0002, "epoch": 5.917355371900826, "step": 1790}, {"loss": 1.23, "grad_norm": 0.9748323559761047, "learning_rate": 0.0002, "epoch": 5.950413223140496, "step": 1800}, {"loss": 1.2167, "grad_norm": 1.0731725692749023, "learning_rate": 0.0002, "epoch": 5.983471074380166, "step": 1810}, {"eval_loss": 2.0875232219696045, "eval_runtime": 38.8414, "eval_samples_per_second": 13.259, "eval_steps_per_second": 1.673, "epoch": 6.0, "step": 1815}, {"loss": 1.2001, "grad_norm": 1.1357579231262207, "learning_rate": 0.0002, "epoch": 6.016528925619835, "step": 1820}, {"loss": 1.0238, "grad_norm": 1.1450963020324707, "learning_rate": 0.0002, "epoch": 6.049586776859504, "step": 1830}, {"loss": 1.0099, "grad_norm": 1.2671376466751099, "learning_rate": 0.0002, "epoch": 6.082644628099174, "step": 1840}, {"loss": 1.07, "grad_norm": 1.1405659914016724, "learning_rate": 0.0002, "epoch": 6.115702479338843, "step": 1850}, {"loss": 1.0456, "grad_norm": 1.176552176475525, "learning_rate": 0.0002, "epoch": 6.148760330578512, "step": 1860}, {"loss": 1.0804, "grad_norm": 1.2722952365875244, "learning_rate": 0.0002, "epoch": 6.181818181818182, "step": 1870}, {"loss": 1.0305, "grad_norm": 1.2505744695663452, "learning_rate": 0.0002, "epoch": 6.214876033057851, "step": 1880}, {"loss": 1.0496, "grad_norm": 1.388776183128357, "learning_rate": 0.0002, "epoch": 6.24793388429752, "step": 1890}, {"loss": 1.0727, "grad_norm": 1.3574049472808838, "learning_rate": 0.0002, "epoch": 6.2809917355371905, "step": 1900}, {"loss": 1.0142, "grad_norm": 1.15278160572052, "learning_rate": 0.0002, "epoch": 6.31404958677686, "step": 1910}, {"loss": 1.0977, "grad_norm": 1.280260682106018, "learning_rate": 0.0002, "epoch": 6.347107438016529, "step": 1920}, {"loss": 1.0319, "grad_norm": 1.3323947191238403, "learning_rate": 0.0002, "epoch": 6.380165289256198, "step": 1930}, {"loss": 1.0816, "grad_norm": 1.2422343492507935, "learning_rate": 0.0002, "epoch": 6.413223140495868, "step": 1940}, {"loss": 1.0144, "grad_norm": 1.485025405883789, "learning_rate": 0.0002, "epoch": 6.446280991735537, "step": 1950}, {"loss": 1.0489, "grad_norm": 1.132170557975769, "learning_rate": 0.0002, "epoch": 6.479338842975206, "step": 1960}, {"loss": 1.0551, "grad_norm": 1.1854133605957031, "learning_rate": 0.0002, "epoch": 6.512396694214876, "step": 1970}, {"loss": 1.0353, "grad_norm": 1.2570922374725342, "learning_rate": 0.0002, "epoch": 6.545454545454545, "step": 1980}, {"loss": 1.0693, "grad_norm": 1.1590516567230225, "learning_rate": 0.0002, "epoch": 6.578512396694215, "step": 1990}, {"loss": 1.0899, "grad_norm": 1.3472840785980225, "learning_rate": 0.0002, "epoch": 6.6115702479338845, "step": 2000}, {"loss": 1.1226, "grad_norm": 1.4928734302520752, "learning_rate": 0.0002, "epoch": 6.644628099173554, "step": 2010}, {"loss": 1.0448, "grad_norm": 1.243243932723999, "learning_rate": 0.0002, "epoch": 6.677685950413223, "step": 2020}, {"loss": 1.0557, "grad_norm": 1.6677647829055786, "learning_rate": 0.0002, "epoch": 6.710743801652892, "step": 2030}, {"loss": 1.1326, "grad_norm": 1.1295818090438843, "learning_rate": 0.0002, "epoch": 6.743801652892562, "step": 2040}, {"loss": 1.0889, "grad_norm": 1.2596524953842163, "learning_rate": 0.0002, "epoch": 6.776859504132231, "step": 2050}, {"loss": 1.1344, "grad_norm": 1.2924189567565918, "learning_rate": 0.0002, "epoch": 6.809917355371901, "step": 2060}, {"loss": 1.1409, "grad_norm": 1.1653043031692505, "learning_rate": 0.0002, "epoch": 6.8429752066115705, "step": 2070}, {"loss": 1.0911, "grad_norm": 1.3316930532455444, "learning_rate": 0.0002, "epoch": 6.87603305785124, "step": 2080}, {"loss": 1.1134, "grad_norm": 1.2001112699508667, "learning_rate": 0.0002, "epoch": 6.909090909090909, "step": 2090}, {"loss": 1.0538, "grad_norm": 1.1454474925994873, "learning_rate": 0.0002, "epoch": 6.9421487603305785, "step": 2100}, {"loss": 1.0955, "grad_norm": 1.1415315866470337, "learning_rate": 0.0002, "epoch": 6.975206611570248, "step": 2110}, {"eval_loss": 2.2608585357666016, "eval_runtime": 38.8512, "eval_samples_per_second": 13.256, "eval_steps_per_second": 1.673, "epoch": 6.998347107438017, "step": 2117}, {"loss": 1.0526, "grad_norm": 1.212228536605835, "learning_rate": 0.0002, "epoch": 7.008264462809917, "step": 2120}, {"loss": 0.8893, "grad_norm": 1.713937520980835, "learning_rate": 0.0002, "epoch": 7.041322314049586, "step": 2130}, {"loss": 0.8403, "grad_norm": 1.5111262798309326, "learning_rate": 0.0002, "epoch": 7.074380165289257, "step": 2140}, {"loss": 0.8349, "grad_norm": 1.3368730545043945, "learning_rate": 0.0002, "epoch": 7.107438016528926, "step": 2150}, {"loss": 0.921, "grad_norm": 2.0835201740264893, "learning_rate": 0.0002, "epoch": 7.140495867768595, "step": 2160}, {"loss": 0.8702, "grad_norm": 1.7185221910476685, "learning_rate": 0.0002, "epoch": 7.1735537190082646, "step": 2170}, {"loss": 0.8934, "grad_norm": 1.3082201480865479, "learning_rate": 0.0002, "epoch": 7.206611570247934, "step": 2180}, {"loss": 0.9314, "grad_norm": 1.6471620798110962, "learning_rate": 0.0002, "epoch": 7.239669421487603, "step": 2190}, {"loss": 0.8917, "grad_norm": 1.3716152906417847, "learning_rate": 0.0002, "epoch": 7.2727272727272725, "step": 2200}, {"loss": 0.9069, "grad_norm": 1.6379696130752563, "learning_rate": 0.0002, "epoch": 7.305785123966942, "step": 2210}, {"loss": 0.9263, "grad_norm": 1.3955202102661133, "learning_rate": 0.0002, "epoch": 7.338842975206612, "step": 2220}, {"loss": 0.8964, "grad_norm": 1.4794671535491943, "learning_rate": 0.0002, "epoch": 7.371900826446281, "step": 2230}, {"loss": 0.8703, "grad_norm": 1.5542606115341187, "learning_rate": 0.0002, "epoch": 7.404958677685951, "step": 2240}, {"loss": 0.8864, "grad_norm": 1.3853563070297241, "learning_rate": 0.0002, "epoch": 7.43801652892562, "step": 2250}, {"loss": 0.9029, "grad_norm": 1.428218126296997, "learning_rate": 0.0002, "epoch": 7.471074380165289, "step": 2260}, {"loss": 0.9793, "grad_norm": 1.78374183177948, "learning_rate": 0.0002, "epoch": 7.5041322314049586, "step": 2270}, {"loss": 0.9102, "grad_norm": 1.4092047214508057, "learning_rate": 0.0002, "epoch": 7.537190082644628, "step": 2280}, {"loss": 0.8833, "grad_norm": 1.601216197013855, "learning_rate": 0.0002, "epoch": 7.570247933884297, "step": 2290}, {"loss": 0.9367, "grad_norm": 1.82637619972229, "learning_rate": 0.0002, "epoch": 7.6033057851239665, "step": 2300}, {"loss": 0.9162, "grad_norm": 1.434897541999817, "learning_rate": 0.0002, "epoch": 7.636363636363637, "step": 2310}, {"loss": 0.8969, "grad_norm": 1.5651953220367432, "learning_rate": 0.0002, "epoch": 7.669421487603306, "step": 2320}, {"loss": 0.9189, "grad_norm": 1.793326735496521, "learning_rate": 0.0002, "epoch": 7.702479338842975, "step": 2330}, {"loss": 0.9133, "grad_norm": 1.3330620527267456, "learning_rate": 0.0002, "epoch": 7.735537190082645, "step": 2340}, {"loss": 0.9519, "grad_norm": 1.8976562023162842, "learning_rate": 0.0002, "epoch": 7.768595041322314, "step": 2350}, {"loss": 0.9381, "grad_norm": 1.7769376039505005, "learning_rate": 0.0002, "epoch": 7.801652892561983, "step": 2360}, {"loss": 0.9588, "grad_norm": 1.607336163520813, "learning_rate": 0.0002, "epoch": 7.8347107438016526, "step": 2370}, {"loss": 0.9241, "grad_norm": 1.807392954826355, "learning_rate": 0.0002, "epoch": 7.867768595041323, "step": 2380}, {"loss": 0.9395, "grad_norm": 1.501326322555542, "learning_rate": 0.0002, "epoch": 7.900826446280992, "step": 2390}, {"loss": 0.9857, "grad_norm": 1.473686695098877, "learning_rate": 0.0002, "epoch": 7.933884297520661, "step": 2400}, {"loss": 0.8913, "grad_norm": 1.8466233015060425, "learning_rate": 0.0002, "epoch": 7.966942148760331, "step": 2410}]}