diff --git a/.gitattributes b/.gitattributes index 82ed3f8dbfc0ed9a81e12460b22dfd5463cb0b28..68378763499be59d1f6249293ad8df66a738d181 100644 --- a/.gitattributes +++ b/.gitattributes @@ -798,3 +798,12 @@ gemma-2-9b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d gemma-2-9b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.3-num-2108-sd-42/checkpoint-437/tokenizer.json filter=lfs diff=lfs merge=lfs -text gemma-2-9b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.3-num-2108-sd-42/checkpoint-875/tokenizer.json filter=lfs diff=lfs merge=lfs -text gemma-2-9b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.3-num-2108-sd-42/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/README.md b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/README.md new file mode 100644 index 0000000000000000000000000000000000000000..503a34a03e25483aa99213835fd87bfc8289a3fe --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/adapter_config.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e98db163734cc03f7a8f8b3f720d3a2befdf7453 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/adapter_model.safetensors b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2914583db0507b14a5bcd3d237d124baf41adf2d --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93008ec421c7a3492809b1931e60a60b7e286cb520e0ba8266c8862b2ae048ef +size 143153376 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/README.md b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/README.md new file mode 100644 index 0000000000000000000000000000000000000000..503a34a03e25483aa99213835fd87bfc8289a3fe --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/adapter_config.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e98db163734cc03f7a8f8b3f720d3a2befdf7453 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/adapter_model.safetensors b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5793b2d312d8c4c78a45529ee36122735b061e80 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d2b39ae001343be48bf95ef271c95a287b58182dff8d6611c7494b9cd4bf46 +size 143153376 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/optimizer.pt b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1e91aaf6b4a7ab37ab111d43635fd03117c9c7db --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b7346cda1f2aa85718a88d0c05c65265a8a3bc7e0c1a5ca2c30caa60250a82 +size 72886650 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/rng_state.pth b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..112518523616d4c7374cab599a9d335e6f4d298c --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bed4367f8e2e03865f517c55f8b9b4864edcf9af1401aeb2168812d94293cce +size 14244 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/scheduler.pt b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..08b95ab5ffe9416570d1acf846049a319f683905 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50868e373a85900101fa12aa1d2734356df59d3850cf8a9b48cd4a6bab1e1654 +size 1064 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/special_tokens_map.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/tokenizer.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..af0eac5c0056f83b8f3fcdb79165f8847111c305 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922 +size 17525357 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/tokenizer.model b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/tokenizer_config.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/trainer_state.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6a75ea362a145fe8dd5ffecd7b1629c1fc358353 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/trainer_state.json @@ -0,0 +1,919 @@ +{ + "best_metric": 1.1845070123672485, + "best_model_checkpoint": "outputs-001/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612", + "epoch": 4.0, + "eval_steps": 10, + "global_step": 1224, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.032679738562091505, + "grad_norm": 0.631856381893158, + "learning_rate": 0.0002, + "loss": 2.3561, + "step": 10 + }, + { + "epoch": 0.06535947712418301, + "grad_norm": 0.5065668821334839, + "learning_rate": 0.0002, + "loss": 1.8141, + "step": 20 + }, + { + "epoch": 0.09803921568627451, + "grad_norm": 0.6978895664215088, + "learning_rate": 0.0002, + "loss": 1.4952, + "step": 30 + }, + { + "epoch": 0.13071895424836602, + "grad_norm": 0.6619144082069397, + "learning_rate": 0.0002, + "loss": 1.4829, + "step": 40 + }, + { + "epoch": 0.16339869281045752, + "grad_norm": 0.6153793931007385, + "learning_rate": 0.0002, + "loss": 1.3038, + "step": 50 + }, + { + "epoch": 0.19607843137254902, + "grad_norm": 0.4703301787376404, + "learning_rate": 0.0002, + "loss": 1.1429, + "step": 60 + }, + { + "epoch": 0.22875816993464052, + "grad_norm": 1.1672580242156982, + "learning_rate": 0.0002, + "loss": 1.2828, + "step": 70 + }, + { + "epoch": 0.26143790849673204, + "grad_norm": 0.3455565273761749, + "learning_rate": 0.0002, + "loss": 1.0985, + "step": 80 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.36216291785240173, + "learning_rate": 0.0002, + "loss": 1.1359, + "step": 90 + }, + { + "epoch": 0.32679738562091504, + "grad_norm": 0.4545166492462158, + "learning_rate": 0.0002, + "loss": 1.2544, + "step": 100 + }, + { + "epoch": 0.35947712418300654, + "grad_norm": 0.3612092435359955, + "learning_rate": 0.0002, + "loss": 1.2287, + "step": 110 + }, + { + "epoch": 0.39215686274509803, + "grad_norm": 0.5080830454826355, + "learning_rate": 0.0002, + "loss": 1.1868, + "step": 120 + }, + { + "epoch": 0.42483660130718953, + "grad_norm": 0.3268195390701294, + "learning_rate": 0.0002, + "loss": 1.1902, + "step": 130 + }, + { + "epoch": 0.45751633986928103, + "grad_norm": 0.33971714973449707, + "learning_rate": 0.0002, + "loss": 1.247, + "step": 140 + }, + { + "epoch": 0.49019607843137253, + "grad_norm": 0.4036043882369995, + "learning_rate": 0.0002, + "loss": 1.1844, + "step": 150 + }, + { + "epoch": 0.5228758169934641, + "grad_norm": 0.35938864946365356, + "learning_rate": 0.0002, + "loss": 1.1624, + "step": 160 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 0.28880223631858826, + "learning_rate": 0.0002, + "loss": 1.164, + "step": 170 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 0.3436269462108612, + "learning_rate": 0.0002, + "loss": 1.3521, + "step": 180 + }, + { + "epoch": 0.6209150326797386, + "grad_norm": 0.41923725605010986, + "learning_rate": 0.0002, + "loss": 1.2456, + "step": 190 + }, + { + "epoch": 0.6535947712418301, + "grad_norm": 0.25119203329086304, + "learning_rate": 0.0002, + "loss": 1.2226, + "step": 200 + }, + { + "epoch": 0.6862745098039216, + "grad_norm": 0.5870180726051331, + "learning_rate": 0.0002, + "loss": 1.1568, + "step": 210 + }, + { + "epoch": 0.7189542483660131, + "grad_norm": 0.2831224203109741, + "learning_rate": 0.0002, + "loss": 1.064, + "step": 220 + }, + { + "epoch": 0.7516339869281046, + "grad_norm": 0.3192005753517151, + "learning_rate": 0.0002, + "loss": 1.2469, + "step": 230 + }, + { + "epoch": 0.7843137254901961, + "grad_norm": 0.2998219430446625, + "learning_rate": 0.0002, + "loss": 1.1613, + "step": 240 + }, + { + "epoch": 0.8169934640522876, + "grad_norm": 0.32855790853500366, + "learning_rate": 0.0002, + "loss": 1.205, + "step": 250 + }, + { + "epoch": 0.8496732026143791, + "grad_norm": 0.3403124213218689, + "learning_rate": 0.0002, + "loss": 1.1631, + "step": 260 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 0.277401328086853, + "learning_rate": 0.0002, + "loss": 1.1646, + "step": 270 + }, + { + "epoch": 0.9150326797385621, + "grad_norm": 0.2975269556045532, + "learning_rate": 0.0002, + "loss": 1.084, + "step": 280 + }, + { + "epoch": 0.9477124183006536, + "grad_norm": 1.7909578084945679, + "learning_rate": 0.0002, + "loss": 1.2039, + "step": 290 + }, + { + "epoch": 0.9803921568627451, + "grad_norm": 0.2917245328426361, + "learning_rate": 0.0002, + "loss": 1.1226, + "step": 300 + }, + { + "epoch": 1.0, + "eval_loss": 1.1937541961669922, + "eval_runtime": 46.2571, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.189, + "step": 306 + }, + { + "epoch": 1.0130718954248366, + "grad_norm": 0.26494982838630676, + "learning_rate": 0.0002, + "loss": 1.2011, + "step": 310 + }, + { + "epoch": 1.0457516339869282, + "grad_norm": 0.6289355754852295, + "learning_rate": 0.0002, + "loss": 1.0565, + "step": 320 + }, + { + "epoch": 1.0784313725490196, + "grad_norm": 0.26784011721611023, + "learning_rate": 0.0002, + "loss": 1.1619, + "step": 330 + }, + { + "epoch": 1.1111111111111112, + "grad_norm": 0.3392215967178345, + "learning_rate": 0.0002, + "loss": 1.1151, + "step": 340 + }, + { + "epoch": 1.1437908496732025, + "grad_norm": 0.40005937218666077, + "learning_rate": 0.0002, + "loss": 1.0752, + "step": 350 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 0.3590582013130188, + "learning_rate": 0.0002, + "loss": 1.0408, + "step": 360 + }, + { + "epoch": 1.2091503267973855, + "grad_norm": 0.3995305895805359, + "learning_rate": 0.0002, + "loss": 1.0836, + "step": 370 + }, + { + "epoch": 1.2418300653594772, + "grad_norm": 0.2950291633605957, + "learning_rate": 0.0002, + "loss": 1.0992, + "step": 380 + }, + { + "epoch": 1.2745098039215685, + "grad_norm": 0.32035166025161743, + "learning_rate": 0.0002, + "loss": 1.1152, + "step": 390 + }, + { + "epoch": 1.3071895424836601, + "grad_norm": 0.410366415977478, + "learning_rate": 0.0002, + "loss": 1.1467, + "step": 400 + }, + { + "epoch": 1.3398692810457518, + "grad_norm": 0.3106379508972168, + "learning_rate": 0.0002, + "loss": 0.9985, + "step": 410 + }, + { + "epoch": 1.3725490196078431, + "grad_norm": 0.38580670952796936, + "learning_rate": 0.0002, + "loss": 0.9789, + "step": 420 + }, + { + "epoch": 1.4052287581699345, + "grad_norm": 0.34411361813545227, + "learning_rate": 0.0002, + "loss": 1.0931, + "step": 430 + }, + { + "epoch": 1.4379084967320261, + "grad_norm": 0.44206851720809937, + "learning_rate": 0.0002, + "loss": 1.1685, + "step": 440 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 0.3492952585220337, + "learning_rate": 0.0002, + "loss": 1.0347, + "step": 450 + }, + { + "epoch": 1.5032679738562091, + "grad_norm": 0.376423716545105, + "learning_rate": 0.0002, + "loss": 1.0534, + "step": 460 + }, + { + "epoch": 1.5359477124183005, + "grad_norm": 0.359757661819458, + "learning_rate": 0.0002, + "loss": 1.1162, + "step": 470 + }, + { + "epoch": 1.5686274509803921, + "grad_norm": 0.3385067880153656, + "learning_rate": 0.0002, + "loss": 0.9586, + "step": 480 + }, + { + "epoch": 1.6013071895424837, + "grad_norm": 0.4943889379501343, + "learning_rate": 0.0002, + "loss": 1.0807, + "step": 490 + }, + { + "epoch": 1.6339869281045751, + "grad_norm": 0.4203241169452667, + "learning_rate": 0.0002, + "loss": 1.0796, + "step": 500 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 0.3093789219856262, + "learning_rate": 0.0002, + "loss": 1.1059, + "step": 510 + }, + { + "epoch": 1.6993464052287581, + "grad_norm": 0.3653067350387573, + "learning_rate": 0.0002, + "loss": 1.0323, + "step": 520 + }, + { + "epoch": 1.7320261437908497, + "grad_norm": 0.36761337518692017, + "learning_rate": 0.0002, + "loss": 1.0885, + "step": 530 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 0.5040399432182312, + "learning_rate": 0.0002, + "loss": 1.1698, + "step": 540 + }, + { + "epoch": 1.7973856209150327, + "grad_norm": 0.3818035125732422, + "learning_rate": 0.0002, + "loss": 1.0105, + "step": 550 + }, + { + "epoch": 1.8300653594771243, + "grad_norm": 0.4021618664264679, + "learning_rate": 0.0002, + "loss": 0.94, + "step": 560 + }, + { + "epoch": 1.8627450980392157, + "grad_norm": 0.3986459970474243, + "learning_rate": 0.0002, + "loss": 1.0358, + "step": 570 + }, + { + "epoch": 1.8954248366013071, + "grad_norm": 0.48416733741760254, + "learning_rate": 0.0002, + "loss": 1.003, + "step": 580 + }, + { + "epoch": 1.9281045751633987, + "grad_norm": 0.36853986978530884, + "learning_rate": 0.0002, + "loss": 1.1146, + "step": 590 + }, + { + "epoch": 1.9607843137254903, + "grad_norm": 0.383022665977478, + "learning_rate": 0.0002, + "loss": 1.0689, + "step": 600 + }, + { + "epoch": 1.9934640522875817, + "grad_norm": 0.3169507086277008, + "learning_rate": 0.0002, + "loss": 1.098, + "step": 610 + }, + { + "epoch": 2.0, + "eval_loss": 1.1845070123672485, + "eval_runtime": 46.2811, + "eval_samples_per_second": 9.421, + "eval_steps_per_second": 1.188, + "step": 612 + }, + { + "epoch": 2.026143790849673, + "grad_norm": 0.8920142650604248, + "learning_rate": 0.0002, + "loss": 0.9618, + "step": 620 + }, + { + "epoch": 2.0588235294117645, + "grad_norm": 0.4814859628677368, + "learning_rate": 0.0002, + "loss": 0.9784, + "step": 630 + }, + { + "epoch": 2.0915032679738563, + "grad_norm": 0.4251559376716614, + "learning_rate": 0.0002, + "loss": 0.8464, + "step": 640 + }, + { + "epoch": 2.1241830065359477, + "grad_norm": 0.5295765399932861, + "learning_rate": 0.0002, + "loss": 0.932, + "step": 650 + }, + { + "epoch": 2.156862745098039, + "grad_norm": 0.45016610622406006, + "learning_rate": 0.0002, + "loss": 0.9603, + "step": 660 + }, + { + "epoch": 2.189542483660131, + "grad_norm": 0.5870586633682251, + "learning_rate": 0.0002, + "loss": 0.8738, + "step": 670 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.5174715518951416, + "learning_rate": 0.0002, + "loss": 0.9483, + "step": 680 + }, + { + "epoch": 2.2549019607843137, + "grad_norm": 0.5252485275268555, + "learning_rate": 0.0002, + "loss": 0.9551, + "step": 690 + }, + { + "epoch": 2.287581699346405, + "grad_norm": 0.5158312320709229, + "learning_rate": 0.0002, + "loss": 0.9253, + "step": 700 + }, + { + "epoch": 2.3202614379084965, + "grad_norm": 0.4824209213256836, + "learning_rate": 0.0002, + "loss": 0.9278, + "step": 710 + }, + { + "epoch": 2.3529411764705883, + "grad_norm": 0.6335175037384033, + "learning_rate": 0.0002, + "loss": 0.8804, + "step": 720 + }, + { + "epoch": 2.3856209150326797, + "grad_norm": 0.5240563154220581, + "learning_rate": 0.0002, + "loss": 0.9685, + "step": 730 + }, + { + "epoch": 2.418300653594771, + "grad_norm": 0.5172886252403259, + "learning_rate": 0.0002, + "loss": 0.8794, + "step": 740 + }, + { + "epoch": 2.450980392156863, + "grad_norm": 0.48972561955451965, + "learning_rate": 0.0002, + "loss": 0.8158, + "step": 750 + }, + { + "epoch": 2.4836601307189543, + "grad_norm": 0.5295189023017883, + "learning_rate": 0.0002, + "loss": 0.8766, + "step": 760 + }, + { + "epoch": 2.5163398692810457, + "grad_norm": 0.5487208962440491, + "learning_rate": 0.0002, + "loss": 0.8695, + "step": 770 + }, + { + "epoch": 2.549019607843137, + "grad_norm": 0.5375093221664429, + "learning_rate": 0.0002, + "loss": 1.0109, + "step": 780 + }, + { + "epoch": 2.581699346405229, + "grad_norm": 0.5424453020095825, + "learning_rate": 0.0002, + "loss": 0.9244, + "step": 790 + }, + { + "epoch": 2.6143790849673203, + "grad_norm": 0.6029134392738342, + "learning_rate": 0.0002, + "loss": 1.0424, + "step": 800 + }, + { + "epoch": 2.6470588235294117, + "grad_norm": 0.6584921479225159, + "learning_rate": 0.0002, + "loss": 0.8688, + "step": 810 + }, + { + "epoch": 2.6797385620915035, + "grad_norm": 0.5735557675361633, + "learning_rate": 0.0002, + "loss": 0.7796, + "step": 820 + }, + { + "epoch": 2.712418300653595, + "grad_norm": 0.5216763019561768, + "learning_rate": 0.0002, + "loss": 0.8834, + "step": 830 + }, + { + "epoch": 2.7450980392156863, + "grad_norm": 0.5455219149589539, + "learning_rate": 0.0002, + "loss": 0.8946, + "step": 840 + }, + { + "epoch": 2.7777777777777777, + "grad_norm": 0.5139284729957581, + "learning_rate": 0.0002, + "loss": 0.8037, + "step": 850 + }, + { + "epoch": 2.810457516339869, + "grad_norm": 0.5096403360366821, + "learning_rate": 0.0002, + "loss": 0.988, + "step": 860 + }, + { + "epoch": 2.843137254901961, + "grad_norm": 0.6337038278579712, + "learning_rate": 0.0002, + "loss": 0.9169, + "step": 870 + }, + { + "epoch": 2.8758169934640523, + "grad_norm": 0.47218772768974304, + "learning_rate": 0.0002, + "loss": 0.8938, + "step": 880 + }, + { + "epoch": 2.9084967320261437, + "grad_norm": 0.4640636742115021, + "learning_rate": 0.0002, + "loss": 0.8554, + "step": 890 + }, + { + "epoch": 2.9411764705882355, + "grad_norm": 0.4199628531932831, + "learning_rate": 0.0002, + "loss": 0.8625, + "step": 900 + }, + { + "epoch": 2.973856209150327, + "grad_norm": 0.5067117214202881, + "learning_rate": 0.0002, + "loss": 0.8104, + "step": 910 + }, + { + "epoch": 3.0, + "eval_loss": 1.2291251420974731, + "eval_runtime": 46.2557, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.189, + "step": 918 + }, + { + "epoch": 3.0065359477124183, + "grad_norm": 0.8342176079750061, + "learning_rate": 0.0002, + "loss": 0.8157, + "step": 920 + }, + { + "epoch": 3.0392156862745097, + "grad_norm": 0.7695813775062561, + "learning_rate": 0.0002, + "loss": 0.6855, + "step": 930 + }, + { + "epoch": 3.0718954248366015, + "grad_norm": 0.6819486618041992, + "learning_rate": 0.0002, + "loss": 0.6173, + "step": 940 + }, + { + "epoch": 3.104575163398693, + "grad_norm": 0.7568879723548889, + "learning_rate": 0.0002, + "loss": 0.6495, + "step": 950 + }, + { + "epoch": 3.1372549019607843, + "grad_norm": 0.6760695576667786, + "learning_rate": 0.0002, + "loss": 0.7905, + "step": 960 + }, + { + "epoch": 3.1699346405228757, + "grad_norm": 0.6359127759933472, + "learning_rate": 0.0002, + "loss": 0.6405, + "step": 970 + }, + { + "epoch": 3.2026143790849675, + "grad_norm": 0.8414971828460693, + "learning_rate": 0.0002, + "loss": 0.7172, + "step": 980 + }, + { + "epoch": 3.235294117647059, + "grad_norm": 0.68381667137146, + "learning_rate": 0.0002, + "loss": 0.7865, + "step": 990 + }, + { + "epoch": 3.2679738562091503, + "grad_norm": 0.6852193474769592, + "learning_rate": 0.0002, + "loss": 0.6651, + "step": 1000 + }, + { + "epoch": 3.3006535947712417, + "grad_norm": 0.8184967041015625, + "learning_rate": 0.0002, + "loss": 0.6571, + "step": 1010 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 1.047290563583374, + "learning_rate": 0.0002, + "loss": 0.7036, + "step": 1020 + }, + { + "epoch": 3.366013071895425, + "grad_norm": 0.8291178345680237, + "learning_rate": 0.0002, + "loss": 0.7215, + "step": 1030 + }, + { + "epoch": 3.3986928104575163, + "grad_norm": 0.6668022871017456, + "learning_rate": 0.0002, + "loss": 0.6243, + "step": 1040 + }, + { + "epoch": 3.431372549019608, + "grad_norm": 0.6354008316993713, + "learning_rate": 0.0002, + "loss": 0.7459, + "step": 1050 + }, + { + "epoch": 3.4640522875816995, + "grad_norm": 1.2028366327285767, + "learning_rate": 0.0002, + "loss": 0.6826, + "step": 1060 + }, + { + "epoch": 3.496732026143791, + "grad_norm": 0.717367947101593, + "learning_rate": 0.0002, + "loss": 0.5913, + "step": 1070 + }, + { + "epoch": 3.5294117647058822, + "grad_norm": 0.542179524898529, + "learning_rate": 0.0002, + "loss": 0.6903, + "step": 1080 + }, + { + "epoch": 3.5620915032679736, + "grad_norm": 0.845981776714325, + "learning_rate": 0.0002, + "loss": 0.7673, + "step": 1090 + }, + { + "epoch": 3.5947712418300655, + "grad_norm": 0.7381046414375305, + "learning_rate": 0.0002, + "loss": 0.7089, + "step": 1100 + }, + { + "epoch": 3.627450980392157, + "grad_norm": 0.6563456058502197, + "learning_rate": 0.0002, + "loss": 0.6705, + "step": 1110 + }, + { + "epoch": 3.6601307189542482, + "grad_norm": 0.7130876779556274, + "learning_rate": 0.0002, + "loss": 0.7767, + "step": 1120 + }, + { + "epoch": 3.69281045751634, + "grad_norm": 0.800032913684845, + "learning_rate": 0.0002, + "loss": 0.7164, + "step": 1130 + }, + { + "epoch": 3.7254901960784315, + "grad_norm": 0.980328381061554, + "learning_rate": 0.0002, + "loss": 0.7272, + "step": 1140 + }, + { + "epoch": 3.758169934640523, + "grad_norm": 0.8542261123657227, + "learning_rate": 0.0002, + "loss": 0.7672, + "step": 1150 + }, + { + "epoch": 3.7908496732026142, + "grad_norm": 0.6302552819252014, + "learning_rate": 0.0002, + "loss": 0.679, + "step": 1160 + }, + { + "epoch": 3.8235294117647056, + "grad_norm": 0.515398383140564, + "learning_rate": 0.0002, + "loss": 0.7457, + "step": 1170 + }, + { + "epoch": 3.8562091503267975, + "grad_norm": 1.2427130937576294, + "learning_rate": 0.0002, + "loss": 0.693, + "step": 1180 + }, + { + "epoch": 3.888888888888889, + "grad_norm": 0.8206831216812134, + "learning_rate": 0.0002, + "loss": 0.7182, + "step": 1190 + }, + { + "epoch": 3.9215686274509802, + "grad_norm": 0.7633249163627625, + "learning_rate": 0.0002, + "loss": 0.7519, + "step": 1200 + }, + { + "epoch": 3.954248366013072, + "grad_norm": 0.8034512400627136, + "learning_rate": 0.0002, + "loss": 0.7082, + "step": 1210 + }, + { + "epoch": 3.9869281045751634, + "grad_norm": 0.7667182087898254, + "learning_rate": 0.0002, + "loss": 0.6834, + "step": 1220 + }, + { + "epoch": 4.0, + "eval_loss": 1.3456707000732422, + "eval_runtime": 46.2562, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.189, + "step": 1224 + } + ], + "logging_steps": 10, + "max_steps": 2448, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6.286922464886784e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/training_args.bin b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7fb8100bbfe18f1342e5fc87e32e8e798268e3b --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1224/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e66621362aa4eab196083c9a4bd0bf843b2425d7109d63be3ab5f4006691dd1 +size 5560 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/README.md b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/README.md new file mode 100644 index 0000000000000000000000000000000000000000..503a34a03e25483aa99213835fd87bfc8289a3fe --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/adapter_config.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e98db163734cc03f7a8f8b3f720d3a2befdf7453 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/adapter_model.safetensors b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4936c43621fa767da307e8270e56e0efdaa4e40d --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de25c1bf3e53f3be00286eed018301ab7d6f4edaaa7a984599625f2dcc82e25c +size 143153376 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/optimizer.pt b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..857517e3cf85b51f2fc639aa66494ec5953c6aef --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0166704bfb9d47b832d6c61ff5149e2cc798aa8510cde99b99e079a01e60785f +size 72886650 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/rng_state.pth b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..71ad3a571084a574b59a69e4bc0e05f3741f29e5 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a3272a32abbe607ed1e0c42e1e4209e67224653ec7a445a12bf6cb39d5b6cd +size 14244 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/scheduler.pt b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..12cf840f119983494dc57dcded908b0a8a439bd4 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70fd0e651657c68255bfbddaea06e66fc5308abd761799251251b4b129a8c90d +size 1064 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/special_tokens_map.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/tokenizer.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..af0eac5c0056f83b8f3fcdb79165f8847111c305 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922 +size 17525357 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/tokenizer.model b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/tokenizer_config.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/trainer_state.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8e964ad3ba74f43622e711b1d70d1fb8ed0622e3 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/trainer_state.json @@ -0,0 +1,1144 @@ +{ + "best_metric": 1.1845070123672485, + "best_model_checkpoint": "outputs-001/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612", + "epoch": 5.0, + "eval_steps": 10, + "global_step": 1530, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.032679738562091505, + "grad_norm": 0.631856381893158, + "learning_rate": 0.0002, + "loss": 2.3561, + "step": 10 + }, + { + "epoch": 0.06535947712418301, + "grad_norm": 0.5065668821334839, + "learning_rate": 0.0002, + "loss": 1.8141, + "step": 20 + }, + { + "epoch": 0.09803921568627451, + "grad_norm": 0.6978895664215088, + "learning_rate": 0.0002, + "loss": 1.4952, + "step": 30 + }, + { + "epoch": 0.13071895424836602, + "grad_norm": 0.6619144082069397, + "learning_rate": 0.0002, + "loss": 1.4829, + "step": 40 + }, + { + "epoch": 0.16339869281045752, + "grad_norm": 0.6153793931007385, + "learning_rate": 0.0002, + "loss": 1.3038, + "step": 50 + }, + { + "epoch": 0.19607843137254902, + "grad_norm": 0.4703301787376404, + "learning_rate": 0.0002, + "loss": 1.1429, + "step": 60 + }, + { + "epoch": 0.22875816993464052, + "grad_norm": 1.1672580242156982, + "learning_rate": 0.0002, + "loss": 1.2828, + "step": 70 + }, + { + "epoch": 0.26143790849673204, + "grad_norm": 0.3455565273761749, + "learning_rate": 0.0002, + "loss": 1.0985, + "step": 80 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.36216291785240173, + "learning_rate": 0.0002, + "loss": 1.1359, + "step": 90 + }, + { + "epoch": 0.32679738562091504, + "grad_norm": 0.4545166492462158, + "learning_rate": 0.0002, + "loss": 1.2544, + "step": 100 + }, + { + "epoch": 0.35947712418300654, + "grad_norm": 0.3612092435359955, + "learning_rate": 0.0002, + "loss": 1.2287, + "step": 110 + }, + { + "epoch": 0.39215686274509803, + "grad_norm": 0.5080830454826355, + "learning_rate": 0.0002, + "loss": 1.1868, + "step": 120 + }, + { + "epoch": 0.42483660130718953, + "grad_norm": 0.3268195390701294, + "learning_rate": 0.0002, + "loss": 1.1902, + "step": 130 + }, + { + "epoch": 0.45751633986928103, + "grad_norm": 0.33971714973449707, + "learning_rate": 0.0002, + "loss": 1.247, + "step": 140 + }, + { + "epoch": 0.49019607843137253, + "grad_norm": 0.4036043882369995, + "learning_rate": 0.0002, + "loss": 1.1844, + "step": 150 + }, + { + "epoch": 0.5228758169934641, + "grad_norm": 0.35938864946365356, + "learning_rate": 0.0002, + "loss": 1.1624, + "step": 160 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 0.28880223631858826, + "learning_rate": 0.0002, + "loss": 1.164, + "step": 170 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 0.3436269462108612, + "learning_rate": 0.0002, + "loss": 1.3521, + "step": 180 + }, + { + "epoch": 0.6209150326797386, + "grad_norm": 0.41923725605010986, + "learning_rate": 0.0002, + "loss": 1.2456, + "step": 190 + }, + { + "epoch": 0.6535947712418301, + "grad_norm": 0.25119203329086304, + "learning_rate": 0.0002, + "loss": 1.2226, + "step": 200 + }, + { + "epoch": 0.6862745098039216, + "grad_norm": 0.5870180726051331, + "learning_rate": 0.0002, + "loss": 1.1568, + "step": 210 + }, + { + "epoch": 0.7189542483660131, + "grad_norm": 0.2831224203109741, + "learning_rate": 0.0002, + "loss": 1.064, + "step": 220 + }, + { + "epoch": 0.7516339869281046, + "grad_norm": 0.3192005753517151, + "learning_rate": 0.0002, + "loss": 1.2469, + "step": 230 + }, + { + "epoch": 0.7843137254901961, + "grad_norm": 0.2998219430446625, + "learning_rate": 0.0002, + "loss": 1.1613, + "step": 240 + }, + { + "epoch": 0.8169934640522876, + "grad_norm": 0.32855790853500366, + "learning_rate": 0.0002, + "loss": 1.205, + "step": 250 + }, + { + "epoch": 0.8496732026143791, + "grad_norm": 0.3403124213218689, + "learning_rate": 0.0002, + "loss": 1.1631, + "step": 260 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 0.277401328086853, + "learning_rate": 0.0002, + "loss": 1.1646, + "step": 270 + }, + { + "epoch": 0.9150326797385621, + "grad_norm": 0.2975269556045532, + "learning_rate": 0.0002, + "loss": 1.084, + "step": 280 + }, + { + "epoch": 0.9477124183006536, + "grad_norm": 1.7909578084945679, + "learning_rate": 0.0002, + "loss": 1.2039, + "step": 290 + }, + { + "epoch": 0.9803921568627451, + "grad_norm": 0.2917245328426361, + "learning_rate": 0.0002, + "loss": 1.1226, + "step": 300 + }, + { + "epoch": 1.0, + "eval_loss": 1.1937541961669922, + "eval_runtime": 46.2571, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.189, + "step": 306 + }, + { + "epoch": 1.0130718954248366, + "grad_norm": 0.26494982838630676, + "learning_rate": 0.0002, + "loss": 1.2011, + "step": 310 + }, + { + "epoch": 1.0457516339869282, + "grad_norm": 0.6289355754852295, + "learning_rate": 0.0002, + "loss": 1.0565, + "step": 320 + }, + { + "epoch": 1.0784313725490196, + "grad_norm": 0.26784011721611023, + "learning_rate": 0.0002, + "loss": 1.1619, + "step": 330 + }, + { + "epoch": 1.1111111111111112, + "grad_norm": 0.3392215967178345, + "learning_rate": 0.0002, + "loss": 1.1151, + "step": 340 + }, + { + "epoch": 1.1437908496732025, + "grad_norm": 0.40005937218666077, + "learning_rate": 0.0002, + "loss": 1.0752, + "step": 350 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 0.3590582013130188, + "learning_rate": 0.0002, + "loss": 1.0408, + "step": 360 + }, + { + "epoch": 1.2091503267973855, + "grad_norm": 0.3995305895805359, + "learning_rate": 0.0002, + "loss": 1.0836, + "step": 370 + }, + { + "epoch": 1.2418300653594772, + "grad_norm": 0.2950291633605957, + "learning_rate": 0.0002, + "loss": 1.0992, + "step": 380 + }, + { + "epoch": 1.2745098039215685, + "grad_norm": 0.32035166025161743, + "learning_rate": 0.0002, + "loss": 1.1152, + "step": 390 + }, + { + "epoch": 1.3071895424836601, + "grad_norm": 0.410366415977478, + "learning_rate": 0.0002, + "loss": 1.1467, + "step": 400 + }, + { + "epoch": 1.3398692810457518, + "grad_norm": 0.3106379508972168, + "learning_rate": 0.0002, + "loss": 0.9985, + "step": 410 + }, + { + "epoch": 1.3725490196078431, + "grad_norm": 0.38580670952796936, + "learning_rate": 0.0002, + "loss": 0.9789, + "step": 420 + }, + { + "epoch": 1.4052287581699345, + "grad_norm": 0.34411361813545227, + "learning_rate": 0.0002, + "loss": 1.0931, + "step": 430 + }, + { + "epoch": 1.4379084967320261, + "grad_norm": 0.44206851720809937, + "learning_rate": 0.0002, + "loss": 1.1685, + "step": 440 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 0.3492952585220337, + "learning_rate": 0.0002, + "loss": 1.0347, + "step": 450 + }, + { + "epoch": 1.5032679738562091, + "grad_norm": 0.376423716545105, + "learning_rate": 0.0002, + "loss": 1.0534, + "step": 460 + }, + { + "epoch": 1.5359477124183005, + "grad_norm": 0.359757661819458, + "learning_rate": 0.0002, + "loss": 1.1162, + "step": 470 + }, + { + "epoch": 1.5686274509803921, + "grad_norm": 0.3385067880153656, + "learning_rate": 0.0002, + "loss": 0.9586, + "step": 480 + }, + { + "epoch": 1.6013071895424837, + "grad_norm": 0.4943889379501343, + "learning_rate": 0.0002, + "loss": 1.0807, + "step": 490 + }, + { + "epoch": 1.6339869281045751, + "grad_norm": 0.4203241169452667, + "learning_rate": 0.0002, + "loss": 1.0796, + "step": 500 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 0.3093789219856262, + "learning_rate": 0.0002, + "loss": 1.1059, + "step": 510 + }, + { + "epoch": 1.6993464052287581, + "grad_norm": 0.3653067350387573, + "learning_rate": 0.0002, + "loss": 1.0323, + "step": 520 + }, + { + "epoch": 1.7320261437908497, + "grad_norm": 0.36761337518692017, + "learning_rate": 0.0002, + "loss": 1.0885, + "step": 530 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 0.5040399432182312, + "learning_rate": 0.0002, + "loss": 1.1698, + "step": 540 + }, + { + "epoch": 1.7973856209150327, + "grad_norm": 0.3818035125732422, + "learning_rate": 0.0002, + "loss": 1.0105, + "step": 550 + }, + { + "epoch": 1.8300653594771243, + "grad_norm": 0.4021618664264679, + "learning_rate": 0.0002, + "loss": 0.94, + "step": 560 + }, + { + "epoch": 1.8627450980392157, + "grad_norm": 0.3986459970474243, + "learning_rate": 0.0002, + "loss": 1.0358, + "step": 570 + }, + { + "epoch": 1.8954248366013071, + "grad_norm": 0.48416733741760254, + "learning_rate": 0.0002, + "loss": 1.003, + "step": 580 + }, + { + "epoch": 1.9281045751633987, + "grad_norm": 0.36853986978530884, + "learning_rate": 0.0002, + "loss": 1.1146, + "step": 590 + }, + { + "epoch": 1.9607843137254903, + "grad_norm": 0.383022665977478, + "learning_rate": 0.0002, + "loss": 1.0689, + "step": 600 + }, + { + "epoch": 1.9934640522875817, + "grad_norm": 0.3169507086277008, + "learning_rate": 0.0002, + "loss": 1.098, + "step": 610 + }, + { + "epoch": 2.0, + "eval_loss": 1.1845070123672485, + "eval_runtime": 46.2811, + "eval_samples_per_second": 9.421, + "eval_steps_per_second": 1.188, + "step": 612 + }, + { + "epoch": 2.026143790849673, + "grad_norm": 0.8920142650604248, + "learning_rate": 0.0002, + "loss": 0.9618, + "step": 620 + }, + { + "epoch": 2.0588235294117645, + "grad_norm": 0.4814859628677368, + "learning_rate": 0.0002, + "loss": 0.9784, + "step": 630 + }, + { + "epoch": 2.0915032679738563, + "grad_norm": 0.4251559376716614, + "learning_rate": 0.0002, + "loss": 0.8464, + "step": 640 + }, + { + "epoch": 2.1241830065359477, + "grad_norm": 0.5295765399932861, + "learning_rate": 0.0002, + "loss": 0.932, + "step": 650 + }, + { + "epoch": 2.156862745098039, + "grad_norm": 0.45016610622406006, + "learning_rate": 0.0002, + "loss": 0.9603, + "step": 660 + }, + { + "epoch": 2.189542483660131, + "grad_norm": 0.5870586633682251, + "learning_rate": 0.0002, + "loss": 0.8738, + "step": 670 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.5174715518951416, + "learning_rate": 0.0002, + "loss": 0.9483, + "step": 680 + }, + { + "epoch": 2.2549019607843137, + "grad_norm": 0.5252485275268555, + "learning_rate": 0.0002, + "loss": 0.9551, + "step": 690 + }, + { + "epoch": 2.287581699346405, + "grad_norm": 0.5158312320709229, + "learning_rate": 0.0002, + "loss": 0.9253, + "step": 700 + }, + { + "epoch": 2.3202614379084965, + "grad_norm": 0.4824209213256836, + "learning_rate": 0.0002, + "loss": 0.9278, + "step": 710 + }, + { + "epoch": 2.3529411764705883, + "grad_norm": 0.6335175037384033, + "learning_rate": 0.0002, + "loss": 0.8804, + "step": 720 + }, + { + "epoch": 2.3856209150326797, + "grad_norm": 0.5240563154220581, + "learning_rate": 0.0002, + "loss": 0.9685, + "step": 730 + }, + { + "epoch": 2.418300653594771, + "grad_norm": 0.5172886252403259, + "learning_rate": 0.0002, + "loss": 0.8794, + "step": 740 + }, + { + "epoch": 2.450980392156863, + "grad_norm": 0.48972561955451965, + "learning_rate": 0.0002, + "loss": 0.8158, + "step": 750 + }, + { + "epoch": 2.4836601307189543, + "grad_norm": 0.5295189023017883, + "learning_rate": 0.0002, + "loss": 0.8766, + "step": 760 + }, + { + "epoch": 2.5163398692810457, + "grad_norm": 0.5487208962440491, + "learning_rate": 0.0002, + "loss": 0.8695, + "step": 770 + }, + { + "epoch": 2.549019607843137, + "grad_norm": 0.5375093221664429, + "learning_rate": 0.0002, + "loss": 1.0109, + "step": 780 + }, + { + "epoch": 2.581699346405229, + "grad_norm": 0.5424453020095825, + "learning_rate": 0.0002, + "loss": 0.9244, + "step": 790 + }, + { + "epoch": 2.6143790849673203, + "grad_norm": 0.6029134392738342, + "learning_rate": 0.0002, + "loss": 1.0424, + "step": 800 + }, + { + "epoch": 2.6470588235294117, + "grad_norm": 0.6584921479225159, + "learning_rate": 0.0002, + "loss": 0.8688, + "step": 810 + }, + { + "epoch": 2.6797385620915035, + "grad_norm": 0.5735557675361633, + "learning_rate": 0.0002, + "loss": 0.7796, + "step": 820 + }, + { + "epoch": 2.712418300653595, + "grad_norm": 0.5216763019561768, + "learning_rate": 0.0002, + "loss": 0.8834, + "step": 830 + }, + { + "epoch": 2.7450980392156863, + "grad_norm": 0.5455219149589539, + "learning_rate": 0.0002, + "loss": 0.8946, + "step": 840 + }, + { + "epoch": 2.7777777777777777, + "grad_norm": 0.5139284729957581, + "learning_rate": 0.0002, + "loss": 0.8037, + "step": 850 + }, + { + "epoch": 2.810457516339869, + "grad_norm": 0.5096403360366821, + "learning_rate": 0.0002, + "loss": 0.988, + "step": 860 + }, + { + "epoch": 2.843137254901961, + "grad_norm": 0.6337038278579712, + "learning_rate": 0.0002, + "loss": 0.9169, + "step": 870 + }, + { + "epoch": 2.8758169934640523, + "grad_norm": 0.47218772768974304, + "learning_rate": 0.0002, + "loss": 0.8938, + "step": 880 + }, + { + "epoch": 2.9084967320261437, + "grad_norm": 0.4640636742115021, + "learning_rate": 0.0002, + "loss": 0.8554, + "step": 890 + }, + { + "epoch": 2.9411764705882355, + "grad_norm": 0.4199628531932831, + "learning_rate": 0.0002, + "loss": 0.8625, + "step": 900 + }, + { + "epoch": 2.973856209150327, + "grad_norm": 0.5067117214202881, + "learning_rate": 0.0002, + "loss": 0.8104, + "step": 910 + }, + { + "epoch": 3.0, + "eval_loss": 1.2291251420974731, + "eval_runtime": 46.2557, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.189, + "step": 918 + }, + { + "epoch": 3.0065359477124183, + "grad_norm": 0.8342176079750061, + "learning_rate": 0.0002, + "loss": 0.8157, + "step": 920 + }, + { + "epoch": 3.0392156862745097, + "grad_norm": 0.7695813775062561, + "learning_rate": 0.0002, + "loss": 0.6855, + "step": 930 + }, + { + "epoch": 3.0718954248366015, + "grad_norm": 0.6819486618041992, + "learning_rate": 0.0002, + "loss": 0.6173, + "step": 940 + }, + { + "epoch": 3.104575163398693, + "grad_norm": 0.7568879723548889, + "learning_rate": 0.0002, + "loss": 0.6495, + "step": 950 + }, + { + "epoch": 3.1372549019607843, + "grad_norm": 0.6760695576667786, + "learning_rate": 0.0002, + "loss": 0.7905, + "step": 960 + }, + { + "epoch": 3.1699346405228757, + "grad_norm": 0.6359127759933472, + "learning_rate": 0.0002, + "loss": 0.6405, + "step": 970 + }, + { + "epoch": 3.2026143790849675, + "grad_norm": 0.8414971828460693, + "learning_rate": 0.0002, + "loss": 0.7172, + "step": 980 + }, + { + "epoch": 3.235294117647059, + "grad_norm": 0.68381667137146, + "learning_rate": 0.0002, + "loss": 0.7865, + "step": 990 + }, + { + "epoch": 3.2679738562091503, + "grad_norm": 0.6852193474769592, + "learning_rate": 0.0002, + "loss": 0.6651, + "step": 1000 + }, + { + "epoch": 3.3006535947712417, + "grad_norm": 0.8184967041015625, + "learning_rate": 0.0002, + "loss": 0.6571, + "step": 1010 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 1.047290563583374, + "learning_rate": 0.0002, + "loss": 0.7036, + "step": 1020 + }, + { + "epoch": 3.366013071895425, + "grad_norm": 0.8291178345680237, + "learning_rate": 0.0002, + "loss": 0.7215, + "step": 1030 + }, + { + "epoch": 3.3986928104575163, + "grad_norm": 0.6668022871017456, + "learning_rate": 0.0002, + "loss": 0.6243, + "step": 1040 + }, + { + "epoch": 3.431372549019608, + "grad_norm": 0.6354008316993713, + "learning_rate": 0.0002, + "loss": 0.7459, + "step": 1050 + }, + { + "epoch": 3.4640522875816995, + "grad_norm": 1.2028366327285767, + "learning_rate": 0.0002, + "loss": 0.6826, + "step": 1060 + }, + { + "epoch": 3.496732026143791, + "grad_norm": 0.717367947101593, + "learning_rate": 0.0002, + "loss": 0.5913, + "step": 1070 + }, + { + "epoch": 3.5294117647058822, + "grad_norm": 0.542179524898529, + "learning_rate": 0.0002, + "loss": 0.6903, + "step": 1080 + }, + { + "epoch": 3.5620915032679736, + "grad_norm": 0.845981776714325, + "learning_rate": 0.0002, + "loss": 0.7673, + "step": 1090 + }, + { + "epoch": 3.5947712418300655, + "grad_norm": 0.7381046414375305, + "learning_rate": 0.0002, + "loss": 0.7089, + "step": 1100 + }, + { + "epoch": 3.627450980392157, + "grad_norm": 0.6563456058502197, + "learning_rate": 0.0002, + "loss": 0.6705, + "step": 1110 + }, + { + "epoch": 3.6601307189542482, + "grad_norm": 0.7130876779556274, + "learning_rate": 0.0002, + "loss": 0.7767, + "step": 1120 + }, + { + "epoch": 3.69281045751634, + "grad_norm": 0.800032913684845, + "learning_rate": 0.0002, + "loss": 0.7164, + "step": 1130 + }, + { + "epoch": 3.7254901960784315, + "grad_norm": 0.980328381061554, + "learning_rate": 0.0002, + "loss": 0.7272, + "step": 1140 + }, + { + "epoch": 3.758169934640523, + "grad_norm": 0.8542261123657227, + "learning_rate": 0.0002, + "loss": 0.7672, + "step": 1150 + }, + { + "epoch": 3.7908496732026142, + "grad_norm": 0.6302552819252014, + "learning_rate": 0.0002, + "loss": 0.679, + "step": 1160 + }, + { + "epoch": 3.8235294117647056, + "grad_norm": 0.515398383140564, + "learning_rate": 0.0002, + "loss": 0.7457, + "step": 1170 + }, + { + "epoch": 3.8562091503267975, + "grad_norm": 1.2427130937576294, + "learning_rate": 0.0002, + "loss": 0.693, + "step": 1180 + }, + { + "epoch": 3.888888888888889, + "grad_norm": 0.8206831216812134, + "learning_rate": 0.0002, + "loss": 0.7182, + "step": 1190 + }, + { + "epoch": 3.9215686274509802, + "grad_norm": 0.7633249163627625, + "learning_rate": 0.0002, + "loss": 0.7519, + "step": 1200 + }, + { + "epoch": 3.954248366013072, + "grad_norm": 0.8034512400627136, + "learning_rate": 0.0002, + "loss": 0.7082, + "step": 1210 + }, + { + "epoch": 3.9869281045751634, + "grad_norm": 0.7667182087898254, + "learning_rate": 0.0002, + "loss": 0.6834, + "step": 1220 + }, + { + "epoch": 4.0, + "eval_loss": 1.3456707000732422, + "eval_runtime": 46.2562, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.189, + "step": 1224 + }, + { + "epoch": 4.019607843137255, + "grad_norm": 0.7724746465682983, + "learning_rate": 0.0002, + "loss": 0.582, + "step": 1230 + }, + { + "epoch": 4.052287581699346, + "grad_norm": 1.166916847229004, + "learning_rate": 0.0002, + "loss": 0.4759, + "step": 1240 + }, + { + "epoch": 4.084967320261438, + "grad_norm": 0.7234508991241455, + "learning_rate": 0.0002, + "loss": 0.4995, + "step": 1250 + }, + { + "epoch": 4.117647058823529, + "grad_norm": 1.1418060064315796, + "learning_rate": 0.0002, + "loss": 0.4863, + "step": 1260 + }, + { + "epoch": 4.150326797385621, + "grad_norm": 0.9603922367095947, + "learning_rate": 0.0002, + "loss": 0.5425, + "step": 1270 + }, + { + "epoch": 4.183006535947713, + "grad_norm": 0.8976530432701111, + "learning_rate": 0.0002, + "loss": 0.4892, + "step": 1280 + }, + { + "epoch": 4.215686274509804, + "grad_norm": 0.6855078339576721, + "learning_rate": 0.0002, + "loss": 0.473, + "step": 1290 + }, + { + "epoch": 4.248366013071895, + "grad_norm": 1.2676647901535034, + "learning_rate": 0.0002, + "loss": 0.4416, + "step": 1300 + }, + { + "epoch": 4.281045751633987, + "grad_norm": 1.104057788848877, + "learning_rate": 0.0002, + "loss": 0.5071, + "step": 1310 + }, + { + "epoch": 4.313725490196078, + "grad_norm": 1.7076562643051147, + "learning_rate": 0.0002, + "loss": 0.5168, + "step": 1320 + }, + { + "epoch": 4.34640522875817, + "grad_norm": 1.2308520078659058, + "learning_rate": 0.0002, + "loss": 0.4655, + "step": 1330 + }, + { + "epoch": 4.379084967320262, + "grad_norm": 1.2652729749679565, + "learning_rate": 0.0002, + "loss": 0.5322, + "step": 1340 + }, + { + "epoch": 4.411764705882353, + "grad_norm": 1.054958701133728, + "learning_rate": 0.0002, + "loss": 0.5262, + "step": 1350 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 1.0130749940872192, + "learning_rate": 0.0002, + "loss": 0.4747, + "step": 1360 + }, + { + "epoch": 4.477124183006536, + "grad_norm": 1.0517818927764893, + "learning_rate": 0.0002, + "loss": 0.4887, + "step": 1370 + }, + { + "epoch": 4.509803921568627, + "grad_norm": 0.8593037128448486, + "learning_rate": 0.0002, + "loss": 0.4906, + "step": 1380 + }, + { + "epoch": 4.542483660130719, + "grad_norm": 1.0248081684112549, + "learning_rate": 0.0002, + "loss": 0.5049, + "step": 1390 + }, + { + "epoch": 4.57516339869281, + "grad_norm": 0.8999413847923279, + "learning_rate": 0.0002, + "loss": 0.472, + "step": 1400 + }, + { + "epoch": 4.607843137254902, + "grad_norm": 0.9106912612915039, + "learning_rate": 0.0002, + "loss": 0.5102, + "step": 1410 + }, + { + "epoch": 4.640522875816993, + "grad_norm": 1.2736181020736694, + "learning_rate": 0.0002, + "loss": 0.5203, + "step": 1420 + }, + { + "epoch": 4.673202614379085, + "grad_norm": 0.9311690926551819, + "learning_rate": 0.0002, + "loss": 0.5101, + "step": 1430 + }, + { + "epoch": 4.705882352941177, + "grad_norm": 1.0455045700073242, + "learning_rate": 0.0002, + "loss": 0.5648, + "step": 1440 + }, + { + "epoch": 4.738562091503268, + "grad_norm": 1.0190727710723877, + "learning_rate": 0.0002, + "loss": 0.5004, + "step": 1450 + }, + { + "epoch": 4.771241830065359, + "grad_norm": 1.333198070526123, + "learning_rate": 0.0002, + "loss": 0.5506, + "step": 1460 + }, + { + "epoch": 4.803921568627451, + "grad_norm": 0.8808416724205017, + "learning_rate": 0.0002, + "loss": 0.5846, + "step": 1470 + }, + { + "epoch": 4.836601307189542, + "grad_norm": 0.8896227478981018, + "learning_rate": 0.0002, + "loss": 0.4671, + "step": 1480 + }, + { + "epoch": 4.8692810457516345, + "grad_norm": 1.212323784828186, + "learning_rate": 0.0002, + "loss": 0.4732, + "step": 1490 + }, + { + "epoch": 4.901960784313726, + "grad_norm": 1.0490120649337769, + "learning_rate": 0.0002, + "loss": 0.5263, + "step": 1500 + }, + { + "epoch": 4.934640522875817, + "grad_norm": 0.8946618437767029, + "learning_rate": 0.0002, + "loss": 0.5815, + "step": 1510 + }, + { + "epoch": 4.967320261437909, + "grad_norm": 1.0609275102615356, + "learning_rate": 0.0002, + "loss": 0.5369, + "step": 1520 + }, + { + "epoch": 5.0, + "grad_norm": 0.8885099291801453, + "learning_rate": 0.0002, + "loss": 0.5348, + "step": 1530 + }, + { + "epoch": 5.0, + "eval_loss": 1.5771757364273071, + "eval_runtime": 46.2667, + "eval_samples_per_second": 9.424, + "eval_steps_per_second": 1.189, + "step": 1530 + } + ], + "logging_steps": 10, + "max_steps": 2448, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7.85865308110848e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/training_args.bin b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7fb8100bbfe18f1342e5fc87e32e8e798268e3b --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1530/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e66621362aa4eab196083c9a4bd0bf843b2425d7109d63be3ab5f4006691dd1 +size 5560 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/README.md b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/README.md new file mode 100644 index 0000000000000000000000000000000000000000..503a34a03e25483aa99213835fd87bfc8289a3fe --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/adapter_config.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e98db163734cc03f7a8f8b3f720d3a2befdf7453 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/adapter_model.safetensors b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..379972c93acac74527377c606066267c038ca1a0 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cba63e1e980f17fbfd7294d59c0b242ce270633ea5d80bac7f58e15a34b5394 +size 143153376 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/optimizer.pt b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..da7f65b8fb5d04428b7cdab38a68e49ef8372da5 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09cea484d0ce018bcbf9863c62afcb363be5503137233edb1ad8309572a0d2ef +size 72886650 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/rng_state.pth b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..81f61c0077516e02ac5c8d313ad77141e2d8301a --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61c8f9dce02503f3cbf8c9bb14de9d5e9c8f903b865370691267f3556220b4ea +size 14244 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/scheduler.pt b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ca3ede5ec898c12dbce366c0565f25cf87bdda2b --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bcfa0cfb603077ad7bad7d25378dad56ade42ca731e5eca62b2e241e2e4d5d9 +size 1064 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/special_tokens_map.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/tokenizer.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..af0eac5c0056f83b8f3fcdb79165f8847111c305 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922 +size 17525357 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/tokenizer.model b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/tokenizer_config.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/trainer_state.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..133dded1b78c147b5e6866cdcff15a038870fef3 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/trainer_state.json @@ -0,0 +1,1362 @@ +{ + "best_metric": 1.1845070123672485, + "best_model_checkpoint": "outputs-001/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612", + "epoch": 6.0, + "eval_steps": 10, + "global_step": 1836, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.032679738562091505, + "grad_norm": 0.631856381893158, + "learning_rate": 0.0002, + "loss": 2.3561, + "step": 10 + }, + { + "epoch": 0.06535947712418301, + "grad_norm": 0.5065668821334839, + "learning_rate": 0.0002, + "loss": 1.8141, + "step": 20 + }, + { + "epoch": 0.09803921568627451, + "grad_norm": 0.6978895664215088, + "learning_rate": 0.0002, + "loss": 1.4952, + "step": 30 + }, + { + "epoch": 0.13071895424836602, + "grad_norm": 0.6619144082069397, + "learning_rate": 0.0002, + "loss": 1.4829, + "step": 40 + }, + { + "epoch": 0.16339869281045752, + "grad_norm": 0.6153793931007385, + "learning_rate": 0.0002, + "loss": 1.3038, + "step": 50 + }, + { + "epoch": 0.19607843137254902, + "grad_norm": 0.4703301787376404, + "learning_rate": 0.0002, + "loss": 1.1429, + "step": 60 + }, + { + "epoch": 0.22875816993464052, + "grad_norm": 1.1672580242156982, + "learning_rate": 0.0002, + "loss": 1.2828, + "step": 70 + }, + { + "epoch": 0.26143790849673204, + "grad_norm": 0.3455565273761749, + "learning_rate": 0.0002, + "loss": 1.0985, + "step": 80 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.36216291785240173, + "learning_rate": 0.0002, + "loss": 1.1359, + "step": 90 + }, + { + "epoch": 0.32679738562091504, + "grad_norm": 0.4545166492462158, + "learning_rate": 0.0002, + "loss": 1.2544, + "step": 100 + }, + { + "epoch": 0.35947712418300654, + "grad_norm": 0.3612092435359955, + "learning_rate": 0.0002, + "loss": 1.2287, + "step": 110 + }, + { + "epoch": 0.39215686274509803, + "grad_norm": 0.5080830454826355, + "learning_rate": 0.0002, + "loss": 1.1868, + "step": 120 + }, + { + "epoch": 0.42483660130718953, + "grad_norm": 0.3268195390701294, + "learning_rate": 0.0002, + "loss": 1.1902, + "step": 130 + }, + { + "epoch": 0.45751633986928103, + "grad_norm": 0.33971714973449707, + "learning_rate": 0.0002, + "loss": 1.247, + "step": 140 + }, + { + "epoch": 0.49019607843137253, + "grad_norm": 0.4036043882369995, + "learning_rate": 0.0002, + "loss": 1.1844, + "step": 150 + }, + { + "epoch": 0.5228758169934641, + "grad_norm": 0.35938864946365356, + "learning_rate": 0.0002, + "loss": 1.1624, + "step": 160 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 0.28880223631858826, + "learning_rate": 0.0002, + "loss": 1.164, + "step": 170 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 0.3436269462108612, + "learning_rate": 0.0002, + "loss": 1.3521, + "step": 180 + }, + { + "epoch": 0.6209150326797386, + "grad_norm": 0.41923725605010986, + "learning_rate": 0.0002, + "loss": 1.2456, + "step": 190 + }, + { + "epoch": 0.6535947712418301, + "grad_norm": 0.25119203329086304, + "learning_rate": 0.0002, + "loss": 1.2226, + "step": 200 + }, + { + "epoch": 0.6862745098039216, + "grad_norm": 0.5870180726051331, + "learning_rate": 0.0002, + "loss": 1.1568, + "step": 210 + }, + { + "epoch": 0.7189542483660131, + "grad_norm": 0.2831224203109741, + "learning_rate": 0.0002, + "loss": 1.064, + "step": 220 + }, + { + "epoch": 0.7516339869281046, + "grad_norm": 0.3192005753517151, + "learning_rate": 0.0002, + "loss": 1.2469, + "step": 230 + }, + { + "epoch": 0.7843137254901961, + "grad_norm": 0.2998219430446625, + "learning_rate": 0.0002, + "loss": 1.1613, + "step": 240 + }, + { + "epoch": 0.8169934640522876, + "grad_norm": 0.32855790853500366, + "learning_rate": 0.0002, + "loss": 1.205, + "step": 250 + }, + { + "epoch": 0.8496732026143791, + "grad_norm": 0.3403124213218689, + "learning_rate": 0.0002, + "loss": 1.1631, + "step": 260 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 0.277401328086853, + "learning_rate": 0.0002, + "loss": 1.1646, + "step": 270 + }, + { + "epoch": 0.9150326797385621, + "grad_norm": 0.2975269556045532, + "learning_rate": 0.0002, + "loss": 1.084, + "step": 280 + }, + { + "epoch": 0.9477124183006536, + "grad_norm": 1.7909578084945679, + "learning_rate": 0.0002, + "loss": 1.2039, + "step": 290 + }, + { + "epoch": 0.9803921568627451, + "grad_norm": 0.2917245328426361, + "learning_rate": 0.0002, + "loss": 1.1226, + "step": 300 + }, + { + "epoch": 1.0, + "eval_loss": 1.1937541961669922, + "eval_runtime": 46.2571, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.189, + "step": 306 + }, + { + "epoch": 1.0130718954248366, + "grad_norm": 0.26494982838630676, + "learning_rate": 0.0002, + "loss": 1.2011, + "step": 310 + }, + { + "epoch": 1.0457516339869282, + "grad_norm": 0.6289355754852295, + "learning_rate": 0.0002, + "loss": 1.0565, + "step": 320 + }, + { + "epoch": 1.0784313725490196, + "grad_norm": 0.26784011721611023, + "learning_rate": 0.0002, + "loss": 1.1619, + "step": 330 + }, + { + "epoch": 1.1111111111111112, + "grad_norm": 0.3392215967178345, + "learning_rate": 0.0002, + "loss": 1.1151, + "step": 340 + }, + { + "epoch": 1.1437908496732025, + "grad_norm": 0.40005937218666077, + "learning_rate": 0.0002, + "loss": 1.0752, + "step": 350 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 0.3590582013130188, + "learning_rate": 0.0002, + "loss": 1.0408, + "step": 360 + }, + { + "epoch": 1.2091503267973855, + "grad_norm": 0.3995305895805359, + "learning_rate": 0.0002, + "loss": 1.0836, + "step": 370 + }, + { + "epoch": 1.2418300653594772, + "grad_norm": 0.2950291633605957, + "learning_rate": 0.0002, + "loss": 1.0992, + "step": 380 + }, + { + "epoch": 1.2745098039215685, + "grad_norm": 0.32035166025161743, + "learning_rate": 0.0002, + "loss": 1.1152, + "step": 390 + }, + { + "epoch": 1.3071895424836601, + "grad_norm": 0.410366415977478, + "learning_rate": 0.0002, + "loss": 1.1467, + "step": 400 + }, + { + "epoch": 1.3398692810457518, + "grad_norm": 0.3106379508972168, + "learning_rate": 0.0002, + "loss": 0.9985, + "step": 410 + }, + { + "epoch": 1.3725490196078431, + "grad_norm": 0.38580670952796936, + "learning_rate": 0.0002, + "loss": 0.9789, + "step": 420 + }, + { + "epoch": 1.4052287581699345, + "grad_norm": 0.34411361813545227, + "learning_rate": 0.0002, + "loss": 1.0931, + "step": 430 + }, + { + "epoch": 1.4379084967320261, + "grad_norm": 0.44206851720809937, + "learning_rate": 0.0002, + "loss": 1.1685, + "step": 440 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 0.3492952585220337, + "learning_rate": 0.0002, + "loss": 1.0347, + "step": 450 + }, + { + "epoch": 1.5032679738562091, + "grad_norm": 0.376423716545105, + "learning_rate": 0.0002, + "loss": 1.0534, + "step": 460 + }, + { + "epoch": 1.5359477124183005, + "grad_norm": 0.359757661819458, + "learning_rate": 0.0002, + "loss": 1.1162, + "step": 470 + }, + { + "epoch": 1.5686274509803921, + "grad_norm": 0.3385067880153656, + "learning_rate": 0.0002, + "loss": 0.9586, + "step": 480 + }, + { + "epoch": 1.6013071895424837, + "grad_norm": 0.4943889379501343, + "learning_rate": 0.0002, + "loss": 1.0807, + "step": 490 + }, + { + "epoch": 1.6339869281045751, + "grad_norm": 0.4203241169452667, + "learning_rate": 0.0002, + "loss": 1.0796, + "step": 500 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 0.3093789219856262, + "learning_rate": 0.0002, + "loss": 1.1059, + "step": 510 + }, + { + "epoch": 1.6993464052287581, + "grad_norm": 0.3653067350387573, + "learning_rate": 0.0002, + "loss": 1.0323, + "step": 520 + }, + { + "epoch": 1.7320261437908497, + "grad_norm": 0.36761337518692017, + "learning_rate": 0.0002, + "loss": 1.0885, + "step": 530 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 0.5040399432182312, + "learning_rate": 0.0002, + "loss": 1.1698, + "step": 540 + }, + { + "epoch": 1.7973856209150327, + "grad_norm": 0.3818035125732422, + "learning_rate": 0.0002, + "loss": 1.0105, + "step": 550 + }, + { + "epoch": 1.8300653594771243, + "grad_norm": 0.4021618664264679, + "learning_rate": 0.0002, + "loss": 0.94, + "step": 560 + }, + { + "epoch": 1.8627450980392157, + "grad_norm": 0.3986459970474243, + "learning_rate": 0.0002, + "loss": 1.0358, + "step": 570 + }, + { + "epoch": 1.8954248366013071, + "grad_norm": 0.48416733741760254, + "learning_rate": 0.0002, + "loss": 1.003, + "step": 580 + }, + { + "epoch": 1.9281045751633987, + "grad_norm": 0.36853986978530884, + "learning_rate": 0.0002, + "loss": 1.1146, + "step": 590 + }, + { + "epoch": 1.9607843137254903, + "grad_norm": 0.383022665977478, + "learning_rate": 0.0002, + "loss": 1.0689, + "step": 600 + }, + { + "epoch": 1.9934640522875817, + "grad_norm": 0.3169507086277008, + "learning_rate": 0.0002, + "loss": 1.098, + "step": 610 + }, + { + "epoch": 2.0, + "eval_loss": 1.1845070123672485, + "eval_runtime": 46.2811, + "eval_samples_per_second": 9.421, + "eval_steps_per_second": 1.188, + "step": 612 + }, + { + "epoch": 2.026143790849673, + "grad_norm": 0.8920142650604248, + "learning_rate": 0.0002, + "loss": 0.9618, + "step": 620 + }, + { + "epoch": 2.0588235294117645, + "grad_norm": 0.4814859628677368, + "learning_rate": 0.0002, + "loss": 0.9784, + "step": 630 + }, + { + "epoch": 2.0915032679738563, + "grad_norm": 0.4251559376716614, + "learning_rate": 0.0002, + "loss": 0.8464, + "step": 640 + }, + { + "epoch": 2.1241830065359477, + "grad_norm": 0.5295765399932861, + "learning_rate": 0.0002, + "loss": 0.932, + "step": 650 + }, + { + "epoch": 2.156862745098039, + "grad_norm": 0.45016610622406006, + "learning_rate": 0.0002, + "loss": 0.9603, + "step": 660 + }, + { + "epoch": 2.189542483660131, + "grad_norm": 0.5870586633682251, + "learning_rate": 0.0002, + "loss": 0.8738, + "step": 670 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.5174715518951416, + "learning_rate": 0.0002, + "loss": 0.9483, + "step": 680 + }, + { + "epoch": 2.2549019607843137, + "grad_norm": 0.5252485275268555, + "learning_rate": 0.0002, + "loss": 0.9551, + "step": 690 + }, + { + "epoch": 2.287581699346405, + "grad_norm": 0.5158312320709229, + "learning_rate": 0.0002, + "loss": 0.9253, + "step": 700 + }, + { + "epoch": 2.3202614379084965, + "grad_norm": 0.4824209213256836, + "learning_rate": 0.0002, + "loss": 0.9278, + "step": 710 + }, + { + "epoch": 2.3529411764705883, + "grad_norm": 0.6335175037384033, + "learning_rate": 0.0002, + "loss": 0.8804, + "step": 720 + }, + { + "epoch": 2.3856209150326797, + "grad_norm": 0.5240563154220581, + "learning_rate": 0.0002, + "loss": 0.9685, + "step": 730 + }, + { + "epoch": 2.418300653594771, + "grad_norm": 0.5172886252403259, + "learning_rate": 0.0002, + "loss": 0.8794, + "step": 740 + }, + { + "epoch": 2.450980392156863, + "grad_norm": 0.48972561955451965, + "learning_rate": 0.0002, + "loss": 0.8158, + "step": 750 + }, + { + "epoch": 2.4836601307189543, + "grad_norm": 0.5295189023017883, + "learning_rate": 0.0002, + "loss": 0.8766, + "step": 760 + }, + { + "epoch": 2.5163398692810457, + "grad_norm": 0.5487208962440491, + "learning_rate": 0.0002, + "loss": 0.8695, + "step": 770 + }, + { + "epoch": 2.549019607843137, + "grad_norm": 0.5375093221664429, + "learning_rate": 0.0002, + "loss": 1.0109, + "step": 780 + }, + { + "epoch": 2.581699346405229, + "grad_norm": 0.5424453020095825, + "learning_rate": 0.0002, + "loss": 0.9244, + "step": 790 + }, + { + "epoch": 2.6143790849673203, + "grad_norm": 0.6029134392738342, + "learning_rate": 0.0002, + "loss": 1.0424, + "step": 800 + }, + { + "epoch": 2.6470588235294117, + "grad_norm": 0.6584921479225159, + "learning_rate": 0.0002, + "loss": 0.8688, + "step": 810 + }, + { + "epoch": 2.6797385620915035, + "grad_norm": 0.5735557675361633, + "learning_rate": 0.0002, + "loss": 0.7796, + "step": 820 + }, + { + "epoch": 2.712418300653595, + "grad_norm": 0.5216763019561768, + "learning_rate": 0.0002, + "loss": 0.8834, + "step": 830 + }, + { + "epoch": 2.7450980392156863, + "grad_norm": 0.5455219149589539, + "learning_rate": 0.0002, + "loss": 0.8946, + "step": 840 + }, + { + "epoch": 2.7777777777777777, + "grad_norm": 0.5139284729957581, + "learning_rate": 0.0002, + "loss": 0.8037, + "step": 850 + }, + { + "epoch": 2.810457516339869, + "grad_norm": 0.5096403360366821, + "learning_rate": 0.0002, + "loss": 0.988, + "step": 860 + }, + { + "epoch": 2.843137254901961, + "grad_norm": 0.6337038278579712, + "learning_rate": 0.0002, + "loss": 0.9169, + "step": 870 + }, + { + "epoch": 2.8758169934640523, + "grad_norm": 0.47218772768974304, + "learning_rate": 0.0002, + "loss": 0.8938, + "step": 880 + }, + { + "epoch": 2.9084967320261437, + "grad_norm": 0.4640636742115021, + "learning_rate": 0.0002, + "loss": 0.8554, + "step": 890 + }, + { + "epoch": 2.9411764705882355, + "grad_norm": 0.4199628531932831, + "learning_rate": 0.0002, + "loss": 0.8625, + "step": 900 + }, + { + "epoch": 2.973856209150327, + "grad_norm": 0.5067117214202881, + "learning_rate": 0.0002, + "loss": 0.8104, + "step": 910 + }, + { + "epoch": 3.0, + "eval_loss": 1.2291251420974731, + "eval_runtime": 46.2557, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.189, + "step": 918 + }, + { + "epoch": 3.0065359477124183, + "grad_norm": 0.8342176079750061, + "learning_rate": 0.0002, + "loss": 0.8157, + "step": 920 + }, + { + "epoch": 3.0392156862745097, + "grad_norm": 0.7695813775062561, + "learning_rate": 0.0002, + "loss": 0.6855, + "step": 930 + }, + { + "epoch": 3.0718954248366015, + "grad_norm": 0.6819486618041992, + "learning_rate": 0.0002, + "loss": 0.6173, + "step": 940 + }, + { + "epoch": 3.104575163398693, + "grad_norm": 0.7568879723548889, + "learning_rate": 0.0002, + "loss": 0.6495, + "step": 950 + }, + { + "epoch": 3.1372549019607843, + "grad_norm": 0.6760695576667786, + "learning_rate": 0.0002, + "loss": 0.7905, + "step": 960 + }, + { + "epoch": 3.1699346405228757, + "grad_norm": 0.6359127759933472, + "learning_rate": 0.0002, + "loss": 0.6405, + "step": 970 + }, + { + "epoch": 3.2026143790849675, + "grad_norm": 0.8414971828460693, + "learning_rate": 0.0002, + "loss": 0.7172, + "step": 980 + }, + { + "epoch": 3.235294117647059, + "grad_norm": 0.68381667137146, + "learning_rate": 0.0002, + "loss": 0.7865, + "step": 990 + }, + { + "epoch": 3.2679738562091503, + "grad_norm": 0.6852193474769592, + "learning_rate": 0.0002, + "loss": 0.6651, + "step": 1000 + }, + { + "epoch": 3.3006535947712417, + "grad_norm": 0.8184967041015625, + "learning_rate": 0.0002, + "loss": 0.6571, + "step": 1010 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 1.047290563583374, + "learning_rate": 0.0002, + "loss": 0.7036, + "step": 1020 + }, + { + "epoch": 3.366013071895425, + "grad_norm": 0.8291178345680237, + "learning_rate": 0.0002, + "loss": 0.7215, + "step": 1030 + }, + { + "epoch": 3.3986928104575163, + "grad_norm": 0.6668022871017456, + "learning_rate": 0.0002, + "loss": 0.6243, + "step": 1040 + }, + { + "epoch": 3.431372549019608, + "grad_norm": 0.6354008316993713, + "learning_rate": 0.0002, + "loss": 0.7459, + "step": 1050 + }, + { + "epoch": 3.4640522875816995, + "grad_norm": 1.2028366327285767, + "learning_rate": 0.0002, + "loss": 0.6826, + "step": 1060 + }, + { + "epoch": 3.496732026143791, + "grad_norm": 0.717367947101593, + "learning_rate": 0.0002, + "loss": 0.5913, + "step": 1070 + }, + { + "epoch": 3.5294117647058822, + "grad_norm": 0.542179524898529, + "learning_rate": 0.0002, + "loss": 0.6903, + "step": 1080 + }, + { + "epoch": 3.5620915032679736, + "grad_norm": 0.845981776714325, + "learning_rate": 0.0002, + "loss": 0.7673, + "step": 1090 + }, + { + "epoch": 3.5947712418300655, + "grad_norm": 0.7381046414375305, + "learning_rate": 0.0002, + "loss": 0.7089, + "step": 1100 + }, + { + "epoch": 3.627450980392157, + "grad_norm": 0.6563456058502197, + "learning_rate": 0.0002, + "loss": 0.6705, + "step": 1110 + }, + { + "epoch": 3.6601307189542482, + "grad_norm": 0.7130876779556274, + "learning_rate": 0.0002, + "loss": 0.7767, + "step": 1120 + }, + { + "epoch": 3.69281045751634, + "grad_norm": 0.800032913684845, + "learning_rate": 0.0002, + "loss": 0.7164, + "step": 1130 + }, + { + "epoch": 3.7254901960784315, + "grad_norm": 0.980328381061554, + "learning_rate": 0.0002, + "loss": 0.7272, + "step": 1140 + }, + { + "epoch": 3.758169934640523, + "grad_norm": 0.8542261123657227, + "learning_rate": 0.0002, + "loss": 0.7672, + "step": 1150 + }, + { + "epoch": 3.7908496732026142, + "grad_norm": 0.6302552819252014, + "learning_rate": 0.0002, + "loss": 0.679, + "step": 1160 + }, + { + "epoch": 3.8235294117647056, + "grad_norm": 0.515398383140564, + "learning_rate": 0.0002, + "loss": 0.7457, + "step": 1170 + }, + { + "epoch": 3.8562091503267975, + "grad_norm": 1.2427130937576294, + "learning_rate": 0.0002, + "loss": 0.693, + "step": 1180 + }, + { + "epoch": 3.888888888888889, + "grad_norm": 0.8206831216812134, + "learning_rate": 0.0002, + "loss": 0.7182, + "step": 1190 + }, + { + "epoch": 3.9215686274509802, + "grad_norm": 0.7633249163627625, + "learning_rate": 0.0002, + "loss": 0.7519, + "step": 1200 + }, + { + "epoch": 3.954248366013072, + "grad_norm": 0.8034512400627136, + "learning_rate": 0.0002, + "loss": 0.7082, + "step": 1210 + }, + { + "epoch": 3.9869281045751634, + "grad_norm": 0.7667182087898254, + "learning_rate": 0.0002, + "loss": 0.6834, + "step": 1220 + }, + { + "epoch": 4.0, + "eval_loss": 1.3456707000732422, + "eval_runtime": 46.2562, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.189, + "step": 1224 + }, + { + "epoch": 4.019607843137255, + "grad_norm": 0.7724746465682983, + "learning_rate": 0.0002, + "loss": 0.582, + "step": 1230 + }, + { + "epoch": 4.052287581699346, + "grad_norm": 1.166916847229004, + "learning_rate": 0.0002, + "loss": 0.4759, + "step": 1240 + }, + { + "epoch": 4.084967320261438, + "grad_norm": 0.7234508991241455, + "learning_rate": 0.0002, + "loss": 0.4995, + "step": 1250 + }, + { + "epoch": 4.117647058823529, + "grad_norm": 1.1418060064315796, + "learning_rate": 0.0002, + "loss": 0.4863, + "step": 1260 + }, + { + "epoch": 4.150326797385621, + "grad_norm": 0.9603922367095947, + "learning_rate": 0.0002, + "loss": 0.5425, + "step": 1270 + }, + { + "epoch": 4.183006535947713, + "grad_norm": 0.8976530432701111, + "learning_rate": 0.0002, + "loss": 0.4892, + "step": 1280 + }, + { + "epoch": 4.215686274509804, + "grad_norm": 0.6855078339576721, + "learning_rate": 0.0002, + "loss": 0.473, + "step": 1290 + }, + { + "epoch": 4.248366013071895, + "grad_norm": 1.2676647901535034, + "learning_rate": 0.0002, + "loss": 0.4416, + "step": 1300 + }, + { + "epoch": 4.281045751633987, + "grad_norm": 1.104057788848877, + "learning_rate": 0.0002, + "loss": 0.5071, + "step": 1310 + }, + { + "epoch": 4.313725490196078, + "grad_norm": 1.7076562643051147, + "learning_rate": 0.0002, + "loss": 0.5168, + "step": 1320 + }, + { + "epoch": 4.34640522875817, + "grad_norm": 1.2308520078659058, + "learning_rate": 0.0002, + "loss": 0.4655, + "step": 1330 + }, + { + "epoch": 4.379084967320262, + "grad_norm": 1.2652729749679565, + "learning_rate": 0.0002, + "loss": 0.5322, + "step": 1340 + }, + { + "epoch": 4.411764705882353, + "grad_norm": 1.054958701133728, + "learning_rate": 0.0002, + "loss": 0.5262, + "step": 1350 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 1.0130749940872192, + "learning_rate": 0.0002, + "loss": 0.4747, + "step": 1360 + }, + { + "epoch": 4.477124183006536, + "grad_norm": 1.0517818927764893, + "learning_rate": 0.0002, + "loss": 0.4887, + "step": 1370 + }, + { + "epoch": 4.509803921568627, + "grad_norm": 0.8593037128448486, + "learning_rate": 0.0002, + "loss": 0.4906, + "step": 1380 + }, + { + "epoch": 4.542483660130719, + "grad_norm": 1.0248081684112549, + "learning_rate": 0.0002, + "loss": 0.5049, + "step": 1390 + }, + { + "epoch": 4.57516339869281, + "grad_norm": 0.8999413847923279, + "learning_rate": 0.0002, + "loss": 0.472, + "step": 1400 + }, + { + "epoch": 4.607843137254902, + "grad_norm": 0.9106912612915039, + "learning_rate": 0.0002, + "loss": 0.5102, + "step": 1410 + }, + { + "epoch": 4.640522875816993, + "grad_norm": 1.2736181020736694, + "learning_rate": 0.0002, + "loss": 0.5203, + "step": 1420 + }, + { + "epoch": 4.673202614379085, + "grad_norm": 0.9311690926551819, + "learning_rate": 0.0002, + "loss": 0.5101, + "step": 1430 + }, + { + "epoch": 4.705882352941177, + "grad_norm": 1.0455045700073242, + "learning_rate": 0.0002, + "loss": 0.5648, + "step": 1440 + }, + { + "epoch": 4.738562091503268, + "grad_norm": 1.0190727710723877, + "learning_rate": 0.0002, + "loss": 0.5004, + "step": 1450 + }, + { + "epoch": 4.771241830065359, + "grad_norm": 1.333198070526123, + "learning_rate": 0.0002, + "loss": 0.5506, + "step": 1460 + }, + { + "epoch": 4.803921568627451, + "grad_norm": 0.8808416724205017, + "learning_rate": 0.0002, + "loss": 0.5846, + "step": 1470 + }, + { + "epoch": 4.836601307189542, + "grad_norm": 0.8896227478981018, + "learning_rate": 0.0002, + "loss": 0.4671, + "step": 1480 + }, + { + "epoch": 4.8692810457516345, + "grad_norm": 1.212323784828186, + "learning_rate": 0.0002, + "loss": 0.4732, + "step": 1490 + }, + { + "epoch": 4.901960784313726, + "grad_norm": 1.0490120649337769, + "learning_rate": 0.0002, + "loss": 0.5263, + "step": 1500 + }, + { + "epoch": 4.934640522875817, + "grad_norm": 0.8946618437767029, + "learning_rate": 0.0002, + "loss": 0.5815, + "step": 1510 + }, + { + "epoch": 4.967320261437909, + "grad_norm": 1.0609275102615356, + "learning_rate": 0.0002, + "loss": 0.5369, + "step": 1520 + }, + { + "epoch": 5.0, + "grad_norm": 0.8885099291801453, + "learning_rate": 0.0002, + "loss": 0.5348, + "step": 1530 + }, + { + "epoch": 5.0, + "eval_loss": 1.5771757364273071, + "eval_runtime": 46.2667, + "eval_samples_per_second": 9.424, + "eval_steps_per_second": 1.189, + "step": 1530 + }, + { + "epoch": 5.032679738562091, + "grad_norm": 1.401705026626587, + "learning_rate": 0.0002, + "loss": 0.3176, + "step": 1540 + }, + { + "epoch": 5.065359477124183, + "grad_norm": 0.8365539908409119, + "learning_rate": 0.0002, + "loss": 0.3346, + "step": 1550 + }, + { + "epoch": 5.098039215686274, + "grad_norm": 1.3188321590423584, + "learning_rate": 0.0002, + "loss": 0.3605, + "step": 1560 + }, + { + "epoch": 5.130718954248366, + "grad_norm": 0.9819526076316833, + "learning_rate": 0.0002, + "loss": 0.326, + "step": 1570 + }, + { + "epoch": 5.163398692810458, + "grad_norm": 1.13265061378479, + "learning_rate": 0.0002, + "loss": 0.3575, + "step": 1580 + }, + { + "epoch": 5.196078431372549, + "grad_norm": 1.478152871131897, + "learning_rate": 0.0002, + "loss": 0.3418, + "step": 1590 + }, + { + "epoch": 5.228758169934641, + "grad_norm": 1.4188750982284546, + "learning_rate": 0.0002, + "loss": 0.3388, + "step": 1600 + }, + { + "epoch": 5.261437908496732, + "grad_norm": 1.2499338388442993, + "learning_rate": 0.0002, + "loss": 0.3524, + "step": 1610 + }, + { + "epoch": 5.294117647058823, + "grad_norm": 1.7885085344314575, + "learning_rate": 0.0002, + "loss": 0.423, + "step": 1620 + }, + { + "epoch": 5.326797385620915, + "grad_norm": 1.2614946365356445, + "learning_rate": 0.0002, + "loss": 0.3237, + "step": 1630 + }, + { + "epoch": 5.359477124183006, + "grad_norm": 1.28338623046875, + "learning_rate": 0.0002, + "loss": 0.3511, + "step": 1640 + }, + { + "epoch": 5.392156862745098, + "grad_norm": 1.1973257064819336, + "learning_rate": 0.0002, + "loss": 0.3112, + "step": 1650 + }, + { + "epoch": 5.42483660130719, + "grad_norm": 1.1356301307678223, + "learning_rate": 0.0002, + "loss": 0.3645, + "step": 1660 + }, + { + "epoch": 5.457516339869281, + "grad_norm": 0.9048901200294495, + "learning_rate": 0.0002, + "loss": 0.307, + "step": 1670 + }, + { + "epoch": 5.490196078431373, + "grad_norm": 1.5352122783660889, + "learning_rate": 0.0002, + "loss": 0.3828, + "step": 1680 + }, + { + "epoch": 5.522875816993464, + "grad_norm": 0.9096335172653198, + "learning_rate": 0.0002, + "loss": 0.3826, + "step": 1690 + }, + { + "epoch": 5.555555555555555, + "grad_norm": 1.1903661489486694, + "learning_rate": 0.0002, + "loss": 0.3686, + "step": 1700 + }, + { + "epoch": 5.588235294117647, + "grad_norm": 0.9234451651573181, + "learning_rate": 0.0002, + "loss": 0.325, + "step": 1710 + }, + { + "epoch": 5.620915032679738, + "grad_norm": 1.4554102420806885, + "learning_rate": 0.0002, + "loss": 0.3451, + "step": 1720 + }, + { + "epoch": 5.65359477124183, + "grad_norm": 1.1044343709945679, + "learning_rate": 0.0002, + "loss": 0.3488, + "step": 1730 + }, + { + "epoch": 5.686274509803922, + "grad_norm": 1.2219593524932861, + "learning_rate": 0.0002, + "loss": 0.3673, + "step": 1740 + }, + { + "epoch": 5.718954248366013, + "grad_norm": 0.901652455329895, + "learning_rate": 0.0002, + "loss": 0.3517, + "step": 1750 + }, + { + "epoch": 5.751633986928105, + "grad_norm": 1.3334792852401733, + "learning_rate": 0.0002, + "loss": 0.4082, + "step": 1760 + }, + { + "epoch": 5.784313725490196, + "grad_norm": 1.5595488548278809, + "learning_rate": 0.0002, + "loss": 0.4386, + "step": 1770 + }, + { + "epoch": 5.816993464052287, + "grad_norm": 1.3892982006072998, + "learning_rate": 0.0002, + "loss": 0.3217, + "step": 1780 + }, + { + "epoch": 5.849673202614379, + "grad_norm": 1.0813168287277222, + "learning_rate": 0.0002, + "loss": 0.3919, + "step": 1790 + }, + { + "epoch": 5.882352941176471, + "grad_norm": 1.145320177078247, + "learning_rate": 0.0002, + "loss": 0.3697, + "step": 1800 + }, + { + "epoch": 5.915032679738562, + "grad_norm": 1.0249533653259277, + "learning_rate": 0.0002, + "loss": 0.3903, + "step": 1810 + }, + { + "epoch": 5.947712418300654, + "grad_norm": 1.0013737678527832, + "learning_rate": 0.0002, + "loss": 0.3481, + "step": 1820 + }, + { + "epoch": 5.980392156862745, + "grad_norm": 1.212314248085022, + "learning_rate": 0.0002, + "loss": 0.4278, + "step": 1830 + }, + { + "epoch": 6.0, + "eval_loss": 1.7506128549575806, + "eval_runtime": 46.2256, + "eval_samples_per_second": 9.432, + "eval_steps_per_second": 1.19, + "step": 1836 + } + ], + "logging_steps": 10, + "max_steps": 2448, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 9.430383697330176e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/training_args.bin b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7fb8100bbfe18f1342e5fc87e32e8e798268e3b --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-1836/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e66621362aa4eab196083c9a4bd0bf843b2425d7109d63be3ab5f4006691dd1 +size 5560 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/README.md b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/README.md new file mode 100644 index 0000000000000000000000000000000000000000..503a34a03e25483aa99213835fd87bfc8289a3fe --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/adapter_config.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e98db163734cc03f7a8f8b3f720d3a2befdf7453 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/adapter_model.safetensors b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1b28480ca5bc1eb127b32f93614305c2d230e0c4 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb5d75fa0f4f847a671f69184c2db23aadddf2ac7a9e25d1927d28c18184fc0f +size 143153376 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/optimizer.pt b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a2356b6b396fd50110a3fe45259a25171362b9e5 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8933331a45aeef95820a6919adf16536e1dbfb652c7fd8fc9e3a18fcf3d6b3d9 +size 72886650 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/rng_state.pth b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..4854e975285843f95823759082b059363306ea06 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4165bc80df0386ed9547456c19949971870693610e838f12639743e2ae1879cb +size 14244 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/scheduler.pt b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..32cd31dfe25a652f2c346bdbb5c184bbf176f834 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51d4c1af6bb74978251e7ec248a24d78e754f8d197292882e924d78e8e37d749 +size 1064 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/special_tokens_map.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/tokenizer.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..af0eac5c0056f83b8f3fcdb79165f8847111c305 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922 +size 17525357 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/tokenizer.model b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/tokenizer_config.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/trainer_state.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..36534ef8be2aff94238a2e5f7d0ca2e3fc2ebb94 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/trainer_state.json @@ -0,0 +1,1587 @@ +{ + "best_metric": 1.1845070123672485, + "best_model_checkpoint": "outputs-001/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612", + "epoch": 7.0, + "eval_steps": 10, + "global_step": 2142, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.032679738562091505, + "grad_norm": 0.631856381893158, + "learning_rate": 0.0002, + "loss": 2.3561, + "step": 10 + }, + { + "epoch": 0.06535947712418301, + "grad_norm": 0.5065668821334839, + "learning_rate": 0.0002, + "loss": 1.8141, + "step": 20 + }, + { + "epoch": 0.09803921568627451, + "grad_norm": 0.6978895664215088, + "learning_rate": 0.0002, + "loss": 1.4952, + "step": 30 + }, + { + "epoch": 0.13071895424836602, + "grad_norm": 0.6619144082069397, + "learning_rate": 0.0002, + "loss": 1.4829, + "step": 40 + }, + { + "epoch": 0.16339869281045752, + "grad_norm": 0.6153793931007385, + "learning_rate": 0.0002, + "loss": 1.3038, + "step": 50 + }, + { + "epoch": 0.19607843137254902, + "grad_norm": 0.4703301787376404, + "learning_rate": 0.0002, + "loss": 1.1429, + "step": 60 + }, + { + "epoch": 0.22875816993464052, + "grad_norm": 1.1672580242156982, + "learning_rate": 0.0002, + "loss": 1.2828, + "step": 70 + }, + { + "epoch": 0.26143790849673204, + "grad_norm": 0.3455565273761749, + "learning_rate": 0.0002, + "loss": 1.0985, + "step": 80 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.36216291785240173, + "learning_rate": 0.0002, + "loss": 1.1359, + "step": 90 + }, + { + "epoch": 0.32679738562091504, + "grad_norm": 0.4545166492462158, + "learning_rate": 0.0002, + "loss": 1.2544, + "step": 100 + }, + { + "epoch": 0.35947712418300654, + "grad_norm": 0.3612092435359955, + "learning_rate": 0.0002, + "loss": 1.2287, + "step": 110 + }, + { + "epoch": 0.39215686274509803, + "grad_norm": 0.5080830454826355, + "learning_rate": 0.0002, + "loss": 1.1868, + "step": 120 + }, + { + "epoch": 0.42483660130718953, + "grad_norm": 0.3268195390701294, + "learning_rate": 0.0002, + "loss": 1.1902, + "step": 130 + }, + { + "epoch": 0.45751633986928103, + "grad_norm": 0.33971714973449707, + "learning_rate": 0.0002, + "loss": 1.247, + "step": 140 + }, + { + "epoch": 0.49019607843137253, + "grad_norm": 0.4036043882369995, + "learning_rate": 0.0002, + "loss": 1.1844, + "step": 150 + }, + { + "epoch": 0.5228758169934641, + "grad_norm": 0.35938864946365356, + "learning_rate": 0.0002, + "loss": 1.1624, + "step": 160 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 0.28880223631858826, + "learning_rate": 0.0002, + "loss": 1.164, + "step": 170 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 0.3436269462108612, + "learning_rate": 0.0002, + "loss": 1.3521, + "step": 180 + }, + { + "epoch": 0.6209150326797386, + "grad_norm": 0.41923725605010986, + "learning_rate": 0.0002, + "loss": 1.2456, + "step": 190 + }, + { + "epoch": 0.6535947712418301, + "grad_norm": 0.25119203329086304, + "learning_rate": 0.0002, + "loss": 1.2226, + "step": 200 + }, + { + "epoch": 0.6862745098039216, + "grad_norm": 0.5870180726051331, + "learning_rate": 0.0002, + "loss": 1.1568, + "step": 210 + }, + { + "epoch": 0.7189542483660131, + "grad_norm": 0.2831224203109741, + "learning_rate": 0.0002, + "loss": 1.064, + "step": 220 + }, + { + "epoch": 0.7516339869281046, + "grad_norm": 0.3192005753517151, + "learning_rate": 0.0002, + "loss": 1.2469, + "step": 230 + }, + { + "epoch": 0.7843137254901961, + "grad_norm": 0.2998219430446625, + "learning_rate": 0.0002, + "loss": 1.1613, + "step": 240 + }, + { + "epoch": 0.8169934640522876, + "grad_norm": 0.32855790853500366, + "learning_rate": 0.0002, + "loss": 1.205, + "step": 250 + }, + { + "epoch": 0.8496732026143791, + "grad_norm": 0.3403124213218689, + "learning_rate": 0.0002, + "loss": 1.1631, + "step": 260 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 0.277401328086853, + "learning_rate": 0.0002, + "loss": 1.1646, + "step": 270 + }, + { + "epoch": 0.9150326797385621, + "grad_norm": 0.2975269556045532, + "learning_rate": 0.0002, + "loss": 1.084, + "step": 280 + }, + { + "epoch": 0.9477124183006536, + "grad_norm": 1.7909578084945679, + "learning_rate": 0.0002, + "loss": 1.2039, + "step": 290 + }, + { + "epoch": 0.9803921568627451, + "grad_norm": 0.2917245328426361, + "learning_rate": 0.0002, + "loss": 1.1226, + "step": 300 + }, + { + "epoch": 1.0, + "eval_loss": 1.1937541961669922, + "eval_runtime": 46.2571, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.189, + "step": 306 + }, + { + "epoch": 1.0130718954248366, + "grad_norm": 0.26494982838630676, + "learning_rate": 0.0002, + "loss": 1.2011, + "step": 310 + }, + { + "epoch": 1.0457516339869282, + "grad_norm": 0.6289355754852295, + "learning_rate": 0.0002, + "loss": 1.0565, + "step": 320 + }, + { + "epoch": 1.0784313725490196, + "grad_norm": 0.26784011721611023, + "learning_rate": 0.0002, + "loss": 1.1619, + "step": 330 + }, + { + "epoch": 1.1111111111111112, + "grad_norm": 0.3392215967178345, + "learning_rate": 0.0002, + "loss": 1.1151, + "step": 340 + }, + { + "epoch": 1.1437908496732025, + "grad_norm": 0.40005937218666077, + "learning_rate": 0.0002, + "loss": 1.0752, + "step": 350 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 0.3590582013130188, + "learning_rate": 0.0002, + "loss": 1.0408, + "step": 360 + }, + { + "epoch": 1.2091503267973855, + "grad_norm": 0.3995305895805359, + "learning_rate": 0.0002, + "loss": 1.0836, + "step": 370 + }, + { + "epoch": 1.2418300653594772, + "grad_norm": 0.2950291633605957, + "learning_rate": 0.0002, + "loss": 1.0992, + "step": 380 + }, + { + "epoch": 1.2745098039215685, + "grad_norm": 0.32035166025161743, + "learning_rate": 0.0002, + "loss": 1.1152, + "step": 390 + }, + { + "epoch": 1.3071895424836601, + "grad_norm": 0.410366415977478, + "learning_rate": 0.0002, + "loss": 1.1467, + "step": 400 + }, + { + "epoch": 1.3398692810457518, + "grad_norm": 0.3106379508972168, + "learning_rate": 0.0002, + "loss": 0.9985, + "step": 410 + }, + { + "epoch": 1.3725490196078431, + "grad_norm": 0.38580670952796936, + "learning_rate": 0.0002, + "loss": 0.9789, + "step": 420 + }, + { + "epoch": 1.4052287581699345, + "grad_norm": 0.34411361813545227, + "learning_rate": 0.0002, + "loss": 1.0931, + "step": 430 + }, + { + "epoch": 1.4379084967320261, + "grad_norm": 0.44206851720809937, + "learning_rate": 0.0002, + "loss": 1.1685, + "step": 440 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 0.3492952585220337, + "learning_rate": 0.0002, + "loss": 1.0347, + "step": 450 + }, + { + "epoch": 1.5032679738562091, + "grad_norm": 0.376423716545105, + "learning_rate": 0.0002, + "loss": 1.0534, + "step": 460 + }, + { + "epoch": 1.5359477124183005, + "grad_norm": 0.359757661819458, + "learning_rate": 0.0002, + "loss": 1.1162, + "step": 470 + }, + { + "epoch": 1.5686274509803921, + "grad_norm": 0.3385067880153656, + "learning_rate": 0.0002, + "loss": 0.9586, + "step": 480 + }, + { + "epoch": 1.6013071895424837, + "grad_norm": 0.4943889379501343, + "learning_rate": 0.0002, + "loss": 1.0807, + "step": 490 + }, + { + "epoch": 1.6339869281045751, + "grad_norm": 0.4203241169452667, + "learning_rate": 0.0002, + "loss": 1.0796, + "step": 500 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 0.3093789219856262, + "learning_rate": 0.0002, + "loss": 1.1059, + "step": 510 + }, + { + "epoch": 1.6993464052287581, + "grad_norm": 0.3653067350387573, + "learning_rate": 0.0002, + "loss": 1.0323, + "step": 520 + }, + { + "epoch": 1.7320261437908497, + "grad_norm": 0.36761337518692017, + "learning_rate": 0.0002, + "loss": 1.0885, + "step": 530 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 0.5040399432182312, + "learning_rate": 0.0002, + "loss": 1.1698, + "step": 540 + }, + { + "epoch": 1.7973856209150327, + "grad_norm": 0.3818035125732422, + "learning_rate": 0.0002, + "loss": 1.0105, + "step": 550 + }, + { + "epoch": 1.8300653594771243, + "grad_norm": 0.4021618664264679, + "learning_rate": 0.0002, + "loss": 0.94, + "step": 560 + }, + { + "epoch": 1.8627450980392157, + "grad_norm": 0.3986459970474243, + "learning_rate": 0.0002, + "loss": 1.0358, + "step": 570 + }, + { + "epoch": 1.8954248366013071, + "grad_norm": 0.48416733741760254, + "learning_rate": 0.0002, + "loss": 1.003, + "step": 580 + }, + { + "epoch": 1.9281045751633987, + "grad_norm": 0.36853986978530884, + "learning_rate": 0.0002, + "loss": 1.1146, + "step": 590 + }, + { + "epoch": 1.9607843137254903, + "grad_norm": 0.383022665977478, + "learning_rate": 0.0002, + "loss": 1.0689, + "step": 600 + }, + { + "epoch": 1.9934640522875817, + "grad_norm": 0.3169507086277008, + "learning_rate": 0.0002, + "loss": 1.098, + "step": 610 + }, + { + "epoch": 2.0, + "eval_loss": 1.1845070123672485, + "eval_runtime": 46.2811, + "eval_samples_per_second": 9.421, + "eval_steps_per_second": 1.188, + "step": 612 + }, + { + "epoch": 2.026143790849673, + "grad_norm": 0.8920142650604248, + "learning_rate": 0.0002, + "loss": 0.9618, + "step": 620 + }, + { + "epoch": 2.0588235294117645, + "grad_norm": 0.4814859628677368, + "learning_rate": 0.0002, + "loss": 0.9784, + "step": 630 + }, + { + "epoch": 2.0915032679738563, + "grad_norm": 0.4251559376716614, + "learning_rate": 0.0002, + "loss": 0.8464, + "step": 640 + }, + { + "epoch": 2.1241830065359477, + "grad_norm": 0.5295765399932861, + "learning_rate": 0.0002, + "loss": 0.932, + "step": 650 + }, + { + "epoch": 2.156862745098039, + "grad_norm": 0.45016610622406006, + "learning_rate": 0.0002, + "loss": 0.9603, + "step": 660 + }, + { + "epoch": 2.189542483660131, + "grad_norm": 0.5870586633682251, + "learning_rate": 0.0002, + "loss": 0.8738, + "step": 670 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.5174715518951416, + "learning_rate": 0.0002, + "loss": 0.9483, + "step": 680 + }, + { + "epoch": 2.2549019607843137, + "grad_norm": 0.5252485275268555, + "learning_rate": 0.0002, + "loss": 0.9551, + "step": 690 + }, + { + "epoch": 2.287581699346405, + "grad_norm": 0.5158312320709229, + "learning_rate": 0.0002, + "loss": 0.9253, + "step": 700 + }, + { + "epoch": 2.3202614379084965, + "grad_norm": 0.4824209213256836, + "learning_rate": 0.0002, + "loss": 0.9278, + "step": 710 + }, + { + "epoch": 2.3529411764705883, + "grad_norm": 0.6335175037384033, + "learning_rate": 0.0002, + "loss": 0.8804, + "step": 720 + }, + { + "epoch": 2.3856209150326797, + "grad_norm": 0.5240563154220581, + "learning_rate": 0.0002, + "loss": 0.9685, + "step": 730 + }, + { + "epoch": 2.418300653594771, + "grad_norm": 0.5172886252403259, + "learning_rate": 0.0002, + "loss": 0.8794, + "step": 740 + }, + { + "epoch": 2.450980392156863, + "grad_norm": 0.48972561955451965, + "learning_rate": 0.0002, + "loss": 0.8158, + "step": 750 + }, + { + "epoch": 2.4836601307189543, + "grad_norm": 0.5295189023017883, + "learning_rate": 0.0002, + "loss": 0.8766, + "step": 760 + }, + { + "epoch": 2.5163398692810457, + "grad_norm": 0.5487208962440491, + "learning_rate": 0.0002, + "loss": 0.8695, + "step": 770 + }, + { + "epoch": 2.549019607843137, + "grad_norm": 0.5375093221664429, + "learning_rate": 0.0002, + "loss": 1.0109, + "step": 780 + }, + { + "epoch": 2.581699346405229, + "grad_norm": 0.5424453020095825, + "learning_rate": 0.0002, + "loss": 0.9244, + "step": 790 + }, + { + "epoch": 2.6143790849673203, + "grad_norm": 0.6029134392738342, + "learning_rate": 0.0002, + "loss": 1.0424, + "step": 800 + }, + { + "epoch": 2.6470588235294117, + "grad_norm": 0.6584921479225159, + "learning_rate": 0.0002, + "loss": 0.8688, + "step": 810 + }, + { + "epoch": 2.6797385620915035, + "grad_norm": 0.5735557675361633, + "learning_rate": 0.0002, + "loss": 0.7796, + "step": 820 + }, + { + "epoch": 2.712418300653595, + "grad_norm": 0.5216763019561768, + "learning_rate": 0.0002, + "loss": 0.8834, + "step": 830 + }, + { + "epoch": 2.7450980392156863, + "grad_norm": 0.5455219149589539, + "learning_rate": 0.0002, + "loss": 0.8946, + "step": 840 + }, + { + "epoch": 2.7777777777777777, + "grad_norm": 0.5139284729957581, + "learning_rate": 0.0002, + "loss": 0.8037, + "step": 850 + }, + { + "epoch": 2.810457516339869, + "grad_norm": 0.5096403360366821, + "learning_rate": 0.0002, + "loss": 0.988, + "step": 860 + }, + { + "epoch": 2.843137254901961, + "grad_norm": 0.6337038278579712, + "learning_rate": 0.0002, + "loss": 0.9169, + "step": 870 + }, + { + "epoch": 2.8758169934640523, + "grad_norm": 0.47218772768974304, + "learning_rate": 0.0002, + "loss": 0.8938, + "step": 880 + }, + { + "epoch": 2.9084967320261437, + "grad_norm": 0.4640636742115021, + "learning_rate": 0.0002, + "loss": 0.8554, + "step": 890 + }, + { + "epoch": 2.9411764705882355, + "grad_norm": 0.4199628531932831, + "learning_rate": 0.0002, + "loss": 0.8625, + "step": 900 + }, + { + "epoch": 2.973856209150327, + "grad_norm": 0.5067117214202881, + "learning_rate": 0.0002, + "loss": 0.8104, + "step": 910 + }, + { + "epoch": 3.0, + "eval_loss": 1.2291251420974731, + "eval_runtime": 46.2557, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.189, + "step": 918 + }, + { + "epoch": 3.0065359477124183, + "grad_norm": 0.8342176079750061, + "learning_rate": 0.0002, + "loss": 0.8157, + "step": 920 + }, + { + "epoch": 3.0392156862745097, + "grad_norm": 0.7695813775062561, + "learning_rate": 0.0002, + "loss": 0.6855, + "step": 930 + }, + { + "epoch": 3.0718954248366015, + "grad_norm": 0.6819486618041992, + "learning_rate": 0.0002, + "loss": 0.6173, + "step": 940 + }, + { + "epoch": 3.104575163398693, + "grad_norm": 0.7568879723548889, + "learning_rate": 0.0002, + "loss": 0.6495, + "step": 950 + }, + { + "epoch": 3.1372549019607843, + "grad_norm": 0.6760695576667786, + "learning_rate": 0.0002, + "loss": 0.7905, + "step": 960 + }, + { + "epoch": 3.1699346405228757, + "grad_norm": 0.6359127759933472, + "learning_rate": 0.0002, + "loss": 0.6405, + "step": 970 + }, + { + "epoch": 3.2026143790849675, + "grad_norm": 0.8414971828460693, + "learning_rate": 0.0002, + "loss": 0.7172, + "step": 980 + }, + { + "epoch": 3.235294117647059, + "grad_norm": 0.68381667137146, + "learning_rate": 0.0002, + "loss": 0.7865, + "step": 990 + }, + { + "epoch": 3.2679738562091503, + "grad_norm": 0.6852193474769592, + "learning_rate": 0.0002, + "loss": 0.6651, + "step": 1000 + }, + { + "epoch": 3.3006535947712417, + "grad_norm": 0.8184967041015625, + "learning_rate": 0.0002, + "loss": 0.6571, + "step": 1010 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 1.047290563583374, + "learning_rate": 0.0002, + "loss": 0.7036, + "step": 1020 + }, + { + "epoch": 3.366013071895425, + "grad_norm": 0.8291178345680237, + "learning_rate": 0.0002, + "loss": 0.7215, + "step": 1030 + }, + { + "epoch": 3.3986928104575163, + "grad_norm": 0.6668022871017456, + "learning_rate": 0.0002, + "loss": 0.6243, + "step": 1040 + }, + { + "epoch": 3.431372549019608, + "grad_norm": 0.6354008316993713, + "learning_rate": 0.0002, + "loss": 0.7459, + "step": 1050 + }, + { + "epoch": 3.4640522875816995, + "grad_norm": 1.2028366327285767, + "learning_rate": 0.0002, + "loss": 0.6826, + "step": 1060 + }, + { + "epoch": 3.496732026143791, + "grad_norm": 0.717367947101593, + "learning_rate": 0.0002, + "loss": 0.5913, + "step": 1070 + }, + { + "epoch": 3.5294117647058822, + "grad_norm": 0.542179524898529, + "learning_rate": 0.0002, + "loss": 0.6903, + "step": 1080 + }, + { + "epoch": 3.5620915032679736, + "grad_norm": 0.845981776714325, + "learning_rate": 0.0002, + "loss": 0.7673, + "step": 1090 + }, + { + "epoch": 3.5947712418300655, + "grad_norm": 0.7381046414375305, + "learning_rate": 0.0002, + "loss": 0.7089, + "step": 1100 + }, + { + "epoch": 3.627450980392157, + "grad_norm": 0.6563456058502197, + "learning_rate": 0.0002, + "loss": 0.6705, + "step": 1110 + }, + { + "epoch": 3.6601307189542482, + "grad_norm": 0.7130876779556274, + "learning_rate": 0.0002, + "loss": 0.7767, + "step": 1120 + }, + { + "epoch": 3.69281045751634, + "grad_norm": 0.800032913684845, + "learning_rate": 0.0002, + "loss": 0.7164, + "step": 1130 + }, + { + "epoch": 3.7254901960784315, + "grad_norm": 0.980328381061554, + "learning_rate": 0.0002, + "loss": 0.7272, + "step": 1140 + }, + { + "epoch": 3.758169934640523, + "grad_norm": 0.8542261123657227, + "learning_rate": 0.0002, + "loss": 0.7672, + "step": 1150 + }, + { + "epoch": 3.7908496732026142, + "grad_norm": 0.6302552819252014, + "learning_rate": 0.0002, + "loss": 0.679, + "step": 1160 + }, + { + "epoch": 3.8235294117647056, + "grad_norm": 0.515398383140564, + "learning_rate": 0.0002, + "loss": 0.7457, + "step": 1170 + }, + { + "epoch": 3.8562091503267975, + "grad_norm": 1.2427130937576294, + "learning_rate": 0.0002, + "loss": 0.693, + "step": 1180 + }, + { + "epoch": 3.888888888888889, + "grad_norm": 0.8206831216812134, + "learning_rate": 0.0002, + "loss": 0.7182, + "step": 1190 + }, + { + "epoch": 3.9215686274509802, + "grad_norm": 0.7633249163627625, + "learning_rate": 0.0002, + "loss": 0.7519, + "step": 1200 + }, + { + "epoch": 3.954248366013072, + "grad_norm": 0.8034512400627136, + "learning_rate": 0.0002, + "loss": 0.7082, + "step": 1210 + }, + { + "epoch": 3.9869281045751634, + "grad_norm": 0.7667182087898254, + "learning_rate": 0.0002, + "loss": 0.6834, + "step": 1220 + }, + { + "epoch": 4.0, + "eval_loss": 1.3456707000732422, + "eval_runtime": 46.2562, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.189, + "step": 1224 + }, + { + "epoch": 4.019607843137255, + "grad_norm": 0.7724746465682983, + "learning_rate": 0.0002, + "loss": 0.582, + "step": 1230 + }, + { + "epoch": 4.052287581699346, + "grad_norm": 1.166916847229004, + "learning_rate": 0.0002, + "loss": 0.4759, + "step": 1240 + }, + { + "epoch": 4.084967320261438, + "grad_norm": 0.7234508991241455, + "learning_rate": 0.0002, + "loss": 0.4995, + "step": 1250 + }, + { + "epoch": 4.117647058823529, + "grad_norm": 1.1418060064315796, + "learning_rate": 0.0002, + "loss": 0.4863, + "step": 1260 + }, + { + "epoch": 4.150326797385621, + "grad_norm": 0.9603922367095947, + "learning_rate": 0.0002, + "loss": 0.5425, + "step": 1270 + }, + { + "epoch": 4.183006535947713, + "grad_norm": 0.8976530432701111, + "learning_rate": 0.0002, + "loss": 0.4892, + "step": 1280 + }, + { + "epoch": 4.215686274509804, + "grad_norm": 0.6855078339576721, + "learning_rate": 0.0002, + "loss": 0.473, + "step": 1290 + }, + { + "epoch": 4.248366013071895, + "grad_norm": 1.2676647901535034, + "learning_rate": 0.0002, + "loss": 0.4416, + "step": 1300 + }, + { + "epoch": 4.281045751633987, + "grad_norm": 1.104057788848877, + "learning_rate": 0.0002, + "loss": 0.5071, + "step": 1310 + }, + { + "epoch": 4.313725490196078, + "grad_norm": 1.7076562643051147, + "learning_rate": 0.0002, + "loss": 0.5168, + "step": 1320 + }, + { + "epoch": 4.34640522875817, + "grad_norm": 1.2308520078659058, + "learning_rate": 0.0002, + "loss": 0.4655, + "step": 1330 + }, + { + "epoch": 4.379084967320262, + "grad_norm": 1.2652729749679565, + "learning_rate": 0.0002, + "loss": 0.5322, + "step": 1340 + }, + { + "epoch": 4.411764705882353, + "grad_norm": 1.054958701133728, + "learning_rate": 0.0002, + "loss": 0.5262, + "step": 1350 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 1.0130749940872192, + "learning_rate": 0.0002, + "loss": 0.4747, + "step": 1360 + }, + { + "epoch": 4.477124183006536, + "grad_norm": 1.0517818927764893, + "learning_rate": 0.0002, + "loss": 0.4887, + "step": 1370 + }, + { + "epoch": 4.509803921568627, + "grad_norm": 0.8593037128448486, + "learning_rate": 0.0002, + "loss": 0.4906, + "step": 1380 + }, + { + "epoch": 4.542483660130719, + "grad_norm": 1.0248081684112549, + "learning_rate": 0.0002, + "loss": 0.5049, + "step": 1390 + }, + { + "epoch": 4.57516339869281, + "grad_norm": 0.8999413847923279, + "learning_rate": 0.0002, + "loss": 0.472, + "step": 1400 + }, + { + "epoch": 4.607843137254902, + "grad_norm": 0.9106912612915039, + "learning_rate": 0.0002, + "loss": 0.5102, + "step": 1410 + }, + { + "epoch": 4.640522875816993, + "grad_norm": 1.2736181020736694, + "learning_rate": 0.0002, + "loss": 0.5203, + "step": 1420 + }, + { + "epoch": 4.673202614379085, + "grad_norm": 0.9311690926551819, + "learning_rate": 0.0002, + "loss": 0.5101, + "step": 1430 + }, + { + "epoch": 4.705882352941177, + "grad_norm": 1.0455045700073242, + "learning_rate": 0.0002, + "loss": 0.5648, + "step": 1440 + }, + { + "epoch": 4.738562091503268, + "grad_norm": 1.0190727710723877, + "learning_rate": 0.0002, + "loss": 0.5004, + "step": 1450 + }, + { + "epoch": 4.771241830065359, + "grad_norm": 1.333198070526123, + "learning_rate": 0.0002, + "loss": 0.5506, + "step": 1460 + }, + { + "epoch": 4.803921568627451, + "grad_norm": 0.8808416724205017, + "learning_rate": 0.0002, + "loss": 0.5846, + "step": 1470 + }, + { + "epoch": 4.836601307189542, + "grad_norm": 0.8896227478981018, + "learning_rate": 0.0002, + "loss": 0.4671, + "step": 1480 + }, + { + "epoch": 4.8692810457516345, + "grad_norm": 1.212323784828186, + "learning_rate": 0.0002, + "loss": 0.4732, + "step": 1490 + }, + { + "epoch": 4.901960784313726, + "grad_norm": 1.0490120649337769, + "learning_rate": 0.0002, + "loss": 0.5263, + "step": 1500 + }, + { + "epoch": 4.934640522875817, + "grad_norm": 0.8946618437767029, + "learning_rate": 0.0002, + "loss": 0.5815, + "step": 1510 + }, + { + "epoch": 4.967320261437909, + "grad_norm": 1.0609275102615356, + "learning_rate": 0.0002, + "loss": 0.5369, + "step": 1520 + }, + { + "epoch": 5.0, + "grad_norm": 0.8885099291801453, + "learning_rate": 0.0002, + "loss": 0.5348, + "step": 1530 + }, + { + "epoch": 5.0, + "eval_loss": 1.5771757364273071, + "eval_runtime": 46.2667, + "eval_samples_per_second": 9.424, + "eval_steps_per_second": 1.189, + "step": 1530 + }, + { + "epoch": 5.032679738562091, + "grad_norm": 1.401705026626587, + "learning_rate": 0.0002, + "loss": 0.3176, + "step": 1540 + }, + { + "epoch": 5.065359477124183, + "grad_norm": 0.8365539908409119, + "learning_rate": 0.0002, + "loss": 0.3346, + "step": 1550 + }, + { + "epoch": 5.098039215686274, + "grad_norm": 1.3188321590423584, + "learning_rate": 0.0002, + "loss": 0.3605, + "step": 1560 + }, + { + "epoch": 5.130718954248366, + "grad_norm": 0.9819526076316833, + "learning_rate": 0.0002, + "loss": 0.326, + "step": 1570 + }, + { + "epoch": 5.163398692810458, + "grad_norm": 1.13265061378479, + "learning_rate": 0.0002, + "loss": 0.3575, + "step": 1580 + }, + { + "epoch": 5.196078431372549, + "grad_norm": 1.478152871131897, + "learning_rate": 0.0002, + "loss": 0.3418, + "step": 1590 + }, + { + "epoch": 5.228758169934641, + "grad_norm": 1.4188750982284546, + "learning_rate": 0.0002, + "loss": 0.3388, + "step": 1600 + }, + { + "epoch": 5.261437908496732, + "grad_norm": 1.2499338388442993, + "learning_rate": 0.0002, + "loss": 0.3524, + "step": 1610 + }, + { + "epoch": 5.294117647058823, + "grad_norm": 1.7885085344314575, + "learning_rate": 0.0002, + "loss": 0.423, + "step": 1620 + }, + { + "epoch": 5.326797385620915, + "grad_norm": 1.2614946365356445, + "learning_rate": 0.0002, + "loss": 0.3237, + "step": 1630 + }, + { + "epoch": 5.359477124183006, + "grad_norm": 1.28338623046875, + "learning_rate": 0.0002, + "loss": 0.3511, + "step": 1640 + }, + { + "epoch": 5.392156862745098, + "grad_norm": 1.1973257064819336, + "learning_rate": 0.0002, + "loss": 0.3112, + "step": 1650 + }, + { + "epoch": 5.42483660130719, + "grad_norm": 1.1356301307678223, + "learning_rate": 0.0002, + "loss": 0.3645, + "step": 1660 + }, + { + "epoch": 5.457516339869281, + "grad_norm": 0.9048901200294495, + "learning_rate": 0.0002, + "loss": 0.307, + "step": 1670 + }, + { + "epoch": 5.490196078431373, + "grad_norm": 1.5352122783660889, + "learning_rate": 0.0002, + "loss": 0.3828, + "step": 1680 + }, + { + "epoch": 5.522875816993464, + "grad_norm": 0.9096335172653198, + "learning_rate": 0.0002, + "loss": 0.3826, + "step": 1690 + }, + { + "epoch": 5.555555555555555, + "grad_norm": 1.1903661489486694, + "learning_rate": 0.0002, + "loss": 0.3686, + "step": 1700 + }, + { + "epoch": 5.588235294117647, + "grad_norm": 0.9234451651573181, + "learning_rate": 0.0002, + "loss": 0.325, + "step": 1710 + }, + { + "epoch": 5.620915032679738, + "grad_norm": 1.4554102420806885, + "learning_rate": 0.0002, + "loss": 0.3451, + "step": 1720 + }, + { + "epoch": 5.65359477124183, + "grad_norm": 1.1044343709945679, + "learning_rate": 0.0002, + "loss": 0.3488, + "step": 1730 + }, + { + "epoch": 5.686274509803922, + "grad_norm": 1.2219593524932861, + "learning_rate": 0.0002, + "loss": 0.3673, + "step": 1740 + }, + { + "epoch": 5.718954248366013, + "grad_norm": 0.901652455329895, + "learning_rate": 0.0002, + "loss": 0.3517, + "step": 1750 + }, + { + "epoch": 5.751633986928105, + "grad_norm": 1.3334792852401733, + "learning_rate": 0.0002, + "loss": 0.4082, + "step": 1760 + }, + { + "epoch": 5.784313725490196, + "grad_norm": 1.5595488548278809, + "learning_rate": 0.0002, + "loss": 0.4386, + "step": 1770 + }, + { + "epoch": 5.816993464052287, + "grad_norm": 1.3892982006072998, + "learning_rate": 0.0002, + "loss": 0.3217, + "step": 1780 + }, + { + "epoch": 5.849673202614379, + "grad_norm": 1.0813168287277222, + "learning_rate": 0.0002, + "loss": 0.3919, + "step": 1790 + }, + { + "epoch": 5.882352941176471, + "grad_norm": 1.145320177078247, + "learning_rate": 0.0002, + "loss": 0.3697, + "step": 1800 + }, + { + "epoch": 5.915032679738562, + "grad_norm": 1.0249533653259277, + "learning_rate": 0.0002, + "loss": 0.3903, + "step": 1810 + }, + { + "epoch": 5.947712418300654, + "grad_norm": 1.0013737678527832, + "learning_rate": 0.0002, + "loss": 0.3481, + "step": 1820 + }, + { + "epoch": 5.980392156862745, + "grad_norm": 1.212314248085022, + "learning_rate": 0.0002, + "loss": 0.4278, + "step": 1830 + }, + { + "epoch": 6.0, + "eval_loss": 1.7506128549575806, + "eval_runtime": 46.2256, + "eval_samples_per_second": 9.432, + "eval_steps_per_second": 1.19, + "step": 1836 + }, + { + "epoch": 6.0130718954248366, + "grad_norm": 0.7339767813682556, + "learning_rate": 0.0002, + "loss": 0.3283, + "step": 1840 + }, + { + "epoch": 6.045751633986928, + "grad_norm": 1.1071710586547852, + "learning_rate": 0.0002, + "loss": 0.2304, + "step": 1850 + }, + { + "epoch": 6.078431372549019, + "grad_norm": 1.2613991498947144, + "learning_rate": 0.0002, + "loss": 0.2436, + "step": 1860 + }, + { + "epoch": 6.111111111111111, + "grad_norm": 1.053133249282837, + "learning_rate": 0.0002, + "loss": 0.2403, + "step": 1870 + }, + { + "epoch": 6.143790849673203, + "grad_norm": 1.069568395614624, + "learning_rate": 0.0002, + "loss": 0.2509, + "step": 1880 + }, + { + "epoch": 6.176470588235294, + "grad_norm": 1.020458698272705, + "learning_rate": 0.0002, + "loss": 0.2272, + "step": 1890 + }, + { + "epoch": 6.209150326797386, + "grad_norm": 1.2430394887924194, + "learning_rate": 0.0002, + "loss": 0.2408, + "step": 1900 + }, + { + "epoch": 6.241830065359477, + "grad_norm": 1.3475574254989624, + "learning_rate": 0.0002, + "loss": 0.229, + "step": 1910 + }, + { + "epoch": 6.2745098039215685, + "grad_norm": 0.9094598889350891, + "learning_rate": 0.0002, + "loss": 0.2542, + "step": 1920 + }, + { + "epoch": 6.30718954248366, + "grad_norm": 1.255650520324707, + "learning_rate": 0.0002, + "loss": 0.2009, + "step": 1930 + }, + { + "epoch": 6.339869281045751, + "grad_norm": 1.4193930625915527, + "learning_rate": 0.0002, + "loss": 0.25, + "step": 1940 + }, + { + "epoch": 6.372549019607844, + "grad_norm": 1.4378032684326172, + "learning_rate": 0.0002, + "loss": 0.293, + "step": 1950 + }, + { + "epoch": 6.405228758169935, + "grad_norm": 1.2236989736557007, + "learning_rate": 0.0002, + "loss": 0.2685, + "step": 1960 + }, + { + "epoch": 6.437908496732026, + "grad_norm": 1.0902987718582153, + "learning_rate": 0.0002, + "loss": 0.2608, + "step": 1970 + }, + { + "epoch": 6.470588235294118, + "grad_norm": 1.1165062189102173, + "learning_rate": 0.0002, + "loss": 0.2267, + "step": 1980 + }, + { + "epoch": 6.503267973856209, + "grad_norm": 1.3953566551208496, + "learning_rate": 0.0002, + "loss": 0.2246, + "step": 1990 + }, + { + "epoch": 6.5359477124183005, + "grad_norm": 1.5215585231781006, + "learning_rate": 0.0002, + "loss": 0.2834, + "step": 2000 + }, + { + "epoch": 6.568627450980392, + "grad_norm": 1.3496609926223755, + "learning_rate": 0.0002, + "loss": 0.2457, + "step": 2010 + }, + { + "epoch": 6.601307189542483, + "grad_norm": 1.2511820793151855, + "learning_rate": 0.0002, + "loss": 0.2637, + "step": 2020 + }, + { + "epoch": 6.633986928104575, + "grad_norm": 1.9875848293304443, + "learning_rate": 0.0002, + "loss": 0.2956, + "step": 2030 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 1.0766608715057373, + "learning_rate": 0.0002, + "loss": 0.2664, + "step": 2040 + }, + { + "epoch": 6.699346405228758, + "grad_norm": 1.3594712018966675, + "learning_rate": 0.0002, + "loss": 0.2792, + "step": 2050 + }, + { + "epoch": 6.73202614379085, + "grad_norm": 1.2357292175292969, + "learning_rate": 0.0002, + "loss": 0.3194, + "step": 2060 + }, + { + "epoch": 6.764705882352941, + "grad_norm": 1.2428375482559204, + "learning_rate": 0.0002, + "loss": 0.2526, + "step": 2070 + }, + { + "epoch": 6.7973856209150325, + "grad_norm": 1.2724156379699707, + "learning_rate": 0.0002, + "loss": 0.2355, + "step": 2080 + }, + { + "epoch": 6.830065359477124, + "grad_norm": 1.4981396198272705, + "learning_rate": 0.0002, + "loss": 0.289, + "step": 2090 + }, + { + "epoch": 6.862745098039216, + "grad_norm": 0.9346088171005249, + "learning_rate": 0.0002, + "loss": 0.2443, + "step": 2100 + }, + { + "epoch": 6.895424836601308, + "grad_norm": 1.3955477476119995, + "learning_rate": 0.0002, + "loss": 0.262, + "step": 2110 + }, + { + "epoch": 6.928104575163399, + "grad_norm": 1.492382287979126, + "learning_rate": 0.0002, + "loss": 0.2981, + "step": 2120 + }, + { + "epoch": 6.96078431372549, + "grad_norm": 1.2755712270736694, + "learning_rate": 0.0002, + "loss": 0.3093, + "step": 2130 + }, + { + "epoch": 6.993464052287582, + "grad_norm": 1.4600884914398193, + "learning_rate": 0.0002, + "loss": 0.2943, + "step": 2140 + }, + { + "epoch": 7.0, + "eval_loss": 1.9770371913909912, + "eval_runtime": 46.2588, + "eval_samples_per_second": 9.425, + "eval_steps_per_second": 1.189, + "step": 2142 + } + ], + "logging_steps": 10, + "max_steps": 2448, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.1002114313551872e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/training_args.bin b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7fb8100bbfe18f1342e5fc87e32e8e798268e3b --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2142/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e66621362aa4eab196083c9a4bd0bf843b2425d7109d63be3ab5f4006691dd1 +size 5560 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/README.md b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/README.md new file mode 100644 index 0000000000000000000000000000000000000000..503a34a03e25483aa99213835fd87bfc8289a3fe --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/adapter_config.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e98db163734cc03f7a8f8b3f720d3a2befdf7453 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/adapter_model.safetensors b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95f8c4bfbad6ddb011b5ace888e988eed3a03201 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f01ad95eba619b35632ef34ad74d9e86bd4fad67b3357523e689855bea266fa3 +size 143153376 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/optimizer.pt b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..52f2e7df11628a41be9de96b6cd87ee4640fbbc7 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e596ba51a382f1ddea78022bd0d98570419e7cb21f1424f169aff90b8d5f6d44 +size 72886650 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/rng_state.pth b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a785c80deaec82ef48b6c2684b07bb516dfa5c23 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:426a64bb3cacb707e0f39ada228c901d99e3164560513a6206a9021ff65e4f62 +size 14244 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/scheduler.pt b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..998a4ffe7d8a9e39dcdd15d61ac920a5b1972ab4 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:794241e4292faf1c486dcb09e837545466e327529800d1ae841cb58adeb555e9 +size 1064 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/special_tokens_map.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/tokenizer.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..af0eac5c0056f83b8f3fcdb79165f8847111c305 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922 +size 17525357 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/tokenizer.model b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/tokenizer_config.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/trainer_state.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..89e51e4711f61411f21a5ef4dc467130db63a71f --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/trainer_state.json @@ -0,0 +1,1805 @@ +{ + "best_metric": 1.1845070123672485, + "best_model_checkpoint": "outputs-001/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612", + "epoch": 8.0, + "eval_steps": 10, + "global_step": 2448, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.032679738562091505, + "grad_norm": 0.631856381893158, + "learning_rate": 0.0002, + "loss": 2.3561, + "step": 10 + }, + { + "epoch": 0.06535947712418301, + "grad_norm": 0.5065668821334839, + "learning_rate": 0.0002, + "loss": 1.8141, + "step": 20 + }, + { + "epoch": 0.09803921568627451, + "grad_norm": 0.6978895664215088, + "learning_rate": 0.0002, + "loss": 1.4952, + "step": 30 + }, + { + "epoch": 0.13071895424836602, + "grad_norm": 0.6619144082069397, + "learning_rate": 0.0002, + "loss": 1.4829, + "step": 40 + }, + { + "epoch": 0.16339869281045752, + "grad_norm": 0.6153793931007385, + "learning_rate": 0.0002, + "loss": 1.3038, + "step": 50 + }, + { + "epoch": 0.19607843137254902, + "grad_norm": 0.4703301787376404, + "learning_rate": 0.0002, + "loss": 1.1429, + "step": 60 + }, + { + "epoch": 0.22875816993464052, + "grad_norm": 1.1672580242156982, + "learning_rate": 0.0002, + "loss": 1.2828, + "step": 70 + }, + { + "epoch": 0.26143790849673204, + "grad_norm": 0.3455565273761749, + "learning_rate": 0.0002, + "loss": 1.0985, + "step": 80 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.36216291785240173, + "learning_rate": 0.0002, + "loss": 1.1359, + "step": 90 + }, + { + "epoch": 0.32679738562091504, + "grad_norm": 0.4545166492462158, + "learning_rate": 0.0002, + "loss": 1.2544, + "step": 100 + }, + { + "epoch": 0.35947712418300654, + "grad_norm": 0.3612092435359955, + "learning_rate": 0.0002, + "loss": 1.2287, + "step": 110 + }, + { + "epoch": 0.39215686274509803, + "grad_norm": 0.5080830454826355, + "learning_rate": 0.0002, + "loss": 1.1868, + "step": 120 + }, + { + "epoch": 0.42483660130718953, + "grad_norm": 0.3268195390701294, + "learning_rate": 0.0002, + "loss": 1.1902, + "step": 130 + }, + { + "epoch": 0.45751633986928103, + "grad_norm": 0.33971714973449707, + "learning_rate": 0.0002, + "loss": 1.247, + "step": 140 + }, + { + "epoch": 0.49019607843137253, + "grad_norm": 0.4036043882369995, + "learning_rate": 0.0002, + "loss": 1.1844, + "step": 150 + }, + { + "epoch": 0.5228758169934641, + "grad_norm": 0.35938864946365356, + "learning_rate": 0.0002, + "loss": 1.1624, + "step": 160 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 0.28880223631858826, + "learning_rate": 0.0002, + "loss": 1.164, + "step": 170 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 0.3436269462108612, + "learning_rate": 0.0002, + "loss": 1.3521, + "step": 180 + }, + { + "epoch": 0.6209150326797386, + "grad_norm": 0.41923725605010986, + "learning_rate": 0.0002, + "loss": 1.2456, + "step": 190 + }, + { + "epoch": 0.6535947712418301, + "grad_norm": 0.25119203329086304, + "learning_rate": 0.0002, + "loss": 1.2226, + "step": 200 + }, + { + "epoch": 0.6862745098039216, + "grad_norm": 0.5870180726051331, + "learning_rate": 0.0002, + "loss": 1.1568, + "step": 210 + }, + { + "epoch": 0.7189542483660131, + "grad_norm": 0.2831224203109741, + "learning_rate": 0.0002, + "loss": 1.064, + "step": 220 + }, + { + "epoch": 0.7516339869281046, + "grad_norm": 0.3192005753517151, + "learning_rate": 0.0002, + "loss": 1.2469, + "step": 230 + }, + { + "epoch": 0.7843137254901961, + "grad_norm": 0.2998219430446625, + "learning_rate": 0.0002, + "loss": 1.1613, + "step": 240 + }, + { + "epoch": 0.8169934640522876, + "grad_norm": 0.32855790853500366, + "learning_rate": 0.0002, + "loss": 1.205, + "step": 250 + }, + { + "epoch": 0.8496732026143791, + "grad_norm": 0.3403124213218689, + "learning_rate": 0.0002, + "loss": 1.1631, + "step": 260 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 0.277401328086853, + "learning_rate": 0.0002, + "loss": 1.1646, + "step": 270 + }, + { + "epoch": 0.9150326797385621, + "grad_norm": 0.2975269556045532, + "learning_rate": 0.0002, + "loss": 1.084, + "step": 280 + }, + { + "epoch": 0.9477124183006536, + "grad_norm": 1.7909578084945679, + "learning_rate": 0.0002, + "loss": 1.2039, + "step": 290 + }, + { + "epoch": 0.9803921568627451, + "grad_norm": 0.2917245328426361, + "learning_rate": 0.0002, + "loss": 1.1226, + "step": 300 + }, + { + "epoch": 1.0, + "eval_loss": 1.1937541961669922, + "eval_runtime": 46.2571, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.189, + "step": 306 + }, + { + "epoch": 1.0130718954248366, + "grad_norm": 0.26494982838630676, + "learning_rate": 0.0002, + "loss": 1.2011, + "step": 310 + }, + { + "epoch": 1.0457516339869282, + "grad_norm": 0.6289355754852295, + "learning_rate": 0.0002, + "loss": 1.0565, + "step": 320 + }, + { + "epoch": 1.0784313725490196, + "grad_norm": 0.26784011721611023, + "learning_rate": 0.0002, + "loss": 1.1619, + "step": 330 + }, + { + "epoch": 1.1111111111111112, + "grad_norm": 0.3392215967178345, + "learning_rate": 0.0002, + "loss": 1.1151, + "step": 340 + }, + { + "epoch": 1.1437908496732025, + "grad_norm": 0.40005937218666077, + "learning_rate": 0.0002, + "loss": 1.0752, + "step": 350 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 0.3590582013130188, + "learning_rate": 0.0002, + "loss": 1.0408, + "step": 360 + }, + { + "epoch": 1.2091503267973855, + "grad_norm": 0.3995305895805359, + "learning_rate": 0.0002, + "loss": 1.0836, + "step": 370 + }, + { + "epoch": 1.2418300653594772, + "grad_norm": 0.2950291633605957, + "learning_rate": 0.0002, + "loss": 1.0992, + "step": 380 + }, + { + "epoch": 1.2745098039215685, + "grad_norm": 0.32035166025161743, + "learning_rate": 0.0002, + "loss": 1.1152, + "step": 390 + }, + { + "epoch": 1.3071895424836601, + "grad_norm": 0.410366415977478, + "learning_rate": 0.0002, + "loss": 1.1467, + "step": 400 + }, + { + "epoch": 1.3398692810457518, + "grad_norm": 0.3106379508972168, + "learning_rate": 0.0002, + "loss": 0.9985, + "step": 410 + }, + { + "epoch": 1.3725490196078431, + "grad_norm": 0.38580670952796936, + "learning_rate": 0.0002, + "loss": 0.9789, + "step": 420 + }, + { + "epoch": 1.4052287581699345, + "grad_norm": 0.34411361813545227, + "learning_rate": 0.0002, + "loss": 1.0931, + "step": 430 + }, + { + "epoch": 1.4379084967320261, + "grad_norm": 0.44206851720809937, + "learning_rate": 0.0002, + "loss": 1.1685, + "step": 440 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 0.3492952585220337, + "learning_rate": 0.0002, + "loss": 1.0347, + "step": 450 + }, + { + "epoch": 1.5032679738562091, + "grad_norm": 0.376423716545105, + "learning_rate": 0.0002, + "loss": 1.0534, + "step": 460 + }, + { + "epoch": 1.5359477124183005, + "grad_norm": 0.359757661819458, + "learning_rate": 0.0002, + "loss": 1.1162, + "step": 470 + }, + { + "epoch": 1.5686274509803921, + "grad_norm": 0.3385067880153656, + "learning_rate": 0.0002, + "loss": 0.9586, + "step": 480 + }, + { + "epoch": 1.6013071895424837, + "grad_norm": 0.4943889379501343, + "learning_rate": 0.0002, + "loss": 1.0807, + "step": 490 + }, + { + "epoch": 1.6339869281045751, + "grad_norm": 0.4203241169452667, + "learning_rate": 0.0002, + "loss": 1.0796, + "step": 500 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 0.3093789219856262, + "learning_rate": 0.0002, + "loss": 1.1059, + "step": 510 + }, + { + "epoch": 1.6993464052287581, + "grad_norm": 0.3653067350387573, + "learning_rate": 0.0002, + "loss": 1.0323, + "step": 520 + }, + { + "epoch": 1.7320261437908497, + "grad_norm": 0.36761337518692017, + "learning_rate": 0.0002, + "loss": 1.0885, + "step": 530 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 0.5040399432182312, + "learning_rate": 0.0002, + "loss": 1.1698, + "step": 540 + }, + { + "epoch": 1.7973856209150327, + "grad_norm": 0.3818035125732422, + "learning_rate": 0.0002, + "loss": 1.0105, + "step": 550 + }, + { + "epoch": 1.8300653594771243, + "grad_norm": 0.4021618664264679, + "learning_rate": 0.0002, + "loss": 0.94, + "step": 560 + }, + { + "epoch": 1.8627450980392157, + "grad_norm": 0.3986459970474243, + "learning_rate": 0.0002, + "loss": 1.0358, + "step": 570 + }, + { + "epoch": 1.8954248366013071, + "grad_norm": 0.48416733741760254, + "learning_rate": 0.0002, + "loss": 1.003, + "step": 580 + }, + { + "epoch": 1.9281045751633987, + "grad_norm": 0.36853986978530884, + "learning_rate": 0.0002, + "loss": 1.1146, + "step": 590 + }, + { + "epoch": 1.9607843137254903, + "grad_norm": 0.383022665977478, + "learning_rate": 0.0002, + "loss": 1.0689, + "step": 600 + }, + { + "epoch": 1.9934640522875817, + "grad_norm": 0.3169507086277008, + "learning_rate": 0.0002, + "loss": 1.098, + "step": 610 + }, + { + "epoch": 2.0, + "eval_loss": 1.1845070123672485, + "eval_runtime": 46.2811, + "eval_samples_per_second": 9.421, + "eval_steps_per_second": 1.188, + "step": 612 + }, + { + "epoch": 2.026143790849673, + "grad_norm": 0.8920142650604248, + "learning_rate": 0.0002, + "loss": 0.9618, + "step": 620 + }, + { + "epoch": 2.0588235294117645, + "grad_norm": 0.4814859628677368, + "learning_rate": 0.0002, + "loss": 0.9784, + "step": 630 + }, + { + "epoch": 2.0915032679738563, + "grad_norm": 0.4251559376716614, + "learning_rate": 0.0002, + "loss": 0.8464, + "step": 640 + }, + { + "epoch": 2.1241830065359477, + "grad_norm": 0.5295765399932861, + "learning_rate": 0.0002, + "loss": 0.932, + "step": 650 + }, + { + "epoch": 2.156862745098039, + "grad_norm": 0.45016610622406006, + "learning_rate": 0.0002, + "loss": 0.9603, + "step": 660 + }, + { + "epoch": 2.189542483660131, + "grad_norm": 0.5870586633682251, + "learning_rate": 0.0002, + "loss": 0.8738, + "step": 670 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.5174715518951416, + "learning_rate": 0.0002, + "loss": 0.9483, + "step": 680 + }, + { + "epoch": 2.2549019607843137, + "grad_norm": 0.5252485275268555, + "learning_rate": 0.0002, + "loss": 0.9551, + "step": 690 + }, + { + "epoch": 2.287581699346405, + "grad_norm": 0.5158312320709229, + "learning_rate": 0.0002, + "loss": 0.9253, + "step": 700 + }, + { + "epoch": 2.3202614379084965, + "grad_norm": 0.4824209213256836, + "learning_rate": 0.0002, + "loss": 0.9278, + "step": 710 + }, + { + "epoch": 2.3529411764705883, + "grad_norm": 0.6335175037384033, + "learning_rate": 0.0002, + "loss": 0.8804, + "step": 720 + }, + { + "epoch": 2.3856209150326797, + "grad_norm": 0.5240563154220581, + "learning_rate": 0.0002, + "loss": 0.9685, + "step": 730 + }, + { + "epoch": 2.418300653594771, + "grad_norm": 0.5172886252403259, + "learning_rate": 0.0002, + "loss": 0.8794, + "step": 740 + }, + { + "epoch": 2.450980392156863, + "grad_norm": 0.48972561955451965, + "learning_rate": 0.0002, + "loss": 0.8158, + "step": 750 + }, + { + "epoch": 2.4836601307189543, + "grad_norm": 0.5295189023017883, + "learning_rate": 0.0002, + "loss": 0.8766, + "step": 760 + }, + { + "epoch": 2.5163398692810457, + "grad_norm": 0.5487208962440491, + "learning_rate": 0.0002, + "loss": 0.8695, + "step": 770 + }, + { + "epoch": 2.549019607843137, + "grad_norm": 0.5375093221664429, + "learning_rate": 0.0002, + "loss": 1.0109, + "step": 780 + }, + { + "epoch": 2.581699346405229, + "grad_norm": 0.5424453020095825, + "learning_rate": 0.0002, + "loss": 0.9244, + "step": 790 + }, + { + "epoch": 2.6143790849673203, + "grad_norm": 0.6029134392738342, + "learning_rate": 0.0002, + "loss": 1.0424, + "step": 800 + }, + { + "epoch": 2.6470588235294117, + "grad_norm": 0.6584921479225159, + "learning_rate": 0.0002, + "loss": 0.8688, + "step": 810 + }, + { + "epoch": 2.6797385620915035, + "grad_norm": 0.5735557675361633, + "learning_rate": 0.0002, + "loss": 0.7796, + "step": 820 + }, + { + "epoch": 2.712418300653595, + "grad_norm": 0.5216763019561768, + "learning_rate": 0.0002, + "loss": 0.8834, + "step": 830 + }, + { + "epoch": 2.7450980392156863, + "grad_norm": 0.5455219149589539, + "learning_rate": 0.0002, + "loss": 0.8946, + "step": 840 + }, + { + "epoch": 2.7777777777777777, + "grad_norm": 0.5139284729957581, + "learning_rate": 0.0002, + "loss": 0.8037, + "step": 850 + }, + { + "epoch": 2.810457516339869, + "grad_norm": 0.5096403360366821, + "learning_rate": 0.0002, + "loss": 0.988, + "step": 860 + }, + { + "epoch": 2.843137254901961, + "grad_norm": 0.6337038278579712, + "learning_rate": 0.0002, + "loss": 0.9169, + "step": 870 + }, + { + "epoch": 2.8758169934640523, + "grad_norm": 0.47218772768974304, + "learning_rate": 0.0002, + "loss": 0.8938, + "step": 880 + }, + { + "epoch": 2.9084967320261437, + "grad_norm": 0.4640636742115021, + "learning_rate": 0.0002, + "loss": 0.8554, + "step": 890 + }, + { + "epoch": 2.9411764705882355, + "grad_norm": 0.4199628531932831, + "learning_rate": 0.0002, + "loss": 0.8625, + "step": 900 + }, + { + "epoch": 2.973856209150327, + "grad_norm": 0.5067117214202881, + "learning_rate": 0.0002, + "loss": 0.8104, + "step": 910 + }, + { + "epoch": 3.0, + "eval_loss": 1.2291251420974731, + "eval_runtime": 46.2557, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.189, + "step": 918 + }, + { + "epoch": 3.0065359477124183, + "grad_norm": 0.8342176079750061, + "learning_rate": 0.0002, + "loss": 0.8157, + "step": 920 + }, + { + "epoch": 3.0392156862745097, + "grad_norm": 0.7695813775062561, + "learning_rate": 0.0002, + "loss": 0.6855, + "step": 930 + }, + { + "epoch": 3.0718954248366015, + "grad_norm": 0.6819486618041992, + "learning_rate": 0.0002, + "loss": 0.6173, + "step": 940 + }, + { + "epoch": 3.104575163398693, + "grad_norm": 0.7568879723548889, + "learning_rate": 0.0002, + "loss": 0.6495, + "step": 950 + }, + { + "epoch": 3.1372549019607843, + "grad_norm": 0.6760695576667786, + "learning_rate": 0.0002, + "loss": 0.7905, + "step": 960 + }, + { + "epoch": 3.1699346405228757, + "grad_norm": 0.6359127759933472, + "learning_rate": 0.0002, + "loss": 0.6405, + "step": 970 + }, + { + "epoch": 3.2026143790849675, + "grad_norm": 0.8414971828460693, + "learning_rate": 0.0002, + "loss": 0.7172, + "step": 980 + }, + { + "epoch": 3.235294117647059, + "grad_norm": 0.68381667137146, + "learning_rate": 0.0002, + "loss": 0.7865, + "step": 990 + }, + { + "epoch": 3.2679738562091503, + "grad_norm": 0.6852193474769592, + "learning_rate": 0.0002, + "loss": 0.6651, + "step": 1000 + }, + { + "epoch": 3.3006535947712417, + "grad_norm": 0.8184967041015625, + "learning_rate": 0.0002, + "loss": 0.6571, + "step": 1010 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 1.047290563583374, + "learning_rate": 0.0002, + "loss": 0.7036, + "step": 1020 + }, + { + "epoch": 3.366013071895425, + "grad_norm": 0.8291178345680237, + "learning_rate": 0.0002, + "loss": 0.7215, + "step": 1030 + }, + { + "epoch": 3.3986928104575163, + "grad_norm": 0.6668022871017456, + "learning_rate": 0.0002, + "loss": 0.6243, + "step": 1040 + }, + { + "epoch": 3.431372549019608, + "grad_norm": 0.6354008316993713, + "learning_rate": 0.0002, + "loss": 0.7459, + "step": 1050 + }, + { + "epoch": 3.4640522875816995, + "grad_norm": 1.2028366327285767, + "learning_rate": 0.0002, + "loss": 0.6826, + "step": 1060 + }, + { + "epoch": 3.496732026143791, + "grad_norm": 0.717367947101593, + "learning_rate": 0.0002, + "loss": 0.5913, + "step": 1070 + }, + { + "epoch": 3.5294117647058822, + "grad_norm": 0.542179524898529, + "learning_rate": 0.0002, + "loss": 0.6903, + "step": 1080 + }, + { + "epoch": 3.5620915032679736, + "grad_norm": 0.845981776714325, + "learning_rate": 0.0002, + "loss": 0.7673, + "step": 1090 + }, + { + "epoch": 3.5947712418300655, + "grad_norm": 0.7381046414375305, + "learning_rate": 0.0002, + "loss": 0.7089, + "step": 1100 + }, + { + "epoch": 3.627450980392157, + "grad_norm": 0.6563456058502197, + "learning_rate": 0.0002, + "loss": 0.6705, + "step": 1110 + }, + { + "epoch": 3.6601307189542482, + "grad_norm": 0.7130876779556274, + "learning_rate": 0.0002, + "loss": 0.7767, + "step": 1120 + }, + { + "epoch": 3.69281045751634, + "grad_norm": 0.800032913684845, + "learning_rate": 0.0002, + "loss": 0.7164, + "step": 1130 + }, + { + "epoch": 3.7254901960784315, + "grad_norm": 0.980328381061554, + "learning_rate": 0.0002, + "loss": 0.7272, + "step": 1140 + }, + { + "epoch": 3.758169934640523, + "grad_norm": 0.8542261123657227, + "learning_rate": 0.0002, + "loss": 0.7672, + "step": 1150 + }, + { + "epoch": 3.7908496732026142, + "grad_norm": 0.6302552819252014, + "learning_rate": 0.0002, + "loss": 0.679, + "step": 1160 + }, + { + "epoch": 3.8235294117647056, + "grad_norm": 0.515398383140564, + "learning_rate": 0.0002, + "loss": 0.7457, + "step": 1170 + }, + { + "epoch": 3.8562091503267975, + "grad_norm": 1.2427130937576294, + "learning_rate": 0.0002, + "loss": 0.693, + "step": 1180 + }, + { + "epoch": 3.888888888888889, + "grad_norm": 0.8206831216812134, + "learning_rate": 0.0002, + "loss": 0.7182, + "step": 1190 + }, + { + "epoch": 3.9215686274509802, + "grad_norm": 0.7633249163627625, + "learning_rate": 0.0002, + "loss": 0.7519, + "step": 1200 + }, + { + "epoch": 3.954248366013072, + "grad_norm": 0.8034512400627136, + "learning_rate": 0.0002, + "loss": 0.7082, + "step": 1210 + }, + { + "epoch": 3.9869281045751634, + "grad_norm": 0.7667182087898254, + "learning_rate": 0.0002, + "loss": 0.6834, + "step": 1220 + }, + { + "epoch": 4.0, + "eval_loss": 1.3456707000732422, + "eval_runtime": 46.2562, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.189, + "step": 1224 + }, + { + "epoch": 4.019607843137255, + "grad_norm": 0.7724746465682983, + "learning_rate": 0.0002, + "loss": 0.582, + "step": 1230 + }, + { + "epoch": 4.052287581699346, + "grad_norm": 1.166916847229004, + "learning_rate": 0.0002, + "loss": 0.4759, + "step": 1240 + }, + { + "epoch": 4.084967320261438, + "grad_norm": 0.7234508991241455, + "learning_rate": 0.0002, + "loss": 0.4995, + "step": 1250 + }, + { + "epoch": 4.117647058823529, + "grad_norm": 1.1418060064315796, + "learning_rate": 0.0002, + "loss": 0.4863, + "step": 1260 + }, + { + "epoch": 4.150326797385621, + "grad_norm": 0.9603922367095947, + "learning_rate": 0.0002, + "loss": 0.5425, + "step": 1270 + }, + { + "epoch": 4.183006535947713, + "grad_norm": 0.8976530432701111, + "learning_rate": 0.0002, + "loss": 0.4892, + "step": 1280 + }, + { + "epoch": 4.215686274509804, + "grad_norm": 0.6855078339576721, + "learning_rate": 0.0002, + "loss": 0.473, + "step": 1290 + }, + { + "epoch": 4.248366013071895, + "grad_norm": 1.2676647901535034, + "learning_rate": 0.0002, + "loss": 0.4416, + "step": 1300 + }, + { + "epoch": 4.281045751633987, + "grad_norm": 1.104057788848877, + "learning_rate": 0.0002, + "loss": 0.5071, + "step": 1310 + }, + { + "epoch": 4.313725490196078, + "grad_norm": 1.7076562643051147, + "learning_rate": 0.0002, + "loss": 0.5168, + "step": 1320 + }, + { + "epoch": 4.34640522875817, + "grad_norm": 1.2308520078659058, + "learning_rate": 0.0002, + "loss": 0.4655, + "step": 1330 + }, + { + "epoch": 4.379084967320262, + "grad_norm": 1.2652729749679565, + "learning_rate": 0.0002, + "loss": 0.5322, + "step": 1340 + }, + { + "epoch": 4.411764705882353, + "grad_norm": 1.054958701133728, + "learning_rate": 0.0002, + "loss": 0.5262, + "step": 1350 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 1.0130749940872192, + "learning_rate": 0.0002, + "loss": 0.4747, + "step": 1360 + }, + { + "epoch": 4.477124183006536, + "grad_norm": 1.0517818927764893, + "learning_rate": 0.0002, + "loss": 0.4887, + "step": 1370 + }, + { + "epoch": 4.509803921568627, + "grad_norm": 0.8593037128448486, + "learning_rate": 0.0002, + "loss": 0.4906, + "step": 1380 + }, + { + "epoch": 4.542483660130719, + "grad_norm": 1.0248081684112549, + "learning_rate": 0.0002, + "loss": 0.5049, + "step": 1390 + }, + { + "epoch": 4.57516339869281, + "grad_norm": 0.8999413847923279, + "learning_rate": 0.0002, + "loss": 0.472, + "step": 1400 + }, + { + "epoch": 4.607843137254902, + "grad_norm": 0.9106912612915039, + "learning_rate": 0.0002, + "loss": 0.5102, + "step": 1410 + }, + { + "epoch": 4.640522875816993, + "grad_norm": 1.2736181020736694, + "learning_rate": 0.0002, + "loss": 0.5203, + "step": 1420 + }, + { + "epoch": 4.673202614379085, + "grad_norm": 0.9311690926551819, + "learning_rate": 0.0002, + "loss": 0.5101, + "step": 1430 + }, + { + "epoch": 4.705882352941177, + "grad_norm": 1.0455045700073242, + "learning_rate": 0.0002, + "loss": 0.5648, + "step": 1440 + }, + { + "epoch": 4.738562091503268, + "grad_norm": 1.0190727710723877, + "learning_rate": 0.0002, + "loss": 0.5004, + "step": 1450 + }, + { + "epoch": 4.771241830065359, + "grad_norm": 1.333198070526123, + "learning_rate": 0.0002, + "loss": 0.5506, + "step": 1460 + }, + { + "epoch": 4.803921568627451, + "grad_norm": 0.8808416724205017, + "learning_rate": 0.0002, + "loss": 0.5846, + "step": 1470 + }, + { + "epoch": 4.836601307189542, + "grad_norm": 0.8896227478981018, + "learning_rate": 0.0002, + "loss": 0.4671, + "step": 1480 + }, + { + "epoch": 4.8692810457516345, + "grad_norm": 1.212323784828186, + "learning_rate": 0.0002, + "loss": 0.4732, + "step": 1490 + }, + { + "epoch": 4.901960784313726, + "grad_norm": 1.0490120649337769, + "learning_rate": 0.0002, + "loss": 0.5263, + "step": 1500 + }, + { + "epoch": 4.934640522875817, + "grad_norm": 0.8946618437767029, + "learning_rate": 0.0002, + "loss": 0.5815, + "step": 1510 + }, + { + "epoch": 4.967320261437909, + "grad_norm": 1.0609275102615356, + "learning_rate": 0.0002, + "loss": 0.5369, + "step": 1520 + }, + { + "epoch": 5.0, + "grad_norm": 0.8885099291801453, + "learning_rate": 0.0002, + "loss": 0.5348, + "step": 1530 + }, + { + "epoch": 5.0, + "eval_loss": 1.5771757364273071, + "eval_runtime": 46.2667, + "eval_samples_per_second": 9.424, + "eval_steps_per_second": 1.189, + "step": 1530 + }, + { + "epoch": 5.032679738562091, + "grad_norm": 1.401705026626587, + "learning_rate": 0.0002, + "loss": 0.3176, + "step": 1540 + }, + { + "epoch": 5.065359477124183, + "grad_norm": 0.8365539908409119, + "learning_rate": 0.0002, + "loss": 0.3346, + "step": 1550 + }, + { + "epoch": 5.098039215686274, + "grad_norm": 1.3188321590423584, + "learning_rate": 0.0002, + "loss": 0.3605, + "step": 1560 + }, + { + "epoch": 5.130718954248366, + "grad_norm": 0.9819526076316833, + "learning_rate": 0.0002, + "loss": 0.326, + "step": 1570 + }, + { + "epoch": 5.163398692810458, + "grad_norm": 1.13265061378479, + "learning_rate": 0.0002, + "loss": 0.3575, + "step": 1580 + }, + { + "epoch": 5.196078431372549, + "grad_norm": 1.478152871131897, + "learning_rate": 0.0002, + "loss": 0.3418, + "step": 1590 + }, + { + "epoch": 5.228758169934641, + "grad_norm": 1.4188750982284546, + "learning_rate": 0.0002, + "loss": 0.3388, + "step": 1600 + }, + { + "epoch": 5.261437908496732, + "grad_norm": 1.2499338388442993, + "learning_rate": 0.0002, + "loss": 0.3524, + "step": 1610 + }, + { + "epoch": 5.294117647058823, + "grad_norm": 1.7885085344314575, + "learning_rate": 0.0002, + "loss": 0.423, + "step": 1620 + }, + { + "epoch": 5.326797385620915, + "grad_norm": 1.2614946365356445, + "learning_rate": 0.0002, + "loss": 0.3237, + "step": 1630 + }, + { + "epoch": 5.359477124183006, + "grad_norm": 1.28338623046875, + "learning_rate": 0.0002, + "loss": 0.3511, + "step": 1640 + }, + { + "epoch": 5.392156862745098, + "grad_norm": 1.1973257064819336, + "learning_rate": 0.0002, + "loss": 0.3112, + "step": 1650 + }, + { + "epoch": 5.42483660130719, + "grad_norm": 1.1356301307678223, + "learning_rate": 0.0002, + "loss": 0.3645, + "step": 1660 + }, + { + "epoch": 5.457516339869281, + "grad_norm": 0.9048901200294495, + "learning_rate": 0.0002, + "loss": 0.307, + "step": 1670 + }, + { + "epoch": 5.490196078431373, + "grad_norm": 1.5352122783660889, + "learning_rate": 0.0002, + "loss": 0.3828, + "step": 1680 + }, + { + "epoch": 5.522875816993464, + "grad_norm": 0.9096335172653198, + "learning_rate": 0.0002, + "loss": 0.3826, + "step": 1690 + }, + { + "epoch": 5.555555555555555, + "grad_norm": 1.1903661489486694, + "learning_rate": 0.0002, + "loss": 0.3686, + "step": 1700 + }, + { + "epoch": 5.588235294117647, + "grad_norm": 0.9234451651573181, + "learning_rate": 0.0002, + "loss": 0.325, + "step": 1710 + }, + { + "epoch": 5.620915032679738, + "grad_norm": 1.4554102420806885, + "learning_rate": 0.0002, + "loss": 0.3451, + "step": 1720 + }, + { + "epoch": 5.65359477124183, + "grad_norm": 1.1044343709945679, + "learning_rate": 0.0002, + "loss": 0.3488, + "step": 1730 + }, + { + "epoch": 5.686274509803922, + "grad_norm": 1.2219593524932861, + "learning_rate": 0.0002, + "loss": 0.3673, + "step": 1740 + }, + { + "epoch": 5.718954248366013, + "grad_norm": 0.901652455329895, + "learning_rate": 0.0002, + "loss": 0.3517, + "step": 1750 + }, + { + "epoch": 5.751633986928105, + "grad_norm": 1.3334792852401733, + "learning_rate": 0.0002, + "loss": 0.4082, + "step": 1760 + }, + { + "epoch": 5.784313725490196, + "grad_norm": 1.5595488548278809, + "learning_rate": 0.0002, + "loss": 0.4386, + "step": 1770 + }, + { + "epoch": 5.816993464052287, + "grad_norm": 1.3892982006072998, + "learning_rate": 0.0002, + "loss": 0.3217, + "step": 1780 + }, + { + "epoch": 5.849673202614379, + "grad_norm": 1.0813168287277222, + "learning_rate": 0.0002, + "loss": 0.3919, + "step": 1790 + }, + { + "epoch": 5.882352941176471, + "grad_norm": 1.145320177078247, + "learning_rate": 0.0002, + "loss": 0.3697, + "step": 1800 + }, + { + "epoch": 5.915032679738562, + "grad_norm": 1.0249533653259277, + "learning_rate": 0.0002, + "loss": 0.3903, + "step": 1810 + }, + { + "epoch": 5.947712418300654, + "grad_norm": 1.0013737678527832, + "learning_rate": 0.0002, + "loss": 0.3481, + "step": 1820 + }, + { + "epoch": 5.980392156862745, + "grad_norm": 1.212314248085022, + "learning_rate": 0.0002, + "loss": 0.4278, + "step": 1830 + }, + { + "epoch": 6.0, + "eval_loss": 1.7506128549575806, + "eval_runtime": 46.2256, + "eval_samples_per_second": 9.432, + "eval_steps_per_second": 1.19, + "step": 1836 + }, + { + "epoch": 6.0130718954248366, + "grad_norm": 0.7339767813682556, + "learning_rate": 0.0002, + "loss": 0.3283, + "step": 1840 + }, + { + "epoch": 6.045751633986928, + "grad_norm": 1.1071710586547852, + "learning_rate": 0.0002, + "loss": 0.2304, + "step": 1850 + }, + { + "epoch": 6.078431372549019, + "grad_norm": 1.2613991498947144, + "learning_rate": 0.0002, + "loss": 0.2436, + "step": 1860 + }, + { + "epoch": 6.111111111111111, + "grad_norm": 1.053133249282837, + "learning_rate": 0.0002, + "loss": 0.2403, + "step": 1870 + }, + { + "epoch": 6.143790849673203, + "grad_norm": 1.069568395614624, + "learning_rate": 0.0002, + "loss": 0.2509, + "step": 1880 + }, + { + "epoch": 6.176470588235294, + "grad_norm": 1.020458698272705, + "learning_rate": 0.0002, + "loss": 0.2272, + "step": 1890 + }, + { + "epoch": 6.209150326797386, + "grad_norm": 1.2430394887924194, + "learning_rate": 0.0002, + "loss": 0.2408, + "step": 1900 + }, + { + "epoch": 6.241830065359477, + "grad_norm": 1.3475574254989624, + "learning_rate": 0.0002, + "loss": 0.229, + "step": 1910 + }, + { + "epoch": 6.2745098039215685, + "grad_norm": 0.9094598889350891, + "learning_rate": 0.0002, + "loss": 0.2542, + "step": 1920 + }, + { + "epoch": 6.30718954248366, + "grad_norm": 1.255650520324707, + "learning_rate": 0.0002, + "loss": 0.2009, + "step": 1930 + }, + { + "epoch": 6.339869281045751, + "grad_norm": 1.4193930625915527, + "learning_rate": 0.0002, + "loss": 0.25, + "step": 1940 + }, + { + "epoch": 6.372549019607844, + "grad_norm": 1.4378032684326172, + "learning_rate": 0.0002, + "loss": 0.293, + "step": 1950 + }, + { + "epoch": 6.405228758169935, + "grad_norm": 1.2236989736557007, + "learning_rate": 0.0002, + "loss": 0.2685, + "step": 1960 + }, + { + "epoch": 6.437908496732026, + "grad_norm": 1.0902987718582153, + "learning_rate": 0.0002, + "loss": 0.2608, + "step": 1970 + }, + { + "epoch": 6.470588235294118, + "grad_norm": 1.1165062189102173, + "learning_rate": 0.0002, + "loss": 0.2267, + "step": 1980 + }, + { + "epoch": 6.503267973856209, + "grad_norm": 1.3953566551208496, + "learning_rate": 0.0002, + "loss": 0.2246, + "step": 1990 + }, + { + "epoch": 6.5359477124183005, + "grad_norm": 1.5215585231781006, + "learning_rate": 0.0002, + "loss": 0.2834, + "step": 2000 + }, + { + "epoch": 6.568627450980392, + "grad_norm": 1.3496609926223755, + "learning_rate": 0.0002, + "loss": 0.2457, + "step": 2010 + }, + { + "epoch": 6.601307189542483, + "grad_norm": 1.2511820793151855, + "learning_rate": 0.0002, + "loss": 0.2637, + "step": 2020 + }, + { + "epoch": 6.633986928104575, + "grad_norm": 1.9875848293304443, + "learning_rate": 0.0002, + "loss": 0.2956, + "step": 2030 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 1.0766608715057373, + "learning_rate": 0.0002, + "loss": 0.2664, + "step": 2040 + }, + { + "epoch": 6.699346405228758, + "grad_norm": 1.3594712018966675, + "learning_rate": 0.0002, + "loss": 0.2792, + "step": 2050 + }, + { + "epoch": 6.73202614379085, + "grad_norm": 1.2357292175292969, + "learning_rate": 0.0002, + "loss": 0.3194, + "step": 2060 + }, + { + "epoch": 6.764705882352941, + "grad_norm": 1.2428375482559204, + "learning_rate": 0.0002, + "loss": 0.2526, + "step": 2070 + }, + { + "epoch": 6.7973856209150325, + "grad_norm": 1.2724156379699707, + "learning_rate": 0.0002, + "loss": 0.2355, + "step": 2080 + }, + { + "epoch": 6.830065359477124, + "grad_norm": 1.4981396198272705, + "learning_rate": 0.0002, + "loss": 0.289, + "step": 2090 + }, + { + "epoch": 6.862745098039216, + "grad_norm": 0.9346088171005249, + "learning_rate": 0.0002, + "loss": 0.2443, + "step": 2100 + }, + { + "epoch": 6.895424836601308, + "grad_norm": 1.3955477476119995, + "learning_rate": 0.0002, + "loss": 0.262, + "step": 2110 + }, + { + "epoch": 6.928104575163399, + "grad_norm": 1.492382287979126, + "learning_rate": 0.0002, + "loss": 0.2981, + "step": 2120 + }, + { + "epoch": 6.96078431372549, + "grad_norm": 1.2755712270736694, + "learning_rate": 0.0002, + "loss": 0.3093, + "step": 2130 + }, + { + "epoch": 6.993464052287582, + "grad_norm": 1.4600884914398193, + "learning_rate": 0.0002, + "loss": 0.2943, + "step": 2140 + }, + { + "epoch": 7.0, + "eval_loss": 1.9770371913909912, + "eval_runtime": 46.2588, + "eval_samples_per_second": 9.425, + "eval_steps_per_second": 1.189, + "step": 2142 + }, + { + "epoch": 7.026143790849673, + "grad_norm": 1.205262541770935, + "learning_rate": 0.0002, + "loss": 0.2032, + "step": 2150 + }, + { + "epoch": 7.0588235294117645, + "grad_norm": 1.1699777841567993, + "learning_rate": 0.0002, + "loss": 0.1642, + "step": 2160 + }, + { + "epoch": 7.091503267973856, + "grad_norm": 1.2428505420684814, + "learning_rate": 0.0002, + "loss": 0.1886, + "step": 2170 + }, + { + "epoch": 7.124183006535947, + "grad_norm": 0.9843717813491821, + "learning_rate": 0.0002, + "loss": 0.1762, + "step": 2180 + }, + { + "epoch": 7.1568627450980395, + "grad_norm": 1.089490532875061, + "learning_rate": 0.0002, + "loss": 0.1723, + "step": 2190 + }, + { + "epoch": 7.189542483660131, + "grad_norm": 1.2811459302902222, + "learning_rate": 0.0002, + "loss": 0.1721, + "step": 2200 + }, + { + "epoch": 7.222222222222222, + "grad_norm": 1.4558671712875366, + "learning_rate": 0.0002, + "loss": 0.1983, + "step": 2210 + }, + { + "epoch": 7.254901960784314, + "grad_norm": 1.2111164331436157, + "learning_rate": 0.0002, + "loss": 0.178, + "step": 2220 + }, + { + "epoch": 7.287581699346405, + "grad_norm": 1.46672785282135, + "learning_rate": 0.0002, + "loss": 0.1463, + "step": 2230 + }, + { + "epoch": 7.3202614379084965, + "grad_norm": 1.464061975479126, + "learning_rate": 0.0002, + "loss": 0.1883, + "step": 2240 + }, + { + "epoch": 7.352941176470588, + "grad_norm": 1.6276377439498901, + "learning_rate": 0.0002, + "loss": 0.2276, + "step": 2250 + }, + { + "epoch": 7.38562091503268, + "grad_norm": 1.4011811017990112, + "learning_rate": 0.0002, + "loss": 0.1771, + "step": 2260 + }, + { + "epoch": 7.4183006535947715, + "grad_norm": 0.9894806146621704, + "learning_rate": 0.0002, + "loss": 0.1875, + "step": 2270 + }, + { + "epoch": 7.450980392156863, + "grad_norm": 0.9357670545578003, + "learning_rate": 0.0002, + "loss": 0.1725, + "step": 2280 + }, + { + "epoch": 7.483660130718954, + "grad_norm": 1.7790061235427856, + "learning_rate": 0.0002, + "loss": 0.1891, + "step": 2290 + }, + { + "epoch": 7.516339869281046, + "grad_norm": 1.468843698501587, + "learning_rate": 0.0002, + "loss": 0.1967, + "step": 2300 + }, + { + "epoch": 7.549019607843137, + "grad_norm": 1.063189148902893, + "learning_rate": 0.0002, + "loss": 0.195, + "step": 2310 + }, + { + "epoch": 7.5816993464052285, + "grad_norm": 0.7940694689750671, + "learning_rate": 0.0002, + "loss": 0.1752, + "step": 2320 + }, + { + "epoch": 7.61437908496732, + "grad_norm": 1.555564045906067, + "learning_rate": 0.0002, + "loss": 0.2169, + "step": 2330 + }, + { + "epoch": 7.647058823529412, + "grad_norm": 0.7388061881065369, + "learning_rate": 0.0002, + "loss": 0.1755, + "step": 2340 + }, + { + "epoch": 7.6797385620915035, + "grad_norm": 1.2422513961791992, + "learning_rate": 0.0002, + "loss": 0.2206, + "step": 2350 + }, + { + "epoch": 7.712418300653595, + "grad_norm": 1.3868855237960815, + "learning_rate": 0.0002, + "loss": 0.2015, + "step": 2360 + }, + { + "epoch": 7.745098039215686, + "grad_norm": 1.2965079545974731, + "learning_rate": 0.0002, + "loss": 0.1888, + "step": 2370 + }, + { + "epoch": 7.777777777777778, + "grad_norm": 1.4052339792251587, + "learning_rate": 0.0002, + "loss": 0.2257, + "step": 2380 + }, + { + "epoch": 7.810457516339869, + "grad_norm": 1.9005945920944214, + "learning_rate": 0.0002, + "loss": 0.2145, + "step": 2390 + }, + { + "epoch": 7.8431372549019605, + "grad_norm": 0.9947215914726257, + "learning_rate": 0.0002, + "loss": 0.2154, + "step": 2400 + }, + { + "epoch": 7.875816993464053, + "grad_norm": 0.796757698059082, + "learning_rate": 0.0002, + "loss": 0.1841, + "step": 2410 + }, + { + "epoch": 7.908496732026144, + "grad_norm": 2.4196858406066895, + "learning_rate": 0.0002, + "loss": 0.2476, + "step": 2420 + }, + { + "epoch": 7.9411764705882355, + "grad_norm": 1.7430493831634521, + "learning_rate": 0.0002, + "loss": 0.2136, + "step": 2430 + }, + { + "epoch": 7.973856209150327, + "grad_norm": 1.0432168245315552, + "learning_rate": 0.0002, + "loss": 0.2053, + "step": 2440 + }, + { + "epoch": 8.0, + "eval_loss": 2.267362356185913, + "eval_runtime": 46.2388, + "eval_samples_per_second": 9.429, + "eval_steps_per_second": 1.189, + "step": 2448 + } + ], + "logging_steps": 10, + "max_steps": 2448, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.2573844929773568e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/training_args.bin b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7fb8100bbfe18f1342e5fc87e32e8e798268e3b --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-2448/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e66621362aa4eab196083c9a4bd0bf843b2425d7109d63be3ab5f4006691dd1 +size 5560 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/README.md b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/README.md new file mode 100644 index 0000000000000000000000000000000000000000..503a34a03e25483aa99213835fd87bfc8289a3fe --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/adapter_config.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e98db163734cc03f7a8f8b3f720d3a2befdf7453 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/adapter_model.safetensors b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1edf24ee4cadcc04acc1504ec06cbb61422ed17b --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:230681bf5243219bb35baf9f833a124d007877d8340320327edd113ecb7b143d +size 143153376 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/optimizer.pt b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9da28b3935caf42e2a919220e7900268864181a --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91cbee641f98a3cde44341c640d36578bd48cad6ef3fb6647875183555308dc1 +size 72886650 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/rng_state.pth b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..8176904e301bb5c02f9b815bdbca4b9b39921f71 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a652197f45495263a8d6ed01735d5c581d6b93ff6ee3f05b0166b309d15f9a24 +size 14244 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/scheduler.pt b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..d6a5305e91aa357e406ba671b5468558cd08ddba --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2737a31bade0c3c61d75256b61229477411c368f4ee01fd483bd841307ca525a +size 1064 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/special_tokens_map.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/tokenizer.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..af0eac5c0056f83b8f3fcdb79165f8847111c305 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922 +size 17525357 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/tokenizer.model b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/tokenizer_config.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/trainer_state.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..852ff0a273d12a4e2f8e7d9c8214c3c8b2ea0c91 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/trainer_state.json @@ -0,0 +1,251 @@ +{ + "best_metric": 1.1937541961669922, + "best_model_checkpoint": "outputs-001/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306", + "epoch": 1.0, + "eval_steps": 10, + "global_step": 306, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.032679738562091505, + "grad_norm": 0.631856381893158, + "learning_rate": 0.0002, + "loss": 2.3561, + "step": 10 + }, + { + "epoch": 0.06535947712418301, + "grad_norm": 0.5065668821334839, + "learning_rate": 0.0002, + "loss": 1.8141, + "step": 20 + }, + { + "epoch": 0.09803921568627451, + "grad_norm": 0.6978895664215088, + "learning_rate": 0.0002, + "loss": 1.4952, + "step": 30 + }, + { + "epoch": 0.13071895424836602, + "grad_norm": 0.6619144082069397, + "learning_rate": 0.0002, + "loss": 1.4829, + "step": 40 + }, + { + "epoch": 0.16339869281045752, + "grad_norm": 0.6153793931007385, + "learning_rate": 0.0002, + "loss": 1.3038, + "step": 50 + }, + { + "epoch": 0.19607843137254902, + "grad_norm": 0.4703301787376404, + "learning_rate": 0.0002, + "loss": 1.1429, + "step": 60 + }, + { + "epoch": 0.22875816993464052, + "grad_norm": 1.1672580242156982, + "learning_rate": 0.0002, + "loss": 1.2828, + "step": 70 + }, + { + "epoch": 0.26143790849673204, + "grad_norm": 0.3455565273761749, + "learning_rate": 0.0002, + "loss": 1.0985, + "step": 80 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.36216291785240173, + "learning_rate": 0.0002, + "loss": 1.1359, + "step": 90 + }, + { + "epoch": 0.32679738562091504, + "grad_norm": 0.4545166492462158, + "learning_rate": 0.0002, + "loss": 1.2544, + "step": 100 + }, + { + "epoch": 0.35947712418300654, + "grad_norm": 0.3612092435359955, + "learning_rate": 0.0002, + "loss": 1.2287, + "step": 110 + }, + { + "epoch": 0.39215686274509803, + "grad_norm": 0.5080830454826355, + "learning_rate": 0.0002, + "loss": 1.1868, + "step": 120 + }, + { + "epoch": 0.42483660130718953, + "grad_norm": 0.3268195390701294, + "learning_rate": 0.0002, + "loss": 1.1902, + "step": 130 + }, + { + "epoch": 0.45751633986928103, + "grad_norm": 0.33971714973449707, + "learning_rate": 0.0002, + "loss": 1.247, + "step": 140 + }, + { + "epoch": 0.49019607843137253, + "grad_norm": 0.4036043882369995, + "learning_rate": 0.0002, + "loss": 1.1844, + "step": 150 + }, + { + "epoch": 0.5228758169934641, + "grad_norm": 0.35938864946365356, + "learning_rate": 0.0002, + "loss": 1.1624, + "step": 160 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 0.28880223631858826, + "learning_rate": 0.0002, + "loss": 1.164, + "step": 170 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 0.3436269462108612, + "learning_rate": 0.0002, + "loss": 1.3521, + "step": 180 + }, + { + "epoch": 0.6209150326797386, + "grad_norm": 0.41923725605010986, + "learning_rate": 0.0002, + "loss": 1.2456, + "step": 190 + }, + { + "epoch": 0.6535947712418301, + "grad_norm": 0.25119203329086304, + "learning_rate": 0.0002, + "loss": 1.2226, + "step": 200 + }, + { + "epoch": 0.6862745098039216, + "grad_norm": 0.5870180726051331, + "learning_rate": 0.0002, + "loss": 1.1568, + "step": 210 + }, + { + "epoch": 0.7189542483660131, + "grad_norm": 0.2831224203109741, + "learning_rate": 0.0002, + "loss": 1.064, + "step": 220 + }, + { + "epoch": 0.7516339869281046, + "grad_norm": 0.3192005753517151, + "learning_rate": 0.0002, + "loss": 1.2469, + "step": 230 + }, + { + "epoch": 0.7843137254901961, + "grad_norm": 0.2998219430446625, + "learning_rate": 0.0002, + "loss": 1.1613, + "step": 240 + }, + { + "epoch": 0.8169934640522876, + "grad_norm": 0.32855790853500366, + "learning_rate": 0.0002, + "loss": 1.205, + "step": 250 + }, + { + "epoch": 0.8496732026143791, + "grad_norm": 0.3403124213218689, + "learning_rate": 0.0002, + "loss": 1.1631, + "step": 260 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 0.277401328086853, + "learning_rate": 0.0002, + "loss": 1.1646, + "step": 270 + }, + { + "epoch": 0.9150326797385621, + "grad_norm": 0.2975269556045532, + "learning_rate": 0.0002, + "loss": 1.084, + "step": 280 + }, + { + "epoch": 0.9477124183006536, + "grad_norm": 1.7909578084945679, + "learning_rate": 0.0002, + "loss": 1.2039, + "step": 290 + }, + { + "epoch": 0.9803921568627451, + "grad_norm": 0.2917245328426361, + "learning_rate": 0.0002, + "loss": 1.1226, + "step": 300 + }, + { + "epoch": 1.0, + "eval_loss": 1.1937541961669922, + "eval_runtime": 46.2571, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.189, + "step": 306 + } + ], + "logging_steps": 10, + "max_steps": 2448, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.571730616221696e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/training_args.bin b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7fb8100bbfe18f1342e5fc87e32e8e798268e3b --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e66621362aa4eab196083c9a4bd0bf843b2425d7109d63be3ab5f4006691dd1 +size 5560 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/README.md b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/README.md new file mode 100644 index 0000000000000000000000000000000000000000..503a34a03e25483aa99213835fd87bfc8289a3fe --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/adapter_config.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e98db163734cc03f7a8f8b3f720d3a2befdf7453 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/adapter_model.safetensors b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2914583db0507b14a5bcd3d237d124baf41adf2d --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93008ec421c7a3492809b1931e60a60b7e286cb520e0ba8266c8862b2ae048ef +size 143153376 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/optimizer.pt b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0f420b92573242ad17878f0879e17cfb7d9f6fb3 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ea03e3f874fa525cf92b58d1cef19cd1f3b877f72d27e4032797302e8a17262 +size 72886650 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/rng_state.pth b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d01faf1aeb8b9794df153b175963d4b0ed6ad22b --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3ce3a7fcd5cd53f7179a619892f1f719be4fc9bb92dc0937e3c2762a64412c5 +size 14244 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/scheduler.pt b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..158a77b829de4b5d6071e89d482ad5a7ee259eec --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fd201f99b49f06e01aa3df0f7df59fbc2c43546fb5710460cca23f33754c7ff +size 1064 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/special_tokens_map.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/tokenizer.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..af0eac5c0056f83b8f3fcdb79165f8847111c305 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922 +size 17525357 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/tokenizer.model b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/tokenizer_config.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/trainer_state.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b652e6c26830d5e3b390a072760290ef3f020bf5 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/trainer_state.json @@ -0,0 +1,476 @@ +{ + "best_metric": 1.1845070123672485, + "best_model_checkpoint": "outputs-001/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612", + "epoch": 2.0, + "eval_steps": 10, + "global_step": 612, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.032679738562091505, + "grad_norm": 0.631856381893158, + "learning_rate": 0.0002, + "loss": 2.3561, + "step": 10 + }, + { + "epoch": 0.06535947712418301, + "grad_norm": 0.5065668821334839, + "learning_rate": 0.0002, + "loss": 1.8141, + "step": 20 + }, + { + "epoch": 0.09803921568627451, + "grad_norm": 0.6978895664215088, + "learning_rate": 0.0002, + "loss": 1.4952, + "step": 30 + }, + { + "epoch": 0.13071895424836602, + "grad_norm": 0.6619144082069397, + "learning_rate": 0.0002, + "loss": 1.4829, + "step": 40 + }, + { + "epoch": 0.16339869281045752, + "grad_norm": 0.6153793931007385, + "learning_rate": 0.0002, + "loss": 1.3038, + "step": 50 + }, + { + "epoch": 0.19607843137254902, + "grad_norm": 0.4703301787376404, + "learning_rate": 0.0002, + "loss": 1.1429, + "step": 60 + }, + { + "epoch": 0.22875816993464052, + "grad_norm": 1.1672580242156982, + "learning_rate": 0.0002, + "loss": 1.2828, + "step": 70 + }, + { + "epoch": 0.26143790849673204, + "grad_norm": 0.3455565273761749, + "learning_rate": 0.0002, + "loss": 1.0985, + "step": 80 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.36216291785240173, + "learning_rate": 0.0002, + "loss": 1.1359, + "step": 90 + }, + { + "epoch": 0.32679738562091504, + "grad_norm": 0.4545166492462158, + "learning_rate": 0.0002, + "loss": 1.2544, + "step": 100 + }, + { + "epoch": 0.35947712418300654, + "grad_norm": 0.3612092435359955, + "learning_rate": 0.0002, + "loss": 1.2287, + "step": 110 + }, + { + "epoch": 0.39215686274509803, + "grad_norm": 0.5080830454826355, + "learning_rate": 0.0002, + "loss": 1.1868, + "step": 120 + }, + { + "epoch": 0.42483660130718953, + "grad_norm": 0.3268195390701294, + "learning_rate": 0.0002, + "loss": 1.1902, + "step": 130 + }, + { + "epoch": 0.45751633986928103, + "grad_norm": 0.33971714973449707, + "learning_rate": 0.0002, + "loss": 1.247, + "step": 140 + }, + { + "epoch": 0.49019607843137253, + "grad_norm": 0.4036043882369995, + "learning_rate": 0.0002, + "loss": 1.1844, + "step": 150 + }, + { + "epoch": 0.5228758169934641, + "grad_norm": 0.35938864946365356, + "learning_rate": 0.0002, + "loss": 1.1624, + "step": 160 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 0.28880223631858826, + "learning_rate": 0.0002, + "loss": 1.164, + "step": 170 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 0.3436269462108612, + "learning_rate": 0.0002, + "loss": 1.3521, + "step": 180 + }, + { + "epoch": 0.6209150326797386, + "grad_norm": 0.41923725605010986, + "learning_rate": 0.0002, + "loss": 1.2456, + "step": 190 + }, + { + "epoch": 0.6535947712418301, + "grad_norm": 0.25119203329086304, + "learning_rate": 0.0002, + "loss": 1.2226, + "step": 200 + }, + { + "epoch": 0.6862745098039216, + "grad_norm": 0.5870180726051331, + "learning_rate": 0.0002, + "loss": 1.1568, + "step": 210 + }, + { + "epoch": 0.7189542483660131, + "grad_norm": 0.2831224203109741, + "learning_rate": 0.0002, + "loss": 1.064, + "step": 220 + }, + { + "epoch": 0.7516339869281046, + "grad_norm": 0.3192005753517151, + "learning_rate": 0.0002, + "loss": 1.2469, + "step": 230 + }, + { + "epoch": 0.7843137254901961, + "grad_norm": 0.2998219430446625, + "learning_rate": 0.0002, + "loss": 1.1613, + "step": 240 + }, + { + "epoch": 0.8169934640522876, + "grad_norm": 0.32855790853500366, + "learning_rate": 0.0002, + "loss": 1.205, + "step": 250 + }, + { + "epoch": 0.8496732026143791, + "grad_norm": 0.3403124213218689, + "learning_rate": 0.0002, + "loss": 1.1631, + "step": 260 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 0.277401328086853, + "learning_rate": 0.0002, + "loss": 1.1646, + "step": 270 + }, + { + "epoch": 0.9150326797385621, + "grad_norm": 0.2975269556045532, + "learning_rate": 0.0002, + "loss": 1.084, + "step": 280 + }, + { + "epoch": 0.9477124183006536, + "grad_norm": 1.7909578084945679, + "learning_rate": 0.0002, + "loss": 1.2039, + "step": 290 + }, + { + "epoch": 0.9803921568627451, + "grad_norm": 0.2917245328426361, + "learning_rate": 0.0002, + "loss": 1.1226, + "step": 300 + }, + { + "epoch": 1.0, + "eval_loss": 1.1937541961669922, + "eval_runtime": 46.2571, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.189, + "step": 306 + }, + { + "epoch": 1.0130718954248366, + "grad_norm": 0.26494982838630676, + "learning_rate": 0.0002, + "loss": 1.2011, + "step": 310 + }, + { + "epoch": 1.0457516339869282, + "grad_norm": 0.6289355754852295, + "learning_rate": 0.0002, + "loss": 1.0565, + "step": 320 + }, + { + "epoch": 1.0784313725490196, + "grad_norm": 0.26784011721611023, + "learning_rate": 0.0002, + "loss": 1.1619, + "step": 330 + }, + { + "epoch": 1.1111111111111112, + "grad_norm": 0.3392215967178345, + "learning_rate": 0.0002, + "loss": 1.1151, + "step": 340 + }, + { + "epoch": 1.1437908496732025, + "grad_norm": 0.40005937218666077, + "learning_rate": 0.0002, + "loss": 1.0752, + "step": 350 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 0.3590582013130188, + "learning_rate": 0.0002, + "loss": 1.0408, + "step": 360 + }, + { + "epoch": 1.2091503267973855, + "grad_norm": 0.3995305895805359, + "learning_rate": 0.0002, + "loss": 1.0836, + "step": 370 + }, + { + "epoch": 1.2418300653594772, + "grad_norm": 0.2950291633605957, + "learning_rate": 0.0002, + "loss": 1.0992, + "step": 380 + }, + { + "epoch": 1.2745098039215685, + "grad_norm": 0.32035166025161743, + "learning_rate": 0.0002, + "loss": 1.1152, + "step": 390 + }, + { + "epoch": 1.3071895424836601, + "grad_norm": 0.410366415977478, + "learning_rate": 0.0002, + "loss": 1.1467, + "step": 400 + }, + { + "epoch": 1.3398692810457518, + "grad_norm": 0.3106379508972168, + "learning_rate": 0.0002, + "loss": 0.9985, + "step": 410 + }, + { + "epoch": 1.3725490196078431, + "grad_norm": 0.38580670952796936, + "learning_rate": 0.0002, + "loss": 0.9789, + "step": 420 + }, + { + "epoch": 1.4052287581699345, + "grad_norm": 0.34411361813545227, + "learning_rate": 0.0002, + "loss": 1.0931, + "step": 430 + }, + { + "epoch": 1.4379084967320261, + "grad_norm": 0.44206851720809937, + "learning_rate": 0.0002, + "loss": 1.1685, + "step": 440 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 0.3492952585220337, + "learning_rate": 0.0002, + "loss": 1.0347, + "step": 450 + }, + { + "epoch": 1.5032679738562091, + "grad_norm": 0.376423716545105, + "learning_rate": 0.0002, + "loss": 1.0534, + "step": 460 + }, + { + "epoch": 1.5359477124183005, + "grad_norm": 0.359757661819458, + "learning_rate": 0.0002, + "loss": 1.1162, + "step": 470 + }, + { + "epoch": 1.5686274509803921, + "grad_norm": 0.3385067880153656, + "learning_rate": 0.0002, + "loss": 0.9586, + "step": 480 + }, + { + "epoch": 1.6013071895424837, + "grad_norm": 0.4943889379501343, + "learning_rate": 0.0002, + "loss": 1.0807, + "step": 490 + }, + { + "epoch": 1.6339869281045751, + "grad_norm": 0.4203241169452667, + "learning_rate": 0.0002, + "loss": 1.0796, + "step": 500 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 0.3093789219856262, + "learning_rate": 0.0002, + "loss": 1.1059, + "step": 510 + }, + { + "epoch": 1.6993464052287581, + "grad_norm": 0.3653067350387573, + "learning_rate": 0.0002, + "loss": 1.0323, + "step": 520 + }, + { + "epoch": 1.7320261437908497, + "grad_norm": 0.36761337518692017, + "learning_rate": 0.0002, + "loss": 1.0885, + "step": 530 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 0.5040399432182312, + "learning_rate": 0.0002, + "loss": 1.1698, + "step": 540 + }, + { + "epoch": 1.7973856209150327, + "grad_norm": 0.3818035125732422, + "learning_rate": 0.0002, + "loss": 1.0105, + "step": 550 + }, + { + "epoch": 1.8300653594771243, + "grad_norm": 0.4021618664264679, + "learning_rate": 0.0002, + "loss": 0.94, + "step": 560 + }, + { + "epoch": 1.8627450980392157, + "grad_norm": 0.3986459970474243, + "learning_rate": 0.0002, + "loss": 1.0358, + "step": 570 + }, + { + "epoch": 1.8954248366013071, + "grad_norm": 0.48416733741760254, + "learning_rate": 0.0002, + "loss": 1.003, + "step": 580 + }, + { + "epoch": 1.9281045751633987, + "grad_norm": 0.36853986978530884, + "learning_rate": 0.0002, + "loss": 1.1146, + "step": 590 + }, + { + "epoch": 1.9607843137254903, + "grad_norm": 0.383022665977478, + "learning_rate": 0.0002, + "loss": 1.0689, + "step": 600 + }, + { + "epoch": 1.9934640522875817, + "grad_norm": 0.3169507086277008, + "learning_rate": 0.0002, + "loss": 1.098, + "step": 610 + }, + { + "epoch": 2.0, + "eval_loss": 1.1845070123672485, + "eval_runtime": 46.2811, + "eval_samples_per_second": 9.421, + "eval_steps_per_second": 1.188, + "step": 612 + } + ], + "logging_steps": 10, + "max_steps": 2448, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.143461232443392e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/training_args.bin b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7fb8100bbfe18f1342e5fc87e32e8e798268e3b --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e66621362aa4eab196083c9a4bd0bf843b2425d7109d63be3ab5f4006691dd1 +size 5560 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/README.md b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/README.md new file mode 100644 index 0000000000000000000000000000000000000000..503a34a03e25483aa99213835fd87bfc8289a3fe --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/adapter_config.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e98db163734cc03f7a8f8b3f720d3a2befdf7453 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/adapter_model.safetensors b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..59d65db48836bfe0ef605bfa7cd536d39f19682e --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:333a1f7cd97a1519fa2bf79b681920b9d489b8c24815bff2dbbb8d9d6a94a2c3 +size 143153376 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/optimizer.pt b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac3ca5d7e4a823dfc7056dfe2cd58b00eb219505 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1548a10f56596ad71632e59372b11e84d3d16ced48bc523a6b2cfbef7f6357aa +size 72886650 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/rng_state.pth b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..950d6ef77c3bf6ab948deec1d6281b128a25a551 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c74af3530660bab801a8ab3745f732675c92e87cf6a7368f8d53a1c2a0c3453 +size 14244 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/scheduler.pt b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1fb300a3f7d7cc9ed714944aa5dd73eca56c827f --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b19d92e4fc21e9c90f025ac58543229cd4f340397ea61c83f015e60019e3d85 +size 1064 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/special_tokens_map.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/tokenizer.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..af0eac5c0056f83b8f3fcdb79165f8847111c305 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922 +size 17525357 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/tokenizer.model b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/tokenizer_config.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/trainer_state.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cf990d703919caa83cd874168246c11c39f3ae8e --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/trainer_state.json @@ -0,0 +1,694 @@ +{ + "best_metric": 1.1845070123672485, + "best_model_checkpoint": "outputs-001/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612", + "epoch": 3.0, + "eval_steps": 10, + "global_step": 918, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.032679738562091505, + "grad_norm": 0.631856381893158, + "learning_rate": 0.0002, + "loss": 2.3561, + "step": 10 + }, + { + "epoch": 0.06535947712418301, + "grad_norm": 0.5065668821334839, + "learning_rate": 0.0002, + "loss": 1.8141, + "step": 20 + }, + { + "epoch": 0.09803921568627451, + "grad_norm": 0.6978895664215088, + "learning_rate": 0.0002, + "loss": 1.4952, + "step": 30 + }, + { + "epoch": 0.13071895424836602, + "grad_norm": 0.6619144082069397, + "learning_rate": 0.0002, + "loss": 1.4829, + "step": 40 + }, + { + "epoch": 0.16339869281045752, + "grad_norm": 0.6153793931007385, + "learning_rate": 0.0002, + "loss": 1.3038, + "step": 50 + }, + { + "epoch": 0.19607843137254902, + "grad_norm": 0.4703301787376404, + "learning_rate": 0.0002, + "loss": 1.1429, + "step": 60 + }, + { + "epoch": 0.22875816993464052, + "grad_norm": 1.1672580242156982, + "learning_rate": 0.0002, + "loss": 1.2828, + "step": 70 + }, + { + "epoch": 0.26143790849673204, + "grad_norm": 0.3455565273761749, + "learning_rate": 0.0002, + "loss": 1.0985, + "step": 80 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.36216291785240173, + "learning_rate": 0.0002, + "loss": 1.1359, + "step": 90 + }, + { + "epoch": 0.32679738562091504, + "grad_norm": 0.4545166492462158, + "learning_rate": 0.0002, + "loss": 1.2544, + "step": 100 + }, + { + "epoch": 0.35947712418300654, + "grad_norm": 0.3612092435359955, + "learning_rate": 0.0002, + "loss": 1.2287, + "step": 110 + }, + { + "epoch": 0.39215686274509803, + "grad_norm": 0.5080830454826355, + "learning_rate": 0.0002, + "loss": 1.1868, + "step": 120 + }, + { + "epoch": 0.42483660130718953, + "grad_norm": 0.3268195390701294, + "learning_rate": 0.0002, + "loss": 1.1902, + "step": 130 + }, + { + "epoch": 0.45751633986928103, + "grad_norm": 0.33971714973449707, + "learning_rate": 0.0002, + "loss": 1.247, + "step": 140 + }, + { + "epoch": 0.49019607843137253, + "grad_norm": 0.4036043882369995, + "learning_rate": 0.0002, + "loss": 1.1844, + "step": 150 + }, + { + "epoch": 0.5228758169934641, + "grad_norm": 0.35938864946365356, + "learning_rate": 0.0002, + "loss": 1.1624, + "step": 160 + }, + { + "epoch": 0.5555555555555556, + "grad_norm": 0.28880223631858826, + "learning_rate": 0.0002, + "loss": 1.164, + "step": 170 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 0.3436269462108612, + "learning_rate": 0.0002, + "loss": 1.3521, + "step": 180 + }, + { + "epoch": 0.6209150326797386, + "grad_norm": 0.41923725605010986, + "learning_rate": 0.0002, + "loss": 1.2456, + "step": 190 + }, + { + "epoch": 0.6535947712418301, + "grad_norm": 0.25119203329086304, + "learning_rate": 0.0002, + "loss": 1.2226, + "step": 200 + }, + { + "epoch": 0.6862745098039216, + "grad_norm": 0.5870180726051331, + "learning_rate": 0.0002, + "loss": 1.1568, + "step": 210 + }, + { + "epoch": 0.7189542483660131, + "grad_norm": 0.2831224203109741, + "learning_rate": 0.0002, + "loss": 1.064, + "step": 220 + }, + { + "epoch": 0.7516339869281046, + "grad_norm": 0.3192005753517151, + "learning_rate": 0.0002, + "loss": 1.2469, + "step": 230 + }, + { + "epoch": 0.7843137254901961, + "grad_norm": 0.2998219430446625, + "learning_rate": 0.0002, + "loss": 1.1613, + "step": 240 + }, + { + "epoch": 0.8169934640522876, + "grad_norm": 0.32855790853500366, + "learning_rate": 0.0002, + "loss": 1.205, + "step": 250 + }, + { + "epoch": 0.8496732026143791, + "grad_norm": 0.3403124213218689, + "learning_rate": 0.0002, + "loss": 1.1631, + "step": 260 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 0.277401328086853, + "learning_rate": 0.0002, + "loss": 1.1646, + "step": 270 + }, + { + "epoch": 0.9150326797385621, + "grad_norm": 0.2975269556045532, + "learning_rate": 0.0002, + "loss": 1.084, + "step": 280 + }, + { + "epoch": 0.9477124183006536, + "grad_norm": 1.7909578084945679, + "learning_rate": 0.0002, + "loss": 1.2039, + "step": 290 + }, + { + "epoch": 0.9803921568627451, + "grad_norm": 0.2917245328426361, + "learning_rate": 0.0002, + "loss": 1.1226, + "step": 300 + }, + { + "epoch": 1.0, + "eval_loss": 1.1937541961669922, + "eval_runtime": 46.2571, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.189, + "step": 306 + }, + { + "epoch": 1.0130718954248366, + "grad_norm": 0.26494982838630676, + "learning_rate": 0.0002, + "loss": 1.2011, + "step": 310 + }, + { + "epoch": 1.0457516339869282, + "grad_norm": 0.6289355754852295, + "learning_rate": 0.0002, + "loss": 1.0565, + "step": 320 + }, + { + "epoch": 1.0784313725490196, + "grad_norm": 0.26784011721611023, + "learning_rate": 0.0002, + "loss": 1.1619, + "step": 330 + }, + { + "epoch": 1.1111111111111112, + "grad_norm": 0.3392215967178345, + "learning_rate": 0.0002, + "loss": 1.1151, + "step": 340 + }, + { + "epoch": 1.1437908496732025, + "grad_norm": 0.40005937218666077, + "learning_rate": 0.0002, + "loss": 1.0752, + "step": 350 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 0.3590582013130188, + "learning_rate": 0.0002, + "loss": 1.0408, + "step": 360 + }, + { + "epoch": 1.2091503267973855, + "grad_norm": 0.3995305895805359, + "learning_rate": 0.0002, + "loss": 1.0836, + "step": 370 + }, + { + "epoch": 1.2418300653594772, + "grad_norm": 0.2950291633605957, + "learning_rate": 0.0002, + "loss": 1.0992, + "step": 380 + }, + { + "epoch": 1.2745098039215685, + "grad_norm": 0.32035166025161743, + "learning_rate": 0.0002, + "loss": 1.1152, + "step": 390 + }, + { + "epoch": 1.3071895424836601, + "grad_norm": 0.410366415977478, + "learning_rate": 0.0002, + "loss": 1.1467, + "step": 400 + }, + { + "epoch": 1.3398692810457518, + "grad_norm": 0.3106379508972168, + "learning_rate": 0.0002, + "loss": 0.9985, + "step": 410 + }, + { + "epoch": 1.3725490196078431, + "grad_norm": 0.38580670952796936, + "learning_rate": 0.0002, + "loss": 0.9789, + "step": 420 + }, + { + "epoch": 1.4052287581699345, + "grad_norm": 0.34411361813545227, + "learning_rate": 0.0002, + "loss": 1.0931, + "step": 430 + }, + { + "epoch": 1.4379084967320261, + "grad_norm": 0.44206851720809937, + "learning_rate": 0.0002, + "loss": 1.1685, + "step": 440 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 0.3492952585220337, + "learning_rate": 0.0002, + "loss": 1.0347, + "step": 450 + }, + { + "epoch": 1.5032679738562091, + "grad_norm": 0.376423716545105, + "learning_rate": 0.0002, + "loss": 1.0534, + "step": 460 + }, + { + "epoch": 1.5359477124183005, + "grad_norm": 0.359757661819458, + "learning_rate": 0.0002, + "loss": 1.1162, + "step": 470 + }, + { + "epoch": 1.5686274509803921, + "grad_norm": 0.3385067880153656, + "learning_rate": 0.0002, + "loss": 0.9586, + "step": 480 + }, + { + "epoch": 1.6013071895424837, + "grad_norm": 0.4943889379501343, + "learning_rate": 0.0002, + "loss": 1.0807, + "step": 490 + }, + { + "epoch": 1.6339869281045751, + "grad_norm": 0.4203241169452667, + "learning_rate": 0.0002, + "loss": 1.0796, + "step": 500 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 0.3093789219856262, + "learning_rate": 0.0002, + "loss": 1.1059, + "step": 510 + }, + { + "epoch": 1.6993464052287581, + "grad_norm": 0.3653067350387573, + "learning_rate": 0.0002, + "loss": 1.0323, + "step": 520 + }, + { + "epoch": 1.7320261437908497, + "grad_norm": 0.36761337518692017, + "learning_rate": 0.0002, + "loss": 1.0885, + "step": 530 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 0.5040399432182312, + "learning_rate": 0.0002, + "loss": 1.1698, + "step": 540 + }, + { + "epoch": 1.7973856209150327, + "grad_norm": 0.3818035125732422, + "learning_rate": 0.0002, + "loss": 1.0105, + "step": 550 + }, + { + "epoch": 1.8300653594771243, + "grad_norm": 0.4021618664264679, + "learning_rate": 0.0002, + "loss": 0.94, + "step": 560 + }, + { + "epoch": 1.8627450980392157, + "grad_norm": 0.3986459970474243, + "learning_rate": 0.0002, + "loss": 1.0358, + "step": 570 + }, + { + "epoch": 1.8954248366013071, + "grad_norm": 0.48416733741760254, + "learning_rate": 0.0002, + "loss": 1.003, + "step": 580 + }, + { + "epoch": 1.9281045751633987, + "grad_norm": 0.36853986978530884, + "learning_rate": 0.0002, + "loss": 1.1146, + "step": 590 + }, + { + "epoch": 1.9607843137254903, + "grad_norm": 0.383022665977478, + "learning_rate": 0.0002, + "loss": 1.0689, + "step": 600 + }, + { + "epoch": 1.9934640522875817, + "grad_norm": 0.3169507086277008, + "learning_rate": 0.0002, + "loss": 1.098, + "step": 610 + }, + { + "epoch": 2.0, + "eval_loss": 1.1845070123672485, + "eval_runtime": 46.2811, + "eval_samples_per_second": 9.421, + "eval_steps_per_second": 1.188, + "step": 612 + }, + { + "epoch": 2.026143790849673, + "grad_norm": 0.8920142650604248, + "learning_rate": 0.0002, + "loss": 0.9618, + "step": 620 + }, + { + "epoch": 2.0588235294117645, + "grad_norm": 0.4814859628677368, + "learning_rate": 0.0002, + "loss": 0.9784, + "step": 630 + }, + { + "epoch": 2.0915032679738563, + "grad_norm": 0.4251559376716614, + "learning_rate": 0.0002, + "loss": 0.8464, + "step": 640 + }, + { + "epoch": 2.1241830065359477, + "grad_norm": 0.5295765399932861, + "learning_rate": 0.0002, + "loss": 0.932, + "step": 650 + }, + { + "epoch": 2.156862745098039, + "grad_norm": 0.45016610622406006, + "learning_rate": 0.0002, + "loss": 0.9603, + "step": 660 + }, + { + "epoch": 2.189542483660131, + "grad_norm": 0.5870586633682251, + "learning_rate": 0.0002, + "loss": 0.8738, + "step": 670 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.5174715518951416, + "learning_rate": 0.0002, + "loss": 0.9483, + "step": 680 + }, + { + "epoch": 2.2549019607843137, + "grad_norm": 0.5252485275268555, + "learning_rate": 0.0002, + "loss": 0.9551, + "step": 690 + }, + { + "epoch": 2.287581699346405, + "grad_norm": 0.5158312320709229, + "learning_rate": 0.0002, + "loss": 0.9253, + "step": 700 + }, + { + "epoch": 2.3202614379084965, + "grad_norm": 0.4824209213256836, + "learning_rate": 0.0002, + "loss": 0.9278, + "step": 710 + }, + { + "epoch": 2.3529411764705883, + "grad_norm": 0.6335175037384033, + "learning_rate": 0.0002, + "loss": 0.8804, + "step": 720 + }, + { + "epoch": 2.3856209150326797, + "grad_norm": 0.5240563154220581, + "learning_rate": 0.0002, + "loss": 0.9685, + "step": 730 + }, + { + "epoch": 2.418300653594771, + "grad_norm": 0.5172886252403259, + "learning_rate": 0.0002, + "loss": 0.8794, + "step": 740 + }, + { + "epoch": 2.450980392156863, + "grad_norm": 0.48972561955451965, + "learning_rate": 0.0002, + "loss": 0.8158, + "step": 750 + }, + { + "epoch": 2.4836601307189543, + "grad_norm": 0.5295189023017883, + "learning_rate": 0.0002, + "loss": 0.8766, + "step": 760 + }, + { + "epoch": 2.5163398692810457, + "grad_norm": 0.5487208962440491, + "learning_rate": 0.0002, + "loss": 0.8695, + "step": 770 + }, + { + "epoch": 2.549019607843137, + "grad_norm": 0.5375093221664429, + "learning_rate": 0.0002, + "loss": 1.0109, + "step": 780 + }, + { + "epoch": 2.581699346405229, + "grad_norm": 0.5424453020095825, + "learning_rate": 0.0002, + "loss": 0.9244, + "step": 790 + }, + { + "epoch": 2.6143790849673203, + "grad_norm": 0.6029134392738342, + "learning_rate": 0.0002, + "loss": 1.0424, + "step": 800 + }, + { + "epoch": 2.6470588235294117, + "grad_norm": 0.6584921479225159, + "learning_rate": 0.0002, + "loss": 0.8688, + "step": 810 + }, + { + "epoch": 2.6797385620915035, + "grad_norm": 0.5735557675361633, + "learning_rate": 0.0002, + "loss": 0.7796, + "step": 820 + }, + { + "epoch": 2.712418300653595, + "grad_norm": 0.5216763019561768, + "learning_rate": 0.0002, + "loss": 0.8834, + "step": 830 + }, + { + "epoch": 2.7450980392156863, + "grad_norm": 0.5455219149589539, + "learning_rate": 0.0002, + "loss": 0.8946, + "step": 840 + }, + { + "epoch": 2.7777777777777777, + "grad_norm": 0.5139284729957581, + "learning_rate": 0.0002, + "loss": 0.8037, + "step": 850 + }, + { + "epoch": 2.810457516339869, + "grad_norm": 0.5096403360366821, + "learning_rate": 0.0002, + "loss": 0.988, + "step": 860 + }, + { + "epoch": 2.843137254901961, + "grad_norm": 0.6337038278579712, + "learning_rate": 0.0002, + "loss": 0.9169, + "step": 870 + }, + { + "epoch": 2.8758169934640523, + "grad_norm": 0.47218772768974304, + "learning_rate": 0.0002, + "loss": 0.8938, + "step": 880 + }, + { + "epoch": 2.9084967320261437, + "grad_norm": 0.4640636742115021, + "learning_rate": 0.0002, + "loss": 0.8554, + "step": 890 + }, + { + "epoch": 2.9411764705882355, + "grad_norm": 0.4199628531932831, + "learning_rate": 0.0002, + "loss": 0.8625, + "step": 900 + }, + { + "epoch": 2.973856209150327, + "grad_norm": 0.5067117214202881, + "learning_rate": 0.0002, + "loss": 0.8104, + "step": 910 + }, + { + "epoch": 3.0, + "eval_loss": 1.2291251420974731, + "eval_runtime": 46.2557, + "eval_samples_per_second": 9.426, + "eval_steps_per_second": 1.189, + "step": 918 + } + ], + "logging_steps": 10, + "max_steps": 2448, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.715191848665088e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/training_args.bin b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7fb8100bbfe18f1342e5fc87e32e8e798268e3b --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e66621362aa4eab196083c9a4bd0bf843b2425d7109d63be3ab5f4006691dd1 +size 5560 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/special_tokens_map.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/tokenizer.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..af0eac5c0056f83b8f3fcdb79165f8847111c305 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922 +size 17525357 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/tokenizer.model b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/tokenizer_config.json b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/training_args.bin b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7fb8100bbfe18f1342e5fc87e32e8e798268e3b --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e66621362aa4eab196083c9a4bd0bf843b2425d7109d63be3ab5f4006691dd1 +size 5560 diff --git a/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/training_log.jsonl b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/training_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..2502e837e2dd8c2b50ba274147b4ea8faad54b05 --- /dev/null +++ b/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/training_log.jsonl @@ -0,0 +1,8 @@ +{"epoch": 1.0, "step": 306, "epoch_duration": 456.216677904129, "total_accumulated_duration": 456.216677904129, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7751.47119140625}, "peak_memory_usage": {"GPU_0": 11696.9921875}, "avg_memory_reserved": {"GPU_0": 12758.0}, "peak_memory_reserved": {"GPU_0": 12758.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.3561, "grad_norm": 0.631856381893158, "learning_rate": 0.0002, "epoch": 0.032679738562091505, "step": 10}, {"loss": 1.8141, "grad_norm": 0.5065668821334839, "learning_rate": 0.0002, "epoch": 0.06535947712418301, "step": 20}, {"loss": 1.4952, "grad_norm": 0.6978895664215088, "learning_rate": 0.0002, "epoch": 0.09803921568627451, "step": 30}, {"loss": 1.4829, "grad_norm": 0.6619144082069397, "learning_rate": 0.0002, "epoch": 0.13071895424836602, "step": 40}, {"loss": 1.3038, "grad_norm": 0.6153793931007385, "learning_rate": 0.0002, "epoch": 0.16339869281045752, "step": 50}, {"loss": 1.1429, "grad_norm": 0.4703301787376404, "learning_rate": 0.0002, "epoch": 0.19607843137254902, "step": 60}, {"loss": 1.2828, "grad_norm": 1.1672580242156982, "learning_rate": 0.0002, "epoch": 0.22875816993464052, "step": 70}, {"loss": 1.0985, "grad_norm": 0.3455565273761749, "learning_rate": 0.0002, "epoch": 0.26143790849673204, "step": 80}, {"loss": 1.1359, "grad_norm": 0.36216291785240173, "learning_rate": 0.0002, "epoch": 0.29411764705882354, "step": 90}, {"loss": 1.2544, "grad_norm": 0.4545166492462158, "learning_rate": 0.0002, "epoch": 0.32679738562091504, "step": 100}, {"loss": 1.2287, "grad_norm": 0.3612092435359955, "learning_rate": 0.0002, "epoch": 0.35947712418300654, "step": 110}, {"loss": 1.1868, "grad_norm": 0.5080830454826355, "learning_rate": 0.0002, "epoch": 0.39215686274509803, "step": 120}, {"loss": 1.1902, "grad_norm": 0.3268195390701294, "learning_rate": 0.0002, "epoch": 0.42483660130718953, "step": 130}, {"loss": 1.247, "grad_norm": 0.33971714973449707, "learning_rate": 0.0002, "epoch": 0.45751633986928103, "step": 140}, {"loss": 1.1844, "grad_norm": 0.4036043882369995, "learning_rate": 0.0002, "epoch": 0.49019607843137253, "step": 150}, {"loss": 1.1624, "grad_norm": 0.35938864946365356, "learning_rate": 0.0002, "epoch": 0.5228758169934641, "step": 160}, {"loss": 1.164, "grad_norm": 0.28880223631858826, "learning_rate": 0.0002, "epoch": 0.5555555555555556, "step": 170}, {"loss": 1.3521, "grad_norm": 0.3436269462108612, "learning_rate": 0.0002, "epoch": 0.5882352941176471, "step": 180}, {"loss": 1.2456, "grad_norm": 0.41923725605010986, "learning_rate": 0.0002, "epoch": 0.6209150326797386, "step": 190}, {"loss": 1.2226, "grad_norm": 0.25119203329086304, "learning_rate": 0.0002, "epoch": 0.6535947712418301, "step": 200}, {"loss": 1.1568, "grad_norm": 0.5870180726051331, "learning_rate": 0.0002, "epoch": 0.6862745098039216, "step": 210}, {"loss": 1.064, "grad_norm": 0.2831224203109741, "learning_rate": 0.0002, "epoch": 0.7189542483660131, "step": 220}, {"loss": 1.2469, "grad_norm": 0.3192005753517151, "learning_rate": 0.0002, "epoch": 0.7516339869281046, "step": 230}, {"loss": 1.1613, "grad_norm": 0.2998219430446625, "learning_rate": 0.0002, "epoch": 0.7843137254901961, "step": 240}, {"loss": 1.205, "grad_norm": 0.32855790853500366, "learning_rate": 0.0002, "epoch": 0.8169934640522876, "step": 250}, {"loss": 1.1631, "grad_norm": 0.3403124213218689, "learning_rate": 0.0002, "epoch": 0.8496732026143791, "step": 260}, {"loss": 1.1646, "grad_norm": 0.277401328086853, "learning_rate": 0.0002, "epoch": 0.8823529411764706, "step": 270}, {"loss": 1.084, "grad_norm": 0.2975269556045532, "learning_rate": 0.0002, "epoch": 0.9150326797385621, "step": 280}, {"loss": 1.2039, "grad_norm": 1.7909578084945679, "learning_rate": 0.0002, "epoch": 0.9477124183006536, "step": 290}, {"loss": 1.1226, "grad_norm": 0.2917245328426361, "learning_rate": 0.0002, "epoch": 0.9803921568627451, "step": 300}]} +{"epoch": 2.0, "step": 612, "epoch_duration": 456.3443353176117, "total_accumulated_duration": 912.5610132217407, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7751.47119140625}, "peak_memory_usage": {"GPU_0": 19860.224609375}, "avg_memory_reserved": {"GPU_0": 28746.0}, "peak_memory_reserved": {"GPU_0": 28746.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-306", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.3561, "grad_norm": 0.631856381893158, "learning_rate": 0.0002, "epoch": 0.032679738562091505, "step": 10}, {"loss": 1.8141, "grad_norm": 0.5065668821334839, "learning_rate": 0.0002, "epoch": 0.06535947712418301, "step": 20}, {"loss": 1.4952, "grad_norm": 0.6978895664215088, "learning_rate": 0.0002, "epoch": 0.09803921568627451, "step": 30}, {"loss": 1.4829, "grad_norm": 0.6619144082069397, "learning_rate": 0.0002, "epoch": 0.13071895424836602, "step": 40}, {"loss": 1.3038, "grad_norm": 0.6153793931007385, "learning_rate": 0.0002, "epoch": 0.16339869281045752, "step": 50}, {"loss": 1.1429, "grad_norm": 0.4703301787376404, "learning_rate": 0.0002, "epoch": 0.19607843137254902, "step": 60}, {"loss": 1.2828, "grad_norm": 1.1672580242156982, "learning_rate": 0.0002, "epoch": 0.22875816993464052, "step": 70}, {"loss": 1.0985, "grad_norm": 0.3455565273761749, "learning_rate": 0.0002, "epoch": 0.26143790849673204, "step": 80}, {"loss": 1.1359, "grad_norm": 0.36216291785240173, "learning_rate": 0.0002, "epoch": 0.29411764705882354, "step": 90}, {"loss": 1.2544, "grad_norm": 0.4545166492462158, "learning_rate": 0.0002, "epoch": 0.32679738562091504, "step": 100}, {"loss": 1.2287, "grad_norm": 0.3612092435359955, "learning_rate": 0.0002, "epoch": 0.35947712418300654, "step": 110}, {"loss": 1.1868, "grad_norm": 0.5080830454826355, "learning_rate": 0.0002, "epoch": 0.39215686274509803, "step": 120}, {"loss": 1.1902, "grad_norm": 0.3268195390701294, "learning_rate": 0.0002, "epoch": 0.42483660130718953, "step": 130}, {"loss": 1.247, "grad_norm": 0.33971714973449707, "learning_rate": 0.0002, "epoch": 0.45751633986928103, "step": 140}, {"loss": 1.1844, "grad_norm": 0.4036043882369995, "learning_rate": 0.0002, "epoch": 0.49019607843137253, "step": 150}, {"loss": 1.1624, "grad_norm": 0.35938864946365356, "learning_rate": 0.0002, "epoch": 0.5228758169934641, "step": 160}, {"loss": 1.164, "grad_norm": 0.28880223631858826, "learning_rate": 0.0002, "epoch": 0.5555555555555556, "step": 170}, {"loss": 1.3521, "grad_norm": 0.3436269462108612, "learning_rate": 0.0002, "epoch": 0.5882352941176471, "step": 180}, {"loss": 1.2456, "grad_norm": 0.41923725605010986, "learning_rate": 0.0002, "epoch": 0.6209150326797386, "step": 190}, {"loss": 1.2226, "grad_norm": 0.25119203329086304, "learning_rate": 0.0002, "epoch": 0.6535947712418301, "step": 200}, {"loss": 1.1568, "grad_norm": 0.5870180726051331, "learning_rate": 0.0002, "epoch": 0.6862745098039216, "step": 210}, {"loss": 1.064, "grad_norm": 0.2831224203109741, "learning_rate": 0.0002, "epoch": 0.7189542483660131, "step": 220}, {"loss": 1.2469, "grad_norm": 0.3192005753517151, "learning_rate": 0.0002, "epoch": 0.7516339869281046, "step": 230}, {"loss": 1.1613, "grad_norm": 0.2998219430446625, "learning_rate": 0.0002, "epoch": 0.7843137254901961, "step": 240}, {"loss": 1.205, "grad_norm": 0.32855790853500366, "learning_rate": 0.0002, "epoch": 0.8169934640522876, "step": 250}, {"loss": 1.1631, "grad_norm": 0.3403124213218689, "learning_rate": 0.0002, "epoch": 0.8496732026143791, "step": 260}, {"loss": 1.1646, "grad_norm": 0.277401328086853, "learning_rate": 0.0002, "epoch": 0.8823529411764706, "step": 270}, {"loss": 1.084, "grad_norm": 0.2975269556045532, "learning_rate": 0.0002, "epoch": 0.9150326797385621, "step": 280}, {"loss": 1.2039, "grad_norm": 1.7909578084945679, "learning_rate": 0.0002, "epoch": 0.9477124183006536, "step": 290}, {"loss": 1.1226, "grad_norm": 0.2917245328426361, "learning_rate": 0.0002, "epoch": 0.9803921568627451, "step": 300}, {"eval_loss": 1.1937541961669922, "eval_runtime": 46.2571, "eval_samples_per_second": 9.426, "eval_steps_per_second": 1.189, "epoch": 1.0, "step": 306}, {"loss": 1.2011, "grad_norm": 0.26494982838630676, "learning_rate": 0.0002, "epoch": 1.0130718954248366, "step": 310}, {"loss": 1.0565, "grad_norm": 0.6289355754852295, "learning_rate": 0.0002, "epoch": 1.0457516339869282, "step": 320}, {"loss": 1.1619, "grad_norm": 0.26784011721611023, "learning_rate": 0.0002, "epoch": 1.0784313725490196, "step": 330}, {"loss": 1.1151, "grad_norm": 0.3392215967178345, "learning_rate": 0.0002, "epoch": 1.1111111111111112, "step": 340}, {"loss": 1.0752, "grad_norm": 0.40005937218666077, "learning_rate": 0.0002, "epoch": 1.1437908496732025, "step": 350}, {"loss": 1.0408, "grad_norm": 0.3590582013130188, "learning_rate": 0.0002, "epoch": 1.1764705882352942, "step": 360}, {"loss": 1.0836, "grad_norm": 0.3995305895805359, "learning_rate": 0.0002, "epoch": 1.2091503267973855, "step": 370}, {"loss": 1.0992, "grad_norm": 0.2950291633605957, "learning_rate": 0.0002, "epoch": 1.2418300653594772, "step": 380}, {"loss": 1.1152, "grad_norm": 0.32035166025161743, "learning_rate": 0.0002, "epoch": 1.2745098039215685, "step": 390}, {"loss": 1.1467, "grad_norm": 0.410366415977478, "learning_rate": 0.0002, "epoch": 1.3071895424836601, "step": 400}, {"loss": 0.9985, "grad_norm": 0.3106379508972168, "learning_rate": 0.0002, "epoch": 1.3398692810457518, "step": 410}, {"loss": 0.9789, "grad_norm": 0.38580670952796936, "learning_rate": 0.0002, "epoch": 1.3725490196078431, "step": 420}, {"loss": 1.0931, "grad_norm": 0.34411361813545227, "learning_rate": 0.0002, "epoch": 1.4052287581699345, "step": 430}, {"loss": 1.1685, "grad_norm": 0.44206851720809937, "learning_rate": 0.0002, "epoch": 1.4379084967320261, "step": 440}, {"loss": 1.0347, "grad_norm": 0.3492952585220337, "learning_rate": 0.0002, "epoch": 1.4705882352941178, "step": 450}, {"loss": 1.0534, "grad_norm": 0.376423716545105, "learning_rate": 0.0002, "epoch": 1.5032679738562091, "step": 460}, {"loss": 1.1162, "grad_norm": 0.359757661819458, "learning_rate": 0.0002, "epoch": 1.5359477124183005, "step": 470}, {"loss": 0.9586, "grad_norm": 0.3385067880153656, "learning_rate": 0.0002, "epoch": 1.5686274509803921, "step": 480}, {"loss": 1.0807, "grad_norm": 0.4943889379501343, "learning_rate": 0.0002, "epoch": 1.6013071895424837, "step": 490}, {"loss": 1.0796, "grad_norm": 0.4203241169452667, "learning_rate": 0.0002, "epoch": 1.6339869281045751, "step": 500}, {"loss": 1.1059, "grad_norm": 0.3093789219856262, "learning_rate": 0.0002, "epoch": 1.6666666666666665, "step": 510}, {"loss": 1.0323, "grad_norm": 0.3653067350387573, "learning_rate": 0.0002, "epoch": 1.6993464052287581, "step": 520}, {"loss": 1.0885, "grad_norm": 0.36761337518692017, "learning_rate": 0.0002, "epoch": 1.7320261437908497, "step": 530}, {"loss": 1.1698, "grad_norm": 0.5040399432182312, "learning_rate": 0.0002, "epoch": 1.7647058823529411, "step": 540}, {"loss": 1.0105, "grad_norm": 0.3818035125732422, "learning_rate": 0.0002, "epoch": 1.7973856209150327, "step": 550}, {"loss": 0.94, "grad_norm": 0.4021618664264679, "learning_rate": 0.0002, "epoch": 1.8300653594771243, "step": 560}, {"loss": 1.0358, "grad_norm": 0.3986459970474243, "learning_rate": 0.0002, "epoch": 1.8627450980392157, "step": 570}, {"loss": 1.003, "grad_norm": 0.48416733741760254, "learning_rate": 0.0002, "epoch": 1.8954248366013071, "step": 580}, {"loss": 1.1146, "grad_norm": 0.36853986978530884, "learning_rate": 0.0002, "epoch": 1.9281045751633987, "step": 590}, {"loss": 1.0689, "grad_norm": 0.383022665977478, "learning_rate": 0.0002, "epoch": 1.9607843137254903, "step": 600}, {"loss": 1.098, "grad_norm": 0.3169507086277008, "learning_rate": 0.0002, "epoch": 1.9934640522875817, "step": 610}]} +{"epoch": 3.0, "step": 918, "epoch_duration": 455.7831633090973, "total_accumulated_duration": 1368.344176530838, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7751.47119140625}, "peak_memory_usage": {"GPU_0": 19860.224609375}, "avg_memory_reserved": {"GPU_0": 28746.0}, "peak_memory_reserved": {"GPU_0": 28746.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.3561, "grad_norm": 0.631856381893158, "learning_rate": 0.0002, "epoch": 0.032679738562091505, "step": 10}, {"loss": 1.8141, "grad_norm": 0.5065668821334839, "learning_rate": 0.0002, "epoch": 0.06535947712418301, "step": 20}, {"loss": 1.4952, "grad_norm": 0.6978895664215088, "learning_rate": 0.0002, "epoch": 0.09803921568627451, "step": 30}, {"loss": 1.4829, "grad_norm": 0.6619144082069397, "learning_rate": 0.0002, "epoch": 0.13071895424836602, "step": 40}, {"loss": 1.3038, "grad_norm": 0.6153793931007385, "learning_rate": 0.0002, "epoch": 0.16339869281045752, "step": 50}, {"loss": 1.1429, "grad_norm": 0.4703301787376404, "learning_rate": 0.0002, "epoch": 0.19607843137254902, "step": 60}, {"loss": 1.2828, "grad_norm": 1.1672580242156982, "learning_rate": 0.0002, "epoch": 0.22875816993464052, "step": 70}, {"loss": 1.0985, "grad_norm": 0.3455565273761749, "learning_rate": 0.0002, "epoch": 0.26143790849673204, "step": 80}, {"loss": 1.1359, "grad_norm": 0.36216291785240173, "learning_rate": 0.0002, "epoch": 0.29411764705882354, "step": 90}, {"loss": 1.2544, "grad_norm": 0.4545166492462158, "learning_rate": 0.0002, "epoch": 0.32679738562091504, "step": 100}, {"loss": 1.2287, "grad_norm": 0.3612092435359955, "learning_rate": 0.0002, "epoch": 0.35947712418300654, "step": 110}, {"loss": 1.1868, "grad_norm": 0.5080830454826355, "learning_rate": 0.0002, "epoch": 0.39215686274509803, "step": 120}, {"loss": 1.1902, "grad_norm": 0.3268195390701294, "learning_rate": 0.0002, "epoch": 0.42483660130718953, "step": 130}, {"loss": 1.247, "grad_norm": 0.33971714973449707, "learning_rate": 0.0002, "epoch": 0.45751633986928103, "step": 140}, {"loss": 1.1844, "grad_norm": 0.4036043882369995, "learning_rate": 0.0002, "epoch": 0.49019607843137253, "step": 150}, {"loss": 1.1624, "grad_norm": 0.35938864946365356, "learning_rate": 0.0002, "epoch": 0.5228758169934641, "step": 160}, {"loss": 1.164, "grad_norm": 0.28880223631858826, "learning_rate": 0.0002, "epoch": 0.5555555555555556, "step": 170}, {"loss": 1.3521, "grad_norm": 0.3436269462108612, "learning_rate": 0.0002, "epoch": 0.5882352941176471, "step": 180}, {"loss": 1.2456, "grad_norm": 0.41923725605010986, "learning_rate": 0.0002, "epoch": 0.6209150326797386, "step": 190}, {"loss": 1.2226, "grad_norm": 0.25119203329086304, "learning_rate": 0.0002, "epoch": 0.6535947712418301, "step": 200}, {"loss": 1.1568, "grad_norm": 0.5870180726051331, "learning_rate": 0.0002, "epoch": 0.6862745098039216, "step": 210}, {"loss": 1.064, "grad_norm": 0.2831224203109741, "learning_rate": 0.0002, "epoch": 0.7189542483660131, "step": 220}, {"loss": 1.2469, "grad_norm": 0.3192005753517151, "learning_rate": 0.0002, "epoch": 0.7516339869281046, "step": 230}, {"loss": 1.1613, "grad_norm": 0.2998219430446625, "learning_rate": 0.0002, "epoch": 0.7843137254901961, "step": 240}, {"loss": 1.205, "grad_norm": 0.32855790853500366, "learning_rate": 0.0002, "epoch": 0.8169934640522876, "step": 250}, {"loss": 1.1631, "grad_norm": 0.3403124213218689, "learning_rate": 0.0002, "epoch": 0.8496732026143791, "step": 260}, {"loss": 1.1646, "grad_norm": 0.277401328086853, "learning_rate": 0.0002, "epoch": 0.8823529411764706, "step": 270}, {"loss": 1.084, "grad_norm": 0.2975269556045532, "learning_rate": 0.0002, "epoch": 0.9150326797385621, "step": 280}, {"loss": 1.2039, "grad_norm": 1.7909578084945679, "learning_rate": 0.0002, "epoch": 0.9477124183006536, "step": 290}, {"loss": 1.1226, "grad_norm": 0.2917245328426361, "learning_rate": 0.0002, "epoch": 0.9803921568627451, "step": 300}, {"eval_loss": 1.1937541961669922, "eval_runtime": 46.2571, "eval_samples_per_second": 9.426, "eval_steps_per_second": 1.189, "epoch": 1.0, "step": 306}, {"loss": 1.2011, "grad_norm": 0.26494982838630676, "learning_rate": 0.0002, "epoch": 1.0130718954248366, "step": 310}, {"loss": 1.0565, "grad_norm": 0.6289355754852295, "learning_rate": 0.0002, "epoch": 1.0457516339869282, "step": 320}, {"loss": 1.1619, "grad_norm": 0.26784011721611023, "learning_rate": 0.0002, "epoch": 1.0784313725490196, "step": 330}, {"loss": 1.1151, "grad_norm": 0.3392215967178345, "learning_rate": 0.0002, "epoch": 1.1111111111111112, "step": 340}, {"loss": 1.0752, "grad_norm": 0.40005937218666077, "learning_rate": 0.0002, "epoch": 1.1437908496732025, "step": 350}, {"loss": 1.0408, "grad_norm": 0.3590582013130188, "learning_rate": 0.0002, "epoch": 1.1764705882352942, "step": 360}, {"loss": 1.0836, "grad_norm": 0.3995305895805359, "learning_rate": 0.0002, "epoch": 1.2091503267973855, "step": 370}, {"loss": 1.0992, "grad_norm": 0.2950291633605957, "learning_rate": 0.0002, "epoch": 1.2418300653594772, "step": 380}, {"loss": 1.1152, "grad_norm": 0.32035166025161743, "learning_rate": 0.0002, "epoch": 1.2745098039215685, "step": 390}, {"loss": 1.1467, "grad_norm": 0.410366415977478, "learning_rate": 0.0002, "epoch": 1.3071895424836601, "step": 400}, {"loss": 0.9985, "grad_norm": 0.3106379508972168, "learning_rate": 0.0002, "epoch": 1.3398692810457518, "step": 410}, {"loss": 0.9789, "grad_norm": 0.38580670952796936, "learning_rate": 0.0002, "epoch": 1.3725490196078431, "step": 420}, {"loss": 1.0931, "grad_norm": 0.34411361813545227, "learning_rate": 0.0002, "epoch": 1.4052287581699345, "step": 430}, {"loss": 1.1685, "grad_norm": 0.44206851720809937, "learning_rate": 0.0002, "epoch": 1.4379084967320261, "step": 440}, {"loss": 1.0347, "grad_norm": 0.3492952585220337, "learning_rate": 0.0002, "epoch": 1.4705882352941178, "step": 450}, {"loss": 1.0534, "grad_norm": 0.376423716545105, "learning_rate": 0.0002, "epoch": 1.5032679738562091, "step": 460}, {"loss": 1.1162, "grad_norm": 0.359757661819458, "learning_rate": 0.0002, "epoch": 1.5359477124183005, "step": 470}, {"loss": 0.9586, "grad_norm": 0.3385067880153656, "learning_rate": 0.0002, "epoch": 1.5686274509803921, "step": 480}, {"loss": 1.0807, "grad_norm": 0.4943889379501343, "learning_rate": 0.0002, "epoch": 1.6013071895424837, "step": 490}, {"loss": 1.0796, "grad_norm": 0.4203241169452667, "learning_rate": 0.0002, "epoch": 1.6339869281045751, "step": 500}, {"loss": 1.1059, "grad_norm": 0.3093789219856262, "learning_rate": 0.0002, "epoch": 1.6666666666666665, "step": 510}, {"loss": 1.0323, "grad_norm": 0.3653067350387573, "learning_rate": 0.0002, "epoch": 1.6993464052287581, "step": 520}, {"loss": 1.0885, "grad_norm": 0.36761337518692017, "learning_rate": 0.0002, "epoch": 1.7320261437908497, "step": 530}, {"loss": 1.1698, "grad_norm": 0.5040399432182312, "learning_rate": 0.0002, "epoch": 1.7647058823529411, "step": 540}, {"loss": 1.0105, "grad_norm": 0.3818035125732422, "learning_rate": 0.0002, "epoch": 1.7973856209150327, "step": 550}, {"loss": 0.94, "grad_norm": 0.4021618664264679, "learning_rate": 0.0002, "epoch": 1.8300653594771243, "step": 560}, {"loss": 1.0358, "grad_norm": 0.3986459970474243, "learning_rate": 0.0002, "epoch": 1.8627450980392157, "step": 570}, {"loss": 1.003, "grad_norm": 0.48416733741760254, "learning_rate": 0.0002, "epoch": 1.8954248366013071, "step": 580}, {"loss": 1.1146, "grad_norm": 0.36853986978530884, "learning_rate": 0.0002, "epoch": 1.9281045751633987, "step": 590}, {"loss": 1.0689, "grad_norm": 0.383022665977478, "learning_rate": 0.0002, "epoch": 1.9607843137254903, "step": 600}, {"loss": 1.098, "grad_norm": 0.3169507086277008, "learning_rate": 0.0002, "epoch": 1.9934640522875817, "step": 610}, {"eval_loss": 1.1845070123672485, "eval_runtime": 46.2811, "eval_samples_per_second": 9.421, "eval_steps_per_second": 1.188, "epoch": 2.0, "step": 612}, {"loss": 0.9618, "grad_norm": 0.8920142650604248, "learning_rate": 0.0002, "epoch": 2.026143790849673, "step": 620}, {"loss": 0.9784, "grad_norm": 0.4814859628677368, "learning_rate": 0.0002, "epoch": 2.0588235294117645, "step": 630}, {"loss": 0.8464, "grad_norm": 0.4251559376716614, "learning_rate": 0.0002, "epoch": 2.0915032679738563, "step": 640}, {"loss": 0.932, "grad_norm": 0.5295765399932861, "learning_rate": 0.0002, "epoch": 2.1241830065359477, "step": 650}, {"loss": 0.9603, "grad_norm": 0.45016610622406006, "learning_rate": 0.0002, "epoch": 2.156862745098039, "step": 660}, {"loss": 0.8738, "grad_norm": 0.5870586633682251, "learning_rate": 0.0002, "epoch": 2.189542483660131, "step": 670}, {"loss": 0.9483, "grad_norm": 0.5174715518951416, "learning_rate": 0.0002, "epoch": 2.2222222222222223, "step": 680}, {"loss": 0.9551, "grad_norm": 0.5252485275268555, "learning_rate": 0.0002, "epoch": 2.2549019607843137, "step": 690}, {"loss": 0.9253, "grad_norm": 0.5158312320709229, "learning_rate": 0.0002, "epoch": 2.287581699346405, "step": 700}, {"loss": 0.9278, "grad_norm": 0.4824209213256836, "learning_rate": 0.0002, "epoch": 2.3202614379084965, "step": 710}, {"loss": 0.8804, "grad_norm": 0.6335175037384033, "learning_rate": 0.0002, "epoch": 2.3529411764705883, "step": 720}, {"loss": 0.9685, "grad_norm": 0.5240563154220581, "learning_rate": 0.0002, "epoch": 2.3856209150326797, "step": 730}, {"loss": 0.8794, "grad_norm": 0.5172886252403259, "learning_rate": 0.0002, "epoch": 2.418300653594771, "step": 740}, {"loss": 0.8158, "grad_norm": 0.48972561955451965, "learning_rate": 0.0002, "epoch": 2.450980392156863, "step": 750}, {"loss": 0.8766, "grad_norm": 0.5295189023017883, "learning_rate": 0.0002, "epoch": 2.4836601307189543, "step": 760}, {"loss": 0.8695, "grad_norm": 0.5487208962440491, "learning_rate": 0.0002, "epoch": 2.5163398692810457, "step": 770}, {"loss": 1.0109, "grad_norm": 0.5375093221664429, "learning_rate": 0.0002, "epoch": 2.549019607843137, "step": 780}, {"loss": 0.9244, "grad_norm": 0.5424453020095825, "learning_rate": 0.0002, "epoch": 2.581699346405229, "step": 790}, {"loss": 1.0424, "grad_norm": 0.6029134392738342, "learning_rate": 0.0002, "epoch": 2.6143790849673203, "step": 800}, {"loss": 0.8688, "grad_norm": 0.6584921479225159, "learning_rate": 0.0002, "epoch": 2.6470588235294117, "step": 810}, {"loss": 0.7796, "grad_norm": 0.5735557675361633, "learning_rate": 0.0002, "epoch": 2.6797385620915035, "step": 820}, {"loss": 0.8834, "grad_norm": 0.5216763019561768, "learning_rate": 0.0002, "epoch": 2.712418300653595, "step": 830}, {"loss": 0.8946, "grad_norm": 0.5455219149589539, "learning_rate": 0.0002, "epoch": 2.7450980392156863, "step": 840}, {"loss": 0.8037, "grad_norm": 0.5139284729957581, "learning_rate": 0.0002, "epoch": 2.7777777777777777, "step": 850}, {"loss": 0.988, "grad_norm": 0.5096403360366821, "learning_rate": 0.0002, "epoch": 2.810457516339869, "step": 860}, {"loss": 0.9169, "grad_norm": 0.6337038278579712, "learning_rate": 0.0002, "epoch": 2.843137254901961, "step": 870}, {"loss": 0.8938, "grad_norm": 0.47218772768974304, "learning_rate": 0.0002, "epoch": 2.8758169934640523, "step": 880}, {"loss": 0.8554, "grad_norm": 0.4640636742115021, "learning_rate": 0.0002, "epoch": 2.9084967320261437, "step": 890}, {"loss": 0.8625, "grad_norm": 0.4199628531932831, "learning_rate": 0.0002, "epoch": 2.9411764705882355, "step": 900}, {"loss": 0.8104, "grad_norm": 0.5067117214202881, "learning_rate": 0.0002, "epoch": 2.973856209150327, "step": 910}]} +{"epoch": 4.0, "step": 1224, "epoch_duration": 455.8060460090637, "total_accumulated_duration": 1824.1502225399017, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7751.47119140625}, "peak_memory_usage": {"GPU_0": 19860.224609375}, "avg_memory_reserved": {"GPU_0": 28746.0}, "peak_memory_reserved": {"GPU_0": 28746.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.3561, "grad_norm": 0.631856381893158, "learning_rate": 0.0002, "epoch": 0.032679738562091505, "step": 10}, {"loss": 1.8141, "grad_norm": 0.5065668821334839, "learning_rate": 0.0002, "epoch": 0.06535947712418301, "step": 20}, {"loss": 1.4952, "grad_norm": 0.6978895664215088, "learning_rate": 0.0002, "epoch": 0.09803921568627451, "step": 30}, {"loss": 1.4829, "grad_norm": 0.6619144082069397, "learning_rate": 0.0002, "epoch": 0.13071895424836602, "step": 40}, {"loss": 1.3038, "grad_norm": 0.6153793931007385, "learning_rate": 0.0002, "epoch": 0.16339869281045752, "step": 50}, {"loss": 1.1429, "grad_norm": 0.4703301787376404, "learning_rate": 0.0002, "epoch": 0.19607843137254902, "step": 60}, {"loss": 1.2828, "grad_norm": 1.1672580242156982, "learning_rate": 0.0002, "epoch": 0.22875816993464052, "step": 70}, {"loss": 1.0985, "grad_norm": 0.3455565273761749, "learning_rate": 0.0002, "epoch": 0.26143790849673204, "step": 80}, {"loss": 1.1359, "grad_norm": 0.36216291785240173, "learning_rate": 0.0002, "epoch": 0.29411764705882354, "step": 90}, {"loss": 1.2544, "grad_norm": 0.4545166492462158, "learning_rate": 0.0002, "epoch": 0.32679738562091504, "step": 100}, {"loss": 1.2287, "grad_norm": 0.3612092435359955, "learning_rate": 0.0002, "epoch": 0.35947712418300654, "step": 110}, {"loss": 1.1868, "grad_norm": 0.5080830454826355, "learning_rate": 0.0002, "epoch": 0.39215686274509803, "step": 120}, {"loss": 1.1902, "grad_norm": 0.3268195390701294, "learning_rate": 0.0002, "epoch": 0.42483660130718953, "step": 130}, {"loss": 1.247, "grad_norm": 0.33971714973449707, "learning_rate": 0.0002, "epoch": 0.45751633986928103, "step": 140}, {"loss": 1.1844, "grad_norm": 0.4036043882369995, "learning_rate": 0.0002, "epoch": 0.49019607843137253, "step": 150}, {"loss": 1.1624, "grad_norm": 0.35938864946365356, "learning_rate": 0.0002, "epoch": 0.5228758169934641, "step": 160}, {"loss": 1.164, "grad_norm": 0.28880223631858826, "learning_rate": 0.0002, "epoch": 0.5555555555555556, "step": 170}, {"loss": 1.3521, "grad_norm": 0.3436269462108612, "learning_rate": 0.0002, "epoch": 0.5882352941176471, "step": 180}, {"loss": 1.2456, "grad_norm": 0.41923725605010986, "learning_rate": 0.0002, "epoch": 0.6209150326797386, "step": 190}, {"loss": 1.2226, "grad_norm": 0.25119203329086304, "learning_rate": 0.0002, "epoch": 0.6535947712418301, "step": 200}, {"loss": 1.1568, "grad_norm": 0.5870180726051331, "learning_rate": 0.0002, "epoch": 0.6862745098039216, "step": 210}, {"loss": 1.064, "grad_norm": 0.2831224203109741, "learning_rate": 0.0002, "epoch": 0.7189542483660131, "step": 220}, {"loss": 1.2469, "grad_norm": 0.3192005753517151, "learning_rate": 0.0002, "epoch": 0.7516339869281046, "step": 230}, {"loss": 1.1613, "grad_norm": 0.2998219430446625, "learning_rate": 0.0002, "epoch": 0.7843137254901961, "step": 240}, {"loss": 1.205, "grad_norm": 0.32855790853500366, "learning_rate": 0.0002, "epoch": 0.8169934640522876, "step": 250}, {"loss": 1.1631, "grad_norm": 0.3403124213218689, "learning_rate": 0.0002, "epoch": 0.8496732026143791, "step": 260}, {"loss": 1.1646, "grad_norm": 0.277401328086853, "learning_rate": 0.0002, "epoch": 0.8823529411764706, "step": 270}, {"loss": 1.084, "grad_norm": 0.2975269556045532, "learning_rate": 0.0002, "epoch": 0.9150326797385621, "step": 280}, {"loss": 1.2039, "grad_norm": 1.7909578084945679, "learning_rate": 0.0002, "epoch": 0.9477124183006536, "step": 290}, {"loss": 1.1226, "grad_norm": 0.2917245328426361, "learning_rate": 0.0002, "epoch": 0.9803921568627451, "step": 300}, {"eval_loss": 1.1937541961669922, "eval_runtime": 46.2571, "eval_samples_per_second": 9.426, "eval_steps_per_second": 1.189, "epoch": 1.0, "step": 306}, {"loss": 1.2011, "grad_norm": 0.26494982838630676, "learning_rate": 0.0002, "epoch": 1.0130718954248366, "step": 310}, {"loss": 1.0565, "grad_norm": 0.6289355754852295, "learning_rate": 0.0002, "epoch": 1.0457516339869282, "step": 320}, {"loss": 1.1619, "grad_norm": 0.26784011721611023, "learning_rate": 0.0002, "epoch": 1.0784313725490196, "step": 330}, {"loss": 1.1151, "grad_norm": 0.3392215967178345, "learning_rate": 0.0002, "epoch": 1.1111111111111112, "step": 340}, {"loss": 1.0752, "grad_norm": 0.40005937218666077, "learning_rate": 0.0002, "epoch": 1.1437908496732025, "step": 350}, {"loss": 1.0408, "grad_norm": 0.3590582013130188, "learning_rate": 0.0002, "epoch": 1.1764705882352942, "step": 360}, {"loss": 1.0836, "grad_norm": 0.3995305895805359, "learning_rate": 0.0002, "epoch": 1.2091503267973855, "step": 370}, {"loss": 1.0992, "grad_norm": 0.2950291633605957, "learning_rate": 0.0002, "epoch": 1.2418300653594772, "step": 380}, {"loss": 1.1152, "grad_norm": 0.32035166025161743, "learning_rate": 0.0002, "epoch": 1.2745098039215685, "step": 390}, {"loss": 1.1467, "grad_norm": 0.410366415977478, "learning_rate": 0.0002, "epoch": 1.3071895424836601, "step": 400}, {"loss": 0.9985, "grad_norm": 0.3106379508972168, "learning_rate": 0.0002, "epoch": 1.3398692810457518, "step": 410}, {"loss": 0.9789, "grad_norm": 0.38580670952796936, "learning_rate": 0.0002, "epoch": 1.3725490196078431, "step": 420}, {"loss": 1.0931, "grad_norm": 0.34411361813545227, "learning_rate": 0.0002, "epoch": 1.4052287581699345, "step": 430}, {"loss": 1.1685, "grad_norm": 0.44206851720809937, "learning_rate": 0.0002, "epoch": 1.4379084967320261, "step": 440}, {"loss": 1.0347, "grad_norm": 0.3492952585220337, "learning_rate": 0.0002, "epoch": 1.4705882352941178, "step": 450}, {"loss": 1.0534, "grad_norm": 0.376423716545105, "learning_rate": 0.0002, "epoch": 1.5032679738562091, "step": 460}, {"loss": 1.1162, "grad_norm": 0.359757661819458, "learning_rate": 0.0002, "epoch": 1.5359477124183005, "step": 470}, {"loss": 0.9586, "grad_norm": 0.3385067880153656, "learning_rate": 0.0002, "epoch": 1.5686274509803921, "step": 480}, {"loss": 1.0807, "grad_norm": 0.4943889379501343, "learning_rate": 0.0002, "epoch": 1.6013071895424837, "step": 490}, {"loss": 1.0796, "grad_norm": 0.4203241169452667, "learning_rate": 0.0002, "epoch": 1.6339869281045751, "step": 500}, {"loss": 1.1059, "grad_norm": 0.3093789219856262, "learning_rate": 0.0002, "epoch": 1.6666666666666665, "step": 510}, {"loss": 1.0323, "grad_norm": 0.3653067350387573, "learning_rate": 0.0002, "epoch": 1.6993464052287581, "step": 520}, {"loss": 1.0885, "grad_norm": 0.36761337518692017, "learning_rate": 0.0002, "epoch": 1.7320261437908497, "step": 530}, {"loss": 1.1698, "grad_norm": 0.5040399432182312, "learning_rate": 0.0002, "epoch": 1.7647058823529411, "step": 540}, {"loss": 1.0105, "grad_norm": 0.3818035125732422, "learning_rate": 0.0002, "epoch": 1.7973856209150327, "step": 550}, {"loss": 0.94, "grad_norm": 0.4021618664264679, "learning_rate": 0.0002, "epoch": 1.8300653594771243, "step": 560}, {"loss": 1.0358, "grad_norm": 0.3986459970474243, "learning_rate": 0.0002, "epoch": 1.8627450980392157, "step": 570}, {"loss": 1.003, "grad_norm": 0.48416733741760254, "learning_rate": 0.0002, "epoch": 1.8954248366013071, "step": 580}, {"loss": 1.1146, "grad_norm": 0.36853986978530884, "learning_rate": 0.0002, "epoch": 1.9281045751633987, "step": 590}, {"loss": 1.0689, "grad_norm": 0.383022665977478, "learning_rate": 0.0002, "epoch": 1.9607843137254903, "step": 600}, {"loss": 1.098, "grad_norm": 0.3169507086277008, "learning_rate": 0.0002, "epoch": 1.9934640522875817, "step": 610}, {"eval_loss": 1.1845070123672485, "eval_runtime": 46.2811, "eval_samples_per_second": 9.421, "eval_steps_per_second": 1.188, "epoch": 2.0, "step": 612}, {"loss": 0.9618, "grad_norm": 0.8920142650604248, "learning_rate": 0.0002, "epoch": 2.026143790849673, "step": 620}, {"loss": 0.9784, "grad_norm": 0.4814859628677368, "learning_rate": 0.0002, "epoch": 2.0588235294117645, "step": 630}, {"loss": 0.8464, "grad_norm": 0.4251559376716614, "learning_rate": 0.0002, "epoch": 2.0915032679738563, "step": 640}, {"loss": 0.932, "grad_norm": 0.5295765399932861, "learning_rate": 0.0002, "epoch": 2.1241830065359477, "step": 650}, {"loss": 0.9603, "grad_norm": 0.45016610622406006, "learning_rate": 0.0002, "epoch": 2.156862745098039, "step": 660}, {"loss": 0.8738, "grad_norm": 0.5870586633682251, "learning_rate": 0.0002, "epoch": 2.189542483660131, "step": 670}, {"loss": 0.9483, "grad_norm": 0.5174715518951416, "learning_rate": 0.0002, "epoch": 2.2222222222222223, "step": 680}, {"loss": 0.9551, "grad_norm": 0.5252485275268555, "learning_rate": 0.0002, "epoch": 2.2549019607843137, "step": 690}, {"loss": 0.9253, "grad_norm": 0.5158312320709229, "learning_rate": 0.0002, "epoch": 2.287581699346405, "step": 700}, {"loss": 0.9278, "grad_norm": 0.4824209213256836, "learning_rate": 0.0002, "epoch": 2.3202614379084965, "step": 710}, {"loss": 0.8804, "grad_norm": 0.6335175037384033, "learning_rate": 0.0002, "epoch": 2.3529411764705883, "step": 720}, {"loss": 0.9685, "grad_norm": 0.5240563154220581, "learning_rate": 0.0002, "epoch": 2.3856209150326797, "step": 730}, {"loss": 0.8794, "grad_norm": 0.5172886252403259, "learning_rate": 0.0002, "epoch": 2.418300653594771, "step": 740}, {"loss": 0.8158, "grad_norm": 0.48972561955451965, "learning_rate": 0.0002, "epoch": 2.450980392156863, "step": 750}, {"loss": 0.8766, "grad_norm": 0.5295189023017883, "learning_rate": 0.0002, "epoch": 2.4836601307189543, "step": 760}, {"loss": 0.8695, "grad_norm": 0.5487208962440491, "learning_rate": 0.0002, "epoch": 2.5163398692810457, "step": 770}, {"loss": 1.0109, "grad_norm": 0.5375093221664429, "learning_rate": 0.0002, "epoch": 2.549019607843137, "step": 780}, {"loss": 0.9244, "grad_norm": 0.5424453020095825, "learning_rate": 0.0002, "epoch": 2.581699346405229, "step": 790}, {"loss": 1.0424, "grad_norm": 0.6029134392738342, "learning_rate": 0.0002, "epoch": 2.6143790849673203, "step": 800}, {"loss": 0.8688, "grad_norm": 0.6584921479225159, "learning_rate": 0.0002, "epoch": 2.6470588235294117, "step": 810}, {"loss": 0.7796, "grad_norm": 0.5735557675361633, "learning_rate": 0.0002, "epoch": 2.6797385620915035, "step": 820}, {"loss": 0.8834, "grad_norm": 0.5216763019561768, "learning_rate": 0.0002, "epoch": 2.712418300653595, "step": 830}, {"loss": 0.8946, "grad_norm": 0.5455219149589539, "learning_rate": 0.0002, "epoch": 2.7450980392156863, "step": 840}, {"loss": 0.8037, "grad_norm": 0.5139284729957581, "learning_rate": 0.0002, "epoch": 2.7777777777777777, "step": 850}, {"loss": 0.988, "grad_norm": 0.5096403360366821, "learning_rate": 0.0002, "epoch": 2.810457516339869, "step": 860}, {"loss": 0.9169, "grad_norm": 0.6337038278579712, "learning_rate": 0.0002, "epoch": 2.843137254901961, "step": 870}, {"loss": 0.8938, "grad_norm": 0.47218772768974304, "learning_rate": 0.0002, "epoch": 2.8758169934640523, "step": 880}, {"loss": 0.8554, "grad_norm": 0.4640636742115021, "learning_rate": 0.0002, "epoch": 2.9084967320261437, "step": 890}, {"loss": 0.8625, "grad_norm": 0.4199628531932831, "learning_rate": 0.0002, "epoch": 2.9411764705882355, "step": 900}, {"loss": 0.8104, "grad_norm": 0.5067117214202881, "learning_rate": 0.0002, "epoch": 2.973856209150327, "step": 910}, {"eval_loss": 1.2291251420974731, "eval_runtime": 46.2557, "eval_samples_per_second": 9.426, "eval_steps_per_second": 1.189, "epoch": 3.0, "step": 918}, {"loss": 0.8157, "grad_norm": 0.8342176079750061, "learning_rate": 0.0002, "epoch": 3.0065359477124183, "step": 920}, {"loss": 0.6855, "grad_norm": 0.7695813775062561, "learning_rate": 0.0002, "epoch": 3.0392156862745097, "step": 930}, {"loss": 0.6173, "grad_norm": 0.6819486618041992, "learning_rate": 0.0002, "epoch": 3.0718954248366015, "step": 940}, {"loss": 0.6495, "grad_norm": 0.7568879723548889, "learning_rate": 0.0002, "epoch": 3.104575163398693, "step": 950}, {"loss": 0.7905, "grad_norm": 0.6760695576667786, "learning_rate": 0.0002, "epoch": 3.1372549019607843, "step": 960}, {"loss": 0.6405, "grad_norm": 0.6359127759933472, "learning_rate": 0.0002, "epoch": 3.1699346405228757, "step": 970}, {"loss": 0.7172, "grad_norm": 0.8414971828460693, "learning_rate": 0.0002, "epoch": 3.2026143790849675, "step": 980}, {"loss": 0.7865, "grad_norm": 0.68381667137146, "learning_rate": 0.0002, "epoch": 3.235294117647059, "step": 990}, {"loss": 0.6651, "grad_norm": 0.6852193474769592, "learning_rate": 0.0002, "epoch": 3.2679738562091503, "step": 1000}, {"loss": 0.6571, "grad_norm": 0.8184967041015625, "learning_rate": 0.0002, "epoch": 3.3006535947712417, "step": 1010}, {"loss": 0.7036, "grad_norm": 1.047290563583374, "learning_rate": 0.0002, "epoch": 3.3333333333333335, "step": 1020}, {"loss": 0.7215, "grad_norm": 0.8291178345680237, "learning_rate": 0.0002, "epoch": 3.366013071895425, "step": 1030}, {"loss": 0.6243, "grad_norm": 0.6668022871017456, "learning_rate": 0.0002, "epoch": 3.3986928104575163, "step": 1040}, {"loss": 0.7459, "grad_norm": 0.6354008316993713, "learning_rate": 0.0002, "epoch": 3.431372549019608, "step": 1050}, {"loss": 0.6826, "grad_norm": 1.2028366327285767, "learning_rate": 0.0002, "epoch": 3.4640522875816995, "step": 1060}, {"loss": 0.5913, "grad_norm": 0.717367947101593, "learning_rate": 0.0002, "epoch": 3.496732026143791, "step": 1070}, {"loss": 0.6903, "grad_norm": 0.542179524898529, "learning_rate": 0.0002, "epoch": 3.5294117647058822, "step": 1080}, {"loss": 0.7673, "grad_norm": 0.845981776714325, "learning_rate": 0.0002, "epoch": 3.5620915032679736, "step": 1090}, {"loss": 0.7089, "grad_norm": 0.7381046414375305, "learning_rate": 0.0002, "epoch": 3.5947712418300655, "step": 1100}, {"loss": 0.6705, "grad_norm": 0.6563456058502197, "learning_rate": 0.0002, "epoch": 3.627450980392157, "step": 1110}, {"loss": 0.7767, "grad_norm": 0.7130876779556274, "learning_rate": 0.0002, "epoch": 3.6601307189542482, "step": 1120}, {"loss": 0.7164, "grad_norm": 0.800032913684845, "learning_rate": 0.0002, "epoch": 3.69281045751634, "step": 1130}, {"loss": 0.7272, "grad_norm": 0.980328381061554, "learning_rate": 0.0002, "epoch": 3.7254901960784315, "step": 1140}, {"loss": 0.7672, "grad_norm": 0.8542261123657227, "learning_rate": 0.0002, "epoch": 3.758169934640523, "step": 1150}, {"loss": 0.679, "grad_norm": 0.6302552819252014, "learning_rate": 0.0002, "epoch": 3.7908496732026142, "step": 1160}, {"loss": 0.7457, "grad_norm": 0.515398383140564, "learning_rate": 0.0002, "epoch": 3.8235294117647056, "step": 1170}, {"loss": 0.693, "grad_norm": 1.2427130937576294, "learning_rate": 0.0002, "epoch": 3.8562091503267975, "step": 1180}, {"loss": 0.7182, "grad_norm": 0.8206831216812134, "learning_rate": 0.0002, "epoch": 3.888888888888889, "step": 1190}, {"loss": 0.7519, "grad_norm": 0.7633249163627625, "learning_rate": 0.0002, "epoch": 3.9215686274509802, "step": 1200}, {"loss": 0.7082, "grad_norm": 0.8034512400627136, "learning_rate": 0.0002, "epoch": 3.954248366013072, "step": 1210}, {"loss": 0.6834, "grad_norm": 0.7667182087898254, "learning_rate": 0.0002, "epoch": 3.9869281045751634, "step": 1220}]} +{"epoch": 5.0, "step": 1530, "epoch_duration": 455.743275642395, "total_accumulated_duration": 2279.8934981822968, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7751.47119140625}, "peak_memory_usage": {"GPU_0": 19860.224609375}, "avg_memory_reserved": {"GPU_0": 28746.0}, "peak_memory_reserved": {"GPU_0": 28746.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.3561, "grad_norm": 0.631856381893158, "learning_rate": 0.0002, "epoch": 0.032679738562091505, "step": 10}, {"loss": 1.8141, "grad_norm": 0.5065668821334839, "learning_rate": 0.0002, "epoch": 0.06535947712418301, "step": 20}, {"loss": 1.4952, "grad_norm": 0.6978895664215088, "learning_rate": 0.0002, "epoch": 0.09803921568627451, "step": 30}, {"loss": 1.4829, "grad_norm": 0.6619144082069397, "learning_rate": 0.0002, "epoch": 0.13071895424836602, "step": 40}, {"loss": 1.3038, "grad_norm": 0.6153793931007385, "learning_rate": 0.0002, "epoch": 0.16339869281045752, "step": 50}, {"loss": 1.1429, "grad_norm": 0.4703301787376404, "learning_rate": 0.0002, "epoch": 0.19607843137254902, "step": 60}, {"loss": 1.2828, "grad_norm": 1.1672580242156982, "learning_rate": 0.0002, "epoch": 0.22875816993464052, "step": 70}, {"loss": 1.0985, "grad_norm": 0.3455565273761749, "learning_rate": 0.0002, "epoch": 0.26143790849673204, "step": 80}, {"loss": 1.1359, "grad_norm": 0.36216291785240173, "learning_rate": 0.0002, "epoch": 0.29411764705882354, "step": 90}, {"loss": 1.2544, "grad_norm": 0.4545166492462158, "learning_rate": 0.0002, "epoch": 0.32679738562091504, "step": 100}, {"loss": 1.2287, "grad_norm": 0.3612092435359955, "learning_rate": 0.0002, "epoch": 0.35947712418300654, "step": 110}, {"loss": 1.1868, "grad_norm": 0.5080830454826355, "learning_rate": 0.0002, "epoch": 0.39215686274509803, "step": 120}, {"loss": 1.1902, "grad_norm": 0.3268195390701294, "learning_rate": 0.0002, "epoch": 0.42483660130718953, "step": 130}, {"loss": 1.247, "grad_norm": 0.33971714973449707, "learning_rate": 0.0002, "epoch": 0.45751633986928103, "step": 140}, {"loss": 1.1844, "grad_norm": 0.4036043882369995, "learning_rate": 0.0002, "epoch": 0.49019607843137253, "step": 150}, {"loss": 1.1624, "grad_norm": 0.35938864946365356, "learning_rate": 0.0002, "epoch": 0.5228758169934641, "step": 160}, {"loss": 1.164, "grad_norm": 0.28880223631858826, "learning_rate": 0.0002, "epoch": 0.5555555555555556, "step": 170}, {"loss": 1.3521, "grad_norm": 0.3436269462108612, "learning_rate": 0.0002, "epoch": 0.5882352941176471, "step": 180}, {"loss": 1.2456, "grad_norm": 0.41923725605010986, "learning_rate": 0.0002, "epoch": 0.6209150326797386, "step": 190}, {"loss": 1.2226, "grad_norm": 0.25119203329086304, "learning_rate": 0.0002, "epoch": 0.6535947712418301, "step": 200}, {"loss": 1.1568, "grad_norm": 0.5870180726051331, "learning_rate": 0.0002, "epoch": 0.6862745098039216, "step": 210}, {"loss": 1.064, "grad_norm": 0.2831224203109741, "learning_rate": 0.0002, "epoch": 0.7189542483660131, "step": 220}, {"loss": 1.2469, "grad_norm": 0.3192005753517151, "learning_rate": 0.0002, "epoch": 0.7516339869281046, "step": 230}, {"loss": 1.1613, "grad_norm": 0.2998219430446625, "learning_rate": 0.0002, "epoch": 0.7843137254901961, "step": 240}, {"loss": 1.205, "grad_norm": 0.32855790853500366, "learning_rate": 0.0002, "epoch": 0.8169934640522876, "step": 250}, {"loss": 1.1631, "grad_norm": 0.3403124213218689, "learning_rate": 0.0002, "epoch": 0.8496732026143791, "step": 260}, {"loss": 1.1646, "grad_norm": 0.277401328086853, "learning_rate": 0.0002, "epoch": 0.8823529411764706, "step": 270}, {"loss": 1.084, "grad_norm": 0.2975269556045532, "learning_rate": 0.0002, "epoch": 0.9150326797385621, "step": 280}, {"loss": 1.2039, "grad_norm": 1.7909578084945679, "learning_rate": 0.0002, "epoch": 0.9477124183006536, "step": 290}, {"loss": 1.1226, "grad_norm": 0.2917245328426361, "learning_rate": 0.0002, "epoch": 0.9803921568627451, "step": 300}, {"eval_loss": 1.1937541961669922, "eval_runtime": 46.2571, "eval_samples_per_second": 9.426, "eval_steps_per_second": 1.189, "epoch": 1.0, "step": 306}, {"loss": 1.2011, "grad_norm": 0.26494982838630676, "learning_rate": 0.0002, "epoch": 1.0130718954248366, "step": 310}, {"loss": 1.0565, "grad_norm": 0.6289355754852295, "learning_rate": 0.0002, "epoch": 1.0457516339869282, "step": 320}, {"loss": 1.1619, "grad_norm": 0.26784011721611023, "learning_rate": 0.0002, "epoch": 1.0784313725490196, "step": 330}, {"loss": 1.1151, "grad_norm": 0.3392215967178345, "learning_rate": 0.0002, "epoch": 1.1111111111111112, "step": 340}, {"loss": 1.0752, "grad_norm": 0.40005937218666077, "learning_rate": 0.0002, "epoch": 1.1437908496732025, "step": 350}, {"loss": 1.0408, "grad_norm": 0.3590582013130188, "learning_rate": 0.0002, "epoch": 1.1764705882352942, "step": 360}, {"loss": 1.0836, "grad_norm": 0.3995305895805359, "learning_rate": 0.0002, "epoch": 1.2091503267973855, "step": 370}, {"loss": 1.0992, "grad_norm": 0.2950291633605957, "learning_rate": 0.0002, "epoch": 1.2418300653594772, "step": 380}, {"loss": 1.1152, "grad_norm": 0.32035166025161743, "learning_rate": 0.0002, "epoch": 1.2745098039215685, "step": 390}, {"loss": 1.1467, "grad_norm": 0.410366415977478, "learning_rate": 0.0002, "epoch": 1.3071895424836601, "step": 400}, {"loss": 0.9985, "grad_norm": 0.3106379508972168, "learning_rate": 0.0002, "epoch": 1.3398692810457518, "step": 410}, {"loss": 0.9789, "grad_norm": 0.38580670952796936, "learning_rate": 0.0002, "epoch": 1.3725490196078431, "step": 420}, {"loss": 1.0931, "grad_norm": 0.34411361813545227, "learning_rate": 0.0002, "epoch": 1.4052287581699345, "step": 430}, {"loss": 1.1685, "grad_norm": 0.44206851720809937, "learning_rate": 0.0002, "epoch": 1.4379084967320261, "step": 440}, {"loss": 1.0347, "grad_norm": 0.3492952585220337, "learning_rate": 0.0002, "epoch": 1.4705882352941178, "step": 450}, {"loss": 1.0534, "grad_norm": 0.376423716545105, "learning_rate": 0.0002, "epoch": 1.5032679738562091, "step": 460}, {"loss": 1.1162, "grad_norm": 0.359757661819458, "learning_rate": 0.0002, "epoch": 1.5359477124183005, "step": 470}, {"loss": 0.9586, "grad_norm": 0.3385067880153656, "learning_rate": 0.0002, "epoch": 1.5686274509803921, "step": 480}, {"loss": 1.0807, "grad_norm": 0.4943889379501343, "learning_rate": 0.0002, "epoch": 1.6013071895424837, "step": 490}, {"loss": 1.0796, "grad_norm": 0.4203241169452667, "learning_rate": 0.0002, "epoch": 1.6339869281045751, "step": 500}, {"loss": 1.1059, "grad_norm": 0.3093789219856262, "learning_rate": 0.0002, "epoch": 1.6666666666666665, "step": 510}, {"loss": 1.0323, "grad_norm": 0.3653067350387573, "learning_rate": 0.0002, "epoch": 1.6993464052287581, "step": 520}, {"loss": 1.0885, "grad_norm": 0.36761337518692017, "learning_rate": 0.0002, "epoch": 1.7320261437908497, "step": 530}, {"loss": 1.1698, "grad_norm": 0.5040399432182312, "learning_rate": 0.0002, "epoch": 1.7647058823529411, "step": 540}, {"loss": 1.0105, "grad_norm": 0.3818035125732422, "learning_rate": 0.0002, "epoch": 1.7973856209150327, "step": 550}, {"loss": 0.94, "grad_norm": 0.4021618664264679, "learning_rate": 0.0002, "epoch": 1.8300653594771243, "step": 560}, {"loss": 1.0358, "grad_norm": 0.3986459970474243, "learning_rate": 0.0002, "epoch": 1.8627450980392157, "step": 570}, {"loss": 1.003, "grad_norm": 0.48416733741760254, "learning_rate": 0.0002, "epoch": 1.8954248366013071, "step": 580}, {"loss": 1.1146, "grad_norm": 0.36853986978530884, "learning_rate": 0.0002, "epoch": 1.9281045751633987, "step": 590}, {"loss": 1.0689, "grad_norm": 0.383022665977478, "learning_rate": 0.0002, "epoch": 1.9607843137254903, "step": 600}, {"loss": 1.098, "grad_norm": 0.3169507086277008, "learning_rate": 0.0002, "epoch": 1.9934640522875817, "step": 610}, {"eval_loss": 1.1845070123672485, "eval_runtime": 46.2811, "eval_samples_per_second": 9.421, "eval_steps_per_second": 1.188, "epoch": 2.0, "step": 612}, {"loss": 0.9618, "grad_norm": 0.8920142650604248, "learning_rate": 0.0002, "epoch": 2.026143790849673, "step": 620}, {"loss": 0.9784, "grad_norm": 0.4814859628677368, "learning_rate": 0.0002, "epoch": 2.0588235294117645, "step": 630}, {"loss": 0.8464, "grad_norm": 0.4251559376716614, "learning_rate": 0.0002, "epoch": 2.0915032679738563, "step": 640}, {"loss": 0.932, "grad_norm": 0.5295765399932861, "learning_rate": 0.0002, "epoch": 2.1241830065359477, "step": 650}, {"loss": 0.9603, "grad_norm": 0.45016610622406006, "learning_rate": 0.0002, "epoch": 2.156862745098039, "step": 660}, {"loss": 0.8738, "grad_norm": 0.5870586633682251, "learning_rate": 0.0002, "epoch": 2.189542483660131, "step": 670}, {"loss": 0.9483, "grad_norm": 0.5174715518951416, "learning_rate": 0.0002, "epoch": 2.2222222222222223, "step": 680}, {"loss": 0.9551, "grad_norm": 0.5252485275268555, "learning_rate": 0.0002, "epoch": 2.2549019607843137, "step": 690}, {"loss": 0.9253, "grad_norm": 0.5158312320709229, "learning_rate": 0.0002, "epoch": 2.287581699346405, "step": 700}, {"loss": 0.9278, "grad_norm": 0.4824209213256836, "learning_rate": 0.0002, "epoch": 2.3202614379084965, "step": 710}, {"loss": 0.8804, "grad_norm": 0.6335175037384033, "learning_rate": 0.0002, "epoch": 2.3529411764705883, "step": 720}, {"loss": 0.9685, "grad_norm": 0.5240563154220581, "learning_rate": 0.0002, "epoch": 2.3856209150326797, "step": 730}, {"loss": 0.8794, "grad_norm": 0.5172886252403259, "learning_rate": 0.0002, "epoch": 2.418300653594771, "step": 740}, {"loss": 0.8158, "grad_norm": 0.48972561955451965, "learning_rate": 0.0002, "epoch": 2.450980392156863, "step": 750}, {"loss": 0.8766, "grad_norm": 0.5295189023017883, "learning_rate": 0.0002, "epoch": 2.4836601307189543, "step": 760}, {"loss": 0.8695, "grad_norm": 0.5487208962440491, "learning_rate": 0.0002, "epoch": 2.5163398692810457, "step": 770}, {"loss": 1.0109, "grad_norm": 0.5375093221664429, "learning_rate": 0.0002, "epoch": 2.549019607843137, "step": 780}, {"loss": 0.9244, "grad_norm": 0.5424453020095825, "learning_rate": 0.0002, "epoch": 2.581699346405229, "step": 790}, {"loss": 1.0424, "grad_norm": 0.6029134392738342, "learning_rate": 0.0002, "epoch": 2.6143790849673203, "step": 800}, {"loss": 0.8688, "grad_norm": 0.6584921479225159, "learning_rate": 0.0002, "epoch": 2.6470588235294117, "step": 810}, {"loss": 0.7796, "grad_norm": 0.5735557675361633, "learning_rate": 0.0002, "epoch": 2.6797385620915035, "step": 820}, {"loss": 0.8834, "grad_norm": 0.5216763019561768, "learning_rate": 0.0002, "epoch": 2.712418300653595, "step": 830}, {"loss": 0.8946, "grad_norm": 0.5455219149589539, "learning_rate": 0.0002, "epoch": 2.7450980392156863, "step": 840}, {"loss": 0.8037, "grad_norm": 0.5139284729957581, "learning_rate": 0.0002, "epoch": 2.7777777777777777, "step": 850}, {"loss": 0.988, "grad_norm": 0.5096403360366821, "learning_rate": 0.0002, "epoch": 2.810457516339869, "step": 860}, {"loss": 0.9169, "grad_norm": 0.6337038278579712, "learning_rate": 0.0002, "epoch": 2.843137254901961, "step": 870}, {"loss": 0.8938, "grad_norm": 0.47218772768974304, "learning_rate": 0.0002, "epoch": 2.8758169934640523, "step": 880}, {"loss": 0.8554, "grad_norm": 0.4640636742115021, "learning_rate": 0.0002, "epoch": 2.9084967320261437, "step": 890}, {"loss": 0.8625, "grad_norm": 0.4199628531932831, "learning_rate": 0.0002, "epoch": 2.9411764705882355, "step": 900}, {"loss": 0.8104, "grad_norm": 0.5067117214202881, "learning_rate": 0.0002, "epoch": 2.973856209150327, "step": 910}, {"eval_loss": 1.2291251420974731, "eval_runtime": 46.2557, "eval_samples_per_second": 9.426, "eval_steps_per_second": 1.189, "epoch": 3.0, "step": 918}, {"loss": 0.8157, "grad_norm": 0.8342176079750061, "learning_rate": 0.0002, "epoch": 3.0065359477124183, "step": 920}, {"loss": 0.6855, "grad_norm": 0.7695813775062561, "learning_rate": 0.0002, "epoch": 3.0392156862745097, "step": 930}, {"loss": 0.6173, "grad_norm": 0.6819486618041992, "learning_rate": 0.0002, "epoch": 3.0718954248366015, "step": 940}, {"loss": 0.6495, "grad_norm": 0.7568879723548889, "learning_rate": 0.0002, "epoch": 3.104575163398693, "step": 950}, {"loss": 0.7905, "grad_norm": 0.6760695576667786, "learning_rate": 0.0002, "epoch": 3.1372549019607843, "step": 960}, {"loss": 0.6405, "grad_norm": 0.6359127759933472, "learning_rate": 0.0002, "epoch": 3.1699346405228757, "step": 970}, {"loss": 0.7172, "grad_norm": 0.8414971828460693, "learning_rate": 0.0002, "epoch": 3.2026143790849675, "step": 980}, {"loss": 0.7865, "grad_norm": 0.68381667137146, "learning_rate": 0.0002, "epoch": 3.235294117647059, "step": 990}, {"loss": 0.6651, "grad_norm": 0.6852193474769592, "learning_rate": 0.0002, "epoch": 3.2679738562091503, "step": 1000}, {"loss": 0.6571, "grad_norm": 0.8184967041015625, "learning_rate": 0.0002, "epoch": 3.3006535947712417, "step": 1010}, {"loss": 0.7036, "grad_norm": 1.047290563583374, "learning_rate": 0.0002, "epoch": 3.3333333333333335, "step": 1020}, {"loss": 0.7215, "grad_norm": 0.8291178345680237, "learning_rate": 0.0002, "epoch": 3.366013071895425, "step": 1030}, {"loss": 0.6243, "grad_norm": 0.6668022871017456, "learning_rate": 0.0002, "epoch": 3.3986928104575163, "step": 1040}, {"loss": 0.7459, "grad_norm": 0.6354008316993713, "learning_rate": 0.0002, "epoch": 3.431372549019608, "step": 1050}, {"loss": 0.6826, "grad_norm": 1.2028366327285767, "learning_rate": 0.0002, "epoch": 3.4640522875816995, "step": 1060}, {"loss": 0.5913, "grad_norm": 0.717367947101593, "learning_rate": 0.0002, "epoch": 3.496732026143791, "step": 1070}, {"loss": 0.6903, "grad_norm": 0.542179524898529, "learning_rate": 0.0002, "epoch": 3.5294117647058822, "step": 1080}, {"loss": 0.7673, "grad_norm": 0.845981776714325, "learning_rate": 0.0002, "epoch": 3.5620915032679736, "step": 1090}, {"loss": 0.7089, "grad_norm": 0.7381046414375305, "learning_rate": 0.0002, "epoch": 3.5947712418300655, "step": 1100}, {"loss": 0.6705, "grad_norm": 0.6563456058502197, "learning_rate": 0.0002, "epoch": 3.627450980392157, "step": 1110}, {"loss": 0.7767, "grad_norm": 0.7130876779556274, "learning_rate": 0.0002, "epoch": 3.6601307189542482, "step": 1120}, {"loss": 0.7164, "grad_norm": 0.800032913684845, "learning_rate": 0.0002, "epoch": 3.69281045751634, "step": 1130}, {"loss": 0.7272, "grad_norm": 0.980328381061554, "learning_rate": 0.0002, "epoch": 3.7254901960784315, "step": 1140}, {"loss": 0.7672, "grad_norm": 0.8542261123657227, "learning_rate": 0.0002, "epoch": 3.758169934640523, "step": 1150}, {"loss": 0.679, "grad_norm": 0.6302552819252014, "learning_rate": 0.0002, "epoch": 3.7908496732026142, "step": 1160}, {"loss": 0.7457, "grad_norm": 0.515398383140564, "learning_rate": 0.0002, "epoch": 3.8235294117647056, "step": 1170}, {"loss": 0.693, "grad_norm": 1.2427130937576294, "learning_rate": 0.0002, "epoch": 3.8562091503267975, "step": 1180}, {"loss": 0.7182, "grad_norm": 0.8206831216812134, "learning_rate": 0.0002, "epoch": 3.888888888888889, "step": 1190}, {"loss": 0.7519, "grad_norm": 0.7633249163627625, "learning_rate": 0.0002, "epoch": 3.9215686274509802, "step": 1200}, {"loss": 0.7082, "grad_norm": 0.8034512400627136, "learning_rate": 0.0002, "epoch": 3.954248366013072, "step": 1210}, {"loss": 0.6834, "grad_norm": 0.7667182087898254, "learning_rate": 0.0002, "epoch": 3.9869281045751634, "step": 1220}, {"eval_loss": 1.3456707000732422, "eval_runtime": 46.2562, "eval_samples_per_second": 9.426, "eval_steps_per_second": 1.189, "epoch": 4.0, "step": 1224}, {"loss": 0.582, "grad_norm": 0.7724746465682983, "learning_rate": 0.0002, "epoch": 4.019607843137255, "step": 1230}, {"loss": 0.4759, "grad_norm": 1.166916847229004, "learning_rate": 0.0002, "epoch": 4.052287581699346, "step": 1240}, {"loss": 0.4995, "grad_norm": 0.7234508991241455, "learning_rate": 0.0002, "epoch": 4.084967320261438, "step": 1250}, {"loss": 0.4863, "grad_norm": 1.1418060064315796, "learning_rate": 0.0002, "epoch": 4.117647058823529, "step": 1260}, {"loss": 0.5425, "grad_norm": 0.9603922367095947, "learning_rate": 0.0002, "epoch": 4.150326797385621, "step": 1270}, {"loss": 0.4892, "grad_norm": 0.8976530432701111, "learning_rate": 0.0002, "epoch": 4.183006535947713, "step": 1280}, {"loss": 0.473, "grad_norm": 0.6855078339576721, "learning_rate": 0.0002, "epoch": 4.215686274509804, "step": 1290}, {"loss": 0.4416, "grad_norm": 1.2676647901535034, "learning_rate": 0.0002, "epoch": 4.248366013071895, "step": 1300}, {"loss": 0.5071, "grad_norm": 1.104057788848877, "learning_rate": 0.0002, "epoch": 4.281045751633987, "step": 1310}, {"loss": 0.5168, "grad_norm": 1.7076562643051147, "learning_rate": 0.0002, "epoch": 4.313725490196078, "step": 1320}, {"loss": 0.4655, "grad_norm": 1.2308520078659058, "learning_rate": 0.0002, "epoch": 4.34640522875817, "step": 1330}, {"loss": 0.5322, "grad_norm": 1.2652729749679565, "learning_rate": 0.0002, "epoch": 4.379084967320262, "step": 1340}, {"loss": 0.5262, "grad_norm": 1.054958701133728, "learning_rate": 0.0002, "epoch": 4.411764705882353, "step": 1350}, {"loss": 0.4747, "grad_norm": 1.0130749940872192, "learning_rate": 0.0002, "epoch": 4.444444444444445, "step": 1360}, {"loss": 0.4887, "grad_norm": 1.0517818927764893, "learning_rate": 0.0002, "epoch": 4.477124183006536, "step": 1370}, {"loss": 0.4906, "grad_norm": 0.8593037128448486, "learning_rate": 0.0002, "epoch": 4.509803921568627, "step": 1380}, {"loss": 0.5049, "grad_norm": 1.0248081684112549, "learning_rate": 0.0002, "epoch": 4.542483660130719, "step": 1390}, {"loss": 0.472, "grad_norm": 0.8999413847923279, "learning_rate": 0.0002, "epoch": 4.57516339869281, "step": 1400}, {"loss": 0.5102, "grad_norm": 0.9106912612915039, "learning_rate": 0.0002, "epoch": 4.607843137254902, "step": 1410}, {"loss": 0.5203, "grad_norm": 1.2736181020736694, "learning_rate": 0.0002, "epoch": 4.640522875816993, "step": 1420}, {"loss": 0.5101, "grad_norm": 0.9311690926551819, "learning_rate": 0.0002, "epoch": 4.673202614379085, "step": 1430}, {"loss": 0.5648, "grad_norm": 1.0455045700073242, "learning_rate": 0.0002, "epoch": 4.705882352941177, "step": 1440}, {"loss": 0.5004, "grad_norm": 1.0190727710723877, "learning_rate": 0.0002, "epoch": 4.738562091503268, "step": 1450}, {"loss": 0.5506, "grad_norm": 1.333198070526123, "learning_rate": 0.0002, "epoch": 4.771241830065359, "step": 1460}, {"loss": 0.5846, "grad_norm": 0.8808416724205017, "learning_rate": 0.0002, "epoch": 4.803921568627451, "step": 1470}, {"loss": 0.4671, "grad_norm": 0.8896227478981018, "learning_rate": 0.0002, "epoch": 4.836601307189542, "step": 1480}, {"loss": 0.4732, "grad_norm": 1.212323784828186, "learning_rate": 0.0002, "epoch": 4.8692810457516345, "step": 1490}, {"loss": 0.5263, "grad_norm": 1.0490120649337769, "learning_rate": 0.0002, "epoch": 4.901960784313726, "step": 1500}, {"loss": 0.5815, "grad_norm": 0.8946618437767029, "learning_rate": 0.0002, "epoch": 4.934640522875817, "step": 1510}, {"loss": 0.5369, "grad_norm": 1.0609275102615356, "learning_rate": 0.0002, "epoch": 4.967320261437909, "step": 1520}, {"loss": 0.5348, "grad_norm": 0.8885099291801453, "learning_rate": 0.0002, "epoch": 5.0, "step": 1530}]} +{"epoch": 6.0, "step": 1836, "epoch_duration": 457.0913918018341, "total_accumulated_duration": 2736.984889984131, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7751.47119140625}, "peak_memory_usage": {"GPU_0": 19860.224609375}, "avg_memory_reserved": {"GPU_0": 28746.0}, "peak_memory_reserved": {"GPU_0": 28746.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.3561, "grad_norm": 0.631856381893158, "learning_rate": 0.0002, "epoch": 0.032679738562091505, "step": 10}, {"loss": 1.8141, "grad_norm": 0.5065668821334839, "learning_rate": 0.0002, "epoch": 0.06535947712418301, "step": 20}, {"loss": 1.4952, "grad_norm": 0.6978895664215088, "learning_rate": 0.0002, "epoch": 0.09803921568627451, "step": 30}, {"loss": 1.4829, "grad_norm": 0.6619144082069397, "learning_rate": 0.0002, "epoch": 0.13071895424836602, "step": 40}, {"loss": 1.3038, "grad_norm": 0.6153793931007385, "learning_rate": 0.0002, "epoch": 0.16339869281045752, "step": 50}, {"loss": 1.1429, "grad_norm": 0.4703301787376404, "learning_rate": 0.0002, "epoch": 0.19607843137254902, "step": 60}, {"loss": 1.2828, "grad_norm": 1.1672580242156982, "learning_rate": 0.0002, "epoch": 0.22875816993464052, "step": 70}, {"loss": 1.0985, "grad_norm": 0.3455565273761749, "learning_rate": 0.0002, "epoch": 0.26143790849673204, "step": 80}, {"loss": 1.1359, "grad_norm": 0.36216291785240173, "learning_rate": 0.0002, "epoch": 0.29411764705882354, "step": 90}, {"loss": 1.2544, "grad_norm": 0.4545166492462158, "learning_rate": 0.0002, "epoch": 0.32679738562091504, "step": 100}, {"loss": 1.2287, "grad_norm": 0.3612092435359955, "learning_rate": 0.0002, "epoch": 0.35947712418300654, "step": 110}, {"loss": 1.1868, "grad_norm": 0.5080830454826355, "learning_rate": 0.0002, "epoch": 0.39215686274509803, "step": 120}, {"loss": 1.1902, "grad_norm": 0.3268195390701294, "learning_rate": 0.0002, "epoch": 0.42483660130718953, "step": 130}, {"loss": 1.247, "grad_norm": 0.33971714973449707, "learning_rate": 0.0002, "epoch": 0.45751633986928103, "step": 140}, {"loss": 1.1844, "grad_norm": 0.4036043882369995, "learning_rate": 0.0002, "epoch": 0.49019607843137253, "step": 150}, {"loss": 1.1624, "grad_norm": 0.35938864946365356, "learning_rate": 0.0002, "epoch": 0.5228758169934641, "step": 160}, {"loss": 1.164, "grad_norm": 0.28880223631858826, "learning_rate": 0.0002, "epoch": 0.5555555555555556, "step": 170}, {"loss": 1.3521, "grad_norm": 0.3436269462108612, "learning_rate": 0.0002, "epoch": 0.5882352941176471, "step": 180}, {"loss": 1.2456, "grad_norm": 0.41923725605010986, "learning_rate": 0.0002, "epoch": 0.6209150326797386, "step": 190}, {"loss": 1.2226, "grad_norm": 0.25119203329086304, "learning_rate": 0.0002, "epoch": 0.6535947712418301, "step": 200}, {"loss": 1.1568, "grad_norm": 0.5870180726051331, "learning_rate": 0.0002, "epoch": 0.6862745098039216, "step": 210}, {"loss": 1.064, "grad_norm": 0.2831224203109741, "learning_rate": 0.0002, "epoch": 0.7189542483660131, "step": 220}, {"loss": 1.2469, "grad_norm": 0.3192005753517151, "learning_rate": 0.0002, "epoch": 0.7516339869281046, "step": 230}, {"loss": 1.1613, "grad_norm": 0.2998219430446625, "learning_rate": 0.0002, "epoch": 0.7843137254901961, "step": 240}, {"loss": 1.205, "grad_norm": 0.32855790853500366, "learning_rate": 0.0002, "epoch": 0.8169934640522876, "step": 250}, {"loss": 1.1631, "grad_norm": 0.3403124213218689, "learning_rate": 0.0002, "epoch": 0.8496732026143791, "step": 260}, {"loss": 1.1646, "grad_norm": 0.277401328086853, "learning_rate": 0.0002, "epoch": 0.8823529411764706, "step": 270}, {"loss": 1.084, "grad_norm": 0.2975269556045532, "learning_rate": 0.0002, "epoch": 0.9150326797385621, "step": 280}, {"loss": 1.2039, "grad_norm": 1.7909578084945679, "learning_rate": 0.0002, "epoch": 0.9477124183006536, "step": 290}, {"loss": 1.1226, "grad_norm": 0.2917245328426361, "learning_rate": 0.0002, "epoch": 0.9803921568627451, "step": 300}, {"eval_loss": 1.1937541961669922, "eval_runtime": 46.2571, "eval_samples_per_second": 9.426, "eval_steps_per_second": 1.189, "epoch": 1.0, "step": 306}, {"loss": 1.2011, "grad_norm": 0.26494982838630676, "learning_rate": 0.0002, "epoch": 1.0130718954248366, "step": 310}, {"loss": 1.0565, "grad_norm": 0.6289355754852295, "learning_rate": 0.0002, "epoch": 1.0457516339869282, "step": 320}, {"loss": 1.1619, "grad_norm": 0.26784011721611023, "learning_rate": 0.0002, "epoch": 1.0784313725490196, "step": 330}, {"loss": 1.1151, "grad_norm": 0.3392215967178345, "learning_rate": 0.0002, "epoch": 1.1111111111111112, "step": 340}, {"loss": 1.0752, "grad_norm": 0.40005937218666077, "learning_rate": 0.0002, "epoch": 1.1437908496732025, "step": 350}, {"loss": 1.0408, "grad_norm": 0.3590582013130188, "learning_rate": 0.0002, "epoch": 1.1764705882352942, "step": 360}, {"loss": 1.0836, "grad_norm": 0.3995305895805359, "learning_rate": 0.0002, "epoch": 1.2091503267973855, "step": 370}, {"loss": 1.0992, "grad_norm": 0.2950291633605957, "learning_rate": 0.0002, "epoch": 1.2418300653594772, "step": 380}, {"loss": 1.1152, "grad_norm": 0.32035166025161743, "learning_rate": 0.0002, "epoch": 1.2745098039215685, "step": 390}, {"loss": 1.1467, "grad_norm": 0.410366415977478, "learning_rate": 0.0002, "epoch": 1.3071895424836601, "step": 400}, {"loss": 0.9985, "grad_norm": 0.3106379508972168, "learning_rate": 0.0002, "epoch": 1.3398692810457518, "step": 410}, {"loss": 0.9789, "grad_norm": 0.38580670952796936, "learning_rate": 0.0002, "epoch": 1.3725490196078431, "step": 420}, {"loss": 1.0931, "grad_norm": 0.34411361813545227, "learning_rate": 0.0002, "epoch": 1.4052287581699345, "step": 430}, {"loss": 1.1685, "grad_norm": 0.44206851720809937, "learning_rate": 0.0002, "epoch": 1.4379084967320261, "step": 440}, {"loss": 1.0347, "grad_norm": 0.3492952585220337, "learning_rate": 0.0002, "epoch": 1.4705882352941178, "step": 450}, {"loss": 1.0534, "grad_norm": 0.376423716545105, "learning_rate": 0.0002, "epoch": 1.5032679738562091, "step": 460}, {"loss": 1.1162, "grad_norm": 0.359757661819458, "learning_rate": 0.0002, "epoch": 1.5359477124183005, "step": 470}, {"loss": 0.9586, "grad_norm": 0.3385067880153656, "learning_rate": 0.0002, "epoch": 1.5686274509803921, "step": 480}, {"loss": 1.0807, "grad_norm": 0.4943889379501343, "learning_rate": 0.0002, "epoch": 1.6013071895424837, "step": 490}, {"loss": 1.0796, "grad_norm": 0.4203241169452667, "learning_rate": 0.0002, "epoch": 1.6339869281045751, "step": 500}, {"loss": 1.1059, "grad_norm": 0.3093789219856262, "learning_rate": 0.0002, "epoch": 1.6666666666666665, "step": 510}, {"loss": 1.0323, "grad_norm": 0.3653067350387573, "learning_rate": 0.0002, "epoch": 1.6993464052287581, "step": 520}, {"loss": 1.0885, "grad_norm": 0.36761337518692017, "learning_rate": 0.0002, "epoch": 1.7320261437908497, "step": 530}, {"loss": 1.1698, "grad_norm": 0.5040399432182312, "learning_rate": 0.0002, "epoch": 1.7647058823529411, "step": 540}, {"loss": 1.0105, "grad_norm": 0.3818035125732422, "learning_rate": 0.0002, "epoch": 1.7973856209150327, "step": 550}, {"loss": 0.94, "grad_norm": 0.4021618664264679, "learning_rate": 0.0002, "epoch": 1.8300653594771243, "step": 560}, {"loss": 1.0358, "grad_norm": 0.3986459970474243, "learning_rate": 0.0002, "epoch": 1.8627450980392157, "step": 570}, {"loss": 1.003, "grad_norm": 0.48416733741760254, "learning_rate": 0.0002, "epoch": 1.8954248366013071, "step": 580}, {"loss": 1.1146, "grad_norm": 0.36853986978530884, "learning_rate": 0.0002, "epoch": 1.9281045751633987, "step": 590}, {"loss": 1.0689, "grad_norm": 0.383022665977478, "learning_rate": 0.0002, "epoch": 1.9607843137254903, "step": 600}, {"loss": 1.098, "grad_norm": 0.3169507086277008, "learning_rate": 0.0002, "epoch": 1.9934640522875817, "step": 610}, {"eval_loss": 1.1845070123672485, "eval_runtime": 46.2811, "eval_samples_per_second": 9.421, "eval_steps_per_second": 1.188, "epoch": 2.0, "step": 612}, {"loss": 0.9618, "grad_norm": 0.8920142650604248, "learning_rate": 0.0002, "epoch": 2.026143790849673, "step": 620}, {"loss": 0.9784, "grad_norm": 0.4814859628677368, "learning_rate": 0.0002, "epoch": 2.0588235294117645, "step": 630}, {"loss": 0.8464, "grad_norm": 0.4251559376716614, "learning_rate": 0.0002, "epoch": 2.0915032679738563, "step": 640}, {"loss": 0.932, "grad_norm": 0.5295765399932861, "learning_rate": 0.0002, "epoch": 2.1241830065359477, "step": 650}, {"loss": 0.9603, "grad_norm": 0.45016610622406006, "learning_rate": 0.0002, "epoch": 2.156862745098039, "step": 660}, {"loss": 0.8738, "grad_norm": 0.5870586633682251, "learning_rate": 0.0002, "epoch": 2.189542483660131, "step": 670}, {"loss": 0.9483, "grad_norm": 0.5174715518951416, "learning_rate": 0.0002, "epoch": 2.2222222222222223, "step": 680}, {"loss": 0.9551, "grad_norm": 0.5252485275268555, "learning_rate": 0.0002, "epoch": 2.2549019607843137, "step": 690}, {"loss": 0.9253, "grad_norm": 0.5158312320709229, "learning_rate": 0.0002, "epoch": 2.287581699346405, "step": 700}, {"loss": 0.9278, "grad_norm": 0.4824209213256836, "learning_rate": 0.0002, "epoch": 2.3202614379084965, "step": 710}, {"loss": 0.8804, "grad_norm": 0.6335175037384033, "learning_rate": 0.0002, "epoch": 2.3529411764705883, "step": 720}, {"loss": 0.9685, "grad_norm": 0.5240563154220581, "learning_rate": 0.0002, "epoch": 2.3856209150326797, "step": 730}, {"loss": 0.8794, "grad_norm": 0.5172886252403259, "learning_rate": 0.0002, "epoch": 2.418300653594771, "step": 740}, {"loss": 0.8158, "grad_norm": 0.48972561955451965, "learning_rate": 0.0002, "epoch": 2.450980392156863, "step": 750}, {"loss": 0.8766, "grad_norm": 0.5295189023017883, "learning_rate": 0.0002, "epoch": 2.4836601307189543, "step": 760}, {"loss": 0.8695, "grad_norm": 0.5487208962440491, "learning_rate": 0.0002, "epoch": 2.5163398692810457, "step": 770}, {"loss": 1.0109, "grad_norm": 0.5375093221664429, "learning_rate": 0.0002, "epoch": 2.549019607843137, "step": 780}, {"loss": 0.9244, "grad_norm": 0.5424453020095825, "learning_rate": 0.0002, "epoch": 2.581699346405229, "step": 790}, {"loss": 1.0424, "grad_norm": 0.6029134392738342, "learning_rate": 0.0002, "epoch": 2.6143790849673203, "step": 800}, {"loss": 0.8688, "grad_norm": 0.6584921479225159, "learning_rate": 0.0002, "epoch": 2.6470588235294117, "step": 810}, {"loss": 0.7796, "grad_norm": 0.5735557675361633, "learning_rate": 0.0002, "epoch": 2.6797385620915035, "step": 820}, {"loss": 0.8834, "grad_norm": 0.5216763019561768, "learning_rate": 0.0002, "epoch": 2.712418300653595, "step": 830}, {"loss": 0.8946, "grad_norm": 0.5455219149589539, "learning_rate": 0.0002, "epoch": 2.7450980392156863, "step": 840}, {"loss": 0.8037, "grad_norm": 0.5139284729957581, "learning_rate": 0.0002, "epoch": 2.7777777777777777, "step": 850}, {"loss": 0.988, "grad_norm": 0.5096403360366821, "learning_rate": 0.0002, "epoch": 2.810457516339869, "step": 860}, {"loss": 0.9169, "grad_norm": 0.6337038278579712, "learning_rate": 0.0002, "epoch": 2.843137254901961, "step": 870}, {"loss": 0.8938, "grad_norm": 0.47218772768974304, "learning_rate": 0.0002, "epoch": 2.8758169934640523, "step": 880}, {"loss": 0.8554, "grad_norm": 0.4640636742115021, "learning_rate": 0.0002, "epoch": 2.9084967320261437, "step": 890}, {"loss": 0.8625, "grad_norm": 0.4199628531932831, "learning_rate": 0.0002, "epoch": 2.9411764705882355, "step": 900}, {"loss": 0.8104, "grad_norm": 0.5067117214202881, "learning_rate": 0.0002, "epoch": 2.973856209150327, "step": 910}, {"eval_loss": 1.2291251420974731, "eval_runtime": 46.2557, "eval_samples_per_second": 9.426, "eval_steps_per_second": 1.189, "epoch": 3.0, "step": 918}, {"loss": 0.8157, "grad_norm": 0.8342176079750061, "learning_rate": 0.0002, "epoch": 3.0065359477124183, "step": 920}, {"loss": 0.6855, "grad_norm": 0.7695813775062561, "learning_rate": 0.0002, "epoch": 3.0392156862745097, "step": 930}, {"loss": 0.6173, "grad_norm": 0.6819486618041992, "learning_rate": 0.0002, "epoch": 3.0718954248366015, "step": 940}, {"loss": 0.6495, "grad_norm": 0.7568879723548889, "learning_rate": 0.0002, "epoch": 3.104575163398693, "step": 950}, {"loss": 0.7905, "grad_norm": 0.6760695576667786, "learning_rate": 0.0002, "epoch": 3.1372549019607843, "step": 960}, {"loss": 0.6405, "grad_norm": 0.6359127759933472, "learning_rate": 0.0002, "epoch": 3.1699346405228757, "step": 970}, {"loss": 0.7172, "grad_norm": 0.8414971828460693, "learning_rate": 0.0002, "epoch": 3.2026143790849675, "step": 980}, {"loss": 0.7865, "grad_norm": 0.68381667137146, "learning_rate": 0.0002, "epoch": 3.235294117647059, "step": 990}, {"loss": 0.6651, "grad_norm": 0.6852193474769592, "learning_rate": 0.0002, "epoch": 3.2679738562091503, "step": 1000}, {"loss": 0.6571, "grad_norm": 0.8184967041015625, "learning_rate": 0.0002, "epoch": 3.3006535947712417, "step": 1010}, {"loss": 0.7036, "grad_norm": 1.047290563583374, "learning_rate": 0.0002, "epoch": 3.3333333333333335, "step": 1020}, {"loss": 0.7215, "grad_norm": 0.8291178345680237, "learning_rate": 0.0002, "epoch": 3.366013071895425, "step": 1030}, {"loss": 0.6243, "grad_norm": 0.6668022871017456, "learning_rate": 0.0002, "epoch": 3.3986928104575163, "step": 1040}, {"loss": 0.7459, "grad_norm": 0.6354008316993713, "learning_rate": 0.0002, "epoch": 3.431372549019608, "step": 1050}, {"loss": 0.6826, "grad_norm": 1.2028366327285767, "learning_rate": 0.0002, "epoch": 3.4640522875816995, "step": 1060}, {"loss": 0.5913, "grad_norm": 0.717367947101593, "learning_rate": 0.0002, "epoch": 3.496732026143791, "step": 1070}, {"loss": 0.6903, "grad_norm": 0.542179524898529, "learning_rate": 0.0002, "epoch": 3.5294117647058822, "step": 1080}, {"loss": 0.7673, "grad_norm": 0.845981776714325, "learning_rate": 0.0002, "epoch": 3.5620915032679736, "step": 1090}, {"loss": 0.7089, "grad_norm": 0.7381046414375305, "learning_rate": 0.0002, "epoch": 3.5947712418300655, "step": 1100}, {"loss": 0.6705, "grad_norm": 0.6563456058502197, "learning_rate": 0.0002, "epoch": 3.627450980392157, "step": 1110}, {"loss": 0.7767, "grad_norm": 0.7130876779556274, "learning_rate": 0.0002, "epoch": 3.6601307189542482, "step": 1120}, {"loss": 0.7164, "grad_norm": 0.800032913684845, "learning_rate": 0.0002, "epoch": 3.69281045751634, "step": 1130}, {"loss": 0.7272, "grad_norm": 0.980328381061554, "learning_rate": 0.0002, "epoch": 3.7254901960784315, "step": 1140}, {"loss": 0.7672, "grad_norm": 0.8542261123657227, "learning_rate": 0.0002, "epoch": 3.758169934640523, "step": 1150}, {"loss": 0.679, "grad_norm": 0.6302552819252014, "learning_rate": 0.0002, "epoch": 3.7908496732026142, "step": 1160}, {"loss": 0.7457, "grad_norm": 0.515398383140564, "learning_rate": 0.0002, "epoch": 3.8235294117647056, "step": 1170}, {"loss": 0.693, "grad_norm": 1.2427130937576294, "learning_rate": 0.0002, "epoch": 3.8562091503267975, "step": 1180}, {"loss": 0.7182, "grad_norm": 0.8206831216812134, "learning_rate": 0.0002, "epoch": 3.888888888888889, "step": 1190}, {"loss": 0.7519, "grad_norm": 0.7633249163627625, "learning_rate": 0.0002, "epoch": 3.9215686274509802, "step": 1200}, {"loss": 0.7082, "grad_norm": 0.8034512400627136, "learning_rate": 0.0002, "epoch": 3.954248366013072, "step": 1210}, {"loss": 0.6834, "grad_norm": 0.7667182087898254, "learning_rate": 0.0002, "epoch": 3.9869281045751634, "step": 1220}, {"eval_loss": 1.3456707000732422, "eval_runtime": 46.2562, "eval_samples_per_second": 9.426, "eval_steps_per_second": 1.189, "epoch": 4.0, "step": 1224}, {"loss": 0.582, "grad_norm": 0.7724746465682983, "learning_rate": 0.0002, "epoch": 4.019607843137255, "step": 1230}, {"loss": 0.4759, "grad_norm": 1.166916847229004, "learning_rate": 0.0002, "epoch": 4.052287581699346, "step": 1240}, {"loss": 0.4995, "grad_norm": 0.7234508991241455, "learning_rate": 0.0002, "epoch": 4.084967320261438, "step": 1250}, {"loss": 0.4863, "grad_norm": 1.1418060064315796, "learning_rate": 0.0002, "epoch": 4.117647058823529, "step": 1260}, {"loss": 0.5425, "grad_norm": 0.9603922367095947, "learning_rate": 0.0002, "epoch": 4.150326797385621, "step": 1270}, {"loss": 0.4892, "grad_norm": 0.8976530432701111, "learning_rate": 0.0002, "epoch": 4.183006535947713, "step": 1280}, {"loss": 0.473, "grad_norm": 0.6855078339576721, "learning_rate": 0.0002, "epoch": 4.215686274509804, "step": 1290}, {"loss": 0.4416, "grad_norm": 1.2676647901535034, "learning_rate": 0.0002, "epoch": 4.248366013071895, "step": 1300}, {"loss": 0.5071, "grad_norm": 1.104057788848877, "learning_rate": 0.0002, "epoch": 4.281045751633987, "step": 1310}, {"loss": 0.5168, "grad_norm": 1.7076562643051147, "learning_rate": 0.0002, "epoch": 4.313725490196078, "step": 1320}, {"loss": 0.4655, "grad_norm": 1.2308520078659058, "learning_rate": 0.0002, "epoch": 4.34640522875817, "step": 1330}, {"loss": 0.5322, "grad_norm": 1.2652729749679565, "learning_rate": 0.0002, "epoch": 4.379084967320262, "step": 1340}, {"loss": 0.5262, "grad_norm": 1.054958701133728, "learning_rate": 0.0002, "epoch": 4.411764705882353, "step": 1350}, {"loss": 0.4747, "grad_norm": 1.0130749940872192, "learning_rate": 0.0002, "epoch": 4.444444444444445, "step": 1360}, {"loss": 0.4887, "grad_norm": 1.0517818927764893, "learning_rate": 0.0002, "epoch": 4.477124183006536, "step": 1370}, {"loss": 0.4906, "grad_norm": 0.8593037128448486, "learning_rate": 0.0002, "epoch": 4.509803921568627, "step": 1380}, {"loss": 0.5049, "grad_norm": 1.0248081684112549, "learning_rate": 0.0002, "epoch": 4.542483660130719, "step": 1390}, {"loss": 0.472, "grad_norm": 0.8999413847923279, "learning_rate": 0.0002, "epoch": 4.57516339869281, "step": 1400}, {"loss": 0.5102, "grad_norm": 0.9106912612915039, "learning_rate": 0.0002, "epoch": 4.607843137254902, "step": 1410}, {"loss": 0.5203, "grad_norm": 1.2736181020736694, "learning_rate": 0.0002, "epoch": 4.640522875816993, "step": 1420}, {"loss": 0.5101, "grad_norm": 0.9311690926551819, "learning_rate": 0.0002, "epoch": 4.673202614379085, "step": 1430}, {"loss": 0.5648, "grad_norm": 1.0455045700073242, "learning_rate": 0.0002, "epoch": 4.705882352941177, "step": 1440}, {"loss": 0.5004, "grad_norm": 1.0190727710723877, "learning_rate": 0.0002, "epoch": 4.738562091503268, "step": 1450}, {"loss": 0.5506, "grad_norm": 1.333198070526123, "learning_rate": 0.0002, "epoch": 4.771241830065359, "step": 1460}, {"loss": 0.5846, "grad_norm": 0.8808416724205017, "learning_rate": 0.0002, "epoch": 4.803921568627451, "step": 1470}, {"loss": 0.4671, "grad_norm": 0.8896227478981018, "learning_rate": 0.0002, "epoch": 4.836601307189542, "step": 1480}, {"loss": 0.4732, "grad_norm": 1.212323784828186, "learning_rate": 0.0002, "epoch": 4.8692810457516345, "step": 1490}, {"loss": 0.5263, "grad_norm": 1.0490120649337769, "learning_rate": 0.0002, "epoch": 4.901960784313726, "step": 1500}, {"loss": 0.5815, "grad_norm": 0.8946618437767029, "learning_rate": 0.0002, "epoch": 4.934640522875817, "step": 1510}, {"loss": 0.5369, "grad_norm": 1.0609275102615356, "learning_rate": 0.0002, "epoch": 4.967320261437909, "step": 1520}, {"loss": 0.5348, "grad_norm": 0.8885099291801453, "learning_rate": 0.0002, "epoch": 5.0, "step": 1530}, {"eval_loss": 1.5771757364273071, "eval_runtime": 46.2667, "eval_samples_per_second": 9.424, "eval_steps_per_second": 1.189, "epoch": 5.0, "step": 1530}, {"loss": 0.3176, "grad_norm": 1.401705026626587, "learning_rate": 0.0002, "epoch": 5.032679738562091, "step": 1540}, {"loss": 0.3346, "grad_norm": 0.8365539908409119, "learning_rate": 0.0002, "epoch": 5.065359477124183, "step": 1550}, {"loss": 0.3605, "grad_norm": 1.3188321590423584, "learning_rate": 0.0002, "epoch": 5.098039215686274, "step": 1560}, {"loss": 0.326, "grad_norm": 0.9819526076316833, "learning_rate": 0.0002, "epoch": 5.130718954248366, "step": 1570}, {"loss": 0.3575, "grad_norm": 1.13265061378479, "learning_rate": 0.0002, "epoch": 5.163398692810458, "step": 1580}, {"loss": 0.3418, "grad_norm": 1.478152871131897, "learning_rate": 0.0002, "epoch": 5.196078431372549, "step": 1590}, {"loss": 0.3388, "grad_norm": 1.4188750982284546, "learning_rate": 0.0002, "epoch": 5.228758169934641, "step": 1600}, {"loss": 0.3524, "grad_norm": 1.2499338388442993, "learning_rate": 0.0002, "epoch": 5.261437908496732, "step": 1610}, {"loss": 0.423, "grad_norm": 1.7885085344314575, "learning_rate": 0.0002, "epoch": 5.294117647058823, "step": 1620}, {"loss": 0.3237, "grad_norm": 1.2614946365356445, "learning_rate": 0.0002, "epoch": 5.326797385620915, "step": 1630}, {"loss": 0.3511, "grad_norm": 1.28338623046875, "learning_rate": 0.0002, "epoch": 5.359477124183006, "step": 1640}, {"loss": 0.3112, "grad_norm": 1.1973257064819336, "learning_rate": 0.0002, "epoch": 5.392156862745098, "step": 1650}, {"loss": 0.3645, "grad_norm": 1.1356301307678223, "learning_rate": 0.0002, "epoch": 5.42483660130719, "step": 1660}, {"loss": 0.307, "grad_norm": 0.9048901200294495, "learning_rate": 0.0002, "epoch": 5.457516339869281, "step": 1670}, {"loss": 0.3828, "grad_norm": 1.5352122783660889, "learning_rate": 0.0002, "epoch": 5.490196078431373, "step": 1680}, {"loss": 0.3826, "grad_norm": 0.9096335172653198, "learning_rate": 0.0002, "epoch": 5.522875816993464, "step": 1690}, {"loss": 0.3686, "grad_norm": 1.1903661489486694, "learning_rate": 0.0002, "epoch": 5.555555555555555, "step": 1700}, {"loss": 0.325, "grad_norm": 0.9234451651573181, "learning_rate": 0.0002, "epoch": 5.588235294117647, "step": 1710}, {"loss": 0.3451, "grad_norm": 1.4554102420806885, "learning_rate": 0.0002, "epoch": 5.620915032679738, "step": 1720}, {"loss": 0.3488, "grad_norm": 1.1044343709945679, "learning_rate": 0.0002, "epoch": 5.65359477124183, "step": 1730}, {"loss": 0.3673, "grad_norm": 1.2219593524932861, "learning_rate": 0.0002, "epoch": 5.686274509803922, "step": 1740}, {"loss": 0.3517, "grad_norm": 0.901652455329895, "learning_rate": 0.0002, "epoch": 5.718954248366013, "step": 1750}, {"loss": 0.4082, "grad_norm": 1.3334792852401733, "learning_rate": 0.0002, "epoch": 5.751633986928105, "step": 1760}, {"loss": 0.4386, "grad_norm": 1.5595488548278809, "learning_rate": 0.0002, "epoch": 5.784313725490196, "step": 1770}, {"loss": 0.3217, "grad_norm": 1.3892982006072998, "learning_rate": 0.0002, "epoch": 5.816993464052287, "step": 1780}, {"loss": 0.3919, "grad_norm": 1.0813168287277222, "learning_rate": 0.0002, "epoch": 5.849673202614379, "step": 1790}, {"loss": 0.3697, "grad_norm": 1.145320177078247, "learning_rate": 0.0002, "epoch": 5.882352941176471, "step": 1800}, {"loss": 0.3903, "grad_norm": 1.0249533653259277, "learning_rate": 0.0002, "epoch": 5.915032679738562, "step": 1810}, {"loss": 0.3481, "grad_norm": 1.0013737678527832, "learning_rate": 0.0002, "epoch": 5.947712418300654, "step": 1820}, {"loss": 0.4278, "grad_norm": 1.212314248085022, "learning_rate": 0.0002, "epoch": 5.980392156862745, "step": 1830}]} +{"epoch": 7.0, "step": 2142, "epoch_duration": 455.7560234069824, "total_accumulated_duration": 3192.7409133911133, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7751.47119140625}, "peak_memory_usage": {"GPU_0": 19860.224609375}, "avg_memory_reserved": {"GPU_0": 28746.0}, "peak_memory_reserved": {"GPU_0": 28746.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.3561, "grad_norm": 0.631856381893158, "learning_rate": 0.0002, "epoch": 0.032679738562091505, "step": 10}, {"loss": 1.8141, "grad_norm": 0.5065668821334839, "learning_rate": 0.0002, "epoch": 0.06535947712418301, "step": 20}, {"loss": 1.4952, "grad_norm": 0.6978895664215088, "learning_rate": 0.0002, "epoch": 0.09803921568627451, "step": 30}, {"loss": 1.4829, "grad_norm": 0.6619144082069397, "learning_rate": 0.0002, "epoch": 0.13071895424836602, "step": 40}, {"loss": 1.3038, "grad_norm": 0.6153793931007385, "learning_rate": 0.0002, "epoch": 0.16339869281045752, "step": 50}, {"loss": 1.1429, "grad_norm": 0.4703301787376404, "learning_rate": 0.0002, "epoch": 0.19607843137254902, "step": 60}, {"loss": 1.2828, "grad_norm": 1.1672580242156982, "learning_rate": 0.0002, "epoch": 0.22875816993464052, "step": 70}, {"loss": 1.0985, "grad_norm": 0.3455565273761749, "learning_rate": 0.0002, "epoch": 0.26143790849673204, "step": 80}, {"loss": 1.1359, "grad_norm": 0.36216291785240173, "learning_rate": 0.0002, "epoch": 0.29411764705882354, "step": 90}, {"loss": 1.2544, "grad_norm": 0.4545166492462158, "learning_rate": 0.0002, "epoch": 0.32679738562091504, "step": 100}, {"loss": 1.2287, "grad_norm": 0.3612092435359955, "learning_rate": 0.0002, "epoch": 0.35947712418300654, "step": 110}, {"loss": 1.1868, "grad_norm": 0.5080830454826355, "learning_rate": 0.0002, "epoch": 0.39215686274509803, "step": 120}, {"loss": 1.1902, "grad_norm": 0.3268195390701294, "learning_rate": 0.0002, "epoch": 0.42483660130718953, "step": 130}, {"loss": 1.247, "grad_norm": 0.33971714973449707, "learning_rate": 0.0002, "epoch": 0.45751633986928103, "step": 140}, {"loss": 1.1844, "grad_norm": 0.4036043882369995, "learning_rate": 0.0002, "epoch": 0.49019607843137253, "step": 150}, {"loss": 1.1624, "grad_norm": 0.35938864946365356, "learning_rate": 0.0002, "epoch": 0.5228758169934641, "step": 160}, {"loss": 1.164, "grad_norm": 0.28880223631858826, "learning_rate": 0.0002, "epoch": 0.5555555555555556, "step": 170}, {"loss": 1.3521, "grad_norm": 0.3436269462108612, "learning_rate": 0.0002, "epoch": 0.5882352941176471, "step": 180}, {"loss": 1.2456, "grad_norm": 0.41923725605010986, "learning_rate": 0.0002, "epoch": 0.6209150326797386, "step": 190}, {"loss": 1.2226, "grad_norm": 0.25119203329086304, "learning_rate": 0.0002, "epoch": 0.6535947712418301, "step": 200}, {"loss": 1.1568, "grad_norm": 0.5870180726051331, "learning_rate": 0.0002, "epoch": 0.6862745098039216, "step": 210}, {"loss": 1.064, "grad_norm": 0.2831224203109741, "learning_rate": 0.0002, "epoch": 0.7189542483660131, "step": 220}, {"loss": 1.2469, "grad_norm": 0.3192005753517151, "learning_rate": 0.0002, "epoch": 0.7516339869281046, "step": 230}, {"loss": 1.1613, "grad_norm": 0.2998219430446625, "learning_rate": 0.0002, "epoch": 0.7843137254901961, "step": 240}, {"loss": 1.205, "grad_norm": 0.32855790853500366, "learning_rate": 0.0002, "epoch": 0.8169934640522876, "step": 250}, {"loss": 1.1631, "grad_norm": 0.3403124213218689, "learning_rate": 0.0002, "epoch": 0.8496732026143791, "step": 260}, {"loss": 1.1646, "grad_norm": 0.277401328086853, "learning_rate": 0.0002, "epoch": 0.8823529411764706, "step": 270}, {"loss": 1.084, "grad_norm": 0.2975269556045532, "learning_rate": 0.0002, "epoch": 0.9150326797385621, "step": 280}, {"loss": 1.2039, "grad_norm": 1.7909578084945679, "learning_rate": 0.0002, "epoch": 0.9477124183006536, "step": 290}, {"loss": 1.1226, "grad_norm": 0.2917245328426361, "learning_rate": 0.0002, "epoch": 0.9803921568627451, "step": 300}, {"eval_loss": 1.1937541961669922, "eval_runtime": 46.2571, "eval_samples_per_second": 9.426, "eval_steps_per_second": 1.189, "epoch": 1.0, "step": 306}, {"loss": 1.2011, "grad_norm": 0.26494982838630676, "learning_rate": 0.0002, "epoch": 1.0130718954248366, "step": 310}, {"loss": 1.0565, "grad_norm": 0.6289355754852295, "learning_rate": 0.0002, "epoch": 1.0457516339869282, "step": 320}, {"loss": 1.1619, "grad_norm": 0.26784011721611023, "learning_rate": 0.0002, "epoch": 1.0784313725490196, "step": 330}, {"loss": 1.1151, "grad_norm": 0.3392215967178345, "learning_rate": 0.0002, "epoch": 1.1111111111111112, "step": 340}, {"loss": 1.0752, "grad_norm": 0.40005937218666077, "learning_rate": 0.0002, "epoch": 1.1437908496732025, "step": 350}, {"loss": 1.0408, "grad_norm": 0.3590582013130188, "learning_rate": 0.0002, "epoch": 1.1764705882352942, "step": 360}, {"loss": 1.0836, "grad_norm": 0.3995305895805359, "learning_rate": 0.0002, "epoch": 1.2091503267973855, "step": 370}, {"loss": 1.0992, "grad_norm": 0.2950291633605957, "learning_rate": 0.0002, "epoch": 1.2418300653594772, "step": 380}, {"loss": 1.1152, "grad_norm": 0.32035166025161743, "learning_rate": 0.0002, "epoch": 1.2745098039215685, "step": 390}, {"loss": 1.1467, "grad_norm": 0.410366415977478, "learning_rate": 0.0002, "epoch": 1.3071895424836601, "step": 400}, {"loss": 0.9985, "grad_norm": 0.3106379508972168, "learning_rate": 0.0002, "epoch": 1.3398692810457518, "step": 410}, {"loss": 0.9789, "grad_norm": 0.38580670952796936, "learning_rate": 0.0002, "epoch": 1.3725490196078431, "step": 420}, {"loss": 1.0931, "grad_norm": 0.34411361813545227, "learning_rate": 0.0002, "epoch": 1.4052287581699345, "step": 430}, {"loss": 1.1685, "grad_norm": 0.44206851720809937, "learning_rate": 0.0002, "epoch": 1.4379084967320261, "step": 440}, {"loss": 1.0347, "grad_norm": 0.3492952585220337, "learning_rate": 0.0002, "epoch": 1.4705882352941178, "step": 450}, {"loss": 1.0534, "grad_norm": 0.376423716545105, "learning_rate": 0.0002, "epoch": 1.5032679738562091, "step": 460}, {"loss": 1.1162, "grad_norm": 0.359757661819458, "learning_rate": 0.0002, "epoch": 1.5359477124183005, "step": 470}, {"loss": 0.9586, "grad_norm": 0.3385067880153656, "learning_rate": 0.0002, "epoch": 1.5686274509803921, "step": 480}, {"loss": 1.0807, "grad_norm": 0.4943889379501343, "learning_rate": 0.0002, "epoch": 1.6013071895424837, "step": 490}, {"loss": 1.0796, "grad_norm": 0.4203241169452667, "learning_rate": 0.0002, "epoch": 1.6339869281045751, "step": 500}, {"loss": 1.1059, "grad_norm": 0.3093789219856262, "learning_rate": 0.0002, "epoch": 1.6666666666666665, "step": 510}, {"loss": 1.0323, "grad_norm": 0.3653067350387573, "learning_rate": 0.0002, "epoch": 1.6993464052287581, "step": 520}, {"loss": 1.0885, "grad_norm": 0.36761337518692017, "learning_rate": 0.0002, "epoch": 1.7320261437908497, "step": 530}, {"loss": 1.1698, "grad_norm": 0.5040399432182312, "learning_rate": 0.0002, "epoch": 1.7647058823529411, "step": 540}, {"loss": 1.0105, "grad_norm": 0.3818035125732422, "learning_rate": 0.0002, "epoch": 1.7973856209150327, "step": 550}, {"loss": 0.94, "grad_norm": 0.4021618664264679, "learning_rate": 0.0002, "epoch": 1.8300653594771243, "step": 560}, {"loss": 1.0358, "grad_norm": 0.3986459970474243, "learning_rate": 0.0002, "epoch": 1.8627450980392157, "step": 570}, {"loss": 1.003, "grad_norm": 0.48416733741760254, "learning_rate": 0.0002, "epoch": 1.8954248366013071, "step": 580}, {"loss": 1.1146, "grad_norm": 0.36853986978530884, "learning_rate": 0.0002, "epoch": 1.9281045751633987, "step": 590}, {"loss": 1.0689, "grad_norm": 0.383022665977478, "learning_rate": 0.0002, "epoch": 1.9607843137254903, "step": 600}, {"loss": 1.098, "grad_norm": 0.3169507086277008, "learning_rate": 0.0002, "epoch": 1.9934640522875817, "step": 610}, {"eval_loss": 1.1845070123672485, "eval_runtime": 46.2811, "eval_samples_per_second": 9.421, "eval_steps_per_second": 1.188, "epoch": 2.0, "step": 612}, {"loss": 0.9618, "grad_norm": 0.8920142650604248, "learning_rate": 0.0002, "epoch": 2.026143790849673, "step": 620}, {"loss": 0.9784, "grad_norm": 0.4814859628677368, "learning_rate": 0.0002, "epoch": 2.0588235294117645, "step": 630}, {"loss": 0.8464, "grad_norm": 0.4251559376716614, "learning_rate": 0.0002, "epoch": 2.0915032679738563, "step": 640}, {"loss": 0.932, "grad_norm": 0.5295765399932861, "learning_rate": 0.0002, "epoch": 2.1241830065359477, "step": 650}, {"loss": 0.9603, "grad_norm": 0.45016610622406006, "learning_rate": 0.0002, "epoch": 2.156862745098039, "step": 660}, {"loss": 0.8738, "grad_norm": 0.5870586633682251, "learning_rate": 0.0002, "epoch": 2.189542483660131, "step": 670}, {"loss": 0.9483, "grad_norm": 0.5174715518951416, "learning_rate": 0.0002, "epoch": 2.2222222222222223, "step": 680}, {"loss": 0.9551, "grad_norm": 0.5252485275268555, "learning_rate": 0.0002, "epoch": 2.2549019607843137, "step": 690}, {"loss": 0.9253, "grad_norm": 0.5158312320709229, "learning_rate": 0.0002, "epoch": 2.287581699346405, "step": 700}, {"loss": 0.9278, "grad_norm": 0.4824209213256836, "learning_rate": 0.0002, "epoch": 2.3202614379084965, "step": 710}, {"loss": 0.8804, "grad_norm": 0.6335175037384033, "learning_rate": 0.0002, "epoch": 2.3529411764705883, "step": 720}, {"loss": 0.9685, "grad_norm": 0.5240563154220581, "learning_rate": 0.0002, "epoch": 2.3856209150326797, "step": 730}, {"loss": 0.8794, "grad_norm": 0.5172886252403259, "learning_rate": 0.0002, "epoch": 2.418300653594771, "step": 740}, {"loss": 0.8158, "grad_norm": 0.48972561955451965, "learning_rate": 0.0002, "epoch": 2.450980392156863, "step": 750}, {"loss": 0.8766, "grad_norm": 0.5295189023017883, "learning_rate": 0.0002, "epoch": 2.4836601307189543, "step": 760}, {"loss": 0.8695, "grad_norm": 0.5487208962440491, "learning_rate": 0.0002, "epoch": 2.5163398692810457, "step": 770}, {"loss": 1.0109, "grad_norm": 0.5375093221664429, "learning_rate": 0.0002, "epoch": 2.549019607843137, "step": 780}, {"loss": 0.9244, "grad_norm": 0.5424453020095825, "learning_rate": 0.0002, "epoch": 2.581699346405229, "step": 790}, {"loss": 1.0424, "grad_norm": 0.6029134392738342, "learning_rate": 0.0002, "epoch": 2.6143790849673203, "step": 800}, {"loss": 0.8688, "grad_norm": 0.6584921479225159, "learning_rate": 0.0002, "epoch": 2.6470588235294117, "step": 810}, {"loss": 0.7796, "grad_norm": 0.5735557675361633, "learning_rate": 0.0002, "epoch": 2.6797385620915035, "step": 820}, {"loss": 0.8834, "grad_norm": 0.5216763019561768, "learning_rate": 0.0002, "epoch": 2.712418300653595, "step": 830}, {"loss": 0.8946, "grad_norm": 0.5455219149589539, "learning_rate": 0.0002, "epoch": 2.7450980392156863, "step": 840}, {"loss": 0.8037, "grad_norm": 0.5139284729957581, "learning_rate": 0.0002, "epoch": 2.7777777777777777, "step": 850}, {"loss": 0.988, "grad_norm": 0.5096403360366821, "learning_rate": 0.0002, "epoch": 2.810457516339869, "step": 860}, {"loss": 0.9169, "grad_norm": 0.6337038278579712, "learning_rate": 0.0002, "epoch": 2.843137254901961, "step": 870}, {"loss": 0.8938, "grad_norm": 0.47218772768974304, "learning_rate": 0.0002, "epoch": 2.8758169934640523, "step": 880}, {"loss": 0.8554, "grad_norm": 0.4640636742115021, "learning_rate": 0.0002, "epoch": 2.9084967320261437, "step": 890}, {"loss": 0.8625, "grad_norm": 0.4199628531932831, "learning_rate": 0.0002, "epoch": 2.9411764705882355, "step": 900}, {"loss": 0.8104, "grad_norm": 0.5067117214202881, "learning_rate": 0.0002, "epoch": 2.973856209150327, "step": 910}, {"eval_loss": 1.2291251420974731, "eval_runtime": 46.2557, "eval_samples_per_second": 9.426, "eval_steps_per_second": 1.189, "epoch": 3.0, "step": 918}, {"loss": 0.8157, "grad_norm": 0.8342176079750061, "learning_rate": 0.0002, "epoch": 3.0065359477124183, "step": 920}, {"loss": 0.6855, "grad_norm": 0.7695813775062561, "learning_rate": 0.0002, "epoch": 3.0392156862745097, "step": 930}, {"loss": 0.6173, "grad_norm": 0.6819486618041992, "learning_rate": 0.0002, "epoch": 3.0718954248366015, "step": 940}, {"loss": 0.6495, "grad_norm": 0.7568879723548889, "learning_rate": 0.0002, "epoch": 3.104575163398693, "step": 950}, {"loss": 0.7905, "grad_norm": 0.6760695576667786, "learning_rate": 0.0002, "epoch": 3.1372549019607843, "step": 960}, {"loss": 0.6405, "grad_norm": 0.6359127759933472, "learning_rate": 0.0002, "epoch": 3.1699346405228757, "step": 970}, {"loss": 0.7172, "grad_norm": 0.8414971828460693, "learning_rate": 0.0002, "epoch": 3.2026143790849675, "step": 980}, {"loss": 0.7865, "grad_norm": 0.68381667137146, "learning_rate": 0.0002, "epoch": 3.235294117647059, "step": 990}, {"loss": 0.6651, "grad_norm": 0.6852193474769592, "learning_rate": 0.0002, "epoch": 3.2679738562091503, "step": 1000}, {"loss": 0.6571, "grad_norm": 0.8184967041015625, "learning_rate": 0.0002, "epoch": 3.3006535947712417, "step": 1010}, {"loss": 0.7036, "grad_norm": 1.047290563583374, "learning_rate": 0.0002, "epoch": 3.3333333333333335, "step": 1020}, {"loss": 0.7215, "grad_norm": 0.8291178345680237, "learning_rate": 0.0002, "epoch": 3.366013071895425, "step": 1030}, {"loss": 0.6243, "grad_norm": 0.6668022871017456, "learning_rate": 0.0002, "epoch": 3.3986928104575163, "step": 1040}, {"loss": 0.7459, "grad_norm": 0.6354008316993713, "learning_rate": 0.0002, "epoch": 3.431372549019608, "step": 1050}, {"loss": 0.6826, "grad_norm": 1.2028366327285767, "learning_rate": 0.0002, "epoch": 3.4640522875816995, "step": 1060}, {"loss": 0.5913, "grad_norm": 0.717367947101593, "learning_rate": 0.0002, "epoch": 3.496732026143791, "step": 1070}, {"loss": 0.6903, "grad_norm": 0.542179524898529, "learning_rate": 0.0002, "epoch": 3.5294117647058822, "step": 1080}, {"loss": 0.7673, "grad_norm": 0.845981776714325, "learning_rate": 0.0002, "epoch": 3.5620915032679736, "step": 1090}, {"loss": 0.7089, "grad_norm": 0.7381046414375305, "learning_rate": 0.0002, "epoch": 3.5947712418300655, "step": 1100}, {"loss": 0.6705, "grad_norm": 0.6563456058502197, "learning_rate": 0.0002, "epoch": 3.627450980392157, "step": 1110}, {"loss": 0.7767, "grad_norm": 0.7130876779556274, "learning_rate": 0.0002, "epoch": 3.6601307189542482, "step": 1120}, {"loss": 0.7164, "grad_norm": 0.800032913684845, "learning_rate": 0.0002, "epoch": 3.69281045751634, "step": 1130}, {"loss": 0.7272, "grad_norm": 0.980328381061554, "learning_rate": 0.0002, "epoch": 3.7254901960784315, "step": 1140}, {"loss": 0.7672, "grad_norm": 0.8542261123657227, "learning_rate": 0.0002, "epoch": 3.758169934640523, "step": 1150}, {"loss": 0.679, "grad_norm": 0.6302552819252014, "learning_rate": 0.0002, "epoch": 3.7908496732026142, "step": 1160}, {"loss": 0.7457, "grad_norm": 0.515398383140564, "learning_rate": 0.0002, "epoch": 3.8235294117647056, "step": 1170}, {"loss": 0.693, "grad_norm": 1.2427130937576294, "learning_rate": 0.0002, "epoch": 3.8562091503267975, "step": 1180}, {"loss": 0.7182, "grad_norm": 0.8206831216812134, "learning_rate": 0.0002, "epoch": 3.888888888888889, "step": 1190}, {"loss": 0.7519, "grad_norm": 0.7633249163627625, "learning_rate": 0.0002, "epoch": 3.9215686274509802, "step": 1200}, {"loss": 0.7082, "grad_norm": 0.8034512400627136, "learning_rate": 0.0002, "epoch": 3.954248366013072, "step": 1210}, {"loss": 0.6834, "grad_norm": 0.7667182087898254, "learning_rate": 0.0002, "epoch": 3.9869281045751634, "step": 1220}, {"eval_loss": 1.3456707000732422, "eval_runtime": 46.2562, "eval_samples_per_second": 9.426, "eval_steps_per_second": 1.189, "epoch": 4.0, "step": 1224}, {"loss": 0.582, "grad_norm": 0.7724746465682983, "learning_rate": 0.0002, "epoch": 4.019607843137255, "step": 1230}, {"loss": 0.4759, "grad_norm": 1.166916847229004, "learning_rate": 0.0002, "epoch": 4.052287581699346, "step": 1240}, {"loss": 0.4995, "grad_norm": 0.7234508991241455, "learning_rate": 0.0002, "epoch": 4.084967320261438, "step": 1250}, {"loss": 0.4863, "grad_norm": 1.1418060064315796, "learning_rate": 0.0002, "epoch": 4.117647058823529, "step": 1260}, {"loss": 0.5425, "grad_norm": 0.9603922367095947, "learning_rate": 0.0002, "epoch": 4.150326797385621, "step": 1270}, {"loss": 0.4892, "grad_norm": 0.8976530432701111, "learning_rate": 0.0002, "epoch": 4.183006535947713, "step": 1280}, {"loss": 0.473, "grad_norm": 0.6855078339576721, "learning_rate": 0.0002, "epoch": 4.215686274509804, "step": 1290}, {"loss": 0.4416, "grad_norm": 1.2676647901535034, "learning_rate": 0.0002, "epoch": 4.248366013071895, "step": 1300}, {"loss": 0.5071, "grad_norm": 1.104057788848877, "learning_rate": 0.0002, "epoch": 4.281045751633987, "step": 1310}, {"loss": 0.5168, "grad_norm": 1.7076562643051147, "learning_rate": 0.0002, "epoch": 4.313725490196078, "step": 1320}, {"loss": 0.4655, "grad_norm": 1.2308520078659058, "learning_rate": 0.0002, "epoch": 4.34640522875817, "step": 1330}, {"loss": 0.5322, "grad_norm": 1.2652729749679565, "learning_rate": 0.0002, "epoch": 4.379084967320262, "step": 1340}, {"loss": 0.5262, "grad_norm": 1.054958701133728, "learning_rate": 0.0002, "epoch": 4.411764705882353, "step": 1350}, {"loss": 0.4747, "grad_norm": 1.0130749940872192, "learning_rate": 0.0002, "epoch": 4.444444444444445, "step": 1360}, {"loss": 0.4887, "grad_norm": 1.0517818927764893, "learning_rate": 0.0002, "epoch": 4.477124183006536, "step": 1370}, {"loss": 0.4906, "grad_norm": 0.8593037128448486, "learning_rate": 0.0002, "epoch": 4.509803921568627, "step": 1380}, {"loss": 0.5049, "grad_norm": 1.0248081684112549, "learning_rate": 0.0002, "epoch": 4.542483660130719, "step": 1390}, {"loss": 0.472, "grad_norm": 0.8999413847923279, "learning_rate": 0.0002, "epoch": 4.57516339869281, "step": 1400}, {"loss": 0.5102, "grad_norm": 0.9106912612915039, "learning_rate": 0.0002, "epoch": 4.607843137254902, "step": 1410}, {"loss": 0.5203, "grad_norm": 1.2736181020736694, "learning_rate": 0.0002, "epoch": 4.640522875816993, "step": 1420}, {"loss": 0.5101, "grad_norm": 0.9311690926551819, "learning_rate": 0.0002, "epoch": 4.673202614379085, "step": 1430}, {"loss": 0.5648, "grad_norm": 1.0455045700073242, "learning_rate": 0.0002, "epoch": 4.705882352941177, "step": 1440}, {"loss": 0.5004, "grad_norm": 1.0190727710723877, "learning_rate": 0.0002, "epoch": 4.738562091503268, "step": 1450}, {"loss": 0.5506, "grad_norm": 1.333198070526123, "learning_rate": 0.0002, "epoch": 4.771241830065359, "step": 1460}, {"loss": 0.5846, "grad_norm": 0.8808416724205017, "learning_rate": 0.0002, "epoch": 4.803921568627451, "step": 1470}, {"loss": 0.4671, "grad_norm": 0.8896227478981018, "learning_rate": 0.0002, "epoch": 4.836601307189542, "step": 1480}, {"loss": 0.4732, "grad_norm": 1.212323784828186, "learning_rate": 0.0002, "epoch": 4.8692810457516345, "step": 1490}, {"loss": 0.5263, "grad_norm": 1.0490120649337769, "learning_rate": 0.0002, "epoch": 4.901960784313726, "step": 1500}, {"loss": 0.5815, "grad_norm": 0.8946618437767029, "learning_rate": 0.0002, "epoch": 4.934640522875817, "step": 1510}, {"loss": 0.5369, "grad_norm": 1.0609275102615356, "learning_rate": 0.0002, "epoch": 4.967320261437909, "step": 1520}, {"loss": 0.5348, "grad_norm": 0.8885099291801453, "learning_rate": 0.0002, "epoch": 5.0, "step": 1530}, {"eval_loss": 1.5771757364273071, "eval_runtime": 46.2667, "eval_samples_per_second": 9.424, "eval_steps_per_second": 1.189, "epoch": 5.0, "step": 1530}, {"loss": 0.3176, "grad_norm": 1.401705026626587, "learning_rate": 0.0002, "epoch": 5.032679738562091, "step": 1540}, {"loss": 0.3346, "grad_norm": 0.8365539908409119, "learning_rate": 0.0002, "epoch": 5.065359477124183, "step": 1550}, {"loss": 0.3605, "grad_norm": 1.3188321590423584, "learning_rate": 0.0002, "epoch": 5.098039215686274, "step": 1560}, {"loss": 0.326, "grad_norm": 0.9819526076316833, "learning_rate": 0.0002, "epoch": 5.130718954248366, "step": 1570}, {"loss": 0.3575, "grad_norm": 1.13265061378479, "learning_rate": 0.0002, "epoch": 5.163398692810458, "step": 1580}, {"loss": 0.3418, "grad_norm": 1.478152871131897, "learning_rate": 0.0002, "epoch": 5.196078431372549, "step": 1590}, {"loss": 0.3388, "grad_norm": 1.4188750982284546, "learning_rate": 0.0002, "epoch": 5.228758169934641, "step": 1600}, {"loss": 0.3524, "grad_norm": 1.2499338388442993, "learning_rate": 0.0002, "epoch": 5.261437908496732, "step": 1610}, {"loss": 0.423, "grad_norm": 1.7885085344314575, "learning_rate": 0.0002, "epoch": 5.294117647058823, "step": 1620}, {"loss": 0.3237, "grad_norm": 1.2614946365356445, "learning_rate": 0.0002, "epoch": 5.326797385620915, "step": 1630}, {"loss": 0.3511, "grad_norm": 1.28338623046875, "learning_rate": 0.0002, "epoch": 5.359477124183006, "step": 1640}, {"loss": 0.3112, "grad_norm": 1.1973257064819336, "learning_rate": 0.0002, "epoch": 5.392156862745098, "step": 1650}, {"loss": 0.3645, "grad_norm": 1.1356301307678223, "learning_rate": 0.0002, "epoch": 5.42483660130719, "step": 1660}, {"loss": 0.307, "grad_norm": 0.9048901200294495, "learning_rate": 0.0002, "epoch": 5.457516339869281, "step": 1670}, {"loss": 0.3828, "grad_norm": 1.5352122783660889, "learning_rate": 0.0002, "epoch": 5.490196078431373, "step": 1680}, {"loss": 0.3826, "grad_norm": 0.9096335172653198, "learning_rate": 0.0002, "epoch": 5.522875816993464, "step": 1690}, {"loss": 0.3686, "grad_norm": 1.1903661489486694, "learning_rate": 0.0002, "epoch": 5.555555555555555, "step": 1700}, {"loss": 0.325, "grad_norm": 0.9234451651573181, "learning_rate": 0.0002, "epoch": 5.588235294117647, "step": 1710}, {"loss": 0.3451, "grad_norm": 1.4554102420806885, "learning_rate": 0.0002, "epoch": 5.620915032679738, "step": 1720}, {"loss": 0.3488, "grad_norm": 1.1044343709945679, "learning_rate": 0.0002, "epoch": 5.65359477124183, "step": 1730}, {"loss": 0.3673, "grad_norm": 1.2219593524932861, "learning_rate": 0.0002, "epoch": 5.686274509803922, "step": 1740}, {"loss": 0.3517, "grad_norm": 0.901652455329895, "learning_rate": 0.0002, "epoch": 5.718954248366013, "step": 1750}, {"loss": 0.4082, "grad_norm": 1.3334792852401733, "learning_rate": 0.0002, "epoch": 5.751633986928105, "step": 1760}, {"loss": 0.4386, "grad_norm": 1.5595488548278809, "learning_rate": 0.0002, "epoch": 5.784313725490196, "step": 1770}, {"loss": 0.3217, "grad_norm": 1.3892982006072998, "learning_rate": 0.0002, "epoch": 5.816993464052287, "step": 1780}, {"loss": 0.3919, "grad_norm": 1.0813168287277222, "learning_rate": 0.0002, "epoch": 5.849673202614379, "step": 1790}, {"loss": 0.3697, "grad_norm": 1.145320177078247, "learning_rate": 0.0002, "epoch": 5.882352941176471, "step": 1800}, {"loss": 0.3903, "grad_norm": 1.0249533653259277, "learning_rate": 0.0002, "epoch": 5.915032679738562, "step": 1810}, {"loss": 0.3481, "grad_norm": 1.0013737678527832, "learning_rate": 0.0002, "epoch": 5.947712418300654, "step": 1820}, {"loss": 0.4278, "grad_norm": 1.212314248085022, "learning_rate": 0.0002, "epoch": 5.980392156862745, "step": 1830}, {"eval_loss": 1.7506128549575806, "eval_runtime": 46.2256, "eval_samples_per_second": 9.432, "eval_steps_per_second": 1.19, "epoch": 6.0, "step": 1836}, {"loss": 0.3283, "grad_norm": 0.7339767813682556, "learning_rate": 0.0002, "epoch": 6.0130718954248366, "step": 1840}, {"loss": 0.2304, "grad_norm": 1.1071710586547852, "learning_rate": 0.0002, "epoch": 6.045751633986928, "step": 1850}, {"loss": 0.2436, "grad_norm": 1.2613991498947144, "learning_rate": 0.0002, "epoch": 6.078431372549019, "step": 1860}, {"loss": 0.2403, "grad_norm": 1.053133249282837, "learning_rate": 0.0002, "epoch": 6.111111111111111, "step": 1870}, {"loss": 0.2509, "grad_norm": 1.069568395614624, "learning_rate": 0.0002, "epoch": 6.143790849673203, "step": 1880}, {"loss": 0.2272, "grad_norm": 1.020458698272705, "learning_rate": 0.0002, "epoch": 6.176470588235294, "step": 1890}, {"loss": 0.2408, "grad_norm": 1.2430394887924194, "learning_rate": 0.0002, "epoch": 6.209150326797386, "step": 1900}, {"loss": 0.229, "grad_norm": 1.3475574254989624, "learning_rate": 0.0002, "epoch": 6.241830065359477, "step": 1910}, {"loss": 0.2542, "grad_norm": 0.9094598889350891, "learning_rate": 0.0002, "epoch": 6.2745098039215685, "step": 1920}, {"loss": 0.2009, "grad_norm": 1.255650520324707, "learning_rate": 0.0002, "epoch": 6.30718954248366, "step": 1930}, {"loss": 0.25, "grad_norm": 1.4193930625915527, "learning_rate": 0.0002, "epoch": 6.339869281045751, "step": 1940}, {"loss": 0.293, "grad_norm": 1.4378032684326172, "learning_rate": 0.0002, "epoch": 6.372549019607844, "step": 1950}, {"loss": 0.2685, "grad_norm": 1.2236989736557007, "learning_rate": 0.0002, "epoch": 6.405228758169935, "step": 1960}, {"loss": 0.2608, "grad_norm": 1.0902987718582153, "learning_rate": 0.0002, "epoch": 6.437908496732026, "step": 1970}, {"loss": 0.2267, "grad_norm": 1.1165062189102173, "learning_rate": 0.0002, "epoch": 6.470588235294118, "step": 1980}, {"loss": 0.2246, "grad_norm": 1.3953566551208496, "learning_rate": 0.0002, "epoch": 6.503267973856209, "step": 1990}, {"loss": 0.2834, "grad_norm": 1.5215585231781006, "learning_rate": 0.0002, "epoch": 6.5359477124183005, "step": 2000}, {"loss": 0.2457, "grad_norm": 1.3496609926223755, "learning_rate": 0.0002, "epoch": 6.568627450980392, "step": 2010}, {"loss": 0.2637, "grad_norm": 1.2511820793151855, "learning_rate": 0.0002, "epoch": 6.601307189542483, "step": 2020}, {"loss": 0.2956, "grad_norm": 1.9875848293304443, "learning_rate": 0.0002, "epoch": 6.633986928104575, "step": 2030}, {"loss": 0.2664, "grad_norm": 1.0766608715057373, "learning_rate": 0.0002, "epoch": 6.666666666666667, "step": 2040}, {"loss": 0.2792, "grad_norm": 1.3594712018966675, "learning_rate": 0.0002, "epoch": 6.699346405228758, "step": 2050}, {"loss": 0.3194, "grad_norm": 1.2357292175292969, "learning_rate": 0.0002, "epoch": 6.73202614379085, "step": 2060}, {"loss": 0.2526, "grad_norm": 1.2428375482559204, "learning_rate": 0.0002, "epoch": 6.764705882352941, "step": 2070}, {"loss": 0.2355, "grad_norm": 1.2724156379699707, "learning_rate": 0.0002, "epoch": 6.7973856209150325, "step": 2080}, {"loss": 0.289, "grad_norm": 1.4981396198272705, "learning_rate": 0.0002, "epoch": 6.830065359477124, "step": 2090}, {"loss": 0.2443, "grad_norm": 0.9346088171005249, "learning_rate": 0.0002, "epoch": 6.862745098039216, "step": 2100}, {"loss": 0.262, "grad_norm": 1.3955477476119995, "learning_rate": 0.0002, "epoch": 6.895424836601308, "step": 2110}, {"loss": 0.2981, "grad_norm": 1.492382287979126, "learning_rate": 0.0002, "epoch": 6.928104575163399, "step": 2120}, {"loss": 0.3093, "grad_norm": 1.2755712270736694, "learning_rate": 0.0002, "epoch": 6.96078431372549, "step": 2130}, {"loss": 0.2943, "grad_norm": 1.4600884914398193, "learning_rate": 0.0002, "epoch": 6.993464052287582, "step": 2140}]} +{"epoch": 8.0, "step": 2448, "epoch_duration": 457.8266222476959, "total_accumulated_duration": 3650.567535638809, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7751.47119140625}, "peak_memory_usage": {"GPU_0": 19860.224609375}, "avg_memory_reserved": {"GPU_0": 28746.0}, "peak_memory_reserved": {"GPU_0": 28746.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.3561, "grad_norm": 0.631856381893158, "learning_rate": 0.0002, "epoch": 0.032679738562091505, "step": 10}, {"loss": 1.8141, "grad_norm": 0.5065668821334839, "learning_rate": 0.0002, "epoch": 0.06535947712418301, "step": 20}, {"loss": 1.4952, "grad_norm": 0.6978895664215088, "learning_rate": 0.0002, "epoch": 0.09803921568627451, "step": 30}, {"loss": 1.4829, "grad_norm": 0.6619144082069397, "learning_rate": 0.0002, "epoch": 0.13071895424836602, "step": 40}, {"loss": 1.3038, "grad_norm": 0.6153793931007385, "learning_rate": 0.0002, "epoch": 0.16339869281045752, "step": 50}, {"loss": 1.1429, "grad_norm": 0.4703301787376404, "learning_rate": 0.0002, "epoch": 0.19607843137254902, "step": 60}, {"loss": 1.2828, "grad_norm": 1.1672580242156982, "learning_rate": 0.0002, "epoch": 0.22875816993464052, "step": 70}, {"loss": 1.0985, "grad_norm": 0.3455565273761749, "learning_rate": 0.0002, "epoch": 0.26143790849673204, "step": 80}, {"loss": 1.1359, "grad_norm": 0.36216291785240173, "learning_rate": 0.0002, "epoch": 0.29411764705882354, "step": 90}, {"loss": 1.2544, "grad_norm": 0.4545166492462158, "learning_rate": 0.0002, "epoch": 0.32679738562091504, "step": 100}, {"loss": 1.2287, "grad_norm": 0.3612092435359955, "learning_rate": 0.0002, "epoch": 0.35947712418300654, "step": 110}, {"loss": 1.1868, "grad_norm": 0.5080830454826355, "learning_rate": 0.0002, "epoch": 0.39215686274509803, "step": 120}, {"loss": 1.1902, "grad_norm": 0.3268195390701294, "learning_rate": 0.0002, "epoch": 0.42483660130718953, "step": 130}, {"loss": 1.247, "grad_norm": 0.33971714973449707, "learning_rate": 0.0002, "epoch": 0.45751633986928103, "step": 140}, {"loss": 1.1844, "grad_norm": 0.4036043882369995, "learning_rate": 0.0002, "epoch": 0.49019607843137253, "step": 150}, {"loss": 1.1624, "grad_norm": 0.35938864946365356, "learning_rate": 0.0002, "epoch": 0.5228758169934641, "step": 160}, {"loss": 1.164, "grad_norm": 0.28880223631858826, "learning_rate": 0.0002, "epoch": 0.5555555555555556, "step": 170}, {"loss": 1.3521, "grad_norm": 0.3436269462108612, "learning_rate": 0.0002, "epoch": 0.5882352941176471, "step": 180}, {"loss": 1.2456, "grad_norm": 0.41923725605010986, "learning_rate": 0.0002, "epoch": 0.6209150326797386, "step": 190}, {"loss": 1.2226, "grad_norm": 0.25119203329086304, "learning_rate": 0.0002, "epoch": 0.6535947712418301, "step": 200}, {"loss": 1.1568, "grad_norm": 0.5870180726051331, "learning_rate": 0.0002, "epoch": 0.6862745098039216, "step": 210}, {"loss": 1.064, "grad_norm": 0.2831224203109741, "learning_rate": 0.0002, "epoch": 0.7189542483660131, "step": 220}, {"loss": 1.2469, "grad_norm": 0.3192005753517151, "learning_rate": 0.0002, "epoch": 0.7516339869281046, "step": 230}, {"loss": 1.1613, "grad_norm": 0.2998219430446625, "learning_rate": 0.0002, "epoch": 0.7843137254901961, "step": 240}, {"loss": 1.205, "grad_norm": 0.32855790853500366, "learning_rate": 0.0002, "epoch": 0.8169934640522876, "step": 250}, {"loss": 1.1631, "grad_norm": 0.3403124213218689, "learning_rate": 0.0002, "epoch": 0.8496732026143791, "step": 260}, {"loss": 1.1646, "grad_norm": 0.277401328086853, "learning_rate": 0.0002, "epoch": 0.8823529411764706, "step": 270}, {"loss": 1.084, "grad_norm": 0.2975269556045532, "learning_rate": 0.0002, "epoch": 0.9150326797385621, "step": 280}, {"loss": 1.2039, "grad_norm": 1.7909578084945679, "learning_rate": 0.0002, "epoch": 0.9477124183006536, "step": 290}, {"loss": 1.1226, "grad_norm": 0.2917245328426361, "learning_rate": 0.0002, "epoch": 0.9803921568627451, "step": 300}, {"eval_loss": 1.1937541961669922, "eval_runtime": 46.2571, "eval_samples_per_second": 9.426, "eval_steps_per_second": 1.189, "epoch": 1.0, "step": 306}, {"loss": 1.2011, "grad_norm": 0.26494982838630676, "learning_rate": 0.0002, "epoch": 1.0130718954248366, "step": 310}, {"loss": 1.0565, "grad_norm": 0.6289355754852295, "learning_rate": 0.0002, "epoch": 1.0457516339869282, "step": 320}, {"loss": 1.1619, "grad_norm": 0.26784011721611023, "learning_rate": 0.0002, "epoch": 1.0784313725490196, "step": 330}, {"loss": 1.1151, "grad_norm": 0.3392215967178345, "learning_rate": 0.0002, "epoch": 1.1111111111111112, "step": 340}, {"loss": 1.0752, "grad_norm": 0.40005937218666077, "learning_rate": 0.0002, "epoch": 1.1437908496732025, "step": 350}, {"loss": 1.0408, "grad_norm": 0.3590582013130188, "learning_rate": 0.0002, "epoch": 1.1764705882352942, "step": 360}, {"loss": 1.0836, "grad_norm": 0.3995305895805359, "learning_rate": 0.0002, "epoch": 1.2091503267973855, "step": 370}, {"loss": 1.0992, "grad_norm": 0.2950291633605957, "learning_rate": 0.0002, "epoch": 1.2418300653594772, "step": 380}, {"loss": 1.1152, "grad_norm": 0.32035166025161743, "learning_rate": 0.0002, "epoch": 1.2745098039215685, "step": 390}, {"loss": 1.1467, "grad_norm": 0.410366415977478, "learning_rate": 0.0002, "epoch": 1.3071895424836601, "step": 400}, {"loss": 0.9985, "grad_norm": 0.3106379508972168, "learning_rate": 0.0002, "epoch": 1.3398692810457518, "step": 410}, {"loss": 0.9789, "grad_norm": 0.38580670952796936, "learning_rate": 0.0002, "epoch": 1.3725490196078431, "step": 420}, {"loss": 1.0931, "grad_norm": 0.34411361813545227, "learning_rate": 0.0002, "epoch": 1.4052287581699345, "step": 430}, {"loss": 1.1685, "grad_norm": 0.44206851720809937, "learning_rate": 0.0002, "epoch": 1.4379084967320261, "step": 440}, {"loss": 1.0347, "grad_norm": 0.3492952585220337, "learning_rate": 0.0002, "epoch": 1.4705882352941178, "step": 450}, {"loss": 1.0534, "grad_norm": 0.376423716545105, "learning_rate": 0.0002, "epoch": 1.5032679738562091, "step": 460}, {"loss": 1.1162, "grad_norm": 0.359757661819458, "learning_rate": 0.0002, "epoch": 1.5359477124183005, "step": 470}, {"loss": 0.9586, "grad_norm": 0.3385067880153656, "learning_rate": 0.0002, "epoch": 1.5686274509803921, "step": 480}, {"loss": 1.0807, "grad_norm": 0.4943889379501343, "learning_rate": 0.0002, "epoch": 1.6013071895424837, "step": 490}, {"loss": 1.0796, "grad_norm": 0.4203241169452667, "learning_rate": 0.0002, "epoch": 1.6339869281045751, "step": 500}, {"loss": 1.1059, "grad_norm": 0.3093789219856262, "learning_rate": 0.0002, "epoch": 1.6666666666666665, "step": 510}, {"loss": 1.0323, "grad_norm": 0.3653067350387573, "learning_rate": 0.0002, "epoch": 1.6993464052287581, "step": 520}, {"loss": 1.0885, "grad_norm": 0.36761337518692017, "learning_rate": 0.0002, "epoch": 1.7320261437908497, "step": 530}, {"loss": 1.1698, "grad_norm": 0.5040399432182312, "learning_rate": 0.0002, "epoch": 1.7647058823529411, "step": 540}, {"loss": 1.0105, "grad_norm": 0.3818035125732422, "learning_rate": 0.0002, "epoch": 1.7973856209150327, "step": 550}, {"loss": 0.94, "grad_norm": 0.4021618664264679, "learning_rate": 0.0002, "epoch": 1.8300653594771243, "step": 560}, {"loss": 1.0358, "grad_norm": 0.3986459970474243, "learning_rate": 0.0002, "epoch": 1.8627450980392157, "step": 570}, {"loss": 1.003, "grad_norm": 0.48416733741760254, "learning_rate": 0.0002, "epoch": 1.8954248366013071, "step": 580}, {"loss": 1.1146, "grad_norm": 0.36853986978530884, "learning_rate": 0.0002, "epoch": 1.9281045751633987, "step": 590}, {"loss": 1.0689, "grad_norm": 0.383022665977478, "learning_rate": 0.0002, "epoch": 1.9607843137254903, "step": 600}, {"loss": 1.098, "grad_norm": 0.3169507086277008, "learning_rate": 0.0002, "epoch": 1.9934640522875817, "step": 610}, {"eval_loss": 1.1845070123672485, "eval_runtime": 46.2811, "eval_samples_per_second": 9.421, "eval_steps_per_second": 1.188, "epoch": 2.0, "step": 612}, {"loss": 0.9618, "grad_norm": 0.8920142650604248, "learning_rate": 0.0002, "epoch": 2.026143790849673, "step": 620}, {"loss": 0.9784, "grad_norm": 0.4814859628677368, "learning_rate": 0.0002, "epoch": 2.0588235294117645, "step": 630}, {"loss": 0.8464, "grad_norm": 0.4251559376716614, "learning_rate": 0.0002, "epoch": 2.0915032679738563, "step": 640}, {"loss": 0.932, "grad_norm": 0.5295765399932861, "learning_rate": 0.0002, "epoch": 2.1241830065359477, "step": 650}, {"loss": 0.9603, "grad_norm": 0.45016610622406006, "learning_rate": 0.0002, "epoch": 2.156862745098039, "step": 660}, {"loss": 0.8738, "grad_norm": 0.5870586633682251, "learning_rate": 0.0002, "epoch": 2.189542483660131, "step": 670}, {"loss": 0.9483, "grad_norm": 0.5174715518951416, "learning_rate": 0.0002, "epoch": 2.2222222222222223, "step": 680}, {"loss": 0.9551, "grad_norm": 0.5252485275268555, "learning_rate": 0.0002, "epoch": 2.2549019607843137, "step": 690}, {"loss": 0.9253, "grad_norm": 0.5158312320709229, "learning_rate": 0.0002, "epoch": 2.287581699346405, "step": 700}, {"loss": 0.9278, "grad_norm": 0.4824209213256836, "learning_rate": 0.0002, "epoch": 2.3202614379084965, "step": 710}, {"loss": 0.8804, "grad_norm": 0.6335175037384033, "learning_rate": 0.0002, "epoch": 2.3529411764705883, "step": 720}, {"loss": 0.9685, "grad_norm": 0.5240563154220581, "learning_rate": 0.0002, "epoch": 2.3856209150326797, "step": 730}, {"loss": 0.8794, "grad_norm": 0.5172886252403259, "learning_rate": 0.0002, "epoch": 2.418300653594771, "step": 740}, {"loss": 0.8158, "grad_norm": 0.48972561955451965, "learning_rate": 0.0002, "epoch": 2.450980392156863, "step": 750}, {"loss": 0.8766, "grad_norm": 0.5295189023017883, "learning_rate": 0.0002, "epoch": 2.4836601307189543, "step": 760}, {"loss": 0.8695, "grad_norm": 0.5487208962440491, "learning_rate": 0.0002, "epoch": 2.5163398692810457, "step": 770}, {"loss": 1.0109, "grad_norm": 0.5375093221664429, "learning_rate": 0.0002, "epoch": 2.549019607843137, "step": 780}, {"loss": 0.9244, "grad_norm": 0.5424453020095825, "learning_rate": 0.0002, "epoch": 2.581699346405229, "step": 790}, {"loss": 1.0424, "grad_norm": 0.6029134392738342, "learning_rate": 0.0002, "epoch": 2.6143790849673203, "step": 800}, {"loss": 0.8688, "grad_norm": 0.6584921479225159, "learning_rate": 0.0002, "epoch": 2.6470588235294117, "step": 810}, {"loss": 0.7796, "grad_norm": 0.5735557675361633, "learning_rate": 0.0002, "epoch": 2.6797385620915035, "step": 820}, {"loss": 0.8834, "grad_norm": 0.5216763019561768, "learning_rate": 0.0002, "epoch": 2.712418300653595, "step": 830}, {"loss": 0.8946, "grad_norm": 0.5455219149589539, "learning_rate": 0.0002, "epoch": 2.7450980392156863, "step": 840}, {"loss": 0.8037, "grad_norm": 0.5139284729957581, "learning_rate": 0.0002, "epoch": 2.7777777777777777, "step": 850}, {"loss": 0.988, "grad_norm": 0.5096403360366821, "learning_rate": 0.0002, "epoch": 2.810457516339869, "step": 860}, {"loss": 0.9169, "grad_norm": 0.6337038278579712, "learning_rate": 0.0002, "epoch": 2.843137254901961, "step": 870}, {"loss": 0.8938, "grad_norm": 0.47218772768974304, "learning_rate": 0.0002, "epoch": 2.8758169934640523, "step": 880}, {"loss": 0.8554, "grad_norm": 0.4640636742115021, "learning_rate": 0.0002, "epoch": 2.9084967320261437, "step": 890}, {"loss": 0.8625, "grad_norm": 0.4199628531932831, "learning_rate": 0.0002, "epoch": 2.9411764705882355, "step": 900}, {"loss": 0.8104, "grad_norm": 0.5067117214202881, "learning_rate": 0.0002, "epoch": 2.973856209150327, "step": 910}, {"eval_loss": 1.2291251420974731, "eval_runtime": 46.2557, "eval_samples_per_second": 9.426, "eval_steps_per_second": 1.189, "epoch": 3.0, "step": 918}, {"loss": 0.8157, "grad_norm": 0.8342176079750061, "learning_rate": 0.0002, "epoch": 3.0065359477124183, "step": 920}, {"loss": 0.6855, "grad_norm": 0.7695813775062561, "learning_rate": 0.0002, "epoch": 3.0392156862745097, "step": 930}, {"loss": 0.6173, "grad_norm": 0.6819486618041992, "learning_rate": 0.0002, "epoch": 3.0718954248366015, "step": 940}, {"loss": 0.6495, "grad_norm": 0.7568879723548889, "learning_rate": 0.0002, "epoch": 3.104575163398693, "step": 950}, {"loss": 0.7905, "grad_norm": 0.6760695576667786, "learning_rate": 0.0002, "epoch": 3.1372549019607843, "step": 960}, {"loss": 0.6405, "grad_norm": 0.6359127759933472, "learning_rate": 0.0002, "epoch": 3.1699346405228757, "step": 970}, {"loss": 0.7172, "grad_norm": 0.8414971828460693, "learning_rate": 0.0002, "epoch": 3.2026143790849675, "step": 980}, {"loss": 0.7865, "grad_norm": 0.68381667137146, "learning_rate": 0.0002, "epoch": 3.235294117647059, "step": 990}, {"loss": 0.6651, "grad_norm": 0.6852193474769592, "learning_rate": 0.0002, "epoch": 3.2679738562091503, "step": 1000}, {"loss": 0.6571, "grad_norm": 0.8184967041015625, "learning_rate": 0.0002, "epoch": 3.3006535947712417, "step": 1010}, {"loss": 0.7036, "grad_norm": 1.047290563583374, "learning_rate": 0.0002, "epoch": 3.3333333333333335, "step": 1020}, {"loss": 0.7215, "grad_norm": 0.8291178345680237, "learning_rate": 0.0002, "epoch": 3.366013071895425, "step": 1030}, {"loss": 0.6243, "grad_norm": 0.6668022871017456, "learning_rate": 0.0002, "epoch": 3.3986928104575163, "step": 1040}, {"loss": 0.7459, "grad_norm": 0.6354008316993713, "learning_rate": 0.0002, "epoch": 3.431372549019608, "step": 1050}, {"loss": 0.6826, "grad_norm": 1.2028366327285767, "learning_rate": 0.0002, "epoch": 3.4640522875816995, "step": 1060}, {"loss": 0.5913, "grad_norm": 0.717367947101593, "learning_rate": 0.0002, "epoch": 3.496732026143791, "step": 1070}, {"loss": 0.6903, "grad_norm": 0.542179524898529, "learning_rate": 0.0002, "epoch": 3.5294117647058822, "step": 1080}, {"loss": 0.7673, "grad_norm": 0.845981776714325, "learning_rate": 0.0002, "epoch": 3.5620915032679736, "step": 1090}, {"loss": 0.7089, "grad_norm": 0.7381046414375305, "learning_rate": 0.0002, "epoch": 3.5947712418300655, "step": 1100}, {"loss": 0.6705, "grad_norm": 0.6563456058502197, "learning_rate": 0.0002, "epoch": 3.627450980392157, "step": 1110}, {"loss": 0.7767, "grad_norm": 0.7130876779556274, "learning_rate": 0.0002, "epoch": 3.6601307189542482, "step": 1120}, {"loss": 0.7164, "grad_norm": 0.800032913684845, "learning_rate": 0.0002, "epoch": 3.69281045751634, "step": 1130}, {"loss": 0.7272, "grad_norm": 0.980328381061554, "learning_rate": 0.0002, "epoch": 3.7254901960784315, "step": 1140}, {"loss": 0.7672, "grad_norm": 0.8542261123657227, "learning_rate": 0.0002, "epoch": 3.758169934640523, "step": 1150}, {"loss": 0.679, "grad_norm": 0.6302552819252014, "learning_rate": 0.0002, "epoch": 3.7908496732026142, "step": 1160}, {"loss": 0.7457, "grad_norm": 0.515398383140564, "learning_rate": 0.0002, "epoch": 3.8235294117647056, "step": 1170}, {"loss": 0.693, "grad_norm": 1.2427130937576294, "learning_rate": 0.0002, "epoch": 3.8562091503267975, "step": 1180}, {"loss": 0.7182, "grad_norm": 0.8206831216812134, "learning_rate": 0.0002, "epoch": 3.888888888888889, "step": 1190}, {"loss": 0.7519, "grad_norm": 0.7633249163627625, "learning_rate": 0.0002, "epoch": 3.9215686274509802, "step": 1200}, {"loss": 0.7082, "grad_norm": 0.8034512400627136, "learning_rate": 0.0002, "epoch": 3.954248366013072, "step": 1210}, {"loss": 0.6834, "grad_norm": 0.7667182087898254, "learning_rate": 0.0002, "epoch": 3.9869281045751634, "step": 1220}, {"eval_loss": 1.3456707000732422, "eval_runtime": 46.2562, "eval_samples_per_second": 9.426, "eval_steps_per_second": 1.189, "epoch": 4.0, "step": 1224}, {"loss": 0.582, "grad_norm": 0.7724746465682983, "learning_rate": 0.0002, "epoch": 4.019607843137255, "step": 1230}, {"loss": 0.4759, "grad_norm": 1.166916847229004, "learning_rate": 0.0002, "epoch": 4.052287581699346, "step": 1240}, {"loss": 0.4995, "grad_norm": 0.7234508991241455, "learning_rate": 0.0002, "epoch": 4.084967320261438, "step": 1250}, {"loss": 0.4863, "grad_norm": 1.1418060064315796, "learning_rate": 0.0002, "epoch": 4.117647058823529, "step": 1260}, {"loss": 0.5425, "grad_norm": 0.9603922367095947, "learning_rate": 0.0002, "epoch": 4.150326797385621, "step": 1270}, {"loss": 0.4892, "grad_norm": 0.8976530432701111, "learning_rate": 0.0002, "epoch": 4.183006535947713, "step": 1280}, {"loss": 0.473, "grad_norm": 0.6855078339576721, "learning_rate": 0.0002, "epoch": 4.215686274509804, "step": 1290}, {"loss": 0.4416, "grad_norm": 1.2676647901535034, "learning_rate": 0.0002, "epoch": 4.248366013071895, "step": 1300}, {"loss": 0.5071, "grad_norm": 1.104057788848877, "learning_rate": 0.0002, "epoch": 4.281045751633987, "step": 1310}, {"loss": 0.5168, "grad_norm": 1.7076562643051147, "learning_rate": 0.0002, "epoch": 4.313725490196078, "step": 1320}, {"loss": 0.4655, "grad_norm": 1.2308520078659058, "learning_rate": 0.0002, "epoch": 4.34640522875817, "step": 1330}, {"loss": 0.5322, "grad_norm": 1.2652729749679565, "learning_rate": 0.0002, "epoch": 4.379084967320262, "step": 1340}, {"loss": 0.5262, "grad_norm": 1.054958701133728, "learning_rate": 0.0002, "epoch": 4.411764705882353, "step": 1350}, {"loss": 0.4747, "grad_norm": 1.0130749940872192, "learning_rate": 0.0002, "epoch": 4.444444444444445, "step": 1360}, {"loss": 0.4887, "grad_norm": 1.0517818927764893, "learning_rate": 0.0002, "epoch": 4.477124183006536, "step": 1370}, {"loss": 0.4906, "grad_norm": 0.8593037128448486, "learning_rate": 0.0002, "epoch": 4.509803921568627, "step": 1380}, {"loss": 0.5049, "grad_norm": 1.0248081684112549, "learning_rate": 0.0002, "epoch": 4.542483660130719, "step": 1390}, {"loss": 0.472, "grad_norm": 0.8999413847923279, "learning_rate": 0.0002, "epoch": 4.57516339869281, "step": 1400}, {"loss": 0.5102, "grad_norm": 0.9106912612915039, "learning_rate": 0.0002, "epoch": 4.607843137254902, "step": 1410}, {"loss": 0.5203, "grad_norm": 1.2736181020736694, "learning_rate": 0.0002, "epoch": 4.640522875816993, "step": 1420}, {"loss": 0.5101, "grad_norm": 0.9311690926551819, "learning_rate": 0.0002, "epoch": 4.673202614379085, "step": 1430}, {"loss": 0.5648, "grad_norm": 1.0455045700073242, "learning_rate": 0.0002, "epoch": 4.705882352941177, "step": 1440}, {"loss": 0.5004, "grad_norm": 1.0190727710723877, "learning_rate": 0.0002, "epoch": 4.738562091503268, "step": 1450}, {"loss": 0.5506, "grad_norm": 1.333198070526123, "learning_rate": 0.0002, "epoch": 4.771241830065359, "step": 1460}, {"loss": 0.5846, "grad_norm": 0.8808416724205017, "learning_rate": 0.0002, "epoch": 4.803921568627451, "step": 1470}, {"loss": 0.4671, "grad_norm": 0.8896227478981018, "learning_rate": 0.0002, "epoch": 4.836601307189542, "step": 1480}, {"loss": 0.4732, "grad_norm": 1.212323784828186, "learning_rate": 0.0002, "epoch": 4.8692810457516345, "step": 1490}, {"loss": 0.5263, "grad_norm": 1.0490120649337769, "learning_rate": 0.0002, "epoch": 4.901960784313726, "step": 1500}, {"loss": 0.5815, "grad_norm": 0.8946618437767029, "learning_rate": 0.0002, "epoch": 4.934640522875817, "step": 1510}, {"loss": 0.5369, "grad_norm": 1.0609275102615356, "learning_rate": 0.0002, "epoch": 4.967320261437909, "step": 1520}, {"loss": 0.5348, "grad_norm": 0.8885099291801453, "learning_rate": 0.0002, "epoch": 5.0, "step": 1530}, {"eval_loss": 1.5771757364273071, "eval_runtime": 46.2667, "eval_samples_per_second": 9.424, "eval_steps_per_second": 1.189, "epoch": 5.0, "step": 1530}, {"loss": 0.3176, "grad_norm": 1.401705026626587, "learning_rate": 0.0002, "epoch": 5.032679738562091, "step": 1540}, {"loss": 0.3346, "grad_norm": 0.8365539908409119, "learning_rate": 0.0002, "epoch": 5.065359477124183, "step": 1550}, {"loss": 0.3605, "grad_norm": 1.3188321590423584, "learning_rate": 0.0002, "epoch": 5.098039215686274, "step": 1560}, {"loss": 0.326, "grad_norm": 0.9819526076316833, "learning_rate": 0.0002, "epoch": 5.130718954248366, "step": 1570}, {"loss": 0.3575, "grad_norm": 1.13265061378479, "learning_rate": 0.0002, "epoch": 5.163398692810458, "step": 1580}, {"loss": 0.3418, "grad_norm": 1.478152871131897, "learning_rate": 0.0002, "epoch": 5.196078431372549, "step": 1590}, {"loss": 0.3388, "grad_norm": 1.4188750982284546, "learning_rate": 0.0002, "epoch": 5.228758169934641, "step": 1600}, {"loss": 0.3524, "grad_norm": 1.2499338388442993, "learning_rate": 0.0002, "epoch": 5.261437908496732, "step": 1610}, {"loss": 0.423, "grad_norm": 1.7885085344314575, "learning_rate": 0.0002, "epoch": 5.294117647058823, "step": 1620}, {"loss": 0.3237, "grad_norm": 1.2614946365356445, "learning_rate": 0.0002, "epoch": 5.326797385620915, "step": 1630}, {"loss": 0.3511, "grad_norm": 1.28338623046875, "learning_rate": 0.0002, "epoch": 5.359477124183006, "step": 1640}, {"loss": 0.3112, "grad_norm": 1.1973257064819336, "learning_rate": 0.0002, "epoch": 5.392156862745098, "step": 1650}, {"loss": 0.3645, "grad_norm": 1.1356301307678223, "learning_rate": 0.0002, "epoch": 5.42483660130719, "step": 1660}, {"loss": 0.307, "grad_norm": 0.9048901200294495, "learning_rate": 0.0002, "epoch": 5.457516339869281, "step": 1670}, {"loss": 0.3828, "grad_norm": 1.5352122783660889, "learning_rate": 0.0002, "epoch": 5.490196078431373, "step": 1680}, {"loss": 0.3826, "grad_norm": 0.9096335172653198, "learning_rate": 0.0002, "epoch": 5.522875816993464, "step": 1690}, {"loss": 0.3686, "grad_norm": 1.1903661489486694, "learning_rate": 0.0002, "epoch": 5.555555555555555, "step": 1700}, {"loss": 0.325, "grad_norm": 0.9234451651573181, "learning_rate": 0.0002, "epoch": 5.588235294117647, "step": 1710}, {"loss": 0.3451, "grad_norm": 1.4554102420806885, "learning_rate": 0.0002, "epoch": 5.620915032679738, "step": 1720}, {"loss": 0.3488, "grad_norm": 1.1044343709945679, "learning_rate": 0.0002, "epoch": 5.65359477124183, "step": 1730}, {"loss": 0.3673, "grad_norm": 1.2219593524932861, "learning_rate": 0.0002, "epoch": 5.686274509803922, "step": 1740}, {"loss": 0.3517, "grad_norm": 0.901652455329895, "learning_rate": 0.0002, "epoch": 5.718954248366013, "step": 1750}, {"loss": 0.4082, "grad_norm": 1.3334792852401733, "learning_rate": 0.0002, "epoch": 5.751633986928105, "step": 1760}, {"loss": 0.4386, "grad_norm": 1.5595488548278809, "learning_rate": 0.0002, "epoch": 5.784313725490196, "step": 1770}, {"loss": 0.3217, "grad_norm": 1.3892982006072998, "learning_rate": 0.0002, "epoch": 5.816993464052287, "step": 1780}, {"loss": 0.3919, "grad_norm": 1.0813168287277222, "learning_rate": 0.0002, "epoch": 5.849673202614379, "step": 1790}, {"loss": 0.3697, "grad_norm": 1.145320177078247, "learning_rate": 0.0002, "epoch": 5.882352941176471, "step": 1800}, {"loss": 0.3903, "grad_norm": 1.0249533653259277, "learning_rate": 0.0002, "epoch": 5.915032679738562, "step": 1810}, {"loss": 0.3481, "grad_norm": 1.0013737678527832, "learning_rate": 0.0002, "epoch": 5.947712418300654, "step": 1820}, {"loss": 0.4278, "grad_norm": 1.212314248085022, "learning_rate": 0.0002, "epoch": 5.980392156862745, "step": 1830}, {"eval_loss": 1.7506128549575806, "eval_runtime": 46.2256, "eval_samples_per_second": 9.432, "eval_steps_per_second": 1.19, "epoch": 6.0, "step": 1836}, {"loss": 0.3283, "grad_norm": 0.7339767813682556, "learning_rate": 0.0002, "epoch": 6.0130718954248366, "step": 1840}, {"loss": 0.2304, "grad_norm": 1.1071710586547852, "learning_rate": 0.0002, "epoch": 6.045751633986928, "step": 1850}, {"loss": 0.2436, "grad_norm": 1.2613991498947144, "learning_rate": 0.0002, "epoch": 6.078431372549019, "step": 1860}, {"loss": 0.2403, "grad_norm": 1.053133249282837, "learning_rate": 0.0002, "epoch": 6.111111111111111, "step": 1870}, {"loss": 0.2509, "grad_norm": 1.069568395614624, "learning_rate": 0.0002, "epoch": 6.143790849673203, "step": 1880}, {"loss": 0.2272, "grad_norm": 1.020458698272705, "learning_rate": 0.0002, "epoch": 6.176470588235294, "step": 1890}, {"loss": 0.2408, "grad_norm": 1.2430394887924194, "learning_rate": 0.0002, "epoch": 6.209150326797386, "step": 1900}, {"loss": 0.229, "grad_norm": 1.3475574254989624, "learning_rate": 0.0002, "epoch": 6.241830065359477, "step": 1910}, {"loss": 0.2542, "grad_norm": 0.9094598889350891, "learning_rate": 0.0002, "epoch": 6.2745098039215685, "step": 1920}, {"loss": 0.2009, "grad_norm": 1.255650520324707, "learning_rate": 0.0002, "epoch": 6.30718954248366, "step": 1930}, {"loss": 0.25, "grad_norm": 1.4193930625915527, "learning_rate": 0.0002, "epoch": 6.339869281045751, "step": 1940}, {"loss": 0.293, "grad_norm": 1.4378032684326172, "learning_rate": 0.0002, "epoch": 6.372549019607844, "step": 1950}, {"loss": 0.2685, "grad_norm": 1.2236989736557007, "learning_rate": 0.0002, "epoch": 6.405228758169935, "step": 1960}, {"loss": 0.2608, "grad_norm": 1.0902987718582153, "learning_rate": 0.0002, "epoch": 6.437908496732026, "step": 1970}, {"loss": 0.2267, "grad_norm": 1.1165062189102173, "learning_rate": 0.0002, "epoch": 6.470588235294118, "step": 1980}, {"loss": 0.2246, "grad_norm": 1.3953566551208496, "learning_rate": 0.0002, "epoch": 6.503267973856209, "step": 1990}, {"loss": 0.2834, "grad_norm": 1.5215585231781006, "learning_rate": 0.0002, "epoch": 6.5359477124183005, "step": 2000}, {"loss": 0.2457, "grad_norm": 1.3496609926223755, "learning_rate": 0.0002, "epoch": 6.568627450980392, "step": 2010}, {"loss": 0.2637, "grad_norm": 1.2511820793151855, "learning_rate": 0.0002, "epoch": 6.601307189542483, "step": 2020}, {"loss": 0.2956, "grad_norm": 1.9875848293304443, "learning_rate": 0.0002, "epoch": 6.633986928104575, "step": 2030}, {"loss": 0.2664, "grad_norm": 1.0766608715057373, "learning_rate": 0.0002, "epoch": 6.666666666666667, "step": 2040}, {"loss": 0.2792, "grad_norm": 1.3594712018966675, "learning_rate": 0.0002, "epoch": 6.699346405228758, "step": 2050}, {"loss": 0.3194, "grad_norm": 1.2357292175292969, "learning_rate": 0.0002, "epoch": 6.73202614379085, "step": 2060}, {"loss": 0.2526, "grad_norm": 1.2428375482559204, "learning_rate": 0.0002, "epoch": 6.764705882352941, "step": 2070}, {"loss": 0.2355, "grad_norm": 1.2724156379699707, "learning_rate": 0.0002, "epoch": 6.7973856209150325, "step": 2080}, {"loss": 0.289, "grad_norm": 1.4981396198272705, "learning_rate": 0.0002, "epoch": 6.830065359477124, "step": 2090}, {"loss": 0.2443, "grad_norm": 0.9346088171005249, "learning_rate": 0.0002, "epoch": 6.862745098039216, "step": 2100}, {"loss": 0.262, "grad_norm": 1.3955477476119995, "learning_rate": 0.0002, "epoch": 6.895424836601308, "step": 2110}, {"loss": 0.2981, "grad_norm": 1.492382287979126, "learning_rate": 0.0002, "epoch": 6.928104575163399, "step": 2120}, {"loss": 0.3093, "grad_norm": 1.2755712270736694, "learning_rate": 0.0002, "epoch": 6.96078431372549, "step": 2130}, {"loss": 0.2943, "grad_norm": 1.4600884914398193, "learning_rate": 0.0002, "epoch": 6.993464052287582, "step": 2140}, {"eval_loss": 1.9770371913909912, "eval_runtime": 46.2588, "eval_samples_per_second": 9.425, "eval_steps_per_second": 1.189, "epoch": 7.0, "step": 2142}, {"loss": 0.2032, "grad_norm": 1.205262541770935, "learning_rate": 0.0002, "epoch": 7.026143790849673, "step": 2150}, {"loss": 0.1642, "grad_norm": 1.1699777841567993, "learning_rate": 0.0002, "epoch": 7.0588235294117645, "step": 2160}, {"loss": 0.1886, "grad_norm": 1.2428505420684814, "learning_rate": 0.0002, "epoch": 7.091503267973856, "step": 2170}, {"loss": 0.1762, "grad_norm": 0.9843717813491821, "learning_rate": 0.0002, "epoch": 7.124183006535947, "step": 2180}, {"loss": 0.1723, "grad_norm": 1.089490532875061, "learning_rate": 0.0002, "epoch": 7.1568627450980395, "step": 2190}, {"loss": 0.1721, "grad_norm": 1.2811459302902222, "learning_rate": 0.0002, "epoch": 7.189542483660131, "step": 2200}, {"loss": 0.1983, "grad_norm": 1.4558671712875366, "learning_rate": 0.0002, "epoch": 7.222222222222222, "step": 2210}, {"loss": 0.178, "grad_norm": 1.2111164331436157, "learning_rate": 0.0002, "epoch": 7.254901960784314, "step": 2220}, {"loss": 0.1463, "grad_norm": 1.46672785282135, "learning_rate": 0.0002, "epoch": 7.287581699346405, "step": 2230}, {"loss": 0.1883, "grad_norm": 1.464061975479126, "learning_rate": 0.0002, "epoch": 7.3202614379084965, "step": 2240}, {"loss": 0.2276, "grad_norm": 1.6276377439498901, "learning_rate": 0.0002, "epoch": 7.352941176470588, "step": 2250}, {"loss": 0.1771, "grad_norm": 1.4011811017990112, "learning_rate": 0.0002, "epoch": 7.38562091503268, "step": 2260}, {"loss": 0.1875, "grad_norm": 0.9894806146621704, "learning_rate": 0.0002, "epoch": 7.4183006535947715, "step": 2270}, {"loss": 0.1725, "grad_norm": 0.9357670545578003, "learning_rate": 0.0002, "epoch": 7.450980392156863, "step": 2280}, {"loss": 0.1891, "grad_norm": 1.7790061235427856, "learning_rate": 0.0002, "epoch": 7.483660130718954, "step": 2290}, {"loss": 0.1967, "grad_norm": 1.468843698501587, "learning_rate": 0.0002, "epoch": 7.516339869281046, "step": 2300}, {"loss": 0.195, "grad_norm": 1.063189148902893, "learning_rate": 0.0002, "epoch": 7.549019607843137, "step": 2310}, {"loss": 0.1752, "grad_norm": 0.7940694689750671, "learning_rate": 0.0002, "epoch": 7.5816993464052285, "step": 2320}, {"loss": 0.2169, "grad_norm": 1.555564045906067, "learning_rate": 0.0002, "epoch": 7.61437908496732, "step": 2330}, {"loss": 0.1755, "grad_norm": 0.7388061881065369, "learning_rate": 0.0002, "epoch": 7.647058823529412, "step": 2340}, {"loss": 0.2206, "grad_norm": 1.2422513961791992, "learning_rate": 0.0002, "epoch": 7.6797385620915035, "step": 2350}, {"loss": 0.2015, "grad_norm": 1.3868855237960815, "learning_rate": 0.0002, "epoch": 7.712418300653595, "step": 2360}, {"loss": 0.1888, "grad_norm": 1.2965079545974731, "learning_rate": 0.0002, "epoch": 7.745098039215686, "step": 2370}, {"loss": 0.2257, "grad_norm": 1.4052339792251587, "learning_rate": 0.0002, "epoch": 7.777777777777778, "step": 2380}, {"loss": 0.2145, "grad_norm": 1.9005945920944214, "learning_rate": 0.0002, "epoch": 7.810457516339869, "step": 2390}, {"loss": 0.2154, "grad_norm": 0.9947215914726257, "learning_rate": 0.0002, "epoch": 7.8431372549019605, "step": 2400}, {"loss": 0.1841, "grad_norm": 0.796757698059082, "learning_rate": 0.0002, "epoch": 7.875816993464053, "step": 2410}, {"loss": 0.2476, "grad_norm": 2.4196858406066895, "learning_rate": 0.0002, "epoch": 7.908496732026144, "step": 2420}, {"loss": 0.2136, "grad_norm": 1.7430493831634521, "learning_rate": 0.0002, "epoch": 7.9411764705882355, "step": 2430}, {"loss": 0.2053, "grad_norm": 1.0432168245315552, "learning_rate": 0.0002, "epoch": 7.973856209150327, "step": 2440}]}