diff --git a/.gitattributes b/.gitattributes index fd142d6c4d9b49180280daae2f48443b866911a9..f599a093c237c75dc1a59006d5244e827974598b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3188,3 +3188,12 @@ gemma-2b-it_int4_arc_challenge-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32 gemma-2b-it_int4_arc_challenge-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-1029-sd-42/checkpoint-728/tokenizer.json filter=lfs diff=lfs merge=lfs -text gemma-2b-it_int4_arc_challenge-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-1029-sd-42/checkpoint-832/tokenizer.json filter=lfs diff=lfs merge=lfs -text gemma-2b-it_int4_arc_challenge-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-1029-sd-42/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/README.md b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/adapter_config.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/adapter_model.safetensors b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4851c82fafea9bb3ebe668ab2e31de9e18c2eed --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc75bed62d53a2b476f62dab1b48583d07a528d4b737ec52826688f664d8ba62 +size 29500848 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/README.md b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/adapter_config.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/adapter_model.safetensors b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ecb597d0eabfe840c3e798e6891b5ffd82dcff39 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8fe69868d27fc0603ad8147976b7fc019f69ed8d8e3559b6046cd2623aa4d6b +size 29500848 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/optimizer.pt b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f68bba9c1680ebbc3a239d07f92af8cb5678b818 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2eec590e592c8ddee58cc629aa8411f932b6379f0a05804aa527b88c7cc430bd +size 15064314 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/rng_state.pth b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..00f10f8685bb74ff2ddded0f49f4ad97dac48c8d --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f30925f76b3f5a7235943d59acdcaf5000590a4423f8d595882ba3339209fa68 +size 14244 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/scheduler.pt b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..476afe96b47e38d60c308b3e9109d0bb8783ef68 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d948d1bf3ec64239322ccdba33ef9035020aa64f970968ec5c4a6170c3bb88ce +size 1064 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/special_tokens_map.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/tokenizer.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/tokenizer.model b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/tokenizer_config.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/trainer_state.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7ed463f9b82db92e527abc3eb8ae2400bbc9b76e --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/trainer_state.json @@ -0,0 +1,803 @@ +{ + "best_metric": 2.1319973468780518, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441", + "epoch": 7.0, + "eval_steps": 10, + "global_step": 1029, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06802721088435375, + "grad_norm": 1.5591567754745483, + "learning_rate": 0.0002, + "loss": 4.862, + "step": 10 + }, + { + "epoch": 0.1360544217687075, + "grad_norm": 1.063948154449463, + "learning_rate": 0.0002, + "loss": 3.6905, + "step": 20 + }, + { + "epoch": 0.20408163265306123, + "grad_norm": 0.9765065312385559, + "learning_rate": 0.0002, + "loss": 3.2247, + "step": 30 + }, + { + "epoch": 0.272108843537415, + "grad_norm": 1.8591399192810059, + "learning_rate": 0.0002, + "loss": 3.0135, + "step": 40 + }, + { + "epoch": 0.3401360544217687, + "grad_norm": 1.7384812831878662, + "learning_rate": 0.0002, + "loss": 2.7815, + "step": 50 + }, + { + "epoch": 0.40816326530612246, + "grad_norm": 1.0425862073898315, + "learning_rate": 0.0002, + "loss": 2.6828, + "step": 60 + }, + { + "epoch": 0.47619047619047616, + "grad_norm": 1.312426209449768, + "learning_rate": 0.0002, + "loss": 2.6078, + "step": 70 + }, + { + "epoch": 0.54421768707483, + "grad_norm": 0.9405839443206787, + "learning_rate": 0.0002, + "loss": 2.4843, + "step": 80 + }, + { + "epoch": 0.6122448979591837, + "grad_norm": 0.7359345555305481, + "learning_rate": 0.0002, + "loss": 2.4835, + "step": 90 + }, + { + "epoch": 0.6802721088435374, + "grad_norm": 0.8867717981338501, + "learning_rate": 0.0002, + "loss": 2.359, + "step": 100 + }, + { + "epoch": 0.7482993197278912, + "grad_norm": 0.7792682647705078, + "learning_rate": 0.0002, + "loss": 2.4368, + "step": 110 + }, + { + "epoch": 0.8163265306122449, + "grad_norm": 0.8457526564598083, + "learning_rate": 0.0002, + "loss": 2.284, + "step": 120 + }, + { + "epoch": 0.8843537414965986, + "grad_norm": 0.8050963282585144, + "learning_rate": 0.0002, + "loss": 2.2625, + "step": 130 + }, + { + "epoch": 0.9523809523809523, + "grad_norm": 0.8963590860366821, + "learning_rate": 0.0002, + "loss": 2.2911, + "step": 140 + }, + { + "epoch": 1.0, + "eval_loss": 2.2520766258239746, + "eval_runtime": 46.2783, + "eval_samples_per_second": 10.955, + "eval_steps_per_second": 1.383, + "step": 147 + }, + { + "epoch": 1.0204081632653061, + "grad_norm": 0.9270220994949341, + "learning_rate": 0.0002, + "loss": 2.2293, + "step": 150 + }, + { + "epoch": 1.08843537414966, + "grad_norm": 0.7620377540588379, + "learning_rate": 0.0002, + "loss": 2.2962, + "step": 160 + }, + { + "epoch": 1.1564625850340136, + "grad_norm": 0.8232784867286682, + "learning_rate": 0.0002, + "loss": 2.1326, + "step": 170 + }, + { + "epoch": 1.2244897959183674, + "grad_norm": 0.8901777267456055, + "learning_rate": 0.0002, + "loss": 2.1522, + "step": 180 + }, + { + "epoch": 1.2925170068027212, + "grad_norm": 0.8079978227615356, + "learning_rate": 0.0002, + "loss": 2.1235, + "step": 190 + }, + { + "epoch": 1.3605442176870748, + "grad_norm": 0.9010588526725769, + "learning_rate": 0.0002, + "loss": 2.1812, + "step": 200 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 0.7076186537742615, + "learning_rate": 0.0002, + "loss": 2.0572, + "step": 210 + }, + { + "epoch": 1.4965986394557822, + "grad_norm": 0.8887438774108887, + "learning_rate": 0.0002, + "loss": 2.1723, + "step": 220 + }, + { + "epoch": 1.564625850340136, + "grad_norm": 0.9181524515151978, + "learning_rate": 0.0002, + "loss": 2.1449, + "step": 230 + }, + { + "epoch": 1.6326530612244898, + "grad_norm": 0.788392961025238, + "learning_rate": 0.0002, + "loss": 2.0993, + "step": 240 + }, + { + "epoch": 1.7006802721088436, + "grad_norm": 0.8064964413642883, + "learning_rate": 0.0002, + "loss": 2.0904, + "step": 250 + }, + { + "epoch": 1.7687074829931972, + "grad_norm": 0.6783174276351929, + "learning_rate": 0.0002, + "loss": 2.1685, + "step": 260 + }, + { + "epoch": 1.836734693877551, + "grad_norm": 0.7616434693336487, + "learning_rate": 0.0002, + "loss": 2.0993, + "step": 270 + }, + { + "epoch": 1.9047619047619047, + "grad_norm": 0.6809217929840088, + "learning_rate": 0.0002, + "loss": 2.1568, + "step": 280 + }, + { + "epoch": 1.9727891156462585, + "grad_norm": 0.6849802732467651, + "learning_rate": 0.0002, + "loss": 2.0749, + "step": 290 + }, + { + "epoch": 2.0, + "eval_loss": 2.151526689529419, + "eval_runtime": 40.9832, + "eval_samples_per_second": 12.371, + "eval_steps_per_second": 1.562, + "step": 294 + }, + { + "epoch": 2.0408163265306123, + "grad_norm": 0.8966974020004272, + "learning_rate": 0.0002, + "loss": 2.066, + "step": 300 + }, + { + "epoch": 2.108843537414966, + "grad_norm": 0.8308210372924805, + "learning_rate": 0.0002, + "loss": 1.9629, + "step": 310 + }, + { + "epoch": 2.17687074829932, + "grad_norm": 0.7147582173347473, + "learning_rate": 0.0002, + "loss": 2.0365, + "step": 320 + }, + { + "epoch": 2.2448979591836733, + "grad_norm": 0.7694330215454102, + "learning_rate": 0.0002, + "loss": 1.9965, + "step": 330 + }, + { + "epoch": 2.312925170068027, + "grad_norm": 0.6489183306694031, + "learning_rate": 0.0002, + "loss": 2.0322, + "step": 340 + }, + { + "epoch": 2.380952380952381, + "grad_norm": 0.7661431431770325, + "learning_rate": 0.0002, + "loss": 2.0627, + "step": 350 + }, + { + "epoch": 2.4489795918367347, + "grad_norm": 0.8295474648475647, + "learning_rate": 0.0002, + "loss": 2.0033, + "step": 360 + }, + { + "epoch": 2.5170068027210886, + "grad_norm": 0.8664118647575378, + "learning_rate": 0.0002, + "loss": 2.0876, + "step": 370 + }, + { + "epoch": 2.5850340136054424, + "grad_norm": 0.6872050762176514, + "learning_rate": 0.0002, + "loss": 2.0085, + "step": 380 + }, + { + "epoch": 2.6530612244897958, + "grad_norm": 0.7354660630226135, + "learning_rate": 0.0002, + "loss": 2.0, + "step": 390 + }, + { + "epoch": 2.7210884353741496, + "grad_norm": 0.7240234017372131, + "learning_rate": 0.0002, + "loss": 2.0219, + "step": 400 + }, + { + "epoch": 2.7891156462585034, + "grad_norm": 0.8370463848114014, + "learning_rate": 0.0002, + "loss": 2.0902, + "step": 410 + }, + { + "epoch": 2.857142857142857, + "grad_norm": 0.6834917068481445, + "learning_rate": 0.0002, + "loss": 2.0799, + "step": 420 + }, + { + "epoch": 2.925170068027211, + "grad_norm": 0.7872207760810852, + "learning_rate": 0.0002, + "loss": 2.0192, + "step": 430 + }, + { + "epoch": 2.9931972789115644, + "grad_norm": 0.7045499086380005, + "learning_rate": 0.0002, + "loss": 2.0546, + "step": 440 + }, + { + "epoch": 3.0, + "eval_loss": 2.1319973468780518, + "eval_runtime": 38.9671, + "eval_samples_per_second": 13.011, + "eval_steps_per_second": 1.642, + "step": 441 + }, + { + "epoch": 3.061224489795918, + "grad_norm": 0.7031271457672119, + "learning_rate": 0.0002, + "loss": 1.9638, + "step": 450 + }, + { + "epoch": 3.129251700680272, + "grad_norm": 0.7331708669662476, + "learning_rate": 0.0002, + "loss": 1.8729, + "step": 460 + }, + { + "epoch": 3.197278911564626, + "grad_norm": 0.7559226155281067, + "learning_rate": 0.0002, + "loss": 2.0023, + "step": 470 + }, + { + "epoch": 3.2653061224489797, + "grad_norm": 0.8188950419425964, + "learning_rate": 0.0002, + "loss": 1.9566, + "step": 480 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.7805435657501221, + "learning_rate": 0.0002, + "loss": 1.9255, + "step": 490 + }, + { + "epoch": 3.4013605442176873, + "grad_norm": 0.7917240858078003, + "learning_rate": 0.0002, + "loss": 1.9461, + "step": 500 + }, + { + "epoch": 3.4693877551020407, + "grad_norm": 0.8258386254310608, + "learning_rate": 0.0002, + "loss": 1.901, + "step": 510 + }, + { + "epoch": 3.5374149659863945, + "grad_norm": 0.8375557661056519, + "learning_rate": 0.0002, + "loss": 1.9833, + "step": 520 + }, + { + "epoch": 3.6054421768707483, + "grad_norm": 0.8005449771881104, + "learning_rate": 0.0002, + "loss": 2.0152, + "step": 530 + }, + { + "epoch": 3.673469387755102, + "grad_norm": 0.799006462097168, + "learning_rate": 0.0002, + "loss": 1.9309, + "step": 540 + }, + { + "epoch": 3.741496598639456, + "grad_norm": 0.7934630513191223, + "learning_rate": 0.0002, + "loss": 1.9775, + "step": 550 + }, + { + "epoch": 3.8095238095238093, + "grad_norm": 0.7247752547264099, + "learning_rate": 0.0002, + "loss": 1.993, + "step": 560 + }, + { + "epoch": 3.877551020408163, + "grad_norm": 0.8138917088508606, + "learning_rate": 0.0002, + "loss": 1.9914, + "step": 570 + }, + { + "epoch": 3.945578231292517, + "grad_norm": 0.7810562252998352, + "learning_rate": 0.0002, + "loss": 1.9116, + "step": 580 + }, + { + "epoch": 4.0, + "eval_loss": 2.132384777069092, + "eval_runtime": 38.6163, + "eval_samples_per_second": 13.129, + "eval_steps_per_second": 1.657, + "step": 588 + }, + { + "epoch": 4.01360544217687, + "grad_norm": 0.7258784770965576, + "learning_rate": 0.0002, + "loss": 1.9048, + "step": 590 + }, + { + "epoch": 4.081632653061225, + "grad_norm": 0.8905395865440369, + "learning_rate": 0.0002, + "loss": 1.7991, + "step": 600 + }, + { + "epoch": 4.149659863945578, + "grad_norm": 0.8189161419868469, + "learning_rate": 0.0002, + "loss": 1.8841, + "step": 610 + }, + { + "epoch": 4.217687074829932, + "grad_norm": 0.8235230445861816, + "learning_rate": 0.0002, + "loss": 1.914, + "step": 620 + }, + { + "epoch": 4.285714285714286, + "grad_norm": 0.8926266431808472, + "learning_rate": 0.0002, + "loss": 1.9114, + "step": 630 + }, + { + "epoch": 4.35374149659864, + "grad_norm": 0.9667059183120728, + "learning_rate": 0.0002, + "loss": 1.8944, + "step": 640 + }, + { + "epoch": 4.421768707482993, + "grad_norm": 0.8441583514213562, + "learning_rate": 0.0002, + "loss": 1.7842, + "step": 650 + }, + { + "epoch": 4.489795918367347, + "grad_norm": 0.8351956009864807, + "learning_rate": 0.0002, + "loss": 1.7937, + "step": 660 + }, + { + "epoch": 4.557823129251701, + "grad_norm": 0.8557114005088806, + "learning_rate": 0.0002, + "loss": 1.8439, + "step": 670 + }, + { + "epoch": 4.625850340136054, + "grad_norm": 0.8698110580444336, + "learning_rate": 0.0002, + "loss": 1.9426, + "step": 680 + }, + { + "epoch": 4.6938775510204085, + "grad_norm": 0.8394802808761597, + "learning_rate": 0.0002, + "loss": 1.8635, + "step": 690 + }, + { + "epoch": 4.761904761904762, + "grad_norm": 0.8168841004371643, + "learning_rate": 0.0002, + "loss": 1.8908, + "step": 700 + }, + { + "epoch": 4.829931972789115, + "grad_norm": 0.8049741387367249, + "learning_rate": 0.0002, + "loss": 1.8674, + "step": 710 + }, + { + "epoch": 4.8979591836734695, + "grad_norm": 0.7987792491912842, + "learning_rate": 0.0002, + "loss": 1.9289, + "step": 720 + }, + { + "epoch": 4.965986394557823, + "grad_norm": 0.9021750688552856, + "learning_rate": 0.0002, + "loss": 1.8779, + "step": 730 + }, + { + "epoch": 5.0, + "eval_loss": 2.1478686332702637, + "eval_runtime": 38.5077, + "eval_samples_per_second": 13.166, + "eval_steps_per_second": 1.662, + "step": 735 + }, + { + "epoch": 5.034013605442177, + "grad_norm": 0.8057989478111267, + "learning_rate": 0.0002, + "loss": 1.8979, + "step": 740 + }, + { + "epoch": 5.1020408163265305, + "grad_norm": 0.9020641446113586, + "learning_rate": 0.0002, + "loss": 1.81, + "step": 750 + }, + { + "epoch": 5.170068027210885, + "grad_norm": 0.843891978263855, + "learning_rate": 0.0002, + "loss": 1.8219, + "step": 760 + }, + { + "epoch": 5.238095238095238, + "grad_norm": 0.8797562122344971, + "learning_rate": 0.0002, + "loss": 1.8153, + "step": 770 + }, + { + "epoch": 5.3061224489795915, + "grad_norm": 0.9378810524940491, + "learning_rate": 0.0002, + "loss": 1.8141, + "step": 780 + }, + { + "epoch": 5.374149659863946, + "grad_norm": 1.0502477884292603, + "learning_rate": 0.0002, + "loss": 1.7499, + "step": 790 + }, + { + "epoch": 5.442176870748299, + "grad_norm": 1.0142803192138672, + "learning_rate": 0.0002, + "loss": 1.7767, + "step": 800 + }, + { + "epoch": 5.510204081632653, + "grad_norm": 1.0314291715621948, + "learning_rate": 0.0002, + "loss": 1.785, + "step": 810 + }, + { + "epoch": 5.578231292517007, + "grad_norm": 0.8898603916168213, + "learning_rate": 0.0002, + "loss": 1.7471, + "step": 820 + }, + { + "epoch": 5.646258503401361, + "grad_norm": 1.011250376701355, + "learning_rate": 0.0002, + "loss": 1.7731, + "step": 830 + }, + { + "epoch": 5.714285714285714, + "grad_norm": 0.9000794291496277, + "learning_rate": 0.0002, + "loss": 1.7588, + "step": 840 + }, + { + "epoch": 5.782312925170068, + "grad_norm": 1.0984753370285034, + "learning_rate": 0.0002, + "loss": 1.788, + "step": 850 + }, + { + "epoch": 5.850340136054422, + "grad_norm": 0.9162030220031738, + "learning_rate": 0.0002, + "loss": 1.7282, + "step": 860 + }, + { + "epoch": 5.918367346938775, + "grad_norm": 0.9867637753486633, + "learning_rate": 0.0002, + "loss": 1.7292, + "step": 870 + }, + { + "epoch": 5.986394557823129, + "grad_norm": 0.8848171234130859, + "learning_rate": 0.0002, + "loss": 1.7651, + "step": 880 + }, + { + "epoch": 6.0, + "eval_loss": 2.180830240249634, + "eval_runtime": 38.8838, + "eval_samples_per_second": 13.039, + "eval_steps_per_second": 1.646, + "step": 882 + }, + { + "epoch": 6.054421768707483, + "grad_norm": 1.0574727058410645, + "learning_rate": 0.0002, + "loss": 1.6712, + "step": 890 + }, + { + "epoch": 6.122448979591836, + "grad_norm": 1.1281784772872925, + "learning_rate": 0.0002, + "loss": 1.6928, + "step": 900 + }, + { + "epoch": 6.190476190476191, + "grad_norm": 0.9686701893806458, + "learning_rate": 0.0002, + "loss": 1.6952, + "step": 910 + }, + { + "epoch": 6.258503401360544, + "grad_norm": 1.016952633857727, + "learning_rate": 0.0002, + "loss": 1.6359, + "step": 920 + }, + { + "epoch": 6.326530612244898, + "grad_norm": 0.9630302786827087, + "learning_rate": 0.0002, + "loss": 1.6675, + "step": 930 + }, + { + "epoch": 6.394557823129252, + "grad_norm": 1.0207276344299316, + "learning_rate": 0.0002, + "loss": 1.7264, + "step": 940 + }, + { + "epoch": 6.462585034013605, + "grad_norm": 1.1470541954040527, + "learning_rate": 0.0002, + "loss": 1.6934, + "step": 950 + }, + { + "epoch": 6.530612244897959, + "grad_norm": 1.0892208814620972, + "learning_rate": 0.0002, + "loss": 1.6645, + "step": 960 + }, + { + "epoch": 6.598639455782313, + "grad_norm": 1.030396819114685, + "learning_rate": 0.0002, + "loss": 1.7105, + "step": 970 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 1.0828958749771118, + "learning_rate": 0.0002, + "loss": 1.709, + "step": 980 + }, + { + "epoch": 6.73469387755102, + "grad_norm": 1.1024560928344727, + "learning_rate": 0.0002, + "loss": 1.67, + "step": 990 + }, + { + "epoch": 6.802721088435375, + "grad_norm": 0.9986393451690674, + "learning_rate": 0.0002, + "loss": 1.7313, + "step": 1000 + }, + { + "epoch": 6.870748299319728, + "grad_norm": 1.0168452262878418, + "learning_rate": 0.0002, + "loss": 1.7041, + "step": 1010 + }, + { + "epoch": 6.938775510204081, + "grad_norm": 1.1757020950317383, + "learning_rate": 0.0002, + "loss": 1.6552, + "step": 1020 + }, + { + "epoch": 7.0, + "eval_loss": 2.2328195571899414, + "eval_runtime": 39.0199, + "eval_samples_per_second": 12.993, + "eval_steps_per_second": 1.64, + "step": 1029 + } + ], + "logging_steps": 10, + "max_steps": 1176, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.2576434228822016e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/training_args.bin b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..645742c1f77bba1172fc3f1632a2a567dfa3b959 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1029/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e2b0557a8b3135cd8ec179c1b82119d3737b61891d3a4fe3303d40bb094cbce +size 5560 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/README.md b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/adapter_config.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/adapter_model.safetensors b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ba0cd9a3a051877fb4bef96baf3338790d42e59 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2be318936c002af64e49938fa3157aca43a4f7719c4a72c40957bdfc5081cc31 +size 29500848 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/optimizer.pt b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bbcedbd824db421c5044303ee8af966bed2c7aeb --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2a7f31d7040dc4530e1349379e88b5f7ec59fd64fca5a9607d3140a744ed82d +size 15064314 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/rng_state.pth b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..30870cad945156379496bbbb37c010dffcb7fc6b --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04425d63b8b7782d5c1dc7a9879e54860a25d1dd31d6910d6ab3f15069f45223 +size 14244 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/scheduler.pt b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..db038e56036665dd0e7c5d104e6e5601e0585029 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:910f5fdab726d612947042d67f7f6911f67f7406db94149e144ea7cb0a036011 +size 1064 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/special_tokens_map.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/tokenizer.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/tokenizer.model b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/tokenizer_config.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/trainer_state.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..70848e31536dadb3085e865315e36b8816d398e2 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/trainer_state.json @@ -0,0 +1,916 @@ +{ + "best_metric": 2.1319973468780518, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441", + "epoch": 8.0, + "eval_steps": 10, + "global_step": 1176, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06802721088435375, + "grad_norm": 1.5591567754745483, + "learning_rate": 0.0002, + "loss": 4.862, + "step": 10 + }, + { + "epoch": 0.1360544217687075, + "grad_norm": 1.063948154449463, + "learning_rate": 0.0002, + "loss": 3.6905, + "step": 20 + }, + { + "epoch": 0.20408163265306123, + "grad_norm": 0.9765065312385559, + "learning_rate": 0.0002, + "loss": 3.2247, + "step": 30 + }, + { + "epoch": 0.272108843537415, + "grad_norm": 1.8591399192810059, + "learning_rate": 0.0002, + "loss": 3.0135, + "step": 40 + }, + { + "epoch": 0.3401360544217687, + "grad_norm": 1.7384812831878662, + "learning_rate": 0.0002, + "loss": 2.7815, + "step": 50 + }, + { + "epoch": 0.40816326530612246, + "grad_norm": 1.0425862073898315, + "learning_rate": 0.0002, + "loss": 2.6828, + "step": 60 + }, + { + "epoch": 0.47619047619047616, + "grad_norm": 1.312426209449768, + "learning_rate": 0.0002, + "loss": 2.6078, + "step": 70 + }, + { + "epoch": 0.54421768707483, + "grad_norm": 0.9405839443206787, + "learning_rate": 0.0002, + "loss": 2.4843, + "step": 80 + }, + { + "epoch": 0.6122448979591837, + "grad_norm": 0.7359345555305481, + "learning_rate": 0.0002, + "loss": 2.4835, + "step": 90 + }, + { + "epoch": 0.6802721088435374, + "grad_norm": 0.8867717981338501, + "learning_rate": 0.0002, + "loss": 2.359, + "step": 100 + }, + { + "epoch": 0.7482993197278912, + "grad_norm": 0.7792682647705078, + "learning_rate": 0.0002, + "loss": 2.4368, + "step": 110 + }, + { + "epoch": 0.8163265306122449, + "grad_norm": 0.8457526564598083, + "learning_rate": 0.0002, + "loss": 2.284, + "step": 120 + }, + { + "epoch": 0.8843537414965986, + "grad_norm": 0.8050963282585144, + "learning_rate": 0.0002, + "loss": 2.2625, + "step": 130 + }, + { + "epoch": 0.9523809523809523, + "grad_norm": 0.8963590860366821, + "learning_rate": 0.0002, + "loss": 2.2911, + "step": 140 + }, + { + "epoch": 1.0, + "eval_loss": 2.2520766258239746, + "eval_runtime": 46.2783, + "eval_samples_per_second": 10.955, + "eval_steps_per_second": 1.383, + "step": 147 + }, + { + "epoch": 1.0204081632653061, + "grad_norm": 0.9270220994949341, + "learning_rate": 0.0002, + "loss": 2.2293, + "step": 150 + }, + { + "epoch": 1.08843537414966, + "grad_norm": 0.7620377540588379, + "learning_rate": 0.0002, + "loss": 2.2962, + "step": 160 + }, + { + "epoch": 1.1564625850340136, + "grad_norm": 0.8232784867286682, + "learning_rate": 0.0002, + "loss": 2.1326, + "step": 170 + }, + { + "epoch": 1.2244897959183674, + "grad_norm": 0.8901777267456055, + "learning_rate": 0.0002, + "loss": 2.1522, + "step": 180 + }, + { + "epoch": 1.2925170068027212, + "grad_norm": 0.8079978227615356, + "learning_rate": 0.0002, + "loss": 2.1235, + "step": 190 + }, + { + "epoch": 1.3605442176870748, + "grad_norm": 0.9010588526725769, + "learning_rate": 0.0002, + "loss": 2.1812, + "step": 200 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 0.7076186537742615, + "learning_rate": 0.0002, + "loss": 2.0572, + "step": 210 + }, + { + "epoch": 1.4965986394557822, + "grad_norm": 0.8887438774108887, + "learning_rate": 0.0002, + "loss": 2.1723, + "step": 220 + }, + { + "epoch": 1.564625850340136, + "grad_norm": 0.9181524515151978, + "learning_rate": 0.0002, + "loss": 2.1449, + "step": 230 + }, + { + "epoch": 1.6326530612244898, + "grad_norm": 0.788392961025238, + "learning_rate": 0.0002, + "loss": 2.0993, + "step": 240 + }, + { + "epoch": 1.7006802721088436, + "grad_norm": 0.8064964413642883, + "learning_rate": 0.0002, + "loss": 2.0904, + "step": 250 + }, + { + "epoch": 1.7687074829931972, + "grad_norm": 0.6783174276351929, + "learning_rate": 0.0002, + "loss": 2.1685, + "step": 260 + }, + { + "epoch": 1.836734693877551, + "grad_norm": 0.7616434693336487, + "learning_rate": 0.0002, + "loss": 2.0993, + "step": 270 + }, + { + "epoch": 1.9047619047619047, + "grad_norm": 0.6809217929840088, + "learning_rate": 0.0002, + "loss": 2.1568, + "step": 280 + }, + { + "epoch": 1.9727891156462585, + "grad_norm": 0.6849802732467651, + "learning_rate": 0.0002, + "loss": 2.0749, + "step": 290 + }, + { + "epoch": 2.0, + "eval_loss": 2.151526689529419, + "eval_runtime": 40.9832, + "eval_samples_per_second": 12.371, + "eval_steps_per_second": 1.562, + "step": 294 + }, + { + "epoch": 2.0408163265306123, + "grad_norm": 0.8966974020004272, + "learning_rate": 0.0002, + "loss": 2.066, + "step": 300 + }, + { + "epoch": 2.108843537414966, + "grad_norm": 0.8308210372924805, + "learning_rate": 0.0002, + "loss": 1.9629, + "step": 310 + }, + { + "epoch": 2.17687074829932, + "grad_norm": 0.7147582173347473, + "learning_rate": 0.0002, + "loss": 2.0365, + "step": 320 + }, + { + "epoch": 2.2448979591836733, + "grad_norm": 0.7694330215454102, + "learning_rate": 0.0002, + "loss": 1.9965, + "step": 330 + }, + { + "epoch": 2.312925170068027, + "grad_norm": 0.6489183306694031, + "learning_rate": 0.0002, + "loss": 2.0322, + "step": 340 + }, + { + "epoch": 2.380952380952381, + "grad_norm": 0.7661431431770325, + "learning_rate": 0.0002, + "loss": 2.0627, + "step": 350 + }, + { + "epoch": 2.4489795918367347, + "grad_norm": 0.8295474648475647, + "learning_rate": 0.0002, + "loss": 2.0033, + "step": 360 + }, + { + "epoch": 2.5170068027210886, + "grad_norm": 0.8664118647575378, + "learning_rate": 0.0002, + "loss": 2.0876, + "step": 370 + }, + { + "epoch": 2.5850340136054424, + "grad_norm": 0.6872050762176514, + "learning_rate": 0.0002, + "loss": 2.0085, + "step": 380 + }, + { + "epoch": 2.6530612244897958, + "grad_norm": 0.7354660630226135, + "learning_rate": 0.0002, + "loss": 2.0, + "step": 390 + }, + { + "epoch": 2.7210884353741496, + "grad_norm": 0.7240234017372131, + "learning_rate": 0.0002, + "loss": 2.0219, + "step": 400 + }, + { + "epoch": 2.7891156462585034, + "grad_norm": 0.8370463848114014, + "learning_rate": 0.0002, + "loss": 2.0902, + "step": 410 + }, + { + "epoch": 2.857142857142857, + "grad_norm": 0.6834917068481445, + "learning_rate": 0.0002, + "loss": 2.0799, + "step": 420 + }, + { + "epoch": 2.925170068027211, + "grad_norm": 0.7872207760810852, + "learning_rate": 0.0002, + "loss": 2.0192, + "step": 430 + }, + { + "epoch": 2.9931972789115644, + "grad_norm": 0.7045499086380005, + "learning_rate": 0.0002, + "loss": 2.0546, + "step": 440 + }, + { + "epoch": 3.0, + "eval_loss": 2.1319973468780518, + "eval_runtime": 38.9671, + "eval_samples_per_second": 13.011, + "eval_steps_per_second": 1.642, + "step": 441 + }, + { + "epoch": 3.061224489795918, + "grad_norm": 0.7031271457672119, + "learning_rate": 0.0002, + "loss": 1.9638, + "step": 450 + }, + { + "epoch": 3.129251700680272, + "grad_norm": 0.7331708669662476, + "learning_rate": 0.0002, + "loss": 1.8729, + "step": 460 + }, + { + "epoch": 3.197278911564626, + "grad_norm": 0.7559226155281067, + "learning_rate": 0.0002, + "loss": 2.0023, + "step": 470 + }, + { + "epoch": 3.2653061224489797, + "grad_norm": 0.8188950419425964, + "learning_rate": 0.0002, + "loss": 1.9566, + "step": 480 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.7805435657501221, + "learning_rate": 0.0002, + "loss": 1.9255, + "step": 490 + }, + { + "epoch": 3.4013605442176873, + "grad_norm": 0.7917240858078003, + "learning_rate": 0.0002, + "loss": 1.9461, + "step": 500 + }, + { + "epoch": 3.4693877551020407, + "grad_norm": 0.8258386254310608, + "learning_rate": 0.0002, + "loss": 1.901, + "step": 510 + }, + { + "epoch": 3.5374149659863945, + "grad_norm": 0.8375557661056519, + "learning_rate": 0.0002, + "loss": 1.9833, + "step": 520 + }, + { + "epoch": 3.6054421768707483, + "grad_norm": 0.8005449771881104, + "learning_rate": 0.0002, + "loss": 2.0152, + "step": 530 + }, + { + "epoch": 3.673469387755102, + "grad_norm": 0.799006462097168, + "learning_rate": 0.0002, + "loss": 1.9309, + "step": 540 + }, + { + "epoch": 3.741496598639456, + "grad_norm": 0.7934630513191223, + "learning_rate": 0.0002, + "loss": 1.9775, + "step": 550 + }, + { + "epoch": 3.8095238095238093, + "grad_norm": 0.7247752547264099, + "learning_rate": 0.0002, + "loss": 1.993, + "step": 560 + }, + { + "epoch": 3.877551020408163, + "grad_norm": 0.8138917088508606, + "learning_rate": 0.0002, + "loss": 1.9914, + "step": 570 + }, + { + "epoch": 3.945578231292517, + "grad_norm": 0.7810562252998352, + "learning_rate": 0.0002, + "loss": 1.9116, + "step": 580 + }, + { + "epoch": 4.0, + "eval_loss": 2.132384777069092, + "eval_runtime": 38.6163, + "eval_samples_per_second": 13.129, + "eval_steps_per_second": 1.657, + "step": 588 + }, + { + "epoch": 4.01360544217687, + "grad_norm": 0.7258784770965576, + "learning_rate": 0.0002, + "loss": 1.9048, + "step": 590 + }, + { + "epoch": 4.081632653061225, + "grad_norm": 0.8905395865440369, + "learning_rate": 0.0002, + "loss": 1.7991, + "step": 600 + }, + { + "epoch": 4.149659863945578, + "grad_norm": 0.8189161419868469, + "learning_rate": 0.0002, + "loss": 1.8841, + "step": 610 + }, + { + "epoch": 4.217687074829932, + "grad_norm": 0.8235230445861816, + "learning_rate": 0.0002, + "loss": 1.914, + "step": 620 + }, + { + "epoch": 4.285714285714286, + "grad_norm": 0.8926266431808472, + "learning_rate": 0.0002, + "loss": 1.9114, + "step": 630 + }, + { + "epoch": 4.35374149659864, + "grad_norm": 0.9667059183120728, + "learning_rate": 0.0002, + "loss": 1.8944, + "step": 640 + }, + { + "epoch": 4.421768707482993, + "grad_norm": 0.8441583514213562, + "learning_rate": 0.0002, + "loss": 1.7842, + "step": 650 + }, + { + "epoch": 4.489795918367347, + "grad_norm": 0.8351956009864807, + "learning_rate": 0.0002, + "loss": 1.7937, + "step": 660 + }, + { + "epoch": 4.557823129251701, + "grad_norm": 0.8557114005088806, + "learning_rate": 0.0002, + "loss": 1.8439, + "step": 670 + }, + { + "epoch": 4.625850340136054, + "grad_norm": 0.8698110580444336, + "learning_rate": 0.0002, + "loss": 1.9426, + "step": 680 + }, + { + "epoch": 4.6938775510204085, + "grad_norm": 0.8394802808761597, + "learning_rate": 0.0002, + "loss": 1.8635, + "step": 690 + }, + { + "epoch": 4.761904761904762, + "grad_norm": 0.8168841004371643, + "learning_rate": 0.0002, + "loss": 1.8908, + "step": 700 + }, + { + "epoch": 4.829931972789115, + "grad_norm": 0.8049741387367249, + "learning_rate": 0.0002, + "loss": 1.8674, + "step": 710 + }, + { + "epoch": 4.8979591836734695, + "grad_norm": 0.7987792491912842, + "learning_rate": 0.0002, + "loss": 1.9289, + "step": 720 + }, + { + "epoch": 4.965986394557823, + "grad_norm": 0.9021750688552856, + "learning_rate": 0.0002, + "loss": 1.8779, + "step": 730 + }, + { + "epoch": 5.0, + "eval_loss": 2.1478686332702637, + "eval_runtime": 38.5077, + "eval_samples_per_second": 13.166, + "eval_steps_per_second": 1.662, + "step": 735 + }, + { + "epoch": 5.034013605442177, + "grad_norm": 0.8057989478111267, + "learning_rate": 0.0002, + "loss": 1.8979, + "step": 740 + }, + { + "epoch": 5.1020408163265305, + "grad_norm": 0.9020641446113586, + "learning_rate": 0.0002, + "loss": 1.81, + "step": 750 + }, + { + "epoch": 5.170068027210885, + "grad_norm": 0.843891978263855, + "learning_rate": 0.0002, + "loss": 1.8219, + "step": 760 + }, + { + "epoch": 5.238095238095238, + "grad_norm": 0.8797562122344971, + "learning_rate": 0.0002, + "loss": 1.8153, + "step": 770 + }, + { + "epoch": 5.3061224489795915, + "grad_norm": 0.9378810524940491, + "learning_rate": 0.0002, + "loss": 1.8141, + "step": 780 + }, + { + "epoch": 5.374149659863946, + "grad_norm": 1.0502477884292603, + "learning_rate": 0.0002, + "loss": 1.7499, + "step": 790 + }, + { + "epoch": 5.442176870748299, + "grad_norm": 1.0142803192138672, + "learning_rate": 0.0002, + "loss": 1.7767, + "step": 800 + }, + { + "epoch": 5.510204081632653, + "grad_norm": 1.0314291715621948, + "learning_rate": 0.0002, + "loss": 1.785, + "step": 810 + }, + { + "epoch": 5.578231292517007, + "grad_norm": 0.8898603916168213, + "learning_rate": 0.0002, + "loss": 1.7471, + "step": 820 + }, + { + "epoch": 5.646258503401361, + "grad_norm": 1.011250376701355, + "learning_rate": 0.0002, + "loss": 1.7731, + "step": 830 + }, + { + "epoch": 5.714285714285714, + "grad_norm": 0.9000794291496277, + "learning_rate": 0.0002, + "loss": 1.7588, + "step": 840 + }, + { + "epoch": 5.782312925170068, + "grad_norm": 1.0984753370285034, + "learning_rate": 0.0002, + "loss": 1.788, + "step": 850 + }, + { + "epoch": 5.850340136054422, + "grad_norm": 0.9162030220031738, + "learning_rate": 0.0002, + "loss": 1.7282, + "step": 860 + }, + { + "epoch": 5.918367346938775, + "grad_norm": 0.9867637753486633, + "learning_rate": 0.0002, + "loss": 1.7292, + "step": 870 + }, + { + "epoch": 5.986394557823129, + "grad_norm": 0.8848171234130859, + "learning_rate": 0.0002, + "loss": 1.7651, + "step": 880 + }, + { + "epoch": 6.0, + "eval_loss": 2.180830240249634, + "eval_runtime": 38.8838, + "eval_samples_per_second": 13.039, + "eval_steps_per_second": 1.646, + "step": 882 + }, + { + "epoch": 6.054421768707483, + "grad_norm": 1.0574727058410645, + "learning_rate": 0.0002, + "loss": 1.6712, + "step": 890 + }, + { + "epoch": 6.122448979591836, + "grad_norm": 1.1281784772872925, + "learning_rate": 0.0002, + "loss": 1.6928, + "step": 900 + }, + { + "epoch": 6.190476190476191, + "grad_norm": 0.9686701893806458, + "learning_rate": 0.0002, + "loss": 1.6952, + "step": 910 + }, + { + "epoch": 6.258503401360544, + "grad_norm": 1.016952633857727, + "learning_rate": 0.0002, + "loss": 1.6359, + "step": 920 + }, + { + "epoch": 6.326530612244898, + "grad_norm": 0.9630302786827087, + "learning_rate": 0.0002, + "loss": 1.6675, + "step": 930 + }, + { + "epoch": 6.394557823129252, + "grad_norm": 1.0207276344299316, + "learning_rate": 0.0002, + "loss": 1.7264, + "step": 940 + }, + { + "epoch": 6.462585034013605, + "grad_norm": 1.1470541954040527, + "learning_rate": 0.0002, + "loss": 1.6934, + "step": 950 + }, + { + "epoch": 6.530612244897959, + "grad_norm": 1.0892208814620972, + "learning_rate": 0.0002, + "loss": 1.6645, + "step": 960 + }, + { + "epoch": 6.598639455782313, + "grad_norm": 1.030396819114685, + "learning_rate": 0.0002, + "loss": 1.7105, + "step": 970 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 1.0828958749771118, + "learning_rate": 0.0002, + "loss": 1.709, + "step": 980 + }, + { + "epoch": 6.73469387755102, + "grad_norm": 1.1024560928344727, + "learning_rate": 0.0002, + "loss": 1.67, + "step": 990 + }, + { + "epoch": 6.802721088435375, + "grad_norm": 0.9986393451690674, + "learning_rate": 0.0002, + "loss": 1.7313, + "step": 1000 + }, + { + "epoch": 6.870748299319728, + "grad_norm": 1.0168452262878418, + "learning_rate": 0.0002, + "loss": 1.7041, + "step": 1010 + }, + { + "epoch": 6.938775510204081, + "grad_norm": 1.1757020950317383, + "learning_rate": 0.0002, + "loss": 1.6552, + "step": 1020 + }, + { + "epoch": 7.0, + "eval_loss": 2.2328195571899414, + "eval_runtime": 39.0199, + "eval_samples_per_second": 12.993, + "eval_steps_per_second": 1.64, + "step": 1029 + }, + { + "epoch": 7.006802721088436, + "grad_norm": 0.917491614818573, + "learning_rate": 0.0002, + "loss": 1.5896, + "step": 1030 + }, + { + "epoch": 7.074829931972789, + "grad_norm": 1.0197532176971436, + "learning_rate": 0.0002, + "loss": 1.5124, + "step": 1040 + }, + { + "epoch": 7.142857142857143, + "grad_norm": 1.1208992004394531, + "learning_rate": 0.0002, + "loss": 1.5645, + "step": 1050 + }, + { + "epoch": 7.210884353741497, + "grad_norm": 1.1166021823883057, + "learning_rate": 0.0002, + "loss": 1.5508, + "step": 1060 + }, + { + "epoch": 7.27891156462585, + "grad_norm": 1.1348235607147217, + "learning_rate": 0.0002, + "loss": 1.555, + "step": 1070 + }, + { + "epoch": 7.346938775510204, + "grad_norm": 1.2517306804656982, + "learning_rate": 0.0002, + "loss": 1.5497, + "step": 1080 + }, + { + "epoch": 7.414965986394558, + "grad_norm": 1.3219249248504639, + "learning_rate": 0.0002, + "loss": 1.6562, + "step": 1090 + }, + { + "epoch": 7.482993197278912, + "grad_norm": 1.0974860191345215, + "learning_rate": 0.0002, + "loss": 1.6394, + "step": 1100 + }, + { + "epoch": 7.551020408163265, + "grad_norm": 1.3503128290176392, + "learning_rate": 0.0002, + "loss": 1.5862, + "step": 1110 + }, + { + "epoch": 7.619047619047619, + "grad_norm": 1.2890093326568604, + "learning_rate": 0.0002, + "loss": 1.6387, + "step": 1120 + }, + { + "epoch": 7.687074829931973, + "grad_norm": 1.2831991910934448, + "learning_rate": 0.0002, + "loss": 1.6061, + "step": 1130 + }, + { + "epoch": 7.755102040816326, + "grad_norm": 1.1331373453140259, + "learning_rate": 0.0002, + "loss": 1.5803, + "step": 1140 + }, + { + "epoch": 7.8231292517006805, + "grad_norm": 1.1796238422393799, + "learning_rate": 0.0002, + "loss": 1.5519, + "step": 1150 + }, + { + "epoch": 7.891156462585034, + "grad_norm": 1.2917659282684326, + "learning_rate": 0.0002, + "loss": 1.5334, + "step": 1160 + }, + { + "epoch": 7.959183673469388, + "grad_norm": 1.2645825147628784, + "learning_rate": 0.0002, + "loss": 1.57, + "step": 1170 + }, + { + "epoch": 8.0, + "eval_loss": 2.2869091033935547, + "eval_runtime": 38.4302, + "eval_samples_per_second": 13.193, + "eval_steps_per_second": 1.665, + "step": 1176 + } + ], + "logging_steps": 10, + "max_steps": 1176, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.4373067690082304e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/training_args.bin b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..645742c1f77bba1172fc3f1632a2a567dfa3b959 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-1176/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e2b0557a8b3135cd8ec179c1b82119d3737b61891d3a4fe3303d40bb094cbce +size 5560 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/README.md b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/adapter_config.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/adapter_model.safetensors b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98a42dcff0a1dca00405182ea983f6474a3c44eb --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60e9effbf5b950f0431ea269e99ac8e2cae65e7844e5f2a808b3be0524c89ccb +size 29500848 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/optimizer.pt b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0329337d241bc1e20eb4fe216b3f6d1cf55cf55b --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3a24ca0501ac8ec65bde5959493f31204683b569120fad7e1641a0fb4091631 +size 15064250 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/rng_state.pth b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6c376ae0af3b3be2e40676abd82abdb0defd17ae --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d4652be470eb454add7b17c741d662217f49f10c6986c8f0232d1516f3a1f6f +size 14244 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/scheduler.pt b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e9cbe3ebe8128c3def0c5f16ddf4f437ba6bcc9c --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71a9f5272dca00a67a663299f7fc10187d6643ce612719de5c600d2a9add79bf +size 1064 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/special_tokens_map.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/tokenizer.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/tokenizer.model b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/tokenizer_config.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/trainer_state.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..32f3e66214ca5f7cacc5df05fd21b59568f7f791 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/trainer_state.json @@ -0,0 +1,139 @@ +{ + "best_metric": 2.2520766258239746, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147", + "epoch": 1.0, + "eval_steps": 10, + "global_step": 147, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06802721088435375, + "grad_norm": 1.5591567754745483, + "learning_rate": 0.0002, + "loss": 4.862, + "step": 10 + }, + { + "epoch": 0.1360544217687075, + "grad_norm": 1.063948154449463, + "learning_rate": 0.0002, + "loss": 3.6905, + "step": 20 + }, + { + "epoch": 0.20408163265306123, + "grad_norm": 0.9765065312385559, + "learning_rate": 0.0002, + "loss": 3.2247, + "step": 30 + }, + { + "epoch": 0.272108843537415, + "grad_norm": 1.8591399192810059, + "learning_rate": 0.0002, + "loss": 3.0135, + "step": 40 + }, + { + "epoch": 0.3401360544217687, + "grad_norm": 1.7384812831878662, + "learning_rate": 0.0002, + "loss": 2.7815, + "step": 50 + }, + { + "epoch": 0.40816326530612246, + "grad_norm": 1.0425862073898315, + "learning_rate": 0.0002, + "loss": 2.6828, + "step": 60 + }, + { + "epoch": 0.47619047619047616, + "grad_norm": 1.312426209449768, + "learning_rate": 0.0002, + "loss": 2.6078, + "step": 70 + }, + { + "epoch": 0.54421768707483, + "grad_norm": 0.9405839443206787, + "learning_rate": 0.0002, + "loss": 2.4843, + "step": 80 + }, + { + "epoch": 0.6122448979591837, + "grad_norm": 0.7359345555305481, + "learning_rate": 0.0002, + "loss": 2.4835, + "step": 90 + }, + { + "epoch": 0.6802721088435374, + "grad_norm": 0.8867717981338501, + "learning_rate": 0.0002, + "loss": 2.359, + "step": 100 + }, + { + "epoch": 0.7482993197278912, + "grad_norm": 0.7792682647705078, + "learning_rate": 0.0002, + "loss": 2.4368, + "step": 110 + }, + { + "epoch": 0.8163265306122449, + "grad_norm": 0.8457526564598083, + "learning_rate": 0.0002, + "loss": 2.284, + "step": 120 + }, + { + "epoch": 0.8843537414965986, + "grad_norm": 0.8050963282585144, + "learning_rate": 0.0002, + "loss": 2.2625, + "step": 130 + }, + { + "epoch": 0.9523809523809523, + "grad_norm": 0.8963590860366821, + "learning_rate": 0.0002, + "loss": 2.2911, + "step": 140 + }, + { + "epoch": 1.0, + "eval_loss": 2.2520766258239746, + "eval_runtime": 46.2783, + "eval_samples_per_second": 10.955, + "eval_steps_per_second": 1.383, + "step": 147 + } + ], + "logging_steps": 10, + "max_steps": 1176, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1796633461260288.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/training_args.bin b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..645742c1f77bba1172fc3f1632a2a567dfa3b959 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e2b0557a8b3135cd8ec179c1b82119d3737b61891d3a4fe3303d40bb094cbce +size 5560 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/README.md b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/adapter_config.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/adapter_model.safetensors b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..876c9258134f8e434202fc1b77cfd3384a998674 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26a888ec6925f75ad12918c5b4767b671bae8ab1941b111629035903f9090e63 +size 29500848 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/optimizer.pt b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d3e4e51b9c4b1ab4a9b6946d504396498bfe2af0 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:679ec1bd445d24dfdf86dbe20e59aae763a2a8af012464fa3b6860113e58ff0e +size 15064314 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/rng_state.pth b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9785cd71dd4fecfe769348ccf25de975713a6161 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:403e465dbdd5467ef2c3716e12596321982d2dfbdd580c3ea790ba98d37caab1 +size 14244 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/scheduler.pt b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a208edd933fd82a00954ddae495af795a99171d4 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:893943beb4c6fed6bbfd11a6764578253efb35a1acb614f919ff98a51b2aeb2a +size 1064 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/special_tokens_map.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/tokenizer.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/tokenizer.model b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/tokenizer_config.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/trainer_state.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9bec36c27f71e3300ef7c6f4a9d121cd9bc09e45 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/trainer_state.json @@ -0,0 +1,252 @@ +{ + "best_metric": 2.151526689529419, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294", + "epoch": 2.0, + "eval_steps": 10, + "global_step": 294, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06802721088435375, + "grad_norm": 1.5591567754745483, + "learning_rate": 0.0002, + "loss": 4.862, + "step": 10 + }, + { + "epoch": 0.1360544217687075, + "grad_norm": 1.063948154449463, + "learning_rate": 0.0002, + "loss": 3.6905, + "step": 20 + }, + { + "epoch": 0.20408163265306123, + "grad_norm": 0.9765065312385559, + "learning_rate": 0.0002, + "loss": 3.2247, + "step": 30 + }, + { + "epoch": 0.272108843537415, + "grad_norm": 1.8591399192810059, + "learning_rate": 0.0002, + "loss": 3.0135, + "step": 40 + }, + { + "epoch": 0.3401360544217687, + "grad_norm": 1.7384812831878662, + "learning_rate": 0.0002, + "loss": 2.7815, + "step": 50 + }, + { + "epoch": 0.40816326530612246, + "grad_norm": 1.0425862073898315, + "learning_rate": 0.0002, + "loss": 2.6828, + "step": 60 + }, + { + "epoch": 0.47619047619047616, + "grad_norm": 1.312426209449768, + "learning_rate": 0.0002, + "loss": 2.6078, + "step": 70 + }, + { + "epoch": 0.54421768707483, + "grad_norm": 0.9405839443206787, + "learning_rate": 0.0002, + "loss": 2.4843, + "step": 80 + }, + { + "epoch": 0.6122448979591837, + "grad_norm": 0.7359345555305481, + "learning_rate": 0.0002, + "loss": 2.4835, + "step": 90 + }, + { + "epoch": 0.6802721088435374, + "grad_norm": 0.8867717981338501, + "learning_rate": 0.0002, + "loss": 2.359, + "step": 100 + }, + { + "epoch": 0.7482993197278912, + "grad_norm": 0.7792682647705078, + "learning_rate": 0.0002, + "loss": 2.4368, + "step": 110 + }, + { + "epoch": 0.8163265306122449, + "grad_norm": 0.8457526564598083, + "learning_rate": 0.0002, + "loss": 2.284, + "step": 120 + }, + { + "epoch": 0.8843537414965986, + "grad_norm": 0.8050963282585144, + "learning_rate": 0.0002, + "loss": 2.2625, + "step": 130 + }, + { + "epoch": 0.9523809523809523, + "grad_norm": 0.8963590860366821, + "learning_rate": 0.0002, + "loss": 2.2911, + "step": 140 + }, + { + "epoch": 1.0, + "eval_loss": 2.2520766258239746, + "eval_runtime": 46.2783, + "eval_samples_per_second": 10.955, + "eval_steps_per_second": 1.383, + "step": 147 + }, + { + "epoch": 1.0204081632653061, + "grad_norm": 0.9270220994949341, + "learning_rate": 0.0002, + "loss": 2.2293, + "step": 150 + }, + { + "epoch": 1.08843537414966, + "grad_norm": 0.7620377540588379, + "learning_rate": 0.0002, + "loss": 2.2962, + "step": 160 + }, + { + "epoch": 1.1564625850340136, + "grad_norm": 0.8232784867286682, + "learning_rate": 0.0002, + "loss": 2.1326, + "step": 170 + }, + { + "epoch": 1.2244897959183674, + "grad_norm": 0.8901777267456055, + "learning_rate": 0.0002, + "loss": 2.1522, + "step": 180 + }, + { + "epoch": 1.2925170068027212, + "grad_norm": 0.8079978227615356, + "learning_rate": 0.0002, + "loss": 2.1235, + "step": 190 + }, + { + "epoch": 1.3605442176870748, + "grad_norm": 0.9010588526725769, + "learning_rate": 0.0002, + "loss": 2.1812, + "step": 200 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 0.7076186537742615, + "learning_rate": 0.0002, + "loss": 2.0572, + "step": 210 + }, + { + "epoch": 1.4965986394557822, + "grad_norm": 0.8887438774108887, + "learning_rate": 0.0002, + "loss": 2.1723, + "step": 220 + }, + { + "epoch": 1.564625850340136, + "grad_norm": 0.9181524515151978, + "learning_rate": 0.0002, + "loss": 2.1449, + "step": 230 + }, + { + "epoch": 1.6326530612244898, + "grad_norm": 0.788392961025238, + "learning_rate": 0.0002, + "loss": 2.0993, + "step": 240 + }, + { + "epoch": 1.7006802721088436, + "grad_norm": 0.8064964413642883, + "learning_rate": 0.0002, + "loss": 2.0904, + "step": 250 + }, + { + "epoch": 1.7687074829931972, + "grad_norm": 0.6783174276351929, + "learning_rate": 0.0002, + "loss": 2.1685, + "step": 260 + }, + { + "epoch": 1.836734693877551, + "grad_norm": 0.7616434693336487, + "learning_rate": 0.0002, + "loss": 2.0993, + "step": 270 + }, + { + "epoch": 1.9047619047619047, + "grad_norm": 0.6809217929840088, + "learning_rate": 0.0002, + "loss": 2.1568, + "step": 280 + }, + { + "epoch": 1.9727891156462585, + "grad_norm": 0.6849802732467651, + "learning_rate": 0.0002, + "loss": 2.0749, + "step": 290 + }, + { + "epoch": 2.0, + "eval_loss": 2.151526689529419, + "eval_runtime": 40.9832, + "eval_samples_per_second": 12.371, + "eval_steps_per_second": 1.562, + "step": 294 + } + ], + "logging_steps": 10, + "max_steps": 1176, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3593266922520576.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/training_args.bin b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..645742c1f77bba1172fc3f1632a2a567dfa3b959 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e2b0557a8b3135cd8ec179c1b82119d3737b61891d3a4fe3303d40bb094cbce +size 5560 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/README.md b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/adapter_config.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/adapter_model.safetensors b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4851c82fafea9bb3ebe668ab2e31de9e18c2eed --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc75bed62d53a2b476f62dab1b48583d07a528d4b737ec52826688f664d8ba62 +size 29500848 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/optimizer.pt b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5561eb6a2624fcad28836bc526f7ae3a2588dd70 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b173f1b9d63d3a7352ad06363021c045c0e0347a172f795628b82dab9b32b338 +size 15064314 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/rng_state.pth b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..cc3fbf635182bfd1d7910b5c157f6132d18fc904 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21a3c765247aeb8f3d0462af41bcac937d79affe1d0974976ef0e843c6df4659 +size 14244 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/scheduler.pt b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a9fb326b5c0e197fffeddea24499cc1a65a06ad0 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:782a55c868c9769b697ad73a8dd9d936e5f24496d6ca9d2dc3449af0d5fd9b78 +size 1064 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/special_tokens_map.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/tokenizer.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/tokenizer.model b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/tokenizer_config.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/trainer_state.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a5335d64ab0165db7a5d63de454c3cf14ead1ed7 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/trainer_state.json @@ -0,0 +1,365 @@ +{ + "best_metric": 2.1319973468780518, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441", + "epoch": 3.0, + "eval_steps": 10, + "global_step": 441, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06802721088435375, + "grad_norm": 1.5591567754745483, + "learning_rate": 0.0002, + "loss": 4.862, + "step": 10 + }, + { + "epoch": 0.1360544217687075, + "grad_norm": 1.063948154449463, + "learning_rate": 0.0002, + "loss": 3.6905, + "step": 20 + }, + { + "epoch": 0.20408163265306123, + "grad_norm": 0.9765065312385559, + "learning_rate": 0.0002, + "loss": 3.2247, + "step": 30 + }, + { + "epoch": 0.272108843537415, + "grad_norm": 1.8591399192810059, + "learning_rate": 0.0002, + "loss": 3.0135, + "step": 40 + }, + { + "epoch": 0.3401360544217687, + "grad_norm": 1.7384812831878662, + "learning_rate": 0.0002, + "loss": 2.7815, + "step": 50 + }, + { + "epoch": 0.40816326530612246, + "grad_norm": 1.0425862073898315, + "learning_rate": 0.0002, + "loss": 2.6828, + "step": 60 + }, + { + "epoch": 0.47619047619047616, + "grad_norm": 1.312426209449768, + "learning_rate": 0.0002, + "loss": 2.6078, + "step": 70 + }, + { + "epoch": 0.54421768707483, + "grad_norm": 0.9405839443206787, + "learning_rate": 0.0002, + "loss": 2.4843, + "step": 80 + }, + { + "epoch": 0.6122448979591837, + "grad_norm": 0.7359345555305481, + "learning_rate": 0.0002, + "loss": 2.4835, + "step": 90 + }, + { + "epoch": 0.6802721088435374, + "grad_norm": 0.8867717981338501, + "learning_rate": 0.0002, + "loss": 2.359, + "step": 100 + }, + { + "epoch": 0.7482993197278912, + "grad_norm": 0.7792682647705078, + "learning_rate": 0.0002, + "loss": 2.4368, + "step": 110 + }, + { + "epoch": 0.8163265306122449, + "grad_norm": 0.8457526564598083, + "learning_rate": 0.0002, + "loss": 2.284, + "step": 120 + }, + { + "epoch": 0.8843537414965986, + "grad_norm": 0.8050963282585144, + "learning_rate": 0.0002, + "loss": 2.2625, + "step": 130 + }, + { + "epoch": 0.9523809523809523, + "grad_norm": 0.8963590860366821, + "learning_rate": 0.0002, + "loss": 2.2911, + "step": 140 + }, + { + "epoch": 1.0, + "eval_loss": 2.2520766258239746, + "eval_runtime": 46.2783, + "eval_samples_per_second": 10.955, + "eval_steps_per_second": 1.383, + "step": 147 + }, + { + "epoch": 1.0204081632653061, + "grad_norm": 0.9270220994949341, + "learning_rate": 0.0002, + "loss": 2.2293, + "step": 150 + }, + { + "epoch": 1.08843537414966, + "grad_norm": 0.7620377540588379, + "learning_rate": 0.0002, + "loss": 2.2962, + "step": 160 + }, + { + "epoch": 1.1564625850340136, + "grad_norm": 0.8232784867286682, + "learning_rate": 0.0002, + "loss": 2.1326, + "step": 170 + }, + { + "epoch": 1.2244897959183674, + "grad_norm": 0.8901777267456055, + "learning_rate": 0.0002, + "loss": 2.1522, + "step": 180 + }, + { + "epoch": 1.2925170068027212, + "grad_norm": 0.8079978227615356, + "learning_rate": 0.0002, + "loss": 2.1235, + "step": 190 + }, + { + "epoch": 1.3605442176870748, + "grad_norm": 0.9010588526725769, + "learning_rate": 0.0002, + "loss": 2.1812, + "step": 200 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 0.7076186537742615, + "learning_rate": 0.0002, + "loss": 2.0572, + "step": 210 + }, + { + "epoch": 1.4965986394557822, + "grad_norm": 0.8887438774108887, + "learning_rate": 0.0002, + "loss": 2.1723, + "step": 220 + }, + { + "epoch": 1.564625850340136, + "grad_norm": 0.9181524515151978, + "learning_rate": 0.0002, + "loss": 2.1449, + "step": 230 + }, + { + "epoch": 1.6326530612244898, + "grad_norm": 0.788392961025238, + "learning_rate": 0.0002, + "loss": 2.0993, + "step": 240 + }, + { + "epoch": 1.7006802721088436, + "grad_norm": 0.8064964413642883, + "learning_rate": 0.0002, + "loss": 2.0904, + "step": 250 + }, + { + "epoch": 1.7687074829931972, + "grad_norm": 0.6783174276351929, + "learning_rate": 0.0002, + "loss": 2.1685, + "step": 260 + }, + { + "epoch": 1.836734693877551, + "grad_norm": 0.7616434693336487, + "learning_rate": 0.0002, + "loss": 2.0993, + "step": 270 + }, + { + "epoch": 1.9047619047619047, + "grad_norm": 0.6809217929840088, + "learning_rate": 0.0002, + "loss": 2.1568, + "step": 280 + }, + { + "epoch": 1.9727891156462585, + "grad_norm": 0.6849802732467651, + "learning_rate": 0.0002, + "loss": 2.0749, + "step": 290 + }, + { + "epoch": 2.0, + "eval_loss": 2.151526689529419, + "eval_runtime": 40.9832, + "eval_samples_per_second": 12.371, + "eval_steps_per_second": 1.562, + "step": 294 + }, + { + "epoch": 2.0408163265306123, + "grad_norm": 0.8966974020004272, + "learning_rate": 0.0002, + "loss": 2.066, + "step": 300 + }, + { + "epoch": 2.108843537414966, + "grad_norm": 0.8308210372924805, + "learning_rate": 0.0002, + "loss": 1.9629, + "step": 310 + }, + { + "epoch": 2.17687074829932, + "grad_norm": 0.7147582173347473, + "learning_rate": 0.0002, + "loss": 2.0365, + "step": 320 + }, + { + "epoch": 2.2448979591836733, + "grad_norm": 0.7694330215454102, + "learning_rate": 0.0002, + "loss": 1.9965, + "step": 330 + }, + { + "epoch": 2.312925170068027, + "grad_norm": 0.6489183306694031, + "learning_rate": 0.0002, + "loss": 2.0322, + "step": 340 + }, + { + "epoch": 2.380952380952381, + "grad_norm": 0.7661431431770325, + "learning_rate": 0.0002, + "loss": 2.0627, + "step": 350 + }, + { + "epoch": 2.4489795918367347, + "grad_norm": 0.8295474648475647, + "learning_rate": 0.0002, + "loss": 2.0033, + "step": 360 + }, + { + "epoch": 2.5170068027210886, + "grad_norm": 0.8664118647575378, + "learning_rate": 0.0002, + "loss": 2.0876, + "step": 370 + }, + { + "epoch": 2.5850340136054424, + "grad_norm": 0.6872050762176514, + "learning_rate": 0.0002, + "loss": 2.0085, + "step": 380 + }, + { + "epoch": 2.6530612244897958, + "grad_norm": 0.7354660630226135, + "learning_rate": 0.0002, + "loss": 2.0, + "step": 390 + }, + { + "epoch": 2.7210884353741496, + "grad_norm": 0.7240234017372131, + "learning_rate": 0.0002, + "loss": 2.0219, + "step": 400 + }, + { + "epoch": 2.7891156462585034, + "grad_norm": 0.8370463848114014, + "learning_rate": 0.0002, + "loss": 2.0902, + "step": 410 + }, + { + "epoch": 2.857142857142857, + "grad_norm": 0.6834917068481445, + "learning_rate": 0.0002, + "loss": 2.0799, + "step": 420 + }, + { + "epoch": 2.925170068027211, + "grad_norm": 0.7872207760810852, + "learning_rate": 0.0002, + "loss": 2.0192, + "step": 430 + }, + { + "epoch": 2.9931972789115644, + "grad_norm": 0.7045499086380005, + "learning_rate": 0.0002, + "loss": 2.0546, + "step": 440 + }, + { + "epoch": 3.0, + "eval_loss": 2.1319973468780518, + "eval_runtime": 38.9671, + "eval_samples_per_second": 13.011, + "eval_steps_per_second": 1.642, + "step": 441 + } + ], + "logging_steps": 10, + "max_steps": 1176, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5389900383780864.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/training_args.bin b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..645742c1f77bba1172fc3f1632a2a567dfa3b959 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e2b0557a8b3135cd8ec179c1b82119d3737b61891d3a4fe3303d40bb094cbce +size 5560 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/README.md b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/adapter_config.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/adapter_model.safetensors b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e873f3a0192f297d1df9187bff99f52fad71eddc --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:510cc47684041e63c555c2b299872b08c85a9d6ca3facc8a3330c6d577be16fa +size 29500848 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/optimizer.pt b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..fabdc4f40ca094c1fd42c82f09364866154d55b3 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79c9dd4c1561e1e8ee312cefb6cff6e10d23b72215c22193892d3db1a0be29cb +size 15064314 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/rng_state.pth b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3b40079cba7bfde8b6c3756aef2534b9cd0b9b62 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cff7f4a1dcaaf5a505bb709a250338cfe5c1145fecbf7a302aa559b1d788279c +size 14244 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/scheduler.pt b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0afcabeb7a7dedff1a692e21097e59fc83ed5402 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9ee932f5bdec463b144b64d4fff8468b2dfaae5982a0f916c97d61c80ddcf19 +size 1064 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/special_tokens_map.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/tokenizer.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/tokenizer.model b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/tokenizer_config.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/trainer_state.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..80da93eabacbb30fc3f32f4054f7c24044375fa5 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/trainer_state.json @@ -0,0 +1,471 @@ +{ + "best_metric": 2.1319973468780518, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441", + "epoch": 4.0, + "eval_steps": 10, + "global_step": 588, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06802721088435375, + "grad_norm": 1.5591567754745483, + "learning_rate": 0.0002, + "loss": 4.862, + "step": 10 + }, + { + "epoch": 0.1360544217687075, + "grad_norm": 1.063948154449463, + "learning_rate": 0.0002, + "loss": 3.6905, + "step": 20 + }, + { + "epoch": 0.20408163265306123, + "grad_norm": 0.9765065312385559, + "learning_rate": 0.0002, + "loss": 3.2247, + "step": 30 + }, + { + "epoch": 0.272108843537415, + "grad_norm": 1.8591399192810059, + "learning_rate": 0.0002, + "loss": 3.0135, + "step": 40 + }, + { + "epoch": 0.3401360544217687, + "grad_norm": 1.7384812831878662, + "learning_rate": 0.0002, + "loss": 2.7815, + "step": 50 + }, + { + "epoch": 0.40816326530612246, + "grad_norm": 1.0425862073898315, + "learning_rate": 0.0002, + "loss": 2.6828, + "step": 60 + }, + { + "epoch": 0.47619047619047616, + "grad_norm": 1.312426209449768, + "learning_rate": 0.0002, + "loss": 2.6078, + "step": 70 + }, + { + "epoch": 0.54421768707483, + "grad_norm": 0.9405839443206787, + "learning_rate": 0.0002, + "loss": 2.4843, + "step": 80 + }, + { + "epoch": 0.6122448979591837, + "grad_norm": 0.7359345555305481, + "learning_rate": 0.0002, + "loss": 2.4835, + "step": 90 + }, + { + "epoch": 0.6802721088435374, + "grad_norm": 0.8867717981338501, + "learning_rate": 0.0002, + "loss": 2.359, + "step": 100 + }, + { + "epoch": 0.7482993197278912, + "grad_norm": 0.7792682647705078, + "learning_rate": 0.0002, + "loss": 2.4368, + "step": 110 + }, + { + "epoch": 0.8163265306122449, + "grad_norm": 0.8457526564598083, + "learning_rate": 0.0002, + "loss": 2.284, + "step": 120 + }, + { + "epoch": 0.8843537414965986, + "grad_norm": 0.8050963282585144, + "learning_rate": 0.0002, + "loss": 2.2625, + "step": 130 + }, + { + "epoch": 0.9523809523809523, + "grad_norm": 0.8963590860366821, + "learning_rate": 0.0002, + "loss": 2.2911, + "step": 140 + }, + { + "epoch": 1.0, + "eval_loss": 2.2520766258239746, + "eval_runtime": 46.2783, + "eval_samples_per_second": 10.955, + "eval_steps_per_second": 1.383, + "step": 147 + }, + { + "epoch": 1.0204081632653061, + "grad_norm": 0.9270220994949341, + "learning_rate": 0.0002, + "loss": 2.2293, + "step": 150 + }, + { + "epoch": 1.08843537414966, + "grad_norm": 0.7620377540588379, + "learning_rate": 0.0002, + "loss": 2.2962, + "step": 160 + }, + { + "epoch": 1.1564625850340136, + "grad_norm": 0.8232784867286682, + "learning_rate": 0.0002, + "loss": 2.1326, + "step": 170 + }, + { + "epoch": 1.2244897959183674, + "grad_norm": 0.8901777267456055, + "learning_rate": 0.0002, + "loss": 2.1522, + "step": 180 + }, + { + "epoch": 1.2925170068027212, + "grad_norm": 0.8079978227615356, + "learning_rate": 0.0002, + "loss": 2.1235, + "step": 190 + }, + { + "epoch": 1.3605442176870748, + "grad_norm": 0.9010588526725769, + "learning_rate": 0.0002, + "loss": 2.1812, + "step": 200 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 0.7076186537742615, + "learning_rate": 0.0002, + "loss": 2.0572, + "step": 210 + }, + { + "epoch": 1.4965986394557822, + "grad_norm": 0.8887438774108887, + "learning_rate": 0.0002, + "loss": 2.1723, + "step": 220 + }, + { + "epoch": 1.564625850340136, + "grad_norm": 0.9181524515151978, + "learning_rate": 0.0002, + "loss": 2.1449, + "step": 230 + }, + { + "epoch": 1.6326530612244898, + "grad_norm": 0.788392961025238, + "learning_rate": 0.0002, + "loss": 2.0993, + "step": 240 + }, + { + "epoch": 1.7006802721088436, + "grad_norm": 0.8064964413642883, + "learning_rate": 0.0002, + "loss": 2.0904, + "step": 250 + }, + { + "epoch": 1.7687074829931972, + "grad_norm": 0.6783174276351929, + "learning_rate": 0.0002, + "loss": 2.1685, + "step": 260 + }, + { + "epoch": 1.836734693877551, + "grad_norm": 0.7616434693336487, + "learning_rate": 0.0002, + "loss": 2.0993, + "step": 270 + }, + { + "epoch": 1.9047619047619047, + "grad_norm": 0.6809217929840088, + "learning_rate": 0.0002, + "loss": 2.1568, + "step": 280 + }, + { + "epoch": 1.9727891156462585, + "grad_norm": 0.6849802732467651, + "learning_rate": 0.0002, + "loss": 2.0749, + "step": 290 + }, + { + "epoch": 2.0, + "eval_loss": 2.151526689529419, + "eval_runtime": 40.9832, + "eval_samples_per_second": 12.371, + "eval_steps_per_second": 1.562, + "step": 294 + }, + { + "epoch": 2.0408163265306123, + "grad_norm": 0.8966974020004272, + "learning_rate": 0.0002, + "loss": 2.066, + "step": 300 + }, + { + "epoch": 2.108843537414966, + "grad_norm": 0.8308210372924805, + "learning_rate": 0.0002, + "loss": 1.9629, + "step": 310 + }, + { + "epoch": 2.17687074829932, + "grad_norm": 0.7147582173347473, + "learning_rate": 0.0002, + "loss": 2.0365, + "step": 320 + }, + { + "epoch": 2.2448979591836733, + "grad_norm": 0.7694330215454102, + "learning_rate": 0.0002, + "loss": 1.9965, + "step": 330 + }, + { + "epoch": 2.312925170068027, + "grad_norm": 0.6489183306694031, + "learning_rate": 0.0002, + "loss": 2.0322, + "step": 340 + }, + { + "epoch": 2.380952380952381, + "grad_norm": 0.7661431431770325, + "learning_rate": 0.0002, + "loss": 2.0627, + "step": 350 + }, + { + "epoch": 2.4489795918367347, + "grad_norm": 0.8295474648475647, + "learning_rate": 0.0002, + "loss": 2.0033, + "step": 360 + }, + { + "epoch": 2.5170068027210886, + "grad_norm": 0.8664118647575378, + "learning_rate": 0.0002, + "loss": 2.0876, + "step": 370 + }, + { + "epoch": 2.5850340136054424, + "grad_norm": 0.6872050762176514, + "learning_rate": 0.0002, + "loss": 2.0085, + "step": 380 + }, + { + "epoch": 2.6530612244897958, + "grad_norm": 0.7354660630226135, + "learning_rate": 0.0002, + "loss": 2.0, + "step": 390 + }, + { + "epoch": 2.7210884353741496, + "grad_norm": 0.7240234017372131, + "learning_rate": 0.0002, + "loss": 2.0219, + "step": 400 + }, + { + "epoch": 2.7891156462585034, + "grad_norm": 0.8370463848114014, + "learning_rate": 0.0002, + "loss": 2.0902, + "step": 410 + }, + { + "epoch": 2.857142857142857, + "grad_norm": 0.6834917068481445, + "learning_rate": 0.0002, + "loss": 2.0799, + "step": 420 + }, + { + "epoch": 2.925170068027211, + "grad_norm": 0.7872207760810852, + "learning_rate": 0.0002, + "loss": 2.0192, + "step": 430 + }, + { + "epoch": 2.9931972789115644, + "grad_norm": 0.7045499086380005, + "learning_rate": 0.0002, + "loss": 2.0546, + "step": 440 + }, + { + "epoch": 3.0, + "eval_loss": 2.1319973468780518, + "eval_runtime": 38.9671, + "eval_samples_per_second": 13.011, + "eval_steps_per_second": 1.642, + "step": 441 + }, + { + "epoch": 3.061224489795918, + "grad_norm": 0.7031271457672119, + "learning_rate": 0.0002, + "loss": 1.9638, + "step": 450 + }, + { + "epoch": 3.129251700680272, + "grad_norm": 0.7331708669662476, + "learning_rate": 0.0002, + "loss": 1.8729, + "step": 460 + }, + { + "epoch": 3.197278911564626, + "grad_norm": 0.7559226155281067, + "learning_rate": 0.0002, + "loss": 2.0023, + "step": 470 + }, + { + "epoch": 3.2653061224489797, + "grad_norm": 0.8188950419425964, + "learning_rate": 0.0002, + "loss": 1.9566, + "step": 480 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.7805435657501221, + "learning_rate": 0.0002, + "loss": 1.9255, + "step": 490 + }, + { + "epoch": 3.4013605442176873, + "grad_norm": 0.7917240858078003, + "learning_rate": 0.0002, + "loss": 1.9461, + "step": 500 + }, + { + "epoch": 3.4693877551020407, + "grad_norm": 0.8258386254310608, + "learning_rate": 0.0002, + "loss": 1.901, + "step": 510 + }, + { + "epoch": 3.5374149659863945, + "grad_norm": 0.8375557661056519, + "learning_rate": 0.0002, + "loss": 1.9833, + "step": 520 + }, + { + "epoch": 3.6054421768707483, + "grad_norm": 0.8005449771881104, + "learning_rate": 0.0002, + "loss": 2.0152, + "step": 530 + }, + { + "epoch": 3.673469387755102, + "grad_norm": 0.799006462097168, + "learning_rate": 0.0002, + "loss": 1.9309, + "step": 540 + }, + { + "epoch": 3.741496598639456, + "grad_norm": 0.7934630513191223, + "learning_rate": 0.0002, + "loss": 1.9775, + "step": 550 + }, + { + "epoch": 3.8095238095238093, + "grad_norm": 0.7247752547264099, + "learning_rate": 0.0002, + "loss": 1.993, + "step": 560 + }, + { + "epoch": 3.877551020408163, + "grad_norm": 0.8138917088508606, + "learning_rate": 0.0002, + "loss": 1.9914, + "step": 570 + }, + { + "epoch": 3.945578231292517, + "grad_norm": 0.7810562252998352, + "learning_rate": 0.0002, + "loss": 1.9116, + "step": 580 + }, + { + "epoch": 4.0, + "eval_loss": 2.132384777069092, + "eval_runtime": 38.6163, + "eval_samples_per_second": 13.129, + "eval_steps_per_second": 1.657, + "step": 588 + } + ], + "logging_steps": 10, + "max_steps": 1176, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7186533845041152.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/training_args.bin b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..645742c1f77bba1172fc3f1632a2a567dfa3b959 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-588/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e2b0557a8b3135cd8ec179c1b82119d3737b61891d3a4fe3303d40bb094cbce +size 5560 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/README.md b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/adapter_config.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/adapter_model.safetensors b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37404c6cea31db3997792af2dbf7aacae5637340 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b32d764188d9b5666be169de817322e3b396cc79fd7a2d21604f89371854526 +size 29500848 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/optimizer.pt b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..094a6d7c6c41f78cc96d43abf95f688f1363206a --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e844825a60ce62b75c1951a0785683ab569515c44af3b4656e71699b8a577a30 +size 15064314 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/rng_state.pth b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..5131f174ffd9b8494f23dcc9c1eac1aeb967d219 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e7ff0183b0ec595609385b0a7025dfc25f90af02d8a62bb2b5506167cd06acc +size 14244 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/scheduler.pt b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ddab3a40460a57b667b894f30d495bc38a3cf47d --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:107cd1873e15cebfa1d5eb56483ce0ddcba57c57a619378048cb90a4ed031aa7 +size 1064 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/special_tokens_map.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/tokenizer.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/tokenizer.model b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/tokenizer_config.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/trainer_state.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6e7717d47a2d549c9d1959ed50ace9d698d9ec7d --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/trainer_state.json @@ -0,0 +1,584 @@ +{ + "best_metric": 2.1319973468780518, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441", + "epoch": 5.0, + "eval_steps": 10, + "global_step": 735, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06802721088435375, + "grad_norm": 1.5591567754745483, + "learning_rate": 0.0002, + "loss": 4.862, + "step": 10 + }, + { + "epoch": 0.1360544217687075, + "grad_norm": 1.063948154449463, + "learning_rate": 0.0002, + "loss": 3.6905, + "step": 20 + }, + { + "epoch": 0.20408163265306123, + "grad_norm": 0.9765065312385559, + "learning_rate": 0.0002, + "loss": 3.2247, + "step": 30 + }, + { + "epoch": 0.272108843537415, + "grad_norm": 1.8591399192810059, + "learning_rate": 0.0002, + "loss": 3.0135, + "step": 40 + }, + { + "epoch": 0.3401360544217687, + "grad_norm": 1.7384812831878662, + "learning_rate": 0.0002, + "loss": 2.7815, + "step": 50 + }, + { + "epoch": 0.40816326530612246, + "grad_norm": 1.0425862073898315, + "learning_rate": 0.0002, + "loss": 2.6828, + "step": 60 + }, + { + "epoch": 0.47619047619047616, + "grad_norm": 1.312426209449768, + "learning_rate": 0.0002, + "loss": 2.6078, + "step": 70 + }, + { + "epoch": 0.54421768707483, + "grad_norm": 0.9405839443206787, + "learning_rate": 0.0002, + "loss": 2.4843, + "step": 80 + }, + { + "epoch": 0.6122448979591837, + "grad_norm": 0.7359345555305481, + "learning_rate": 0.0002, + "loss": 2.4835, + "step": 90 + }, + { + "epoch": 0.6802721088435374, + "grad_norm": 0.8867717981338501, + "learning_rate": 0.0002, + "loss": 2.359, + "step": 100 + }, + { + "epoch": 0.7482993197278912, + "grad_norm": 0.7792682647705078, + "learning_rate": 0.0002, + "loss": 2.4368, + "step": 110 + }, + { + "epoch": 0.8163265306122449, + "grad_norm": 0.8457526564598083, + "learning_rate": 0.0002, + "loss": 2.284, + "step": 120 + }, + { + "epoch": 0.8843537414965986, + "grad_norm": 0.8050963282585144, + "learning_rate": 0.0002, + "loss": 2.2625, + "step": 130 + }, + { + "epoch": 0.9523809523809523, + "grad_norm": 0.8963590860366821, + "learning_rate": 0.0002, + "loss": 2.2911, + "step": 140 + }, + { + "epoch": 1.0, + "eval_loss": 2.2520766258239746, + "eval_runtime": 46.2783, + "eval_samples_per_second": 10.955, + "eval_steps_per_second": 1.383, + "step": 147 + }, + { + "epoch": 1.0204081632653061, + "grad_norm": 0.9270220994949341, + "learning_rate": 0.0002, + "loss": 2.2293, + "step": 150 + }, + { + "epoch": 1.08843537414966, + "grad_norm": 0.7620377540588379, + "learning_rate": 0.0002, + "loss": 2.2962, + "step": 160 + }, + { + "epoch": 1.1564625850340136, + "grad_norm": 0.8232784867286682, + "learning_rate": 0.0002, + "loss": 2.1326, + "step": 170 + }, + { + "epoch": 1.2244897959183674, + "grad_norm": 0.8901777267456055, + "learning_rate": 0.0002, + "loss": 2.1522, + "step": 180 + }, + { + "epoch": 1.2925170068027212, + "grad_norm": 0.8079978227615356, + "learning_rate": 0.0002, + "loss": 2.1235, + "step": 190 + }, + { + "epoch": 1.3605442176870748, + "grad_norm": 0.9010588526725769, + "learning_rate": 0.0002, + "loss": 2.1812, + "step": 200 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 0.7076186537742615, + "learning_rate": 0.0002, + "loss": 2.0572, + "step": 210 + }, + { + "epoch": 1.4965986394557822, + "grad_norm": 0.8887438774108887, + "learning_rate": 0.0002, + "loss": 2.1723, + "step": 220 + }, + { + "epoch": 1.564625850340136, + "grad_norm": 0.9181524515151978, + "learning_rate": 0.0002, + "loss": 2.1449, + "step": 230 + }, + { + "epoch": 1.6326530612244898, + "grad_norm": 0.788392961025238, + "learning_rate": 0.0002, + "loss": 2.0993, + "step": 240 + }, + { + "epoch": 1.7006802721088436, + "grad_norm": 0.8064964413642883, + "learning_rate": 0.0002, + "loss": 2.0904, + "step": 250 + }, + { + "epoch": 1.7687074829931972, + "grad_norm": 0.6783174276351929, + "learning_rate": 0.0002, + "loss": 2.1685, + "step": 260 + }, + { + "epoch": 1.836734693877551, + "grad_norm": 0.7616434693336487, + "learning_rate": 0.0002, + "loss": 2.0993, + "step": 270 + }, + { + "epoch": 1.9047619047619047, + "grad_norm": 0.6809217929840088, + "learning_rate": 0.0002, + "loss": 2.1568, + "step": 280 + }, + { + "epoch": 1.9727891156462585, + "grad_norm": 0.6849802732467651, + "learning_rate": 0.0002, + "loss": 2.0749, + "step": 290 + }, + { + "epoch": 2.0, + "eval_loss": 2.151526689529419, + "eval_runtime": 40.9832, + "eval_samples_per_second": 12.371, + "eval_steps_per_second": 1.562, + "step": 294 + }, + { + "epoch": 2.0408163265306123, + "grad_norm": 0.8966974020004272, + "learning_rate": 0.0002, + "loss": 2.066, + "step": 300 + }, + { + "epoch": 2.108843537414966, + "grad_norm": 0.8308210372924805, + "learning_rate": 0.0002, + "loss": 1.9629, + "step": 310 + }, + { + "epoch": 2.17687074829932, + "grad_norm": 0.7147582173347473, + "learning_rate": 0.0002, + "loss": 2.0365, + "step": 320 + }, + { + "epoch": 2.2448979591836733, + "grad_norm": 0.7694330215454102, + "learning_rate": 0.0002, + "loss": 1.9965, + "step": 330 + }, + { + "epoch": 2.312925170068027, + "grad_norm": 0.6489183306694031, + "learning_rate": 0.0002, + "loss": 2.0322, + "step": 340 + }, + { + "epoch": 2.380952380952381, + "grad_norm": 0.7661431431770325, + "learning_rate": 0.0002, + "loss": 2.0627, + "step": 350 + }, + { + "epoch": 2.4489795918367347, + "grad_norm": 0.8295474648475647, + "learning_rate": 0.0002, + "loss": 2.0033, + "step": 360 + }, + { + "epoch": 2.5170068027210886, + "grad_norm": 0.8664118647575378, + "learning_rate": 0.0002, + "loss": 2.0876, + "step": 370 + }, + { + "epoch": 2.5850340136054424, + "grad_norm": 0.6872050762176514, + "learning_rate": 0.0002, + "loss": 2.0085, + "step": 380 + }, + { + "epoch": 2.6530612244897958, + "grad_norm": 0.7354660630226135, + "learning_rate": 0.0002, + "loss": 2.0, + "step": 390 + }, + { + "epoch": 2.7210884353741496, + "grad_norm": 0.7240234017372131, + "learning_rate": 0.0002, + "loss": 2.0219, + "step": 400 + }, + { + "epoch": 2.7891156462585034, + "grad_norm": 0.8370463848114014, + "learning_rate": 0.0002, + "loss": 2.0902, + "step": 410 + }, + { + "epoch": 2.857142857142857, + "grad_norm": 0.6834917068481445, + "learning_rate": 0.0002, + "loss": 2.0799, + "step": 420 + }, + { + "epoch": 2.925170068027211, + "grad_norm": 0.7872207760810852, + "learning_rate": 0.0002, + "loss": 2.0192, + "step": 430 + }, + { + "epoch": 2.9931972789115644, + "grad_norm": 0.7045499086380005, + "learning_rate": 0.0002, + "loss": 2.0546, + "step": 440 + }, + { + "epoch": 3.0, + "eval_loss": 2.1319973468780518, + "eval_runtime": 38.9671, + "eval_samples_per_second": 13.011, + "eval_steps_per_second": 1.642, + "step": 441 + }, + { + "epoch": 3.061224489795918, + "grad_norm": 0.7031271457672119, + "learning_rate": 0.0002, + "loss": 1.9638, + "step": 450 + }, + { + "epoch": 3.129251700680272, + "grad_norm": 0.7331708669662476, + "learning_rate": 0.0002, + "loss": 1.8729, + "step": 460 + }, + { + "epoch": 3.197278911564626, + "grad_norm": 0.7559226155281067, + "learning_rate": 0.0002, + "loss": 2.0023, + "step": 470 + }, + { + "epoch": 3.2653061224489797, + "grad_norm": 0.8188950419425964, + "learning_rate": 0.0002, + "loss": 1.9566, + "step": 480 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.7805435657501221, + "learning_rate": 0.0002, + "loss": 1.9255, + "step": 490 + }, + { + "epoch": 3.4013605442176873, + "grad_norm": 0.7917240858078003, + "learning_rate": 0.0002, + "loss": 1.9461, + "step": 500 + }, + { + "epoch": 3.4693877551020407, + "grad_norm": 0.8258386254310608, + "learning_rate": 0.0002, + "loss": 1.901, + "step": 510 + }, + { + "epoch": 3.5374149659863945, + "grad_norm": 0.8375557661056519, + "learning_rate": 0.0002, + "loss": 1.9833, + "step": 520 + }, + { + "epoch": 3.6054421768707483, + "grad_norm": 0.8005449771881104, + "learning_rate": 0.0002, + "loss": 2.0152, + "step": 530 + }, + { + "epoch": 3.673469387755102, + "grad_norm": 0.799006462097168, + "learning_rate": 0.0002, + "loss": 1.9309, + "step": 540 + }, + { + "epoch": 3.741496598639456, + "grad_norm": 0.7934630513191223, + "learning_rate": 0.0002, + "loss": 1.9775, + "step": 550 + }, + { + "epoch": 3.8095238095238093, + "grad_norm": 0.7247752547264099, + "learning_rate": 0.0002, + "loss": 1.993, + "step": 560 + }, + { + "epoch": 3.877551020408163, + "grad_norm": 0.8138917088508606, + "learning_rate": 0.0002, + "loss": 1.9914, + "step": 570 + }, + { + "epoch": 3.945578231292517, + "grad_norm": 0.7810562252998352, + "learning_rate": 0.0002, + "loss": 1.9116, + "step": 580 + }, + { + "epoch": 4.0, + "eval_loss": 2.132384777069092, + "eval_runtime": 38.6163, + "eval_samples_per_second": 13.129, + "eval_steps_per_second": 1.657, + "step": 588 + }, + { + "epoch": 4.01360544217687, + "grad_norm": 0.7258784770965576, + "learning_rate": 0.0002, + "loss": 1.9048, + "step": 590 + }, + { + "epoch": 4.081632653061225, + "grad_norm": 0.8905395865440369, + "learning_rate": 0.0002, + "loss": 1.7991, + "step": 600 + }, + { + "epoch": 4.149659863945578, + "grad_norm": 0.8189161419868469, + "learning_rate": 0.0002, + "loss": 1.8841, + "step": 610 + }, + { + "epoch": 4.217687074829932, + "grad_norm": 0.8235230445861816, + "learning_rate": 0.0002, + "loss": 1.914, + "step": 620 + }, + { + "epoch": 4.285714285714286, + "grad_norm": 0.8926266431808472, + "learning_rate": 0.0002, + "loss": 1.9114, + "step": 630 + }, + { + "epoch": 4.35374149659864, + "grad_norm": 0.9667059183120728, + "learning_rate": 0.0002, + "loss": 1.8944, + "step": 640 + }, + { + "epoch": 4.421768707482993, + "grad_norm": 0.8441583514213562, + "learning_rate": 0.0002, + "loss": 1.7842, + "step": 650 + }, + { + "epoch": 4.489795918367347, + "grad_norm": 0.8351956009864807, + "learning_rate": 0.0002, + "loss": 1.7937, + "step": 660 + }, + { + "epoch": 4.557823129251701, + "grad_norm": 0.8557114005088806, + "learning_rate": 0.0002, + "loss": 1.8439, + "step": 670 + }, + { + "epoch": 4.625850340136054, + "grad_norm": 0.8698110580444336, + "learning_rate": 0.0002, + "loss": 1.9426, + "step": 680 + }, + { + "epoch": 4.6938775510204085, + "grad_norm": 0.8394802808761597, + "learning_rate": 0.0002, + "loss": 1.8635, + "step": 690 + }, + { + "epoch": 4.761904761904762, + "grad_norm": 0.8168841004371643, + "learning_rate": 0.0002, + "loss": 1.8908, + "step": 700 + }, + { + "epoch": 4.829931972789115, + "grad_norm": 0.8049741387367249, + "learning_rate": 0.0002, + "loss": 1.8674, + "step": 710 + }, + { + "epoch": 4.8979591836734695, + "grad_norm": 0.7987792491912842, + "learning_rate": 0.0002, + "loss": 1.9289, + "step": 720 + }, + { + "epoch": 4.965986394557823, + "grad_norm": 0.9021750688552856, + "learning_rate": 0.0002, + "loss": 1.8779, + "step": 730 + }, + { + "epoch": 5.0, + "eval_loss": 2.1478686332702637, + "eval_runtime": 38.5077, + "eval_samples_per_second": 13.166, + "eval_steps_per_second": 1.662, + "step": 735 + } + ], + "logging_steps": 10, + "max_steps": 1176, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8983167306301440.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/training_args.bin b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..645742c1f77bba1172fc3f1632a2a567dfa3b959 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-735/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e2b0557a8b3135cd8ec179c1b82119d3737b61891d3a4fe3303d40bb094cbce +size 5560 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/README.md b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/adapter_config.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/adapter_model.safetensors b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..91567337f0d953594b6b7ceaa7bab9bf96927a73 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e2ace9d681204dd75cf6d08873e6a28045ef069f681555f901926aafc9fd151 +size 29500848 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/optimizer.pt b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4c8be9b1a847cd66ee1953150716cf4c899cf8fb --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e4b9e0432ea8b851b9b000425660b6bf22a98a5c2a8305c8e074b12ef7ec951 +size 15064314 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/rng_state.pth b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6bb5788c1ccbb01bccbfe2a8664b8418db10c7b3 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d47741831139f30800aa8972d57527e96b22a047356b0a7f94016c5d6b7527b1 +size 14244 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/scheduler.pt b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2daa1dd53f4bc7eb73efae365c3d54239b0ed968 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98328ad65ad5852018d577817b21ad3aaa3078612066fd00fd46e7e196f4095a +size 1064 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/special_tokens_map.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/tokenizer.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/tokenizer.model b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/tokenizer_config.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/trainer_state.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..087193dae471c44120b3c2e11ae2c4b867718b83 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/trainer_state.json @@ -0,0 +1,697 @@ +{ + "best_metric": 2.1319973468780518, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441", + "epoch": 6.0, + "eval_steps": 10, + "global_step": 882, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06802721088435375, + "grad_norm": 1.5591567754745483, + "learning_rate": 0.0002, + "loss": 4.862, + "step": 10 + }, + { + "epoch": 0.1360544217687075, + "grad_norm": 1.063948154449463, + "learning_rate": 0.0002, + "loss": 3.6905, + "step": 20 + }, + { + "epoch": 0.20408163265306123, + "grad_norm": 0.9765065312385559, + "learning_rate": 0.0002, + "loss": 3.2247, + "step": 30 + }, + { + "epoch": 0.272108843537415, + "grad_norm": 1.8591399192810059, + "learning_rate": 0.0002, + "loss": 3.0135, + "step": 40 + }, + { + "epoch": 0.3401360544217687, + "grad_norm": 1.7384812831878662, + "learning_rate": 0.0002, + "loss": 2.7815, + "step": 50 + }, + { + "epoch": 0.40816326530612246, + "grad_norm": 1.0425862073898315, + "learning_rate": 0.0002, + "loss": 2.6828, + "step": 60 + }, + { + "epoch": 0.47619047619047616, + "grad_norm": 1.312426209449768, + "learning_rate": 0.0002, + "loss": 2.6078, + "step": 70 + }, + { + "epoch": 0.54421768707483, + "grad_norm": 0.9405839443206787, + "learning_rate": 0.0002, + "loss": 2.4843, + "step": 80 + }, + { + "epoch": 0.6122448979591837, + "grad_norm": 0.7359345555305481, + "learning_rate": 0.0002, + "loss": 2.4835, + "step": 90 + }, + { + "epoch": 0.6802721088435374, + "grad_norm": 0.8867717981338501, + "learning_rate": 0.0002, + "loss": 2.359, + "step": 100 + }, + { + "epoch": 0.7482993197278912, + "grad_norm": 0.7792682647705078, + "learning_rate": 0.0002, + "loss": 2.4368, + "step": 110 + }, + { + "epoch": 0.8163265306122449, + "grad_norm": 0.8457526564598083, + "learning_rate": 0.0002, + "loss": 2.284, + "step": 120 + }, + { + "epoch": 0.8843537414965986, + "grad_norm": 0.8050963282585144, + "learning_rate": 0.0002, + "loss": 2.2625, + "step": 130 + }, + { + "epoch": 0.9523809523809523, + "grad_norm": 0.8963590860366821, + "learning_rate": 0.0002, + "loss": 2.2911, + "step": 140 + }, + { + "epoch": 1.0, + "eval_loss": 2.2520766258239746, + "eval_runtime": 46.2783, + "eval_samples_per_second": 10.955, + "eval_steps_per_second": 1.383, + "step": 147 + }, + { + "epoch": 1.0204081632653061, + "grad_norm": 0.9270220994949341, + "learning_rate": 0.0002, + "loss": 2.2293, + "step": 150 + }, + { + "epoch": 1.08843537414966, + "grad_norm": 0.7620377540588379, + "learning_rate": 0.0002, + "loss": 2.2962, + "step": 160 + }, + { + "epoch": 1.1564625850340136, + "grad_norm": 0.8232784867286682, + "learning_rate": 0.0002, + "loss": 2.1326, + "step": 170 + }, + { + "epoch": 1.2244897959183674, + "grad_norm": 0.8901777267456055, + "learning_rate": 0.0002, + "loss": 2.1522, + "step": 180 + }, + { + "epoch": 1.2925170068027212, + "grad_norm": 0.8079978227615356, + "learning_rate": 0.0002, + "loss": 2.1235, + "step": 190 + }, + { + "epoch": 1.3605442176870748, + "grad_norm": 0.9010588526725769, + "learning_rate": 0.0002, + "loss": 2.1812, + "step": 200 + }, + { + "epoch": 1.4285714285714286, + "grad_norm": 0.7076186537742615, + "learning_rate": 0.0002, + "loss": 2.0572, + "step": 210 + }, + { + "epoch": 1.4965986394557822, + "grad_norm": 0.8887438774108887, + "learning_rate": 0.0002, + "loss": 2.1723, + "step": 220 + }, + { + "epoch": 1.564625850340136, + "grad_norm": 0.9181524515151978, + "learning_rate": 0.0002, + "loss": 2.1449, + "step": 230 + }, + { + "epoch": 1.6326530612244898, + "grad_norm": 0.788392961025238, + "learning_rate": 0.0002, + "loss": 2.0993, + "step": 240 + }, + { + "epoch": 1.7006802721088436, + "grad_norm": 0.8064964413642883, + "learning_rate": 0.0002, + "loss": 2.0904, + "step": 250 + }, + { + "epoch": 1.7687074829931972, + "grad_norm": 0.6783174276351929, + "learning_rate": 0.0002, + "loss": 2.1685, + "step": 260 + }, + { + "epoch": 1.836734693877551, + "grad_norm": 0.7616434693336487, + "learning_rate": 0.0002, + "loss": 2.0993, + "step": 270 + }, + { + "epoch": 1.9047619047619047, + "grad_norm": 0.6809217929840088, + "learning_rate": 0.0002, + "loss": 2.1568, + "step": 280 + }, + { + "epoch": 1.9727891156462585, + "grad_norm": 0.6849802732467651, + "learning_rate": 0.0002, + "loss": 2.0749, + "step": 290 + }, + { + "epoch": 2.0, + "eval_loss": 2.151526689529419, + "eval_runtime": 40.9832, + "eval_samples_per_second": 12.371, + "eval_steps_per_second": 1.562, + "step": 294 + }, + { + "epoch": 2.0408163265306123, + "grad_norm": 0.8966974020004272, + "learning_rate": 0.0002, + "loss": 2.066, + "step": 300 + }, + { + "epoch": 2.108843537414966, + "grad_norm": 0.8308210372924805, + "learning_rate": 0.0002, + "loss": 1.9629, + "step": 310 + }, + { + "epoch": 2.17687074829932, + "grad_norm": 0.7147582173347473, + "learning_rate": 0.0002, + "loss": 2.0365, + "step": 320 + }, + { + "epoch": 2.2448979591836733, + "grad_norm": 0.7694330215454102, + "learning_rate": 0.0002, + "loss": 1.9965, + "step": 330 + }, + { + "epoch": 2.312925170068027, + "grad_norm": 0.6489183306694031, + "learning_rate": 0.0002, + "loss": 2.0322, + "step": 340 + }, + { + "epoch": 2.380952380952381, + "grad_norm": 0.7661431431770325, + "learning_rate": 0.0002, + "loss": 2.0627, + "step": 350 + }, + { + "epoch": 2.4489795918367347, + "grad_norm": 0.8295474648475647, + "learning_rate": 0.0002, + "loss": 2.0033, + "step": 360 + }, + { + "epoch": 2.5170068027210886, + "grad_norm": 0.8664118647575378, + "learning_rate": 0.0002, + "loss": 2.0876, + "step": 370 + }, + { + "epoch": 2.5850340136054424, + "grad_norm": 0.6872050762176514, + "learning_rate": 0.0002, + "loss": 2.0085, + "step": 380 + }, + { + "epoch": 2.6530612244897958, + "grad_norm": 0.7354660630226135, + "learning_rate": 0.0002, + "loss": 2.0, + "step": 390 + }, + { + "epoch": 2.7210884353741496, + "grad_norm": 0.7240234017372131, + "learning_rate": 0.0002, + "loss": 2.0219, + "step": 400 + }, + { + "epoch": 2.7891156462585034, + "grad_norm": 0.8370463848114014, + "learning_rate": 0.0002, + "loss": 2.0902, + "step": 410 + }, + { + "epoch": 2.857142857142857, + "grad_norm": 0.6834917068481445, + "learning_rate": 0.0002, + "loss": 2.0799, + "step": 420 + }, + { + "epoch": 2.925170068027211, + "grad_norm": 0.7872207760810852, + "learning_rate": 0.0002, + "loss": 2.0192, + "step": 430 + }, + { + "epoch": 2.9931972789115644, + "grad_norm": 0.7045499086380005, + "learning_rate": 0.0002, + "loss": 2.0546, + "step": 440 + }, + { + "epoch": 3.0, + "eval_loss": 2.1319973468780518, + "eval_runtime": 38.9671, + "eval_samples_per_second": 13.011, + "eval_steps_per_second": 1.642, + "step": 441 + }, + { + "epoch": 3.061224489795918, + "grad_norm": 0.7031271457672119, + "learning_rate": 0.0002, + "loss": 1.9638, + "step": 450 + }, + { + "epoch": 3.129251700680272, + "grad_norm": 0.7331708669662476, + "learning_rate": 0.0002, + "loss": 1.8729, + "step": 460 + }, + { + "epoch": 3.197278911564626, + "grad_norm": 0.7559226155281067, + "learning_rate": 0.0002, + "loss": 2.0023, + "step": 470 + }, + { + "epoch": 3.2653061224489797, + "grad_norm": 0.8188950419425964, + "learning_rate": 0.0002, + "loss": 1.9566, + "step": 480 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.7805435657501221, + "learning_rate": 0.0002, + "loss": 1.9255, + "step": 490 + }, + { + "epoch": 3.4013605442176873, + "grad_norm": 0.7917240858078003, + "learning_rate": 0.0002, + "loss": 1.9461, + "step": 500 + }, + { + "epoch": 3.4693877551020407, + "grad_norm": 0.8258386254310608, + "learning_rate": 0.0002, + "loss": 1.901, + "step": 510 + }, + { + "epoch": 3.5374149659863945, + "grad_norm": 0.8375557661056519, + "learning_rate": 0.0002, + "loss": 1.9833, + "step": 520 + }, + { + "epoch": 3.6054421768707483, + "grad_norm": 0.8005449771881104, + "learning_rate": 0.0002, + "loss": 2.0152, + "step": 530 + }, + { + "epoch": 3.673469387755102, + "grad_norm": 0.799006462097168, + "learning_rate": 0.0002, + "loss": 1.9309, + "step": 540 + }, + { + "epoch": 3.741496598639456, + "grad_norm": 0.7934630513191223, + "learning_rate": 0.0002, + "loss": 1.9775, + "step": 550 + }, + { + "epoch": 3.8095238095238093, + "grad_norm": 0.7247752547264099, + "learning_rate": 0.0002, + "loss": 1.993, + "step": 560 + }, + { + "epoch": 3.877551020408163, + "grad_norm": 0.8138917088508606, + "learning_rate": 0.0002, + "loss": 1.9914, + "step": 570 + }, + { + "epoch": 3.945578231292517, + "grad_norm": 0.7810562252998352, + "learning_rate": 0.0002, + "loss": 1.9116, + "step": 580 + }, + { + "epoch": 4.0, + "eval_loss": 2.132384777069092, + "eval_runtime": 38.6163, + "eval_samples_per_second": 13.129, + "eval_steps_per_second": 1.657, + "step": 588 + }, + { + "epoch": 4.01360544217687, + "grad_norm": 0.7258784770965576, + "learning_rate": 0.0002, + "loss": 1.9048, + "step": 590 + }, + { + "epoch": 4.081632653061225, + "grad_norm": 0.8905395865440369, + "learning_rate": 0.0002, + "loss": 1.7991, + "step": 600 + }, + { + "epoch": 4.149659863945578, + "grad_norm": 0.8189161419868469, + "learning_rate": 0.0002, + "loss": 1.8841, + "step": 610 + }, + { + "epoch": 4.217687074829932, + "grad_norm": 0.8235230445861816, + "learning_rate": 0.0002, + "loss": 1.914, + "step": 620 + }, + { + "epoch": 4.285714285714286, + "grad_norm": 0.8926266431808472, + "learning_rate": 0.0002, + "loss": 1.9114, + "step": 630 + }, + { + "epoch": 4.35374149659864, + "grad_norm": 0.9667059183120728, + "learning_rate": 0.0002, + "loss": 1.8944, + "step": 640 + }, + { + "epoch": 4.421768707482993, + "grad_norm": 0.8441583514213562, + "learning_rate": 0.0002, + "loss": 1.7842, + "step": 650 + }, + { + "epoch": 4.489795918367347, + "grad_norm": 0.8351956009864807, + "learning_rate": 0.0002, + "loss": 1.7937, + "step": 660 + }, + { + "epoch": 4.557823129251701, + "grad_norm": 0.8557114005088806, + "learning_rate": 0.0002, + "loss": 1.8439, + "step": 670 + }, + { + "epoch": 4.625850340136054, + "grad_norm": 0.8698110580444336, + "learning_rate": 0.0002, + "loss": 1.9426, + "step": 680 + }, + { + "epoch": 4.6938775510204085, + "grad_norm": 0.8394802808761597, + "learning_rate": 0.0002, + "loss": 1.8635, + "step": 690 + }, + { + "epoch": 4.761904761904762, + "grad_norm": 0.8168841004371643, + "learning_rate": 0.0002, + "loss": 1.8908, + "step": 700 + }, + { + "epoch": 4.829931972789115, + "grad_norm": 0.8049741387367249, + "learning_rate": 0.0002, + "loss": 1.8674, + "step": 710 + }, + { + "epoch": 4.8979591836734695, + "grad_norm": 0.7987792491912842, + "learning_rate": 0.0002, + "loss": 1.9289, + "step": 720 + }, + { + "epoch": 4.965986394557823, + "grad_norm": 0.9021750688552856, + "learning_rate": 0.0002, + "loss": 1.8779, + "step": 730 + }, + { + "epoch": 5.0, + "eval_loss": 2.1478686332702637, + "eval_runtime": 38.5077, + "eval_samples_per_second": 13.166, + "eval_steps_per_second": 1.662, + "step": 735 + }, + { + "epoch": 5.034013605442177, + "grad_norm": 0.8057989478111267, + "learning_rate": 0.0002, + "loss": 1.8979, + "step": 740 + }, + { + "epoch": 5.1020408163265305, + "grad_norm": 0.9020641446113586, + "learning_rate": 0.0002, + "loss": 1.81, + "step": 750 + }, + { + "epoch": 5.170068027210885, + "grad_norm": 0.843891978263855, + "learning_rate": 0.0002, + "loss": 1.8219, + "step": 760 + }, + { + "epoch": 5.238095238095238, + "grad_norm": 0.8797562122344971, + "learning_rate": 0.0002, + "loss": 1.8153, + "step": 770 + }, + { + "epoch": 5.3061224489795915, + "grad_norm": 0.9378810524940491, + "learning_rate": 0.0002, + "loss": 1.8141, + "step": 780 + }, + { + "epoch": 5.374149659863946, + "grad_norm": 1.0502477884292603, + "learning_rate": 0.0002, + "loss": 1.7499, + "step": 790 + }, + { + "epoch": 5.442176870748299, + "grad_norm": 1.0142803192138672, + "learning_rate": 0.0002, + "loss": 1.7767, + "step": 800 + }, + { + "epoch": 5.510204081632653, + "grad_norm": 1.0314291715621948, + "learning_rate": 0.0002, + "loss": 1.785, + "step": 810 + }, + { + "epoch": 5.578231292517007, + "grad_norm": 0.8898603916168213, + "learning_rate": 0.0002, + "loss": 1.7471, + "step": 820 + }, + { + "epoch": 5.646258503401361, + "grad_norm": 1.011250376701355, + "learning_rate": 0.0002, + "loss": 1.7731, + "step": 830 + }, + { + "epoch": 5.714285714285714, + "grad_norm": 0.9000794291496277, + "learning_rate": 0.0002, + "loss": 1.7588, + "step": 840 + }, + { + "epoch": 5.782312925170068, + "grad_norm": 1.0984753370285034, + "learning_rate": 0.0002, + "loss": 1.788, + "step": 850 + }, + { + "epoch": 5.850340136054422, + "grad_norm": 0.9162030220031738, + "learning_rate": 0.0002, + "loss": 1.7282, + "step": 860 + }, + { + "epoch": 5.918367346938775, + "grad_norm": 0.9867637753486633, + "learning_rate": 0.0002, + "loss": 1.7292, + "step": 870 + }, + { + "epoch": 5.986394557823129, + "grad_norm": 0.8848171234130859, + "learning_rate": 0.0002, + "loss": 1.7651, + "step": 880 + }, + { + "epoch": 6.0, + "eval_loss": 2.180830240249634, + "eval_runtime": 38.8838, + "eval_samples_per_second": 13.039, + "eval_steps_per_second": 1.646, + "step": 882 + } + ], + "logging_steps": 10, + "max_steps": 1176, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.0779800767561728e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/training_args.bin b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..645742c1f77bba1172fc3f1632a2a567dfa3b959 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-882/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e2b0557a8b3135cd8ec179c1b82119d3737b61891d3a4fe3303d40bb094cbce +size 5560 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/special_tokens_map.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/tokenizer.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/tokenizer.model b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/tokenizer_config.json b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/training_args.bin b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..645742c1f77bba1172fc3f1632a2a567dfa3b959 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e2b0557a8b3135cd8ec179c1b82119d3737b61891d3a4fe3303d40bb094cbce +size 5560 diff --git a/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/training_log.jsonl b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/training_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..8c2c85e994025f419648ed6c4e81eef51f9e7219 --- /dev/null +++ b/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/training_log.jsonl @@ -0,0 +1,8 @@ +{"epoch": 1.0, "step": 147, "epoch_duration": 199.2672381401062, "total_accumulated_duration": 199.2672381401062, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 5628.7490234375}, "avg_memory_reserved": {"GPU_0": 6182.0}, "peak_memory_reserved": {"GPU_0": 6182.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 4.862, "grad_norm": 1.5591567754745483, "learning_rate": 0.0002, "epoch": 0.06802721088435375, "step": 10}, {"loss": 3.6905, "grad_norm": 1.063948154449463, "learning_rate": 0.0002, "epoch": 0.1360544217687075, "step": 20}, {"loss": 3.2247, "grad_norm": 0.9765065312385559, "learning_rate": 0.0002, "epoch": 0.20408163265306123, "step": 30}, {"loss": 3.0135, "grad_norm": 1.8591399192810059, "learning_rate": 0.0002, "epoch": 0.272108843537415, "step": 40}, {"loss": 2.7815, "grad_norm": 1.7384812831878662, "learning_rate": 0.0002, "epoch": 0.3401360544217687, "step": 50}, {"loss": 2.6828, "grad_norm": 1.0425862073898315, "learning_rate": 0.0002, "epoch": 0.40816326530612246, "step": 60}, {"loss": 2.6078, "grad_norm": 1.312426209449768, "learning_rate": 0.0002, "epoch": 0.47619047619047616, "step": 70}, {"loss": 2.4843, "grad_norm": 0.9405839443206787, "learning_rate": 0.0002, "epoch": 0.54421768707483, "step": 80}, {"loss": 2.4835, "grad_norm": 0.7359345555305481, "learning_rate": 0.0002, "epoch": 0.6122448979591837, "step": 90}, {"loss": 2.359, "grad_norm": 0.8867717981338501, "learning_rate": 0.0002, "epoch": 0.6802721088435374, "step": 100}, {"loss": 2.4368, "grad_norm": 0.7792682647705078, "learning_rate": 0.0002, "epoch": 0.7482993197278912, "step": 110}, {"loss": 2.284, "grad_norm": 0.8457526564598083, "learning_rate": 0.0002, "epoch": 0.8163265306122449, "step": 120}, {"loss": 2.2625, "grad_norm": 0.8050963282585144, "learning_rate": 0.0002, "epoch": 0.8843537414965986, "step": 130}, {"loss": 2.2911, "grad_norm": 0.8963590860366821, "learning_rate": 0.0002, "epoch": 0.9523809523809523, "step": 140}]} +{"epoch": 2.0, "step": 294, "epoch_duration": 209.09527683258057, "total_accumulated_duration": 408.36251497268677, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.1748046875}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-147", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 4.862, "grad_norm": 1.5591567754745483, "learning_rate": 0.0002, "epoch": 0.06802721088435375, "step": 10}, {"loss": 3.6905, "grad_norm": 1.063948154449463, "learning_rate": 0.0002, "epoch": 0.1360544217687075, "step": 20}, {"loss": 3.2247, "grad_norm": 0.9765065312385559, "learning_rate": 0.0002, "epoch": 0.20408163265306123, "step": 30}, {"loss": 3.0135, "grad_norm": 1.8591399192810059, "learning_rate": 0.0002, "epoch": 0.272108843537415, "step": 40}, {"loss": 2.7815, "grad_norm": 1.7384812831878662, "learning_rate": 0.0002, "epoch": 0.3401360544217687, "step": 50}, {"loss": 2.6828, "grad_norm": 1.0425862073898315, "learning_rate": 0.0002, "epoch": 0.40816326530612246, "step": 60}, {"loss": 2.6078, "grad_norm": 1.312426209449768, "learning_rate": 0.0002, "epoch": 0.47619047619047616, "step": 70}, {"loss": 2.4843, "grad_norm": 0.9405839443206787, "learning_rate": 0.0002, "epoch": 0.54421768707483, "step": 80}, {"loss": 2.4835, "grad_norm": 0.7359345555305481, "learning_rate": 0.0002, "epoch": 0.6122448979591837, "step": 90}, {"loss": 2.359, "grad_norm": 0.8867717981338501, "learning_rate": 0.0002, "epoch": 0.6802721088435374, "step": 100}, {"loss": 2.4368, "grad_norm": 0.7792682647705078, "learning_rate": 0.0002, "epoch": 0.7482993197278912, "step": 110}, {"loss": 2.284, "grad_norm": 0.8457526564598083, "learning_rate": 0.0002, "epoch": 0.8163265306122449, "step": 120}, {"loss": 2.2625, "grad_norm": 0.8050963282585144, "learning_rate": 0.0002, "epoch": 0.8843537414965986, "step": 130}, {"loss": 2.2911, "grad_norm": 0.8963590860366821, "learning_rate": 0.0002, "epoch": 0.9523809523809523, "step": 140}, {"eval_loss": 2.2520766258239746, "eval_runtime": 46.2783, "eval_samples_per_second": 10.955, "eval_steps_per_second": 1.383, "epoch": 1.0, "step": 147}, {"loss": 2.2293, "grad_norm": 0.9270220994949341, "learning_rate": 0.0002, "epoch": 1.0204081632653061, "step": 150}, {"loss": 2.2962, "grad_norm": 0.7620377540588379, "learning_rate": 0.0002, "epoch": 1.08843537414966, "step": 160}, {"loss": 2.1326, "grad_norm": 0.8232784867286682, "learning_rate": 0.0002, "epoch": 1.1564625850340136, "step": 170}, {"loss": 2.1522, "grad_norm": 0.8901777267456055, "learning_rate": 0.0002, "epoch": 1.2244897959183674, "step": 180}, {"loss": 2.1235, "grad_norm": 0.8079978227615356, "learning_rate": 0.0002, "epoch": 1.2925170068027212, "step": 190}, {"loss": 2.1812, "grad_norm": 0.9010588526725769, "learning_rate": 0.0002, "epoch": 1.3605442176870748, "step": 200}, {"loss": 2.0572, "grad_norm": 0.7076186537742615, "learning_rate": 0.0002, "epoch": 1.4285714285714286, "step": 210}, {"loss": 2.1723, "grad_norm": 0.8887438774108887, "learning_rate": 0.0002, "epoch": 1.4965986394557822, "step": 220}, {"loss": 2.1449, "grad_norm": 0.9181524515151978, "learning_rate": 0.0002, "epoch": 1.564625850340136, "step": 230}, {"loss": 2.0993, "grad_norm": 0.788392961025238, "learning_rate": 0.0002, "epoch": 1.6326530612244898, "step": 240}, {"loss": 2.0904, "grad_norm": 0.8064964413642883, "learning_rate": 0.0002, "epoch": 1.7006802721088436, "step": 250}, {"loss": 2.1685, "grad_norm": 0.6783174276351929, "learning_rate": 0.0002, "epoch": 1.7687074829931972, "step": 260}, {"loss": 2.0993, "grad_norm": 0.7616434693336487, "learning_rate": 0.0002, "epoch": 1.836734693877551, "step": 270}, {"loss": 2.1568, "grad_norm": 0.6809217929840088, "learning_rate": 0.0002, "epoch": 1.9047619047619047, "step": 280}, {"loss": 2.0749, "grad_norm": 0.6849802732467651, "learning_rate": 0.0002, "epoch": 1.9727891156462585, "step": 290}]} +{"epoch": 3.0, "step": 441, "epoch_duration": 175.33330869674683, "total_accumulated_duration": 583.6958236694336, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.1748046875}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-294", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 4.862, "grad_norm": 1.5591567754745483, "learning_rate": 0.0002, "epoch": 0.06802721088435375, "step": 10}, {"loss": 3.6905, "grad_norm": 1.063948154449463, "learning_rate": 0.0002, "epoch": 0.1360544217687075, "step": 20}, {"loss": 3.2247, "grad_norm": 0.9765065312385559, "learning_rate": 0.0002, "epoch": 0.20408163265306123, "step": 30}, {"loss": 3.0135, "grad_norm": 1.8591399192810059, "learning_rate": 0.0002, "epoch": 0.272108843537415, "step": 40}, {"loss": 2.7815, "grad_norm": 1.7384812831878662, "learning_rate": 0.0002, "epoch": 0.3401360544217687, "step": 50}, {"loss": 2.6828, "grad_norm": 1.0425862073898315, "learning_rate": 0.0002, "epoch": 0.40816326530612246, "step": 60}, {"loss": 2.6078, "grad_norm": 1.312426209449768, "learning_rate": 0.0002, "epoch": 0.47619047619047616, "step": 70}, {"loss": 2.4843, "grad_norm": 0.9405839443206787, "learning_rate": 0.0002, "epoch": 0.54421768707483, "step": 80}, {"loss": 2.4835, "grad_norm": 0.7359345555305481, "learning_rate": 0.0002, "epoch": 0.6122448979591837, "step": 90}, {"loss": 2.359, "grad_norm": 0.8867717981338501, "learning_rate": 0.0002, "epoch": 0.6802721088435374, "step": 100}, {"loss": 2.4368, "grad_norm": 0.7792682647705078, "learning_rate": 0.0002, "epoch": 0.7482993197278912, "step": 110}, {"loss": 2.284, "grad_norm": 0.8457526564598083, "learning_rate": 0.0002, "epoch": 0.8163265306122449, "step": 120}, {"loss": 2.2625, "grad_norm": 0.8050963282585144, "learning_rate": 0.0002, "epoch": 0.8843537414965986, "step": 130}, {"loss": 2.2911, "grad_norm": 0.8963590860366821, "learning_rate": 0.0002, "epoch": 0.9523809523809523, "step": 140}, {"eval_loss": 2.2520766258239746, "eval_runtime": 46.2783, "eval_samples_per_second": 10.955, "eval_steps_per_second": 1.383, "epoch": 1.0, "step": 147}, {"loss": 2.2293, "grad_norm": 0.9270220994949341, "learning_rate": 0.0002, "epoch": 1.0204081632653061, "step": 150}, {"loss": 2.2962, "grad_norm": 0.7620377540588379, "learning_rate": 0.0002, "epoch": 1.08843537414966, "step": 160}, {"loss": 2.1326, "grad_norm": 0.8232784867286682, "learning_rate": 0.0002, "epoch": 1.1564625850340136, "step": 170}, {"loss": 2.1522, "grad_norm": 0.8901777267456055, "learning_rate": 0.0002, "epoch": 1.2244897959183674, "step": 180}, {"loss": 2.1235, "grad_norm": 0.8079978227615356, "learning_rate": 0.0002, "epoch": 1.2925170068027212, "step": 190}, {"loss": 2.1812, "grad_norm": 0.9010588526725769, "learning_rate": 0.0002, "epoch": 1.3605442176870748, "step": 200}, {"loss": 2.0572, "grad_norm": 0.7076186537742615, "learning_rate": 0.0002, "epoch": 1.4285714285714286, "step": 210}, {"loss": 2.1723, "grad_norm": 0.8887438774108887, "learning_rate": 0.0002, "epoch": 1.4965986394557822, "step": 220}, {"loss": 2.1449, "grad_norm": 0.9181524515151978, "learning_rate": 0.0002, "epoch": 1.564625850340136, "step": 230}, {"loss": 2.0993, "grad_norm": 0.788392961025238, "learning_rate": 0.0002, "epoch": 1.6326530612244898, "step": 240}, {"loss": 2.0904, "grad_norm": 0.8064964413642883, "learning_rate": 0.0002, "epoch": 1.7006802721088436, "step": 250}, {"loss": 2.1685, "grad_norm": 0.6783174276351929, "learning_rate": 0.0002, "epoch": 1.7687074829931972, "step": 260}, {"loss": 2.0993, "grad_norm": 0.7616434693336487, "learning_rate": 0.0002, "epoch": 1.836734693877551, "step": 270}, {"loss": 2.1568, "grad_norm": 0.6809217929840088, "learning_rate": 0.0002, "epoch": 1.9047619047619047, "step": 280}, {"loss": 2.0749, "grad_norm": 0.6849802732467651, "learning_rate": 0.0002, "epoch": 1.9727891156462585, "step": 290}, {"eval_loss": 2.151526689529419, "eval_runtime": 40.9832, "eval_samples_per_second": 12.371, "eval_steps_per_second": 1.562, "epoch": 2.0, "step": 294}, {"loss": 2.066, "grad_norm": 0.8966974020004272, "learning_rate": 0.0002, "epoch": 2.0408163265306123, "step": 300}, {"loss": 1.9629, "grad_norm": 0.8308210372924805, "learning_rate": 0.0002, "epoch": 2.108843537414966, "step": 310}, {"loss": 2.0365, "grad_norm": 0.7147582173347473, "learning_rate": 0.0002, "epoch": 2.17687074829932, "step": 320}, {"loss": 1.9965, "grad_norm": 0.7694330215454102, "learning_rate": 0.0002, "epoch": 2.2448979591836733, "step": 330}, {"loss": 2.0322, "grad_norm": 0.6489183306694031, "learning_rate": 0.0002, "epoch": 2.312925170068027, "step": 340}, {"loss": 2.0627, "grad_norm": 0.7661431431770325, "learning_rate": 0.0002, "epoch": 2.380952380952381, "step": 350}, {"loss": 2.0033, "grad_norm": 0.8295474648475647, "learning_rate": 0.0002, "epoch": 2.4489795918367347, "step": 360}, {"loss": 2.0876, "grad_norm": 0.8664118647575378, "learning_rate": 0.0002, "epoch": 2.5170068027210886, "step": 370}, {"loss": 2.0085, "grad_norm": 0.6872050762176514, "learning_rate": 0.0002, "epoch": 2.5850340136054424, "step": 380}, {"loss": 2.0, "grad_norm": 0.7354660630226135, "learning_rate": 0.0002, "epoch": 2.6530612244897958, "step": 390}, {"loss": 2.0219, "grad_norm": 0.7240234017372131, "learning_rate": 0.0002, "epoch": 2.7210884353741496, "step": 400}, {"loss": 2.0902, "grad_norm": 0.8370463848114014, "learning_rate": 0.0002, "epoch": 2.7891156462585034, "step": 410}, {"loss": 2.0799, "grad_norm": 0.6834917068481445, "learning_rate": 0.0002, "epoch": 2.857142857142857, "step": 420}, {"loss": 2.0192, "grad_norm": 0.7872207760810852, "learning_rate": 0.0002, "epoch": 2.925170068027211, "step": 430}, {"loss": 2.0546, "grad_norm": 0.7045499086380005, "learning_rate": 0.0002, "epoch": 2.9931972789115644, "step": 440}]} +{"epoch": 4.0, "step": 588, "epoch_duration": 168.20174598693848, "total_accumulated_duration": 751.8975696563721, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.1748046875}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 4.862, "grad_norm": 1.5591567754745483, "learning_rate": 0.0002, "epoch": 0.06802721088435375, "step": 10}, {"loss": 3.6905, "grad_norm": 1.063948154449463, "learning_rate": 0.0002, "epoch": 0.1360544217687075, "step": 20}, {"loss": 3.2247, "grad_norm": 0.9765065312385559, "learning_rate": 0.0002, "epoch": 0.20408163265306123, "step": 30}, {"loss": 3.0135, "grad_norm": 1.8591399192810059, "learning_rate": 0.0002, "epoch": 0.272108843537415, "step": 40}, {"loss": 2.7815, "grad_norm": 1.7384812831878662, "learning_rate": 0.0002, "epoch": 0.3401360544217687, "step": 50}, {"loss": 2.6828, "grad_norm": 1.0425862073898315, "learning_rate": 0.0002, "epoch": 0.40816326530612246, "step": 60}, {"loss": 2.6078, "grad_norm": 1.312426209449768, "learning_rate": 0.0002, "epoch": 0.47619047619047616, "step": 70}, {"loss": 2.4843, "grad_norm": 0.9405839443206787, "learning_rate": 0.0002, "epoch": 0.54421768707483, "step": 80}, {"loss": 2.4835, "grad_norm": 0.7359345555305481, "learning_rate": 0.0002, "epoch": 0.6122448979591837, "step": 90}, {"loss": 2.359, "grad_norm": 0.8867717981338501, "learning_rate": 0.0002, "epoch": 0.6802721088435374, "step": 100}, {"loss": 2.4368, "grad_norm": 0.7792682647705078, "learning_rate": 0.0002, "epoch": 0.7482993197278912, "step": 110}, {"loss": 2.284, "grad_norm": 0.8457526564598083, "learning_rate": 0.0002, "epoch": 0.8163265306122449, "step": 120}, {"loss": 2.2625, "grad_norm": 0.8050963282585144, "learning_rate": 0.0002, "epoch": 0.8843537414965986, "step": 130}, {"loss": 2.2911, "grad_norm": 0.8963590860366821, "learning_rate": 0.0002, "epoch": 0.9523809523809523, "step": 140}, {"eval_loss": 2.2520766258239746, "eval_runtime": 46.2783, "eval_samples_per_second": 10.955, "eval_steps_per_second": 1.383, "epoch": 1.0, "step": 147}, {"loss": 2.2293, "grad_norm": 0.9270220994949341, "learning_rate": 0.0002, "epoch": 1.0204081632653061, "step": 150}, {"loss": 2.2962, "grad_norm": 0.7620377540588379, "learning_rate": 0.0002, "epoch": 1.08843537414966, "step": 160}, {"loss": 2.1326, "grad_norm": 0.8232784867286682, "learning_rate": 0.0002, "epoch": 1.1564625850340136, "step": 170}, {"loss": 2.1522, "grad_norm": 0.8901777267456055, "learning_rate": 0.0002, "epoch": 1.2244897959183674, "step": 180}, {"loss": 2.1235, "grad_norm": 0.8079978227615356, "learning_rate": 0.0002, "epoch": 1.2925170068027212, "step": 190}, {"loss": 2.1812, "grad_norm": 0.9010588526725769, "learning_rate": 0.0002, "epoch": 1.3605442176870748, "step": 200}, {"loss": 2.0572, "grad_norm": 0.7076186537742615, "learning_rate": 0.0002, "epoch": 1.4285714285714286, "step": 210}, {"loss": 2.1723, "grad_norm": 0.8887438774108887, "learning_rate": 0.0002, "epoch": 1.4965986394557822, "step": 220}, {"loss": 2.1449, "grad_norm": 0.9181524515151978, "learning_rate": 0.0002, "epoch": 1.564625850340136, "step": 230}, {"loss": 2.0993, "grad_norm": 0.788392961025238, "learning_rate": 0.0002, "epoch": 1.6326530612244898, "step": 240}, {"loss": 2.0904, "grad_norm": 0.8064964413642883, "learning_rate": 0.0002, "epoch": 1.7006802721088436, "step": 250}, {"loss": 2.1685, "grad_norm": 0.6783174276351929, "learning_rate": 0.0002, "epoch": 1.7687074829931972, "step": 260}, {"loss": 2.0993, "grad_norm": 0.7616434693336487, "learning_rate": 0.0002, "epoch": 1.836734693877551, "step": 270}, {"loss": 2.1568, "grad_norm": 0.6809217929840088, "learning_rate": 0.0002, "epoch": 1.9047619047619047, "step": 280}, {"loss": 2.0749, "grad_norm": 0.6849802732467651, "learning_rate": 0.0002, "epoch": 1.9727891156462585, "step": 290}, {"eval_loss": 2.151526689529419, "eval_runtime": 40.9832, "eval_samples_per_second": 12.371, "eval_steps_per_second": 1.562, "epoch": 2.0, "step": 294}, {"loss": 2.066, "grad_norm": 0.8966974020004272, "learning_rate": 0.0002, "epoch": 2.0408163265306123, "step": 300}, {"loss": 1.9629, "grad_norm": 0.8308210372924805, "learning_rate": 0.0002, "epoch": 2.108843537414966, "step": 310}, {"loss": 2.0365, "grad_norm": 0.7147582173347473, "learning_rate": 0.0002, "epoch": 2.17687074829932, "step": 320}, {"loss": 1.9965, "grad_norm": 0.7694330215454102, "learning_rate": 0.0002, "epoch": 2.2448979591836733, "step": 330}, {"loss": 2.0322, "grad_norm": 0.6489183306694031, "learning_rate": 0.0002, "epoch": 2.312925170068027, "step": 340}, {"loss": 2.0627, "grad_norm": 0.7661431431770325, "learning_rate": 0.0002, "epoch": 2.380952380952381, "step": 350}, {"loss": 2.0033, "grad_norm": 0.8295474648475647, "learning_rate": 0.0002, "epoch": 2.4489795918367347, "step": 360}, {"loss": 2.0876, "grad_norm": 0.8664118647575378, "learning_rate": 0.0002, "epoch": 2.5170068027210886, "step": 370}, {"loss": 2.0085, "grad_norm": 0.6872050762176514, "learning_rate": 0.0002, "epoch": 2.5850340136054424, "step": 380}, {"loss": 2.0, "grad_norm": 0.7354660630226135, "learning_rate": 0.0002, "epoch": 2.6530612244897958, "step": 390}, {"loss": 2.0219, "grad_norm": 0.7240234017372131, "learning_rate": 0.0002, "epoch": 2.7210884353741496, "step": 400}, {"loss": 2.0902, "grad_norm": 0.8370463848114014, "learning_rate": 0.0002, "epoch": 2.7891156462585034, "step": 410}, {"loss": 2.0799, "grad_norm": 0.6834917068481445, "learning_rate": 0.0002, "epoch": 2.857142857142857, "step": 420}, {"loss": 2.0192, "grad_norm": 0.7872207760810852, "learning_rate": 0.0002, "epoch": 2.925170068027211, "step": 430}, {"loss": 2.0546, "grad_norm": 0.7045499086380005, "learning_rate": 0.0002, "epoch": 2.9931972789115644, "step": 440}, {"eval_loss": 2.1319973468780518, "eval_runtime": 38.9671, "eval_samples_per_second": 13.011, "eval_steps_per_second": 1.642, "epoch": 3.0, "step": 441}, {"loss": 1.9638, "grad_norm": 0.7031271457672119, "learning_rate": 0.0002, "epoch": 3.061224489795918, "step": 450}, {"loss": 1.8729, "grad_norm": 0.7331708669662476, "learning_rate": 0.0002, "epoch": 3.129251700680272, "step": 460}, {"loss": 2.0023, "grad_norm": 0.7559226155281067, "learning_rate": 0.0002, "epoch": 3.197278911564626, "step": 470}, {"loss": 1.9566, "grad_norm": 0.8188950419425964, "learning_rate": 0.0002, "epoch": 3.2653061224489797, "step": 480}, {"loss": 1.9255, "grad_norm": 0.7805435657501221, "learning_rate": 0.0002, "epoch": 3.3333333333333335, "step": 490}, {"loss": 1.9461, "grad_norm": 0.7917240858078003, "learning_rate": 0.0002, "epoch": 3.4013605442176873, "step": 500}, {"loss": 1.901, "grad_norm": 0.8258386254310608, "learning_rate": 0.0002, "epoch": 3.4693877551020407, "step": 510}, {"loss": 1.9833, "grad_norm": 0.8375557661056519, "learning_rate": 0.0002, "epoch": 3.5374149659863945, "step": 520}, {"loss": 2.0152, "grad_norm": 0.8005449771881104, "learning_rate": 0.0002, "epoch": 3.6054421768707483, "step": 530}, {"loss": 1.9309, "grad_norm": 0.799006462097168, "learning_rate": 0.0002, "epoch": 3.673469387755102, "step": 540}, {"loss": 1.9775, "grad_norm": 0.7934630513191223, "learning_rate": 0.0002, "epoch": 3.741496598639456, "step": 550}, {"loss": 1.993, "grad_norm": 0.7247752547264099, "learning_rate": 0.0002, "epoch": 3.8095238095238093, "step": 560}, {"loss": 1.9914, "grad_norm": 0.8138917088508606, "learning_rate": 0.0002, "epoch": 3.877551020408163, "step": 570}, {"loss": 1.9116, "grad_norm": 0.7810562252998352, "learning_rate": 0.0002, "epoch": 3.945578231292517, "step": 580}]} +{"epoch": 5.0, "step": 735, "epoch_duration": 171.25689935684204, "total_accumulated_duration": 923.1544690132141, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.1748046875}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 4.862, "grad_norm": 1.5591567754745483, "learning_rate": 0.0002, "epoch": 0.06802721088435375, "step": 10}, {"loss": 3.6905, "grad_norm": 1.063948154449463, "learning_rate": 0.0002, "epoch": 0.1360544217687075, "step": 20}, {"loss": 3.2247, "grad_norm": 0.9765065312385559, "learning_rate": 0.0002, "epoch": 0.20408163265306123, "step": 30}, {"loss": 3.0135, "grad_norm": 1.8591399192810059, "learning_rate": 0.0002, "epoch": 0.272108843537415, "step": 40}, {"loss": 2.7815, "grad_norm": 1.7384812831878662, "learning_rate": 0.0002, "epoch": 0.3401360544217687, "step": 50}, {"loss": 2.6828, "grad_norm": 1.0425862073898315, "learning_rate": 0.0002, "epoch": 0.40816326530612246, "step": 60}, {"loss": 2.6078, "grad_norm": 1.312426209449768, "learning_rate": 0.0002, "epoch": 0.47619047619047616, "step": 70}, {"loss": 2.4843, "grad_norm": 0.9405839443206787, "learning_rate": 0.0002, "epoch": 0.54421768707483, "step": 80}, {"loss": 2.4835, "grad_norm": 0.7359345555305481, "learning_rate": 0.0002, "epoch": 0.6122448979591837, "step": 90}, {"loss": 2.359, "grad_norm": 0.8867717981338501, "learning_rate": 0.0002, "epoch": 0.6802721088435374, "step": 100}, {"loss": 2.4368, "grad_norm": 0.7792682647705078, "learning_rate": 0.0002, "epoch": 0.7482993197278912, "step": 110}, {"loss": 2.284, "grad_norm": 0.8457526564598083, "learning_rate": 0.0002, "epoch": 0.8163265306122449, "step": 120}, {"loss": 2.2625, "grad_norm": 0.8050963282585144, "learning_rate": 0.0002, "epoch": 0.8843537414965986, "step": 130}, {"loss": 2.2911, "grad_norm": 0.8963590860366821, "learning_rate": 0.0002, "epoch": 0.9523809523809523, "step": 140}, {"eval_loss": 2.2520766258239746, "eval_runtime": 46.2783, "eval_samples_per_second": 10.955, "eval_steps_per_second": 1.383, "epoch": 1.0, "step": 147}, {"loss": 2.2293, "grad_norm": 0.9270220994949341, "learning_rate": 0.0002, "epoch": 1.0204081632653061, "step": 150}, {"loss": 2.2962, "grad_norm": 0.7620377540588379, "learning_rate": 0.0002, "epoch": 1.08843537414966, "step": 160}, {"loss": 2.1326, "grad_norm": 0.8232784867286682, "learning_rate": 0.0002, "epoch": 1.1564625850340136, "step": 170}, {"loss": 2.1522, "grad_norm": 0.8901777267456055, "learning_rate": 0.0002, "epoch": 1.2244897959183674, "step": 180}, {"loss": 2.1235, "grad_norm": 0.8079978227615356, "learning_rate": 0.0002, "epoch": 1.2925170068027212, "step": 190}, {"loss": 2.1812, "grad_norm": 0.9010588526725769, "learning_rate": 0.0002, "epoch": 1.3605442176870748, "step": 200}, {"loss": 2.0572, "grad_norm": 0.7076186537742615, "learning_rate": 0.0002, "epoch": 1.4285714285714286, "step": 210}, {"loss": 2.1723, "grad_norm": 0.8887438774108887, "learning_rate": 0.0002, "epoch": 1.4965986394557822, "step": 220}, {"loss": 2.1449, "grad_norm": 0.9181524515151978, "learning_rate": 0.0002, "epoch": 1.564625850340136, "step": 230}, {"loss": 2.0993, "grad_norm": 0.788392961025238, "learning_rate": 0.0002, "epoch": 1.6326530612244898, "step": 240}, {"loss": 2.0904, "grad_norm": 0.8064964413642883, "learning_rate": 0.0002, "epoch": 1.7006802721088436, "step": 250}, {"loss": 2.1685, "grad_norm": 0.6783174276351929, "learning_rate": 0.0002, "epoch": 1.7687074829931972, "step": 260}, {"loss": 2.0993, "grad_norm": 0.7616434693336487, "learning_rate": 0.0002, "epoch": 1.836734693877551, "step": 270}, {"loss": 2.1568, "grad_norm": 0.6809217929840088, "learning_rate": 0.0002, "epoch": 1.9047619047619047, "step": 280}, {"loss": 2.0749, "grad_norm": 0.6849802732467651, "learning_rate": 0.0002, "epoch": 1.9727891156462585, "step": 290}, {"eval_loss": 2.151526689529419, "eval_runtime": 40.9832, "eval_samples_per_second": 12.371, "eval_steps_per_second": 1.562, "epoch": 2.0, "step": 294}, {"loss": 2.066, "grad_norm": 0.8966974020004272, "learning_rate": 0.0002, "epoch": 2.0408163265306123, "step": 300}, {"loss": 1.9629, "grad_norm": 0.8308210372924805, "learning_rate": 0.0002, "epoch": 2.108843537414966, "step": 310}, {"loss": 2.0365, "grad_norm": 0.7147582173347473, "learning_rate": 0.0002, "epoch": 2.17687074829932, "step": 320}, {"loss": 1.9965, "grad_norm": 0.7694330215454102, "learning_rate": 0.0002, "epoch": 2.2448979591836733, "step": 330}, {"loss": 2.0322, "grad_norm": 0.6489183306694031, "learning_rate": 0.0002, "epoch": 2.312925170068027, "step": 340}, {"loss": 2.0627, "grad_norm": 0.7661431431770325, "learning_rate": 0.0002, "epoch": 2.380952380952381, "step": 350}, {"loss": 2.0033, "grad_norm": 0.8295474648475647, "learning_rate": 0.0002, "epoch": 2.4489795918367347, "step": 360}, {"loss": 2.0876, "grad_norm": 0.8664118647575378, "learning_rate": 0.0002, "epoch": 2.5170068027210886, "step": 370}, {"loss": 2.0085, "grad_norm": 0.6872050762176514, "learning_rate": 0.0002, "epoch": 2.5850340136054424, "step": 380}, {"loss": 2.0, "grad_norm": 0.7354660630226135, "learning_rate": 0.0002, "epoch": 2.6530612244897958, "step": 390}, {"loss": 2.0219, "grad_norm": 0.7240234017372131, "learning_rate": 0.0002, "epoch": 2.7210884353741496, "step": 400}, {"loss": 2.0902, "grad_norm": 0.8370463848114014, "learning_rate": 0.0002, "epoch": 2.7891156462585034, "step": 410}, {"loss": 2.0799, "grad_norm": 0.6834917068481445, "learning_rate": 0.0002, "epoch": 2.857142857142857, "step": 420}, {"loss": 2.0192, "grad_norm": 0.7872207760810852, "learning_rate": 0.0002, "epoch": 2.925170068027211, "step": 430}, {"loss": 2.0546, "grad_norm": 0.7045499086380005, "learning_rate": 0.0002, "epoch": 2.9931972789115644, "step": 440}, {"eval_loss": 2.1319973468780518, "eval_runtime": 38.9671, "eval_samples_per_second": 13.011, "eval_steps_per_second": 1.642, "epoch": 3.0, "step": 441}, {"loss": 1.9638, "grad_norm": 0.7031271457672119, "learning_rate": 0.0002, "epoch": 3.061224489795918, "step": 450}, {"loss": 1.8729, "grad_norm": 0.7331708669662476, "learning_rate": 0.0002, "epoch": 3.129251700680272, "step": 460}, {"loss": 2.0023, "grad_norm": 0.7559226155281067, "learning_rate": 0.0002, "epoch": 3.197278911564626, "step": 470}, {"loss": 1.9566, "grad_norm": 0.8188950419425964, "learning_rate": 0.0002, "epoch": 3.2653061224489797, "step": 480}, {"loss": 1.9255, "grad_norm": 0.7805435657501221, "learning_rate": 0.0002, "epoch": 3.3333333333333335, "step": 490}, {"loss": 1.9461, "grad_norm": 0.7917240858078003, "learning_rate": 0.0002, "epoch": 3.4013605442176873, "step": 500}, {"loss": 1.901, "grad_norm": 0.8258386254310608, "learning_rate": 0.0002, "epoch": 3.4693877551020407, "step": 510}, {"loss": 1.9833, "grad_norm": 0.8375557661056519, "learning_rate": 0.0002, "epoch": 3.5374149659863945, "step": 520}, {"loss": 2.0152, "grad_norm": 0.8005449771881104, "learning_rate": 0.0002, "epoch": 3.6054421768707483, "step": 530}, {"loss": 1.9309, "grad_norm": 0.799006462097168, "learning_rate": 0.0002, "epoch": 3.673469387755102, "step": 540}, {"loss": 1.9775, "grad_norm": 0.7934630513191223, "learning_rate": 0.0002, "epoch": 3.741496598639456, "step": 550}, {"loss": 1.993, "grad_norm": 0.7247752547264099, "learning_rate": 0.0002, "epoch": 3.8095238095238093, "step": 560}, {"loss": 1.9914, "grad_norm": 0.8138917088508606, "learning_rate": 0.0002, "epoch": 3.877551020408163, "step": 570}, {"loss": 1.9116, "grad_norm": 0.7810562252998352, "learning_rate": 0.0002, "epoch": 3.945578231292517, "step": 580}, {"eval_loss": 2.132384777069092, "eval_runtime": 38.6163, "eval_samples_per_second": 13.129, "eval_steps_per_second": 1.657, "epoch": 4.0, "step": 588}, {"loss": 1.9048, "grad_norm": 0.7258784770965576, "learning_rate": 0.0002, "epoch": 4.01360544217687, "step": 590}, {"loss": 1.7991, "grad_norm": 0.8905395865440369, "learning_rate": 0.0002, "epoch": 4.081632653061225, "step": 600}, {"loss": 1.8841, "grad_norm": 0.8189161419868469, "learning_rate": 0.0002, "epoch": 4.149659863945578, "step": 610}, {"loss": 1.914, "grad_norm": 0.8235230445861816, "learning_rate": 0.0002, "epoch": 4.217687074829932, "step": 620}, {"loss": 1.9114, "grad_norm": 0.8926266431808472, "learning_rate": 0.0002, "epoch": 4.285714285714286, "step": 630}, {"loss": 1.8944, "grad_norm": 0.9667059183120728, "learning_rate": 0.0002, "epoch": 4.35374149659864, "step": 640}, {"loss": 1.7842, "grad_norm": 0.8441583514213562, "learning_rate": 0.0002, "epoch": 4.421768707482993, "step": 650}, {"loss": 1.7937, "grad_norm": 0.8351956009864807, "learning_rate": 0.0002, "epoch": 4.489795918367347, "step": 660}, {"loss": 1.8439, "grad_norm": 0.8557114005088806, "learning_rate": 0.0002, "epoch": 4.557823129251701, "step": 670}, {"loss": 1.9426, "grad_norm": 0.8698110580444336, "learning_rate": 0.0002, "epoch": 4.625850340136054, "step": 680}, {"loss": 1.8635, "grad_norm": 0.8394802808761597, "learning_rate": 0.0002, "epoch": 4.6938775510204085, "step": 690}, {"loss": 1.8908, "grad_norm": 0.8168841004371643, "learning_rate": 0.0002, "epoch": 4.761904761904762, "step": 700}, {"loss": 1.8674, "grad_norm": 0.8049741387367249, "learning_rate": 0.0002, "epoch": 4.829931972789115, "step": 710}, {"loss": 1.9289, "grad_norm": 0.7987792491912842, "learning_rate": 0.0002, "epoch": 4.8979591836734695, "step": 720}, {"loss": 1.8779, "grad_norm": 0.9021750688552856, "learning_rate": 0.0002, "epoch": 4.965986394557823, "step": 730}]} +{"epoch": 6.0, "step": 882, "epoch_duration": 171.3648488521576, "total_accumulated_duration": 1094.5193178653717, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.1748046875}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 4.862, "grad_norm": 1.5591567754745483, "learning_rate": 0.0002, "epoch": 0.06802721088435375, "step": 10}, {"loss": 3.6905, "grad_norm": 1.063948154449463, "learning_rate": 0.0002, "epoch": 0.1360544217687075, "step": 20}, {"loss": 3.2247, "grad_norm": 0.9765065312385559, "learning_rate": 0.0002, "epoch": 0.20408163265306123, "step": 30}, {"loss": 3.0135, "grad_norm": 1.8591399192810059, "learning_rate": 0.0002, "epoch": 0.272108843537415, "step": 40}, {"loss": 2.7815, "grad_norm": 1.7384812831878662, "learning_rate": 0.0002, "epoch": 0.3401360544217687, "step": 50}, {"loss": 2.6828, "grad_norm": 1.0425862073898315, "learning_rate": 0.0002, "epoch": 0.40816326530612246, "step": 60}, {"loss": 2.6078, "grad_norm": 1.312426209449768, "learning_rate": 0.0002, "epoch": 0.47619047619047616, "step": 70}, {"loss": 2.4843, "grad_norm": 0.9405839443206787, "learning_rate": 0.0002, "epoch": 0.54421768707483, "step": 80}, {"loss": 2.4835, "grad_norm": 0.7359345555305481, "learning_rate": 0.0002, "epoch": 0.6122448979591837, "step": 90}, {"loss": 2.359, "grad_norm": 0.8867717981338501, "learning_rate": 0.0002, "epoch": 0.6802721088435374, "step": 100}, {"loss": 2.4368, "grad_norm": 0.7792682647705078, "learning_rate": 0.0002, "epoch": 0.7482993197278912, "step": 110}, {"loss": 2.284, "grad_norm": 0.8457526564598083, "learning_rate": 0.0002, "epoch": 0.8163265306122449, "step": 120}, {"loss": 2.2625, "grad_norm": 0.8050963282585144, "learning_rate": 0.0002, "epoch": 0.8843537414965986, "step": 130}, {"loss": 2.2911, "grad_norm": 0.8963590860366821, "learning_rate": 0.0002, "epoch": 0.9523809523809523, "step": 140}, {"eval_loss": 2.2520766258239746, "eval_runtime": 46.2783, "eval_samples_per_second": 10.955, "eval_steps_per_second": 1.383, "epoch": 1.0, "step": 147}, {"loss": 2.2293, "grad_norm": 0.9270220994949341, "learning_rate": 0.0002, "epoch": 1.0204081632653061, "step": 150}, {"loss": 2.2962, "grad_norm": 0.7620377540588379, "learning_rate": 0.0002, "epoch": 1.08843537414966, "step": 160}, {"loss": 2.1326, "grad_norm": 0.8232784867286682, "learning_rate": 0.0002, "epoch": 1.1564625850340136, "step": 170}, {"loss": 2.1522, "grad_norm": 0.8901777267456055, "learning_rate": 0.0002, "epoch": 1.2244897959183674, "step": 180}, {"loss": 2.1235, "grad_norm": 0.8079978227615356, "learning_rate": 0.0002, "epoch": 1.2925170068027212, "step": 190}, {"loss": 2.1812, "grad_norm": 0.9010588526725769, "learning_rate": 0.0002, "epoch": 1.3605442176870748, "step": 200}, {"loss": 2.0572, "grad_norm": 0.7076186537742615, "learning_rate": 0.0002, "epoch": 1.4285714285714286, "step": 210}, {"loss": 2.1723, "grad_norm": 0.8887438774108887, "learning_rate": 0.0002, "epoch": 1.4965986394557822, "step": 220}, {"loss": 2.1449, "grad_norm": 0.9181524515151978, "learning_rate": 0.0002, "epoch": 1.564625850340136, "step": 230}, {"loss": 2.0993, "grad_norm": 0.788392961025238, "learning_rate": 0.0002, "epoch": 1.6326530612244898, "step": 240}, {"loss": 2.0904, "grad_norm": 0.8064964413642883, "learning_rate": 0.0002, "epoch": 1.7006802721088436, "step": 250}, {"loss": 2.1685, "grad_norm": 0.6783174276351929, "learning_rate": 0.0002, "epoch": 1.7687074829931972, "step": 260}, {"loss": 2.0993, "grad_norm": 0.7616434693336487, "learning_rate": 0.0002, "epoch": 1.836734693877551, "step": 270}, {"loss": 2.1568, "grad_norm": 0.6809217929840088, "learning_rate": 0.0002, "epoch": 1.9047619047619047, "step": 280}, {"loss": 2.0749, "grad_norm": 0.6849802732467651, "learning_rate": 0.0002, "epoch": 1.9727891156462585, "step": 290}, {"eval_loss": 2.151526689529419, "eval_runtime": 40.9832, "eval_samples_per_second": 12.371, "eval_steps_per_second": 1.562, "epoch": 2.0, "step": 294}, {"loss": 2.066, "grad_norm": 0.8966974020004272, "learning_rate": 0.0002, "epoch": 2.0408163265306123, "step": 300}, {"loss": 1.9629, "grad_norm": 0.8308210372924805, "learning_rate": 0.0002, "epoch": 2.108843537414966, "step": 310}, {"loss": 2.0365, "grad_norm": 0.7147582173347473, "learning_rate": 0.0002, "epoch": 2.17687074829932, "step": 320}, {"loss": 1.9965, "grad_norm": 0.7694330215454102, "learning_rate": 0.0002, "epoch": 2.2448979591836733, "step": 330}, {"loss": 2.0322, "grad_norm": 0.6489183306694031, "learning_rate": 0.0002, "epoch": 2.312925170068027, "step": 340}, {"loss": 2.0627, "grad_norm": 0.7661431431770325, "learning_rate": 0.0002, "epoch": 2.380952380952381, "step": 350}, {"loss": 2.0033, "grad_norm": 0.8295474648475647, "learning_rate": 0.0002, "epoch": 2.4489795918367347, "step": 360}, {"loss": 2.0876, "grad_norm": 0.8664118647575378, "learning_rate": 0.0002, "epoch": 2.5170068027210886, "step": 370}, {"loss": 2.0085, "grad_norm": 0.6872050762176514, "learning_rate": 0.0002, "epoch": 2.5850340136054424, "step": 380}, {"loss": 2.0, "grad_norm": 0.7354660630226135, "learning_rate": 0.0002, "epoch": 2.6530612244897958, "step": 390}, {"loss": 2.0219, "grad_norm": 0.7240234017372131, "learning_rate": 0.0002, "epoch": 2.7210884353741496, "step": 400}, {"loss": 2.0902, "grad_norm": 0.8370463848114014, "learning_rate": 0.0002, "epoch": 2.7891156462585034, "step": 410}, {"loss": 2.0799, "grad_norm": 0.6834917068481445, "learning_rate": 0.0002, "epoch": 2.857142857142857, "step": 420}, {"loss": 2.0192, "grad_norm": 0.7872207760810852, "learning_rate": 0.0002, "epoch": 2.925170068027211, "step": 430}, {"loss": 2.0546, "grad_norm": 0.7045499086380005, "learning_rate": 0.0002, "epoch": 2.9931972789115644, "step": 440}, {"eval_loss": 2.1319973468780518, "eval_runtime": 38.9671, "eval_samples_per_second": 13.011, "eval_steps_per_second": 1.642, "epoch": 3.0, "step": 441}, {"loss": 1.9638, "grad_norm": 0.7031271457672119, "learning_rate": 0.0002, "epoch": 3.061224489795918, "step": 450}, {"loss": 1.8729, "grad_norm": 0.7331708669662476, "learning_rate": 0.0002, "epoch": 3.129251700680272, "step": 460}, {"loss": 2.0023, "grad_norm": 0.7559226155281067, "learning_rate": 0.0002, "epoch": 3.197278911564626, "step": 470}, {"loss": 1.9566, "grad_norm": 0.8188950419425964, "learning_rate": 0.0002, "epoch": 3.2653061224489797, "step": 480}, {"loss": 1.9255, "grad_norm": 0.7805435657501221, "learning_rate": 0.0002, "epoch": 3.3333333333333335, "step": 490}, {"loss": 1.9461, "grad_norm": 0.7917240858078003, "learning_rate": 0.0002, "epoch": 3.4013605442176873, "step": 500}, {"loss": 1.901, "grad_norm": 0.8258386254310608, "learning_rate": 0.0002, "epoch": 3.4693877551020407, "step": 510}, {"loss": 1.9833, "grad_norm": 0.8375557661056519, "learning_rate": 0.0002, "epoch": 3.5374149659863945, "step": 520}, {"loss": 2.0152, "grad_norm": 0.8005449771881104, "learning_rate": 0.0002, "epoch": 3.6054421768707483, "step": 530}, {"loss": 1.9309, "grad_norm": 0.799006462097168, "learning_rate": 0.0002, "epoch": 3.673469387755102, "step": 540}, {"loss": 1.9775, "grad_norm": 0.7934630513191223, "learning_rate": 0.0002, "epoch": 3.741496598639456, "step": 550}, {"loss": 1.993, "grad_norm": 0.7247752547264099, "learning_rate": 0.0002, "epoch": 3.8095238095238093, "step": 560}, {"loss": 1.9914, "grad_norm": 0.8138917088508606, "learning_rate": 0.0002, "epoch": 3.877551020408163, "step": 570}, {"loss": 1.9116, "grad_norm": 0.7810562252998352, "learning_rate": 0.0002, "epoch": 3.945578231292517, "step": 580}, {"eval_loss": 2.132384777069092, "eval_runtime": 38.6163, "eval_samples_per_second": 13.129, "eval_steps_per_second": 1.657, "epoch": 4.0, "step": 588}, {"loss": 1.9048, "grad_norm": 0.7258784770965576, "learning_rate": 0.0002, "epoch": 4.01360544217687, "step": 590}, {"loss": 1.7991, "grad_norm": 0.8905395865440369, "learning_rate": 0.0002, "epoch": 4.081632653061225, "step": 600}, {"loss": 1.8841, "grad_norm": 0.8189161419868469, "learning_rate": 0.0002, "epoch": 4.149659863945578, "step": 610}, {"loss": 1.914, "grad_norm": 0.8235230445861816, "learning_rate": 0.0002, "epoch": 4.217687074829932, "step": 620}, {"loss": 1.9114, "grad_norm": 0.8926266431808472, "learning_rate": 0.0002, "epoch": 4.285714285714286, "step": 630}, {"loss": 1.8944, "grad_norm": 0.9667059183120728, "learning_rate": 0.0002, "epoch": 4.35374149659864, "step": 640}, {"loss": 1.7842, "grad_norm": 0.8441583514213562, "learning_rate": 0.0002, "epoch": 4.421768707482993, "step": 650}, {"loss": 1.7937, "grad_norm": 0.8351956009864807, "learning_rate": 0.0002, "epoch": 4.489795918367347, "step": 660}, {"loss": 1.8439, "grad_norm": 0.8557114005088806, "learning_rate": 0.0002, "epoch": 4.557823129251701, "step": 670}, {"loss": 1.9426, "grad_norm": 0.8698110580444336, "learning_rate": 0.0002, "epoch": 4.625850340136054, "step": 680}, {"loss": 1.8635, "grad_norm": 0.8394802808761597, "learning_rate": 0.0002, "epoch": 4.6938775510204085, "step": 690}, {"loss": 1.8908, "grad_norm": 0.8168841004371643, "learning_rate": 0.0002, "epoch": 4.761904761904762, "step": 700}, {"loss": 1.8674, "grad_norm": 0.8049741387367249, "learning_rate": 0.0002, "epoch": 4.829931972789115, "step": 710}, {"loss": 1.9289, "grad_norm": 0.7987792491912842, "learning_rate": 0.0002, "epoch": 4.8979591836734695, "step": 720}, {"loss": 1.8779, "grad_norm": 0.9021750688552856, "learning_rate": 0.0002, "epoch": 4.965986394557823, "step": 730}, {"eval_loss": 2.1478686332702637, "eval_runtime": 38.5077, "eval_samples_per_second": 13.166, "eval_steps_per_second": 1.662, "epoch": 5.0, "step": 735}, {"loss": 1.8979, "grad_norm": 0.8057989478111267, "learning_rate": 0.0002, "epoch": 5.034013605442177, "step": 740}, {"loss": 1.81, "grad_norm": 0.9020641446113586, "learning_rate": 0.0002, "epoch": 5.1020408163265305, "step": 750}, {"loss": 1.8219, "grad_norm": 0.843891978263855, "learning_rate": 0.0002, "epoch": 5.170068027210885, "step": 760}, {"loss": 1.8153, "grad_norm": 0.8797562122344971, "learning_rate": 0.0002, "epoch": 5.238095238095238, "step": 770}, {"loss": 1.8141, "grad_norm": 0.9378810524940491, "learning_rate": 0.0002, "epoch": 5.3061224489795915, "step": 780}, {"loss": 1.7499, "grad_norm": 1.0502477884292603, "learning_rate": 0.0002, "epoch": 5.374149659863946, "step": 790}, {"loss": 1.7767, "grad_norm": 1.0142803192138672, "learning_rate": 0.0002, "epoch": 5.442176870748299, "step": 800}, {"loss": 1.785, "grad_norm": 1.0314291715621948, "learning_rate": 0.0002, "epoch": 5.510204081632653, "step": 810}, {"loss": 1.7471, "grad_norm": 0.8898603916168213, "learning_rate": 0.0002, "epoch": 5.578231292517007, "step": 820}, {"loss": 1.7731, "grad_norm": 1.011250376701355, "learning_rate": 0.0002, "epoch": 5.646258503401361, "step": 830}, {"loss": 1.7588, "grad_norm": 0.9000794291496277, "learning_rate": 0.0002, "epoch": 5.714285714285714, "step": 840}, {"loss": 1.788, "grad_norm": 1.0984753370285034, "learning_rate": 0.0002, "epoch": 5.782312925170068, "step": 850}, {"loss": 1.7282, "grad_norm": 0.9162030220031738, "learning_rate": 0.0002, "epoch": 5.850340136054422, "step": 860}, {"loss": 1.7292, "grad_norm": 0.9867637753486633, "learning_rate": 0.0002, "epoch": 5.918367346938775, "step": 870}, {"loss": 1.7651, "grad_norm": 0.8848171234130859, "learning_rate": 0.0002, "epoch": 5.986394557823129, "step": 880}]} +{"epoch": 7.0, "step": 1029, "epoch_duration": 171.8463671207428, "total_accumulated_duration": 1266.3656849861145, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.1748046875}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 4.862, "grad_norm": 1.5591567754745483, "learning_rate": 0.0002, "epoch": 0.06802721088435375, "step": 10}, {"loss": 3.6905, "grad_norm": 1.063948154449463, "learning_rate": 0.0002, "epoch": 0.1360544217687075, "step": 20}, {"loss": 3.2247, "grad_norm": 0.9765065312385559, "learning_rate": 0.0002, "epoch": 0.20408163265306123, "step": 30}, {"loss": 3.0135, "grad_norm": 1.8591399192810059, "learning_rate": 0.0002, "epoch": 0.272108843537415, "step": 40}, {"loss": 2.7815, "grad_norm": 1.7384812831878662, "learning_rate": 0.0002, "epoch": 0.3401360544217687, "step": 50}, {"loss": 2.6828, "grad_norm": 1.0425862073898315, "learning_rate": 0.0002, "epoch": 0.40816326530612246, "step": 60}, {"loss": 2.6078, "grad_norm": 1.312426209449768, "learning_rate": 0.0002, "epoch": 0.47619047619047616, "step": 70}, {"loss": 2.4843, "grad_norm": 0.9405839443206787, "learning_rate": 0.0002, "epoch": 0.54421768707483, "step": 80}, {"loss": 2.4835, "grad_norm": 0.7359345555305481, "learning_rate": 0.0002, "epoch": 0.6122448979591837, "step": 90}, {"loss": 2.359, "grad_norm": 0.8867717981338501, "learning_rate": 0.0002, "epoch": 0.6802721088435374, "step": 100}, {"loss": 2.4368, "grad_norm": 0.7792682647705078, "learning_rate": 0.0002, "epoch": 0.7482993197278912, "step": 110}, {"loss": 2.284, "grad_norm": 0.8457526564598083, "learning_rate": 0.0002, "epoch": 0.8163265306122449, "step": 120}, {"loss": 2.2625, "grad_norm": 0.8050963282585144, "learning_rate": 0.0002, "epoch": 0.8843537414965986, "step": 130}, {"loss": 2.2911, "grad_norm": 0.8963590860366821, "learning_rate": 0.0002, "epoch": 0.9523809523809523, "step": 140}, {"eval_loss": 2.2520766258239746, "eval_runtime": 46.2783, "eval_samples_per_second": 10.955, "eval_steps_per_second": 1.383, "epoch": 1.0, "step": 147}, {"loss": 2.2293, "grad_norm": 0.9270220994949341, "learning_rate": 0.0002, "epoch": 1.0204081632653061, "step": 150}, {"loss": 2.2962, "grad_norm": 0.7620377540588379, "learning_rate": 0.0002, "epoch": 1.08843537414966, "step": 160}, {"loss": 2.1326, "grad_norm": 0.8232784867286682, "learning_rate": 0.0002, "epoch": 1.1564625850340136, "step": 170}, {"loss": 2.1522, "grad_norm": 0.8901777267456055, "learning_rate": 0.0002, "epoch": 1.2244897959183674, "step": 180}, {"loss": 2.1235, "grad_norm": 0.8079978227615356, "learning_rate": 0.0002, "epoch": 1.2925170068027212, "step": 190}, {"loss": 2.1812, "grad_norm": 0.9010588526725769, "learning_rate": 0.0002, "epoch": 1.3605442176870748, "step": 200}, {"loss": 2.0572, "grad_norm": 0.7076186537742615, "learning_rate": 0.0002, "epoch": 1.4285714285714286, "step": 210}, {"loss": 2.1723, "grad_norm": 0.8887438774108887, "learning_rate": 0.0002, "epoch": 1.4965986394557822, "step": 220}, {"loss": 2.1449, "grad_norm": 0.9181524515151978, "learning_rate": 0.0002, "epoch": 1.564625850340136, "step": 230}, {"loss": 2.0993, "grad_norm": 0.788392961025238, "learning_rate": 0.0002, "epoch": 1.6326530612244898, "step": 240}, {"loss": 2.0904, "grad_norm": 0.8064964413642883, "learning_rate": 0.0002, "epoch": 1.7006802721088436, "step": 250}, {"loss": 2.1685, "grad_norm": 0.6783174276351929, "learning_rate": 0.0002, "epoch": 1.7687074829931972, "step": 260}, {"loss": 2.0993, "grad_norm": 0.7616434693336487, "learning_rate": 0.0002, "epoch": 1.836734693877551, "step": 270}, {"loss": 2.1568, "grad_norm": 0.6809217929840088, "learning_rate": 0.0002, "epoch": 1.9047619047619047, "step": 280}, {"loss": 2.0749, "grad_norm": 0.6849802732467651, "learning_rate": 0.0002, "epoch": 1.9727891156462585, "step": 290}, {"eval_loss": 2.151526689529419, "eval_runtime": 40.9832, "eval_samples_per_second": 12.371, "eval_steps_per_second": 1.562, "epoch": 2.0, "step": 294}, {"loss": 2.066, "grad_norm": 0.8966974020004272, "learning_rate": 0.0002, "epoch": 2.0408163265306123, "step": 300}, {"loss": 1.9629, "grad_norm": 0.8308210372924805, "learning_rate": 0.0002, "epoch": 2.108843537414966, "step": 310}, {"loss": 2.0365, "grad_norm": 0.7147582173347473, "learning_rate": 0.0002, "epoch": 2.17687074829932, "step": 320}, {"loss": 1.9965, "grad_norm": 0.7694330215454102, "learning_rate": 0.0002, "epoch": 2.2448979591836733, "step": 330}, {"loss": 2.0322, "grad_norm": 0.6489183306694031, "learning_rate": 0.0002, "epoch": 2.312925170068027, "step": 340}, {"loss": 2.0627, "grad_norm": 0.7661431431770325, "learning_rate": 0.0002, "epoch": 2.380952380952381, "step": 350}, {"loss": 2.0033, "grad_norm": 0.8295474648475647, "learning_rate": 0.0002, "epoch": 2.4489795918367347, "step": 360}, {"loss": 2.0876, "grad_norm": 0.8664118647575378, "learning_rate": 0.0002, "epoch": 2.5170068027210886, "step": 370}, {"loss": 2.0085, "grad_norm": 0.6872050762176514, "learning_rate": 0.0002, "epoch": 2.5850340136054424, "step": 380}, {"loss": 2.0, "grad_norm": 0.7354660630226135, "learning_rate": 0.0002, "epoch": 2.6530612244897958, "step": 390}, {"loss": 2.0219, "grad_norm": 0.7240234017372131, "learning_rate": 0.0002, "epoch": 2.7210884353741496, "step": 400}, {"loss": 2.0902, "grad_norm": 0.8370463848114014, "learning_rate": 0.0002, "epoch": 2.7891156462585034, "step": 410}, {"loss": 2.0799, "grad_norm": 0.6834917068481445, "learning_rate": 0.0002, "epoch": 2.857142857142857, "step": 420}, {"loss": 2.0192, "grad_norm": 0.7872207760810852, "learning_rate": 0.0002, "epoch": 2.925170068027211, "step": 430}, {"loss": 2.0546, "grad_norm": 0.7045499086380005, "learning_rate": 0.0002, "epoch": 2.9931972789115644, "step": 440}, {"eval_loss": 2.1319973468780518, "eval_runtime": 38.9671, "eval_samples_per_second": 13.011, "eval_steps_per_second": 1.642, "epoch": 3.0, "step": 441}, {"loss": 1.9638, "grad_norm": 0.7031271457672119, "learning_rate": 0.0002, "epoch": 3.061224489795918, "step": 450}, {"loss": 1.8729, "grad_norm": 0.7331708669662476, "learning_rate": 0.0002, "epoch": 3.129251700680272, "step": 460}, {"loss": 2.0023, "grad_norm": 0.7559226155281067, "learning_rate": 0.0002, "epoch": 3.197278911564626, "step": 470}, {"loss": 1.9566, "grad_norm": 0.8188950419425964, "learning_rate": 0.0002, "epoch": 3.2653061224489797, "step": 480}, {"loss": 1.9255, "grad_norm": 0.7805435657501221, "learning_rate": 0.0002, "epoch": 3.3333333333333335, "step": 490}, {"loss": 1.9461, "grad_norm": 0.7917240858078003, "learning_rate": 0.0002, "epoch": 3.4013605442176873, "step": 500}, {"loss": 1.901, "grad_norm": 0.8258386254310608, "learning_rate": 0.0002, "epoch": 3.4693877551020407, "step": 510}, {"loss": 1.9833, "grad_norm": 0.8375557661056519, "learning_rate": 0.0002, "epoch": 3.5374149659863945, "step": 520}, {"loss": 2.0152, "grad_norm": 0.8005449771881104, "learning_rate": 0.0002, "epoch": 3.6054421768707483, "step": 530}, {"loss": 1.9309, "grad_norm": 0.799006462097168, "learning_rate": 0.0002, "epoch": 3.673469387755102, "step": 540}, {"loss": 1.9775, "grad_norm": 0.7934630513191223, "learning_rate": 0.0002, "epoch": 3.741496598639456, "step": 550}, {"loss": 1.993, "grad_norm": 0.7247752547264099, "learning_rate": 0.0002, "epoch": 3.8095238095238093, "step": 560}, {"loss": 1.9914, "grad_norm": 0.8138917088508606, "learning_rate": 0.0002, "epoch": 3.877551020408163, "step": 570}, {"loss": 1.9116, "grad_norm": 0.7810562252998352, "learning_rate": 0.0002, "epoch": 3.945578231292517, "step": 580}, {"eval_loss": 2.132384777069092, "eval_runtime": 38.6163, "eval_samples_per_second": 13.129, "eval_steps_per_second": 1.657, "epoch": 4.0, "step": 588}, {"loss": 1.9048, "grad_norm": 0.7258784770965576, "learning_rate": 0.0002, "epoch": 4.01360544217687, "step": 590}, {"loss": 1.7991, "grad_norm": 0.8905395865440369, "learning_rate": 0.0002, "epoch": 4.081632653061225, "step": 600}, {"loss": 1.8841, "grad_norm": 0.8189161419868469, "learning_rate": 0.0002, "epoch": 4.149659863945578, "step": 610}, {"loss": 1.914, "grad_norm": 0.8235230445861816, "learning_rate": 0.0002, "epoch": 4.217687074829932, "step": 620}, {"loss": 1.9114, "grad_norm": 0.8926266431808472, "learning_rate": 0.0002, "epoch": 4.285714285714286, "step": 630}, {"loss": 1.8944, "grad_norm": 0.9667059183120728, "learning_rate": 0.0002, "epoch": 4.35374149659864, "step": 640}, {"loss": 1.7842, "grad_norm": 0.8441583514213562, "learning_rate": 0.0002, "epoch": 4.421768707482993, "step": 650}, {"loss": 1.7937, "grad_norm": 0.8351956009864807, "learning_rate": 0.0002, "epoch": 4.489795918367347, "step": 660}, {"loss": 1.8439, "grad_norm": 0.8557114005088806, "learning_rate": 0.0002, "epoch": 4.557823129251701, "step": 670}, {"loss": 1.9426, "grad_norm": 0.8698110580444336, "learning_rate": 0.0002, "epoch": 4.625850340136054, "step": 680}, {"loss": 1.8635, "grad_norm": 0.8394802808761597, "learning_rate": 0.0002, "epoch": 4.6938775510204085, "step": 690}, {"loss": 1.8908, "grad_norm": 0.8168841004371643, "learning_rate": 0.0002, "epoch": 4.761904761904762, "step": 700}, {"loss": 1.8674, "grad_norm": 0.8049741387367249, "learning_rate": 0.0002, "epoch": 4.829931972789115, "step": 710}, {"loss": 1.9289, "grad_norm": 0.7987792491912842, "learning_rate": 0.0002, "epoch": 4.8979591836734695, "step": 720}, {"loss": 1.8779, "grad_norm": 0.9021750688552856, "learning_rate": 0.0002, "epoch": 4.965986394557823, "step": 730}, {"eval_loss": 2.1478686332702637, "eval_runtime": 38.5077, "eval_samples_per_second": 13.166, "eval_steps_per_second": 1.662, "epoch": 5.0, "step": 735}, {"loss": 1.8979, "grad_norm": 0.8057989478111267, "learning_rate": 0.0002, "epoch": 5.034013605442177, "step": 740}, {"loss": 1.81, "grad_norm": 0.9020641446113586, "learning_rate": 0.0002, "epoch": 5.1020408163265305, "step": 750}, {"loss": 1.8219, "grad_norm": 0.843891978263855, "learning_rate": 0.0002, "epoch": 5.170068027210885, "step": 760}, {"loss": 1.8153, "grad_norm": 0.8797562122344971, "learning_rate": 0.0002, "epoch": 5.238095238095238, "step": 770}, {"loss": 1.8141, "grad_norm": 0.9378810524940491, "learning_rate": 0.0002, "epoch": 5.3061224489795915, "step": 780}, {"loss": 1.7499, "grad_norm": 1.0502477884292603, "learning_rate": 0.0002, "epoch": 5.374149659863946, "step": 790}, {"loss": 1.7767, "grad_norm": 1.0142803192138672, "learning_rate": 0.0002, "epoch": 5.442176870748299, "step": 800}, {"loss": 1.785, "grad_norm": 1.0314291715621948, "learning_rate": 0.0002, "epoch": 5.510204081632653, "step": 810}, {"loss": 1.7471, "grad_norm": 0.8898603916168213, "learning_rate": 0.0002, "epoch": 5.578231292517007, "step": 820}, {"loss": 1.7731, "grad_norm": 1.011250376701355, "learning_rate": 0.0002, "epoch": 5.646258503401361, "step": 830}, {"loss": 1.7588, "grad_norm": 0.9000794291496277, "learning_rate": 0.0002, "epoch": 5.714285714285714, "step": 840}, {"loss": 1.788, "grad_norm": 1.0984753370285034, "learning_rate": 0.0002, "epoch": 5.782312925170068, "step": 850}, {"loss": 1.7282, "grad_norm": 0.9162030220031738, "learning_rate": 0.0002, "epoch": 5.850340136054422, "step": 860}, {"loss": 1.7292, "grad_norm": 0.9867637753486633, "learning_rate": 0.0002, "epoch": 5.918367346938775, "step": 870}, {"loss": 1.7651, "grad_norm": 0.8848171234130859, "learning_rate": 0.0002, "epoch": 5.986394557823129, "step": 880}, {"eval_loss": 2.180830240249634, "eval_runtime": 38.8838, "eval_samples_per_second": 13.039, "eval_steps_per_second": 1.646, "epoch": 6.0, "step": 882}, {"loss": 1.6712, "grad_norm": 1.0574727058410645, "learning_rate": 0.0002, "epoch": 6.054421768707483, "step": 890}, {"loss": 1.6928, "grad_norm": 1.1281784772872925, "learning_rate": 0.0002, "epoch": 6.122448979591836, "step": 900}, {"loss": 1.6952, "grad_norm": 0.9686701893806458, "learning_rate": 0.0002, "epoch": 6.190476190476191, "step": 910}, {"loss": 1.6359, "grad_norm": 1.016952633857727, "learning_rate": 0.0002, "epoch": 6.258503401360544, "step": 920}, {"loss": 1.6675, "grad_norm": 0.9630302786827087, "learning_rate": 0.0002, "epoch": 6.326530612244898, "step": 930}, {"loss": 1.7264, "grad_norm": 1.0207276344299316, "learning_rate": 0.0002, "epoch": 6.394557823129252, "step": 940}, {"loss": 1.6934, "grad_norm": 1.1470541954040527, "learning_rate": 0.0002, "epoch": 6.462585034013605, "step": 950}, {"loss": 1.6645, "grad_norm": 1.0892208814620972, "learning_rate": 0.0002, "epoch": 6.530612244897959, "step": 960}, {"loss": 1.7105, "grad_norm": 1.030396819114685, "learning_rate": 0.0002, "epoch": 6.598639455782313, "step": 970}, {"loss": 1.709, "grad_norm": 1.0828958749771118, "learning_rate": 0.0002, "epoch": 6.666666666666667, "step": 980}, {"loss": 1.67, "grad_norm": 1.1024560928344727, "learning_rate": 0.0002, "epoch": 6.73469387755102, "step": 990}, {"loss": 1.7313, "grad_norm": 0.9986393451690674, "learning_rate": 0.0002, "epoch": 6.802721088435375, "step": 1000}, {"loss": 1.7041, "grad_norm": 1.0168452262878418, "learning_rate": 0.0002, "epoch": 6.870748299319728, "step": 1010}, {"loss": 1.6552, "grad_norm": 1.1757020950317383, "learning_rate": 0.0002, "epoch": 6.938775510204081, "step": 1020}]} +{"epoch": 8.0, "step": 1176, "epoch_duration": 171.78718423843384, "total_accumulated_duration": 1438.1528692245483, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.1748046875}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-702-sd-4/checkpoint-441", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 4.862, "grad_norm": 1.5591567754745483, "learning_rate": 0.0002, "epoch": 0.06802721088435375, "step": 10}, {"loss": 3.6905, "grad_norm": 1.063948154449463, "learning_rate": 0.0002, "epoch": 0.1360544217687075, "step": 20}, {"loss": 3.2247, "grad_norm": 0.9765065312385559, "learning_rate": 0.0002, "epoch": 0.20408163265306123, "step": 30}, {"loss": 3.0135, "grad_norm": 1.8591399192810059, "learning_rate": 0.0002, "epoch": 0.272108843537415, "step": 40}, {"loss": 2.7815, "grad_norm": 1.7384812831878662, "learning_rate": 0.0002, "epoch": 0.3401360544217687, "step": 50}, {"loss": 2.6828, "grad_norm": 1.0425862073898315, "learning_rate": 0.0002, "epoch": 0.40816326530612246, "step": 60}, {"loss": 2.6078, "grad_norm": 1.312426209449768, "learning_rate": 0.0002, "epoch": 0.47619047619047616, "step": 70}, {"loss": 2.4843, "grad_norm": 0.9405839443206787, "learning_rate": 0.0002, "epoch": 0.54421768707483, "step": 80}, {"loss": 2.4835, "grad_norm": 0.7359345555305481, "learning_rate": 0.0002, "epoch": 0.6122448979591837, "step": 90}, {"loss": 2.359, "grad_norm": 0.8867717981338501, "learning_rate": 0.0002, "epoch": 0.6802721088435374, "step": 100}, {"loss": 2.4368, "grad_norm": 0.7792682647705078, "learning_rate": 0.0002, "epoch": 0.7482993197278912, "step": 110}, {"loss": 2.284, "grad_norm": 0.8457526564598083, "learning_rate": 0.0002, "epoch": 0.8163265306122449, "step": 120}, {"loss": 2.2625, "grad_norm": 0.8050963282585144, "learning_rate": 0.0002, "epoch": 0.8843537414965986, "step": 130}, {"loss": 2.2911, "grad_norm": 0.8963590860366821, "learning_rate": 0.0002, "epoch": 0.9523809523809523, "step": 140}, {"eval_loss": 2.2520766258239746, "eval_runtime": 46.2783, "eval_samples_per_second": 10.955, "eval_steps_per_second": 1.383, "epoch": 1.0, "step": 147}, {"loss": 2.2293, "grad_norm": 0.9270220994949341, "learning_rate": 0.0002, "epoch": 1.0204081632653061, "step": 150}, {"loss": 2.2962, "grad_norm": 0.7620377540588379, "learning_rate": 0.0002, "epoch": 1.08843537414966, "step": 160}, {"loss": 2.1326, "grad_norm": 0.8232784867286682, "learning_rate": 0.0002, "epoch": 1.1564625850340136, "step": 170}, {"loss": 2.1522, "grad_norm": 0.8901777267456055, "learning_rate": 0.0002, "epoch": 1.2244897959183674, "step": 180}, {"loss": 2.1235, "grad_norm": 0.8079978227615356, "learning_rate": 0.0002, "epoch": 1.2925170068027212, "step": 190}, {"loss": 2.1812, "grad_norm": 0.9010588526725769, "learning_rate": 0.0002, "epoch": 1.3605442176870748, "step": 200}, {"loss": 2.0572, "grad_norm": 0.7076186537742615, "learning_rate": 0.0002, "epoch": 1.4285714285714286, "step": 210}, {"loss": 2.1723, "grad_norm": 0.8887438774108887, "learning_rate": 0.0002, "epoch": 1.4965986394557822, "step": 220}, {"loss": 2.1449, "grad_norm": 0.9181524515151978, "learning_rate": 0.0002, "epoch": 1.564625850340136, "step": 230}, {"loss": 2.0993, "grad_norm": 0.788392961025238, "learning_rate": 0.0002, "epoch": 1.6326530612244898, "step": 240}, {"loss": 2.0904, "grad_norm": 0.8064964413642883, "learning_rate": 0.0002, "epoch": 1.7006802721088436, "step": 250}, {"loss": 2.1685, "grad_norm": 0.6783174276351929, "learning_rate": 0.0002, "epoch": 1.7687074829931972, "step": 260}, {"loss": 2.0993, "grad_norm": 0.7616434693336487, "learning_rate": 0.0002, "epoch": 1.836734693877551, "step": 270}, {"loss": 2.1568, "grad_norm": 0.6809217929840088, "learning_rate": 0.0002, "epoch": 1.9047619047619047, "step": 280}, {"loss": 2.0749, "grad_norm": 0.6849802732467651, "learning_rate": 0.0002, "epoch": 1.9727891156462585, "step": 290}, {"eval_loss": 2.151526689529419, "eval_runtime": 40.9832, "eval_samples_per_second": 12.371, "eval_steps_per_second": 1.562, "epoch": 2.0, "step": 294}, {"loss": 2.066, "grad_norm": 0.8966974020004272, "learning_rate": 0.0002, "epoch": 2.0408163265306123, "step": 300}, {"loss": 1.9629, "grad_norm": 0.8308210372924805, "learning_rate": 0.0002, "epoch": 2.108843537414966, "step": 310}, {"loss": 2.0365, "grad_norm": 0.7147582173347473, "learning_rate": 0.0002, "epoch": 2.17687074829932, "step": 320}, {"loss": 1.9965, "grad_norm": 0.7694330215454102, "learning_rate": 0.0002, "epoch": 2.2448979591836733, "step": 330}, {"loss": 2.0322, "grad_norm": 0.6489183306694031, "learning_rate": 0.0002, "epoch": 2.312925170068027, "step": 340}, {"loss": 2.0627, "grad_norm": 0.7661431431770325, "learning_rate": 0.0002, "epoch": 2.380952380952381, "step": 350}, {"loss": 2.0033, "grad_norm": 0.8295474648475647, "learning_rate": 0.0002, "epoch": 2.4489795918367347, "step": 360}, {"loss": 2.0876, "grad_norm": 0.8664118647575378, "learning_rate": 0.0002, "epoch": 2.5170068027210886, "step": 370}, {"loss": 2.0085, "grad_norm": 0.6872050762176514, "learning_rate": 0.0002, "epoch": 2.5850340136054424, "step": 380}, {"loss": 2.0, "grad_norm": 0.7354660630226135, "learning_rate": 0.0002, "epoch": 2.6530612244897958, "step": 390}, {"loss": 2.0219, "grad_norm": 0.7240234017372131, "learning_rate": 0.0002, "epoch": 2.7210884353741496, "step": 400}, {"loss": 2.0902, "grad_norm": 0.8370463848114014, "learning_rate": 0.0002, "epoch": 2.7891156462585034, "step": 410}, {"loss": 2.0799, "grad_norm": 0.6834917068481445, "learning_rate": 0.0002, "epoch": 2.857142857142857, "step": 420}, {"loss": 2.0192, "grad_norm": 0.7872207760810852, "learning_rate": 0.0002, "epoch": 2.925170068027211, "step": 430}, {"loss": 2.0546, "grad_norm": 0.7045499086380005, "learning_rate": 0.0002, "epoch": 2.9931972789115644, "step": 440}, {"eval_loss": 2.1319973468780518, "eval_runtime": 38.9671, "eval_samples_per_second": 13.011, "eval_steps_per_second": 1.642, "epoch": 3.0, "step": 441}, {"loss": 1.9638, "grad_norm": 0.7031271457672119, "learning_rate": 0.0002, "epoch": 3.061224489795918, "step": 450}, {"loss": 1.8729, "grad_norm": 0.7331708669662476, "learning_rate": 0.0002, "epoch": 3.129251700680272, "step": 460}, {"loss": 2.0023, "grad_norm": 0.7559226155281067, "learning_rate": 0.0002, "epoch": 3.197278911564626, "step": 470}, {"loss": 1.9566, "grad_norm": 0.8188950419425964, "learning_rate": 0.0002, "epoch": 3.2653061224489797, "step": 480}, {"loss": 1.9255, "grad_norm": 0.7805435657501221, "learning_rate": 0.0002, "epoch": 3.3333333333333335, "step": 490}, {"loss": 1.9461, "grad_norm": 0.7917240858078003, "learning_rate": 0.0002, "epoch": 3.4013605442176873, "step": 500}, {"loss": 1.901, "grad_norm": 0.8258386254310608, "learning_rate": 0.0002, "epoch": 3.4693877551020407, "step": 510}, {"loss": 1.9833, "grad_norm": 0.8375557661056519, "learning_rate": 0.0002, "epoch": 3.5374149659863945, "step": 520}, {"loss": 2.0152, "grad_norm": 0.8005449771881104, "learning_rate": 0.0002, "epoch": 3.6054421768707483, "step": 530}, {"loss": 1.9309, "grad_norm": 0.799006462097168, "learning_rate": 0.0002, "epoch": 3.673469387755102, "step": 540}, {"loss": 1.9775, "grad_norm": 0.7934630513191223, "learning_rate": 0.0002, "epoch": 3.741496598639456, "step": 550}, {"loss": 1.993, "grad_norm": 0.7247752547264099, "learning_rate": 0.0002, "epoch": 3.8095238095238093, "step": 560}, {"loss": 1.9914, "grad_norm": 0.8138917088508606, "learning_rate": 0.0002, "epoch": 3.877551020408163, "step": 570}, {"loss": 1.9116, "grad_norm": 0.7810562252998352, "learning_rate": 0.0002, "epoch": 3.945578231292517, "step": 580}, {"eval_loss": 2.132384777069092, "eval_runtime": 38.6163, "eval_samples_per_second": 13.129, "eval_steps_per_second": 1.657, "epoch": 4.0, "step": 588}, {"loss": 1.9048, "grad_norm": 0.7258784770965576, "learning_rate": 0.0002, "epoch": 4.01360544217687, "step": 590}, {"loss": 1.7991, "grad_norm": 0.8905395865440369, "learning_rate": 0.0002, "epoch": 4.081632653061225, "step": 600}, {"loss": 1.8841, "grad_norm": 0.8189161419868469, "learning_rate": 0.0002, "epoch": 4.149659863945578, "step": 610}, {"loss": 1.914, "grad_norm": 0.8235230445861816, "learning_rate": 0.0002, "epoch": 4.217687074829932, "step": 620}, {"loss": 1.9114, "grad_norm": 0.8926266431808472, "learning_rate": 0.0002, "epoch": 4.285714285714286, "step": 630}, {"loss": 1.8944, "grad_norm": 0.9667059183120728, "learning_rate": 0.0002, "epoch": 4.35374149659864, "step": 640}, {"loss": 1.7842, "grad_norm": 0.8441583514213562, "learning_rate": 0.0002, "epoch": 4.421768707482993, "step": 650}, {"loss": 1.7937, "grad_norm": 0.8351956009864807, "learning_rate": 0.0002, "epoch": 4.489795918367347, "step": 660}, {"loss": 1.8439, "grad_norm": 0.8557114005088806, "learning_rate": 0.0002, "epoch": 4.557823129251701, "step": 670}, {"loss": 1.9426, "grad_norm": 0.8698110580444336, "learning_rate": 0.0002, "epoch": 4.625850340136054, "step": 680}, {"loss": 1.8635, "grad_norm": 0.8394802808761597, "learning_rate": 0.0002, "epoch": 4.6938775510204085, "step": 690}, {"loss": 1.8908, "grad_norm": 0.8168841004371643, "learning_rate": 0.0002, "epoch": 4.761904761904762, "step": 700}, {"loss": 1.8674, "grad_norm": 0.8049741387367249, "learning_rate": 0.0002, "epoch": 4.829931972789115, "step": 710}, {"loss": 1.9289, "grad_norm": 0.7987792491912842, "learning_rate": 0.0002, "epoch": 4.8979591836734695, "step": 720}, {"loss": 1.8779, "grad_norm": 0.9021750688552856, "learning_rate": 0.0002, "epoch": 4.965986394557823, "step": 730}, {"eval_loss": 2.1478686332702637, "eval_runtime": 38.5077, "eval_samples_per_second": 13.166, "eval_steps_per_second": 1.662, "epoch": 5.0, "step": 735}, {"loss": 1.8979, "grad_norm": 0.8057989478111267, "learning_rate": 0.0002, "epoch": 5.034013605442177, "step": 740}, {"loss": 1.81, "grad_norm": 0.9020641446113586, "learning_rate": 0.0002, "epoch": 5.1020408163265305, "step": 750}, {"loss": 1.8219, "grad_norm": 0.843891978263855, "learning_rate": 0.0002, "epoch": 5.170068027210885, "step": 760}, {"loss": 1.8153, "grad_norm": 0.8797562122344971, "learning_rate": 0.0002, "epoch": 5.238095238095238, "step": 770}, {"loss": 1.8141, "grad_norm": 0.9378810524940491, "learning_rate": 0.0002, "epoch": 5.3061224489795915, "step": 780}, {"loss": 1.7499, "grad_norm": 1.0502477884292603, "learning_rate": 0.0002, "epoch": 5.374149659863946, "step": 790}, {"loss": 1.7767, "grad_norm": 1.0142803192138672, "learning_rate": 0.0002, "epoch": 5.442176870748299, "step": 800}, {"loss": 1.785, "grad_norm": 1.0314291715621948, "learning_rate": 0.0002, "epoch": 5.510204081632653, "step": 810}, {"loss": 1.7471, "grad_norm": 0.8898603916168213, "learning_rate": 0.0002, "epoch": 5.578231292517007, "step": 820}, {"loss": 1.7731, "grad_norm": 1.011250376701355, "learning_rate": 0.0002, "epoch": 5.646258503401361, "step": 830}, {"loss": 1.7588, "grad_norm": 0.9000794291496277, "learning_rate": 0.0002, "epoch": 5.714285714285714, "step": 840}, {"loss": 1.788, "grad_norm": 1.0984753370285034, "learning_rate": 0.0002, "epoch": 5.782312925170068, "step": 850}, {"loss": 1.7282, "grad_norm": 0.9162030220031738, "learning_rate": 0.0002, "epoch": 5.850340136054422, "step": 860}, {"loss": 1.7292, "grad_norm": 0.9867637753486633, "learning_rate": 0.0002, "epoch": 5.918367346938775, "step": 870}, {"loss": 1.7651, "grad_norm": 0.8848171234130859, "learning_rate": 0.0002, "epoch": 5.986394557823129, "step": 880}, {"eval_loss": 2.180830240249634, "eval_runtime": 38.8838, "eval_samples_per_second": 13.039, "eval_steps_per_second": 1.646, "epoch": 6.0, "step": 882}, {"loss": 1.6712, "grad_norm": 1.0574727058410645, "learning_rate": 0.0002, "epoch": 6.054421768707483, "step": 890}, {"loss": 1.6928, "grad_norm": 1.1281784772872925, "learning_rate": 0.0002, "epoch": 6.122448979591836, "step": 900}, {"loss": 1.6952, "grad_norm": 0.9686701893806458, "learning_rate": 0.0002, "epoch": 6.190476190476191, "step": 910}, {"loss": 1.6359, "grad_norm": 1.016952633857727, "learning_rate": 0.0002, "epoch": 6.258503401360544, "step": 920}, {"loss": 1.6675, "grad_norm": 0.9630302786827087, "learning_rate": 0.0002, "epoch": 6.326530612244898, "step": 930}, {"loss": 1.7264, "grad_norm": 1.0207276344299316, "learning_rate": 0.0002, "epoch": 6.394557823129252, "step": 940}, {"loss": 1.6934, "grad_norm": 1.1470541954040527, "learning_rate": 0.0002, "epoch": 6.462585034013605, "step": 950}, {"loss": 1.6645, "grad_norm": 1.0892208814620972, "learning_rate": 0.0002, "epoch": 6.530612244897959, "step": 960}, {"loss": 1.7105, "grad_norm": 1.030396819114685, "learning_rate": 0.0002, "epoch": 6.598639455782313, "step": 970}, {"loss": 1.709, "grad_norm": 1.0828958749771118, "learning_rate": 0.0002, "epoch": 6.666666666666667, "step": 980}, {"loss": 1.67, "grad_norm": 1.1024560928344727, "learning_rate": 0.0002, "epoch": 6.73469387755102, "step": 990}, {"loss": 1.7313, "grad_norm": 0.9986393451690674, "learning_rate": 0.0002, "epoch": 6.802721088435375, "step": 1000}, {"loss": 1.7041, "grad_norm": 1.0168452262878418, "learning_rate": 0.0002, "epoch": 6.870748299319728, "step": 1010}, {"loss": 1.6552, "grad_norm": 1.1757020950317383, "learning_rate": 0.0002, "epoch": 6.938775510204081, "step": 1020}, {"eval_loss": 2.2328195571899414, "eval_runtime": 39.0199, "eval_samples_per_second": 12.993, "eval_steps_per_second": 1.64, "epoch": 7.0, "step": 1029}, {"loss": 1.5896, "grad_norm": 0.917491614818573, "learning_rate": 0.0002, "epoch": 7.006802721088436, "step": 1030}, {"loss": 1.5124, "grad_norm": 1.0197532176971436, "learning_rate": 0.0002, "epoch": 7.074829931972789, "step": 1040}, {"loss": 1.5645, "grad_norm": 1.1208992004394531, "learning_rate": 0.0002, "epoch": 7.142857142857143, "step": 1050}, {"loss": 1.5508, "grad_norm": 1.1166021823883057, "learning_rate": 0.0002, "epoch": 7.210884353741497, "step": 1060}, {"loss": 1.555, "grad_norm": 1.1348235607147217, "learning_rate": 0.0002, "epoch": 7.27891156462585, "step": 1070}, {"loss": 1.5497, "grad_norm": 1.2517306804656982, "learning_rate": 0.0002, "epoch": 7.346938775510204, "step": 1080}, {"loss": 1.6562, "grad_norm": 1.3219249248504639, "learning_rate": 0.0002, "epoch": 7.414965986394558, "step": 1090}, {"loss": 1.6394, "grad_norm": 1.0974860191345215, "learning_rate": 0.0002, "epoch": 7.482993197278912, "step": 1100}, {"loss": 1.5862, "grad_norm": 1.3503128290176392, "learning_rate": 0.0002, "epoch": 7.551020408163265, "step": 1110}, {"loss": 1.6387, "grad_norm": 1.2890093326568604, "learning_rate": 0.0002, "epoch": 7.619047619047619, "step": 1120}, {"loss": 1.6061, "grad_norm": 1.2831991910934448, "learning_rate": 0.0002, "epoch": 7.687074829931973, "step": 1130}, {"loss": 1.5803, "grad_norm": 1.1331373453140259, "learning_rate": 0.0002, "epoch": 7.755102040816326, "step": 1140}, {"loss": 1.5519, "grad_norm": 1.1796238422393799, "learning_rate": 0.0002, "epoch": 7.8231292517006805, "step": 1150}, {"loss": 1.5334, "grad_norm": 1.2917659282684326, "learning_rate": 0.0002, "epoch": 7.891156462585034, "step": 1160}, {"loss": 1.57, "grad_norm": 1.2645825147628784, "learning_rate": 0.0002, "epoch": 7.959183673469388, "step": 1170}]}