diff --git a/.gitattributes b/.gitattributes index 68378763499be59d1f6249293ad8df66a738d181..0f9bc0b7a201ed865840daf0848bd22a3b2b32dc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -807,3 +807,12 @@ gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lor gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-612/tokenizer.json filter=lfs diff=lfs merge=lfs -text gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/checkpoint-918/tokenizer.json filter=lfs diff=lfs merge=lfs -text gemma-2-9b-it_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.2-num-1961-sd-42/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/README.md b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/README.md new file mode 100644 index 0000000000000000000000000000000000000000..503a34a03e25483aa99213835fd87bfc8289a3fe --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/adapter_config.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e98db163734cc03f7a8f8b3f720d3a2befdf7453 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/adapter_model.safetensors b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7663de466c6e845744ae2a046d483b7398cd4d2 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e8fa468f6323f0633dcf14b9c91a98ec6a8814d4cb7d97fe784fc997809aea +size 143153376 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/README.md b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/README.md new file mode 100644 index 0000000000000000000000000000000000000000..503a34a03e25483aa99213835fd87bfc8289a3fe --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/adapter_config.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e98db163734cc03f7a8f8b3f720d3a2befdf7453 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/adapter_model.safetensors b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..70ef2244f99b7797c304c4d11eca151e65747b7c --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c70fe8ea0e0c93167363fceff82394263140292d312957544ac0872930ddd0f +size 143153376 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/optimizer.pt b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5a5026ad721940f5b9cf8fb5322259aa83572c45 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a72118539fd9ad9b0d0ec913931155fea4421e0c9cd5a69596840c005bb5d64 +size 72886650 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/rng_state.pth b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a16be44652b1b45459880ec8ecbf9a7a2300568a --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:320eed1b4b11c4de884848b15478856ba4a806cc4afa4cd61e3e97de37cd9e7b +size 14244 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/scheduler.pt b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..946f0b7cc62c21770a5c301d2b4d29052e0d1d35 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:969199c3fbf754fec216bbd170861254d629f5797d20964b9b424cdfaeaf270b +size 1064 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/special_tokens_map.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/tokenizer.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..af0eac5c0056f83b8f3fcdb79165f8847111c305 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922 +size 17525357 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/tokenizer.model b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/tokenizer_config.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/trainer_state.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d8babad2e9dd2a60572114433f0bb6370708478e --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/trainer_state.json @@ -0,0 +1,845 @@ +{ + "best_metric": 1.1546189785003662, + "best_model_checkpoint": "outputs-001/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311", + "epoch": 6.996784565916399, + "eval_steps": 10, + "global_step": 1088, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06430868167202572, + "grad_norm": 0.5317481756210327, + "learning_rate": 0.0002, + "loss": 2.2272, + "step": 10 + }, + { + "epoch": 0.12861736334405144, + "grad_norm": 0.7665001749992371, + "learning_rate": 0.0002, + "loss": 1.6604, + "step": 20 + }, + { + "epoch": 0.19292604501607716, + "grad_norm": 0.4396904408931732, + "learning_rate": 0.0002, + "loss": 1.4015, + "step": 30 + }, + { + "epoch": 0.2572347266881029, + "grad_norm": 0.31786906719207764, + "learning_rate": 0.0002, + "loss": 1.3297, + "step": 40 + }, + { + "epoch": 0.3215434083601286, + "grad_norm": 0.41404595971107483, + "learning_rate": 0.0002, + "loss": 1.2938, + "step": 50 + }, + { + "epoch": 0.3858520900321543, + "grad_norm": 0.36728185415267944, + "learning_rate": 0.0002, + "loss": 1.2673, + "step": 60 + }, + { + "epoch": 0.45016077170418006, + "grad_norm": 0.38337618112564087, + "learning_rate": 0.0002, + "loss": 1.2074, + "step": 70 + }, + { + "epoch": 0.5144694533762058, + "grad_norm": 0.39411404728889465, + "learning_rate": 0.0002, + "loss": 1.2691, + "step": 80 + }, + { + "epoch": 0.5787781350482315, + "grad_norm": 0.39903542399406433, + "learning_rate": 0.0002, + "loss": 1.2563, + "step": 90 + }, + { + "epoch": 0.6430868167202572, + "grad_norm": 0.3390332758426666, + "learning_rate": 0.0002, + "loss": 1.2122, + "step": 100 + }, + { + "epoch": 0.707395498392283, + "grad_norm": 0.35814088582992554, + "learning_rate": 0.0002, + "loss": 1.251, + "step": 110 + }, + { + "epoch": 0.7717041800643086, + "grad_norm": 0.3480045199394226, + "learning_rate": 0.0002, + "loss": 1.218, + "step": 120 + }, + { + "epoch": 0.8360128617363344, + "grad_norm": 0.3282570540904999, + "learning_rate": 0.0002, + "loss": 1.1504, + "step": 130 + }, + { + "epoch": 0.9003215434083601, + "grad_norm": 0.33441081643104553, + "learning_rate": 0.0002, + "loss": 1.2131, + "step": 140 + }, + { + "epoch": 0.9646302250803859, + "grad_norm": 0.3344958424568176, + "learning_rate": 0.0002, + "loss": 1.2116, + "step": 150 + }, + { + "epoch": 0.9967845659163987, + "eval_loss": 1.1744003295898438, + "eval_runtime": 10.521, + "eval_samples_per_second": 9.315, + "eval_steps_per_second": 1.236, + "step": 155 + }, + { + "epoch": 1.0289389067524115, + "grad_norm": 0.3614383637905121, + "learning_rate": 0.0002, + "loss": 1.1898, + "step": 160 + }, + { + "epoch": 1.0932475884244373, + "grad_norm": 0.37686896324157715, + "learning_rate": 0.0002, + "loss": 1.1153, + "step": 170 + }, + { + "epoch": 1.157556270096463, + "grad_norm": 0.3803747296333313, + "learning_rate": 0.0002, + "loss": 1.1288, + "step": 180 + }, + { + "epoch": 1.2218649517684887, + "grad_norm": 0.35592594742774963, + "learning_rate": 0.0002, + "loss": 1.0915, + "step": 190 + }, + { + "epoch": 1.2861736334405145, + "grad_norm": 0.5097760558128357, + "learning_rate": 0.0002, + "loss": 1.0895, + "step": 200 + }, + { + "epoch": 1.3504823151125402, + "grad_norm": 0.3641100227832794, + "learning_rate": 0.0002, + "loss": 1.1268, + "step": 210 + }, + { + "epoch": 1.414790996784566, + "grad_norm": 0.3824535310268402, + "learning_rate": 0.0002, + "loss": 1.1212, + "step": 220 + }, + { + "epoch": 1.4790996784565915, + "grad_norm": 0.42148709297180176, + "learning_rate": 0.0002, + "loss": 1.1234, + "step": 230 + }, + { + "epoch": 1.5434083601286175, + "grad_norm": 0.44197967648506165, + "learning_rate": 0.0002, + "loss": 1.112, + "step": 240 + }, + { + "epoch": 1.607717041800643, + "grad_norm": 0.42140334844589233, + "learning_rate": 0.0002, + "loss": 1.0923, + "step": 250 + }, + { + "epoch": 1.6720257234726688, + "grad_norm": 0.404341459274292, + "learning_rate": 0.0002, + "loss": 1.1455, + "step": 260 + }, + { + "epoch": 1.7363344051446945, + "grad_norm": 0.47345927357673645, + "learning_rate": 0.0002, + "loss": 1.1258, + "step": 270 + }, + { + "epoch": 1.8006430868167203, + "grad_norm": 0.45900461077690125, + "learning_rate": 0.0002, + "loss": 1.0682, + "step": 280 + }, + { + "epoch": 1.864951768488746, + "grad_norm": 0.3809300363063812, + "learning_rate": 0.0002, + "loss": 1.0815, + "step": 290 + }, + { + "epoch": 1.9292604501607717, + "grad_norm": 0.4094211757183075, + "learning_rate": 0.0002, + "loss": 1.0773, + "step": 300 + }, + { + "epoch": 1.9935691318327975, + "grad_norm": 0.40402060747146606, + "learning_rate": 0.0002, + "loss": 1.1134, + "step": 310 + }, + { + "epoch": 2.0, + "eval_loss": 1.1546189785003662, + "eval_runtime": 10.5193, + "eval_samples_per_second": 9.316, + "eval_steps_per_second": 1.236, + "step": 311 + }, + { + "epoch": 2.057877813504823, + "grad_norm": 0.5758638978004456, + "learning_rate": 0.0002, + "loss": 0.9775, + "step": 320 + }, + { + "epoch": 2.122186495176849, + "grad_norm": 0.4793509542942047, + "learning_rate": 0.0002, + "loss": 0.975, + "step": 330 + }, + { + "epoch": 2.1864951768488745, + "grad_norm": 0.5104694962501526, + "learning_rate": 0.0002, + "loss": 0.9331, + "step": 340 + }, + { + "epoch": 2.2508038585209005, + "grad_norm": 0.49754178524017334, + "learning_rate": 0.0002, + "loss": 0.9605, + "step": 350 + }, + { + "epoch": 2.315112540192926, + "grad_norm": 0.5055416822433472, + "learning_rate": 0.0002, + "loss": 0.9609, + "step": 360 + }, + { + "epoch": 2.379421221864952, + "grad_norm": 0.5762393474578857, + "learning_rate": 0.0002, + "loss": 0.9793, + "step": 370 + }, + { + "epoch": 2.4437299035369775, + "grad_norm": 0.44768989086151123, + "learning_rate": 0.0002, + "loss": 0.9392, + "step": 380 + }, + { + "epoch": 2.508038585209003, + "grad_norm": 0.5598754286766052, + "learning_rate": 0.0002, + "loss": 0.9488, + "step": 390 + }, + { + "epoch": 2.572347266881029, + "grad_norm": 0.5343462824821472, + "learning_rate": 0.0002, + "loss": 1.0028, + "step": 400 + }, + { + "epoch": 2.6366559485530545, + "grad_norm": 0.4544358253479004, + "learning_rate": 0.0002, + "loss": 0.9871, + "step": 410 + }, + { + "epoch": 2.7009646302250805, + "grad_norm": 0.5724653005599976, + "learning_rate": 0.0002, + "loss": 1.0025, + "step": 420 + }, + { + "epoch": 2.765273311897106, + "grad_norm": 0.5844957828521729, + "learning_rate": 0.0002, + "loss": 0.9776, + "step": 430 + }, + { + "epoch": 2.829581993569132, + "grad_norm": 0.5306688547134399, + "learning_rate": 0.0002, + "loss": 0.9608, + "step": 440 + }, + { + "epoch": 2.8938906752411575, + "grad_norm": 0.5121245384216309, + "learning_rate": 0.0002, + "loss": 1.0221, + "step": 450 + }, + { + "epoch": 2.958199356913183, + "grad_norm": 0.47789978981018066, + "learning_rate": 0.0002, + "loss": 0.9438, + "step": 460 + }, + { + "epoch": 2.996784565916399, + "eval_loss": 1.1760698556900024, + "eval_runtime": 10.5123, + "eval_samples_per_second": 9.322, + "eval_steps_per_second": 1.237, + "step": 466 + }, + { + "epoch": 3.022508038585209, + "grad_norm": 0.4903484582901001, + "learning_rate": 0.0002, + "loss": 0.9531, + "step": 470 + }, + { + "epoch": 3.0868167202572345, + "grad_norm": 0.7591149210929871, + "learning_rate": 0.0002, + "loss": 0.7771, + "step": 480 + }, + { + "epoch": 3.1511254019292605, + "grad_norm": 0.8178006410598755, + "learning_rate": 0.0002, + "loss": 0.8044, + "step": 490 + }, + { + "epoch": 3.215434083601286, + "grad_norm": 0.7482298016548157, + "learning_rate": 0.0002, + "loss": 0.8237, + "step": 500 + }, + { + "epoch": 3.279742765273312, + "grad_norm": 0.7520643472671509, + "learning_rate": 0.0002, + "loss": 0.8061, + "step": 510 + }, + { + "epoch": 3.3440514469453375, + "grad_norm": 0.6797061562538147, + "learning_rate": 0.0002, + "loss": 0.8065, + "step": 520 + }, + { + "epoch": 3.4083601286173635, + "grad_norm": 0.6733362674713135, + "learning_rate": 0.0002, + "loss": 0.827, + "step": 530 + }, + { + "epoch": 3.472668810289389, + "grad_norm": 0.6488103270530701, + "learning_rate": 0.0002, + "loss": 0.8628, + "step": 540 + }, + { + "epoch": 3.536977491961415, + "grad_norm": 0.6773484349250793, + "learning_rate": 0.0002, + "loss": 0.8053, + "step": 550 + }, + { + "epoch": 3.6012861736334405, + "grad_norm": 0.6569041609764099, + "learning_rate": 0.0002, + "loss": 0.8631, + "step": 560 + }, + { + "epoch": 3.665594855305466, + "grad_norm": 0.7477148771286011, + "learning_rate": 0.0002, + "loss": 0.8251, + "step": 570 + }, + { + "epoch": 3.729903536977492, + "grad_norm": 0.6446558237075806, + "learning_rate": 0.0002, + "loss": 0.8089, + "step": 580 + }, + { + "epoch": 3.7942122186495175, + "grad_norm": 0.6831859946250916, + "learning_rate": 0.0002, + "loss": 0.8287, + "step": 590 + }, + { + "epoch": 3.8585209003215435, + "grad_norm": 0.7512634992599487, + "learning_rate": 0.0002, + "loss": 0.8726, + "step": 600 + }, + { + "epoch": 3.922829581993569, + "grad_norm": 0.7508474588394165, + "learning_rate": 0.0002, + "loss": 0.8014, + "step": 610 + }, + { + "epoch": 3.987138263665595, + "grad_norm": 0.7288223505020142, + "learning_rate": 0.0002, + "loss": 0.845, + "step": 620 + }, + { + "epoch": 4.0, + "eval_loss": 1.2500178813934326, + "eval_runtime": 10.5131, + "eval_samples_per_second": 9.322, + "eval_steps_per_second": 1.237, + "step": 622 + }, + { + "epoch": 4.051446945337621, + "grad_norm": 0.8475615382194519, + "learning_rate": 0.0002, + "loss": 0.6423, + "step": 630 + }, + { + "epoch": 4.115755627009646, + "grad_norm": 0.7431837916374207, + "learning_rate": 0.0002, + "loss": 0.6416, + "step": 640 + }, + { + "epoch": 4.180064308681672, + "grad_norm": 1.154038667678833, + "learning_rate": 0.0002, + "loss": 0.6748, + "step": 650 + }, + { + "epoch": 4.244372990353698, + "grad_norm": 0.8179714679718018, + "learning_rate": 0.0002, + "loss": 0.65, + "step": 660 + }, + { + "epoch": 4.308681672025724, + "grad_norm": 0.9329283237457275, + "learning_rate": 0.0002, + "loss": 0.6385, + "step": 670 + }, + { + "epoch": 4.372990353697749, + "grad_norm": 0.824656069278717, + "learning_rate": 0.0002, + "loss": 0.671, + "step": 680 + }, + { + "epoch": 4.437299035369775, + "grad_norm": 0.9766148924827576, + "learning_rate": 0.0002, + "loss": 0.6679, + "step": 690 + }, + { + "epoch": 4.501607717041801, + "grad_norm": 0.9103652238845825, + "learning_rate": 0.0002, + "loss": 0.6525, + "step": 700 + }, + { + "epoch": 4.565916398713826, + "grad_norm": 0.793594241142273, + "learning_rate": 0.0002, + "loss": 0.6809, + "step": 710 + }, + { + "epoch": 4.630225080385852, + "grad_norm": 0.9835829734802246, + "learning_rate": 0.0002, + "loss": 0.6712, + "step": 720 + }, + { + "epoch": 4.694533762057878, + "grad_norm": 1.0390352010726929, + "learning_rate": 0.0002, + "loss": 0.6757, + "step": 730 + }, + { + "epoch": 4.758842443729904, + "grad_norm": 1.0840471982955933, + "learning_rate": 0.0002, + "loss": 0.6959, + "step": 740 + }, + { + "epoch": 4.823151125401929, + "grad_norm": 0.8057735562324524, + "learning_rate": 0.0002, + "loss": 0.6809, + "step": 750 + }, + { + "epoch": 4.887459807073955, + "grad_norm": 0.8504151701927185, + "learning_rate": 0.0002, + "loss": 0.7202, + "step": 760 + }, + { + "epoch": 4.951768488745981, + "grad_norm": 0.8389859199523926, + "learning_rate": 0.0002, + "loss": 0.7001, + "step": 770 + }, + { + "epoch": 4.996784565916399, + "eval_loss": 1.3824537992477417, + "eval_runtime": 10.5075, + "eval_samples_per_second": 9.327, + "eval_steps_per_second": 1.237, + "step": 777 + }, + { + "epoch": 5.016077170418006, + "grad_norm": 0.8725755214691162, + "learning_rate": 0.0002, + "loss": 0.6425, + "step": 780 + }, + { + "epoch": 5.080385852090032, + "grad_norm": 0.792286217212677, + "learning_rate": 0.0002, + "loss": 0.4945, + "step": 790 + }, + { + "epoch": 5.144694533762058, + "grad_norm": 0.9615631699562073, + "learning_rate": 0.0002, + "loss": 0.5229, + "step": 800 + }, + { + "epoch": 5.209003215434084, + "grad_norm": 0.9059127569198608, + "learning_rate": 0.0002, + "loss": 0.5237, + "step": 810 + }, + { + "epoch": 5.273311897106109, + "grad_norm": 1.0275076627731323, + "learning_rate": 0.0002, + "loss": 0.5122, + "step": 820 + }, + { + "epoch": 5.337620578778135, + "grad_norm": 1.2929821014404297, + "learning_rate": 0.0002, + "loss": 0.4987, + "step": 830 + }, + { + "epoch": 5.401929260450161, + "grad_norm": 1.17123281955719, + "learning_rate": 0.0002, + "loss": 0.53, + "step": 840 + }, + { + "epoch": 5.466237942122186, + "grad_norm": 1.140464186668396, + "learning_rate": 0.0002, + "loss": 0.5364, + "step": 850 + }, + { + "epoch": 5.530546623794212, + "grad_norm": 1.3640265464782715, + "learning_rate": 0.0002, + "loss": 0.5303, + "step": 860 + }, + { + "epoch": 5.594855305466238, + "grad_norm": 1.1880438327789307, + "learning_rate": 0.0002, + "loss": 0.5272, + "step": 870 + }, + { + "epoch": 5.659163987138264, + "grad_norm": 1.1584500074386597, + "learning_rate": 0.0002, + "loss": 0.5574, + "step": 880 + }, + { + "epoch": 5.723472668810289, + "grad_norm": 1.1855696439743042, + "learning_rate": 0.0002, + "loss": 0.5469, + "step": 890 + }, + { + "epoch": 5.787781350482315, + "grad_norm": 1.0149868726730347, + "learning_rate": 0.0002, + "loss": 0.5376, + "step": 900 + }, + { + "epoch": 5.852090032154341, + "grad_norm": 1.0635329484939575, + "learning_rate": 0.0002, + "loss": 0.5131, + "step": 910 + }, + { + "epoch": 5.916398713826366, + "grad_norm": 1.2947518825531006, + "learning_rate": 0.0002, + "loss": 0.5486, + "step": 920 + }, + { + "epoch": 5.980707395498392, + "grad_norm": 1.205394983291626, + "learning_rate": 0.0002, + "loss": 0.5701, + "step": 930 + }, + { + "epoch": 6.0, + "eval_loss": 1.6060408353805542, + "eval_runtime": 10.5111, + "eval_samples_per_second": 9.323, + "eval_steps_per_second": 1.237, + "step": 933 + }, + { + "epoch": 6.045016077170418, + "grad_norm": 1.1479188203811646, + "learning_rate": 0.0002, + "loss": 0.4285, + "step": 940 + }, + { + "epoch": 6.109324758842444, + "grad_norm": 0.8727015256881714, + "learning_rate": 0.0002, + "loss": 0.3774, + "step": 950 + }, + { + "epoch": 6.173633440514469, + "grad_norm": 1.1554491519927979, + "learning_rate": 0.0002, + "loss": 0.407, + "step": 960 + }, + { + "epoch": 6.237942122186495, + "grad_norm": 1.0589015483856201, + "learning_rate": 0.0002, + "loss": 0.3888, + "step": 970 + }, + { + "epoch": 6.302250803858521, + "grad_norm": 1.158897876739502, + "learning_rate": 0.0002, + "loss": 0.4051, + "step": 980 + }, + { + "epoch": 6.366559485530547, + "grad_norm": 1.4029475450515747, + "learning_rate": 0.0002, + "loss": 0.4258, + "step": 990 + }, + { + "epoch": 6.430868167202572, + "grad_norm": 1.156851887702942, + "learning_rate": 0.0002, + "loss": 0.4103, + "step": 1000 + }, + { + "epoch": 6.495176848874598, + "grad_norm": 0.9887818694114685, + "learning_rate": 0.0002, + "loss": 0.4098, + "step": 1010 + }, + { + "epoch": 6.559485530546624, + "grad_norm": 1.0826616287231445, + "learning_rate": 0.0002, + "loss": 0.4329, + "step": 1020 + }, + { + "epoch": 6.62379421221865, + "grad_norm": 1.1170333623886108, + "learning_rate": 0.0002, + "loss": 0.4026, + "step": 1030 + }, + { + "epoch": 6.688102893890675, + "grad_norm": 1.313014030456543, + "learning_rate": 0.0002, + "loss": 0.4052, + "step": 1040 + }, + { + "epoch": 6.752411575562701, + "grad_norm": 1.183534026145935, + "learning_rate": 0.0002, + "loss": 0.4382, + "step": 1050 + }, + { + "epoch": 6.816720257234727, + "grad_norm": 1.1945377588272095, + "learning_rate": 0.0002, + "loss": 0.4142, + "step": 1060 + }, + { + "epoch": 6.881028938906752, + "grad_norm": 1.1162303686141968, + "learning_rate": 0.0002, + "loss": 0.4115, + "step": 1070 + }, + { + "epoch": 6.945337620578778, + "grad_norm": 1.1610374450683594, + "learning_rate": 0.0002, + "loss": 0.434, + "step": 1080 + }, + { + "epoch": 6.996784565916399, + "eval_loss": 1.7738038301467896, + "eval_runtime": 10.5081, + "eval_samples_per_second": 9.326, + "eval_steps_per_second": 1.237, + "step": 1088 + } + ], + "logging_steps": 10, + "max_steps": 1240, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.590943711625216e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/training_args.bin b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e6d36188f471587f71e03f7ccac2f1da5dc8040 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1088/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:704b3d18001b534393bbe379593543ad27254922eefa5e6e440a4466e8c325c3 +size 5560 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/README.md b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/README.md new file mode 100644 index 0000000000000000000000000000000000000000..503a34a03e25483aa99213835fd87bfc8289a3fe --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/adapter_config.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e98db163734cc03f7a8f8b3f720d3a2befdf7453 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/adapter_model.safetensors b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bc55c7e680f2d9bbde2404c6af5262bac8bb0a77 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3783a27590aac5fed027f3bf9dc8fabec9b043e8a40ad249461b5b64a234e07d +size 143153376 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/optimizer.pt b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..dae61af6641d5084373a4c3f1a58bc1692d39e43 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94162acd0ba4e9f6c04f4006ade2a83b2ff155f74c428eb8469d44f5ff4f1b01 +size 72886650 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/rng_state.pth b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..91cd2c91e93c7830bed288929c47f7281534bcba --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b6bb71c042e7e12fef9eae3ddadac234a0bf223a882546898dfa779746881c2 +size 14244 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/scheduler.pt b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..db259eff517d81f813106cb79b38ccce484018bd --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:790ecab4184234dd697049c7d153605f6c55677d2b29e88ccca0b9879b7a6700 +size 1064 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/special_tokens_map.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/tokenizer.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..af0eac5c0056f83b8f3fcdb79165f8847111c305 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922 +size 17525357 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/tokenizer.model b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/tokenizer_config.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/trainer_state.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6ecd004d1aeed15bc000b777fa7511b7e61ad05b --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/trainer_state.json @@ -0,0 +1,965 @@ +{ + "best_metric": 1.1546189785003662, + "best_model_checkpoint": "outputs-001/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311", + "epoch": 7.97427652733119, + "eval_steps": 10, + "global_step": 1240, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06430868167202572, + "grad_norm": 0.5317481756210327, + "learning_rate": 0.0002, + "loss": 2.2272, + "step": 10 + }, + { + "epoch": 0.12861736334405144, + "grad_norm": 0.7665001749992371, + "learning_rate": 0.0002, + "loss": 1.6604, + "step": 20 + }, + { + "epoch": 0.19292604501607716, + "grad_norm": 0.4396904408931732, + "learning_rate": 0.0002, + "loss": 1.4015, + "step": 30 + }, + { + "epoch": 0.2572347266881029, + "grad_norm": 0.31786906719207764, + "learning_rate": 0.0002, + "loss": 1.3297, + "step": 40 + }, + { + "epoch": 0.3215434083601286, + "grad_norm": 0.41404595971107483, + "learning_rate": 0.0002, + "loss": 1.2938, + "step": 50 + }, + { + "epoch": 0.3858520900321543, + "grad_norm": 0.36728185415267944, + "learning_rate": 0.0002, + "loss": 1.2673, + "step": 60 + }, + { + "epoch": 0.45016077170418006, + "grad_norm": 0.38337618112564087, + "learning_rate": 0.0002, + "loss": 1.2074, + "step": 70 + }, + { + "epoch": 0.5144694533762058, + "grad_norm": 0.39411404728889465, + "learning_rate": 0.0002, + "loss": 1.2691, + "step": 80 + }, + { + "epoch": 0.5787781350482315, + "grad_norm": 0.39903542399406433, + "learning_rate": 0.0002, + "loss": 1.2563, + "step": 90 + }, + { + "epoch": 0.6430868167202572, + "grad_norm": 0.3390332758426666, + "learning_rate": 0.0002, + "loss": 1.2122, + "step": 100 + }, + { + "epoch": 0.707395498392283, + "grad_norm": 0.35814088582992554, + "learning_rate": 0.0002, + "loss": 1.251, + "step": 110 + }, + { + "epoch": 0.7717041800643086, + "grad_norm": 0.3480045199394226, + "learning_rate": 0.0002, + "loss": 1.218, + "step": 120 + }, + { + "epoch": 0.8360128617363344, + "grad_norm": 0.3282570540904999, + "learning_rate": 0.0002, + "loss": 1.1504, + "step": 130 + }, + { + "epoch": 0.9003215434083601, + "grad_norm": 0.33441081643104553, + "learning_rate": 0.0002, + "loss": 1.2131, + "step": 140 + }, + { + "epoch": 0.9646302250803859, + "grad_norm": 0.3344958424568176, + "learning_rate": 0.0002, + "loss": 1.2116, + "step": 150 + }, + { + "epoch": 0.9967845659163987, + "eval_loss": 1.1744003295898438, + "eval_runtime": 10.521, + "eval_samples_per_second": 9.315, + "eval_steps_per_second": 1.236, + "step": 155 + }, + { + "epoch": 1.0289389067524115, + "grad_norm": 0.3614383637905121, + "learning_rate": 0.0002, + "loss": 1.1898, + "step": 160 + }, + { + "epoch": 1.0932475884244373, + "grad_norm": 0.37686896324157715, + "learning_rate": 0.0002, + "loss": 1.1153, + "step": 170 + }, + { + "epoch": 1.157556270096463, + "grad_norm": 0.3803747296333313, + "learning_rate": 0.0002, + "loss": 1.1288, + "step": 180 + }, + { + "epoch": 1.2218649517684887, + "grad_norm": 0.35592594742774963, + "learning_rate": 0.0002, + "loss": 1.0915, + "step": 190 + }, + { + "epoch": 1.2861736334405145, + "grad_norm": 0.5097760558128357, + "learning_rate": 0.0002, + "loss": 1.0895, + "step": 200 + }, + { + "epoch": 1.3504823151125402, + "grad_norm": 0.3641100227832794, + "learning_rate": 0.0002, + "loss": 1.1268, + "step": 210 + }, + { + "epoch": 1.414790996784566, + "grad_norm": 0.3824535310268402, + "learning_rate": 0.0002, + "loss": 1.1212, + "step": 220 + }, + { + "epoch": 1.4790996784565915, + "grad_norm": 0.42148709297180176, + "learning_rate": 0.0002, + "loss": 1.1234, + "step": 230 + }, + { + "epoch": 1.5434083601286175, + "grad_norm": 0.44197967648506165, + "learning_rate": 0.0002, + "loss": 1.112, + "step": 240 + }, + { + "epoch": 1.607717041800643, + "grad_norm": 0.42140334844589233, + "learning_rate": 0.0002, + "loss": 1.0923, + "step": 250 + }, + { + "epoch": 1.6720257234726688, + "grad_norm": 0.404341459274292, + "learning_rate": 0.0002, + "loss": 1.1455, + "step": 260 + }, + { + "epoch": 1.7363344051446945, + "grad_norm": 0.47345927357673645, + "learning_rate": 0.0002, + "loss": 1.1258, + "step": 270 + }, + { + "epoch": 1.8006430868167203, + "grad_norm": 0.45900461077690125, + "learning_rate": 0.0002, + "loss": 1.0682, + "step": 280 + }, + { + "epoch": 1.864951768488746, + "grad_norm": 0.3809300363063812, + "learning_rate": 0.0002, + "loss": 1.0815, + "step": 290 + }, + { + "epoch": 1.9292604501607717, + "grad_norm": 0.4094211757183075, + "learning_rate": 0.0002, + "loss": 1.0773, + "step": 300 + }, + { + "epoch": 1.9935691318327975, + "grad_norm": 0.40402060747146606, + "learning_rate": 0.0002, + "loss": 1.1134, + "step": 310 + }, + { + "epoch": 2.0, + "eval_loss": 1.1546189785003662, + "eval_runtime": 10.5193, + "eval_samples_per_second": 9.316, + "eval_steps_per_second": 1.236, + "step": 311 + }, + { + "epoch": 2.057877813504823, + "grad_norm": 0.5758638978004456, + "learning_rate": 0.0002, + "loss": 0.9775, + "step": 320 + }, + { + "epoch": 2.122186495176849, + "grad_norm": 0.4793509542942047, + "learning_rate": 0.0002, + "loss": 0.975, + "step": 330 + }, + { + "epoch": 2.1864951768488745, + "grad_norm": 0.5104694962501526, + "learning_rate": 0.0002, + "loss": 0.9331, + "step": 340 + }, + { + "epoch": 2.2508038585209005, + "grad_norm": 0.49754178524017334, + "learning_rate": 0.0002, + "loss": 0.9605, + "step": 350 + }, + { + "epoch": 2.315112540192926, + "grad_norm": 0.5055416822433472, + "learning_rate": 0.0002, + "loss": 0.9609, + "step": 360 + }, + { + "epoch": 2.379421221864952, + "grad_norm": 0.5762393474578857, + "learning_rate": 0.0002, + "loss": 0.9793, + "step": 370 + }, + { + "epoch": 2.4437299035369775, + "grad_norm": 0.44768989086151123, + "learning_rate": 0.0002, + "loss": 0.9392, + "step": 380 + }, + { + "epoch": 2.508038585209003, + "grad_norm": 0.5598754286766052, + "learning_rate": 0.0002, + "loss": 0.9488, + "step": 390 + }, + { + "epoch": 2.572347266881029, + "grad_norm": 0.5343462824821472, + "learning_rate": 0.0002, + "loss": 1.0028, + "step": 400 + }, + { + "epoch": 2.6366559485530545, + "grad_norm": 0.4544358253479004, + "learning_rate": 0.0002, + "loss": 0.9871, + "step": 410 + }, + { + "epoch": 2.7009646302250805, + "grad_norm": 0.5724653005599976, + "learning_rate": 0.0002, + "loss": 1.0025, + "step": 420 + }, + { + "epoch": 2.765273311897106, + "grad_norm": 0.5844957828521729, + "learning_rate": 0.0002, + "loss": 0.9776, + "step": 430 + }, + { + "epoch": 2.829581993569132, + "grad_norm": 0.5306688547134399, + "learning_rate": 0.0002, + "loss": 0.9608, + "step": 440 + }, + { + "epoch": 2.8938906752411575, + "grad_norm": 0.5121245384216309, + "learning_rate": 0.0002, + "loss": 1.0221, + "step": 450 + }, + { + "epoch": 2.958199356913183, + "grad_norm": 0.47789978981018066, + "learning_rate": 0.0002, + "loss": 0.9438, + "step": 460 + }, + { + "epoch": 2.996784565916399, + "eval_loss": 1.1760698556900024, + "eval_runtime": 10.5123, + "eval_samples_per_second": 9.322, + "eval_steps_per_second": 1.237, + "step": 466 + }, + { + "epoch": 3.022508038585209, + "grad_norm": 0.4903484582901001, + "learning_rate": 0.0002, + "loss": 0.9531, + "step": 470 + }, + { + "epoch": 3.0868167202572345, + "grad_norm": 0.7591149210929871, + "learning_rate": 0.0002, + "loss": 0.7771, + "step": 480 + }, + { + "epoch": 3.1511254019292605, + "grad_norm": 0.8178006410598755, + "learning_rate": 0.0002, + "loss": 0.8044, + "step": 490 + }, + { + "epoch": 3.215434083601286, + "grad_norm": 0.7482298016548157, + "learning_rate": 0.0002, + "loss": 0.8237, + "step": 500 + }, + { + "epoch": 3.279742765273312, + "grad_norm": 0.7520643472671509, + "learning_rate": 0.0002, + "loss": 0.8061, + "step": 510 + }, + { + "epoch": 3.3440514469453375, + "grad_norm": 0.6797061562538147, + "learning_rate": 0.0002, + "loss": 0.8065, + "step": 520 + }, + { + "epoch": 3.4083601286173635, + "grad_norm": 0.6733362674713135, + "learning_rate": 0.0002, + "loss": 0.827, + "step": 530 + }, + { + "epoch": 3.472668810289389, + "grad_norm": 0.6488103270530701, + "learning_rate": 0.0002, + "loss": 0.8628, + "step": 540 + }, + { + "epoch": 3.536977491961415, + "grad_norm": 0.6773484349250793, + "learning_rate": 0.0002, + "loss": 0.8053, + "step": 550 + }, + { + "epoch": 3.6012861736334405, + "grad_norm": 0.6569041609764099, + "learning_rate": 0.0002, + "loss": 0.8631, + "step": 560 + }, + { + "epoch": 3.665594855305466, + "grad_norm": 0.7477148771286011, + "learning_rate": 0.0002, + "loss": 0.8251, + "step": 570 + }, + { + "epoch": 3.729903536977492, + "grad_norm": 0.6446558237075806, + "learning_rate": 0.0002, + "loss": 0.8089, + "step": 580 + }, + { + "epoch": 3.7942122186495175, + "grad_norm": 0.6831859946250916, + "learning_rate": 0.0002, + "loss": 0.8287, + "step": 590 + }, + { + "epoch": 3.8585209003215435, + "grad_norm": 0.7512634992599487, + "learning_rate": 0.0002, + "loss": 0.8726, + "step": 600 + }, + { + "epoch": 3.922829581993569, + "grad_norm": 0.7508474588394165, + "learning_rate": 0.0002, + "loss": 0.8014, + "step": 610 + }, + { + "epoch": 3.987138263665595, + "grad_norm": 0.7288223505020142, + "learning_rate": 0.0002, + "loss": 0.845, + "step": 620 + }, + { + "epoch": 4.0, + "eval_loss": 1.2500178813934326, + "eval_runtime": 10.5131, + "eval_samples_per_second": 9.322, + "eval_steps_per_second": 1.237, + "step": 622 + }, + { + "epoch": 4.051446945337621, + "grad_norm": 0.8475615382194519, + "learning_rate": 0.0002, + "loss": 0.6423, + "step": 630 + }, + { + "epoch": 4.115755627009646, + "grad_norm": 0.7431837916374207, + "learning_rate": 0.0002, + "loss": 0.6416, + "step": 640 + }, + { + "epoch": 4.180064308681672, + "grad_norm": 1.154038667678833, + "learning_rate": 0.0002, + "loss": 0.6748, + "step": 650 + }, + { + "epoch": 4.244372990353698, + "grad_norm": 0.8179714679718018, + "learning_rate": 0.0002, + "loss": 0.65, + "step": 660 + }, + { + "epoch": 4.308681672025724, + "grad_norm": 0.9329283237457275, + "learning_rate": 0.0002, + "loss": 0.6385, + "step": 670 + }, + { + "epoch": 4.372990353697749, + "grad_norm": 0.824656069278717, + "learning_rate": 0.0002, + "loss": 0.671, + "step": 680 + }, + { + "epoch": 4.437299035369775, + "grad_norm": 0.9766148924827576, + "learning_rate": 0.0002, + "loss": 0.6679, + "step": 690 + }, + { + "epoch": 4.501607717041801, + "grad_norm": 0.9103652238845825, + "learning_rate": 0.0002, + "loss": 0.6525, + "step": 700 + }, + { + "epoch": 4.565916398713826, + "grad_norm": 0.793594241142273, + "learning_rate": 0.0002, + "loss": 0.6809, + "step": 710 + }, + { + "epoch": 4.630225080385852, + "grad_norm": 0.9835829734802246, + "learning_rate": 0.0002, + "loss": 0.6712, + "step": 720 + }, + { + "epoch": 4.694533762057878, + "grad_norm": 1.0390352010726929, + "learning_rate": 0.0002, + "loss": 0.6757, + "step": 730 + }, + { + "epoch": 4.758842443729904, + "grad_norm": 1.0840471982955933, + "learning_rate": 0.0002, + "loss": 0.6959, + "step": 740 + }, + { + "epoch": 4.823151125401929, + "grad_norm": 0.8057735562324524, + "learning_rate": 0.0002, + "loss": 0.6809, + "step": 750 + }, + { + "epoch": 4.887459807073955, + "grad_norm": 0.8504151701927185, + "learning_rate": 0.0002, + "loss": 0.7202, + "step": 760 + }, + { + "epoch": 4.951768488745981, + "grad_norm": 0.8389859199523926, + "learning_rate": 0.0002, + "loss": 0.7001, + "step": 770 + }, + { + "epoch": 4.996784565916399, + "eval_loss": 1.3824537992477417, + "eval_runtime": 10.5075, + "eval_samples_per_second": 9.327, + "eval_steps_per_second": 1.237, + "step": 777 + }, + { + "epoch": 5.016077170418006, + "grad_norm": 0.8725755214691162, + "learning_rate": 0.0002, + "loss": 0.6425, + "step": 780 + }, + { + "epoch": 5.080385852090032, + "grad_norm": 0.792286217212677, + "learning_rate": 0.0002, + "loss": 0.4945, + "step": 790 + }, + { + "epoch": 5.144694533762058, + "grad_norm": 0.9615631699562073, + "learning_rate": 0.0002, + "loss": 0.5229, + "step": 800 + }, + { + "epoch": 5.209003215434084, + "grad_norm": 0.9059127569198608, + "learning_rate": 0.0002, + "loss": 0.5237, + "step": 810 + }, + { + "epoch": 5.273311897106109, + "grad_norm": 1.0275076627731323, + "learning_rate": 0.0002, + "loss": 0.5122, + "step": 820 + }, + { + "epoch": 5.337620578778135, + "grad_norm": 1.2929821014404297, + "learning_rate": 0.0002, + "loss": 0.4987, + "step": 830 + }, + { + "epoch": 5.401929260450161, + "grad_norm": 1.17123281955719, + "learning_rate": 0.0002, + "loss": 0.53, + "step": 840 + }, + { + "epoch": 5.466237942122186, + "grad_norm": 1.140464186668396, + "learning_rate": 0.0002, + "loss": 0.5364, + "step": 850 + }, + { + "epoch": 5.530546623794212, + "grad_norm": 1.3640265464782715, + "learning_rate": 0.0002, + "loss": 0.5303, + "step": 860 + }, + { + "epoch": 5.594855305466238, + "grad_norm": 1.1880438327789307, + "learning_rate": 0.0002, + "loss": 0.5272, + "step": 870 + }, + { + "epoch": 5.659163987138264, + "grad_norm": 1.1584500074386597, + "learning_rate": 0.0002, + "loss": 0.5574, + "step": 880 + }, + { + "epoch": 5.723472668810289, + "grad_norm": 1.1855696439743042, + "learning_rate": 0.0002, + "loss": 0.5469, + "step": 890 + }, + { + "epoch": 5.787781350482315, + "grad_norm": 1.0149868726730347, + "learning_rate": 0.0002, + "loss": 0.5376, + "step": 900 + }, + { + "epoch": 5.852090032154341, + "grad_norm": 1.0635329484939575, + "learning_rate": 0.0002, + "loss": 0.5131, + "step": 910 + }, + { + "epoch": 5.916398713826366, + "grad_norm": 1.2947518825531006, + "learning_rate": 0.0002, + "loss": 0.5486, + "step": 920 + }, + { + "epoch": 5.980707395498392, + "grad_norm": 1.205394983291626, + "learning_rate": 0.0002, + "loss": 0.5701, + "step": 930 + }, + { + "epoch": 6.0, + "eval_loss": 1.6060408353805542, + "eval_runtime": 10.5111, + "eval_samples_per_second": 9.323, + "eval_steps_per_second": 1.237, + "step": 933 + }, + { + "epoch": 6.045016077170418, + "grad_norm": 1.1479188203811646, + "learning_rate": 0.0002, + "loss": 0.4285, + "step": 940 + }, + { + "epoch": 6.109324758842444, + "grad_norm": 0.8727015256881714, + "learning_rate": 0.0002, + "loss": 0.3774, + "step": 950 + }, + { + "epoch": 6.173633440514469, + "grad_norm": 1.1554491519927979, + "learning_rate": 0.0002, + "loss": 0.407, + "step": 960 + }, + { + "epoch": 6.237942122186495, + "grad_norm": 1.0589015483856201, + "learning_rate": 0.0002, + "loss": 0.3888, + "step": 970 + }, + { + "epoch": 6.302250803858521, + "grad_norm": 1.158897876739502, + "learning_rate": 0.0002, + "loss": 0.4051, + "step": 980 + }, + { + "epoch": 6.366559485530547, + "grad_norm": 1.4029475450515747, + "learning_rate": 0.0002, + "loss": 0.4258, + "step": 990 + }, + { + "epoch": 6.430868167202572, + "grad_norm": 1.156851887702942, + "learning_rate": 0.0002, + "loss": 0.4103, + "step": 1000 + }, + { + "epoch": 6.495176848874598, + "grad_norm": 0.9887818694114685, + "learning_rate": 0.0002, + "loss": 0.4098, + "step": 1010 + }, + { + "epoch": 6.559485530546624, + "grad_norm": 1.0826616287231445, + "learning_rate": 0.0002, + "loss": 0.4329, + "step": 1020 + }, + { + "epoch": 6.62379421221865, + "grad_norm": 1.1170333623886108, + "learning_rate": 0.0002, + "loss": 0.4026, + "step": 1030 + }, + { + "epoch": 6.688102893890675, + "grad_norm": 1.313014030456543, + "learning_rate": 0.0002, + "loss": 0.4052, + "step": 1040 + }, + { + "epoch": 6.752411575562701, + "grad_norm": 1.183534026145935, + "learning_rate": 0.0002, + "loss": 0.4382, + "step": 1050 + }, + { + "epoch": 6.816720257234727, + "grad_norm": 1.1945377588272095, + "learning_rate": 0.0002, + "loss": 0.4142, + "step": 1060 + }, + { + "epoch": 6.881028938906752, + "grad_norm": 1.1162303686141968, + "learning_rate": 0.0002, + "loss": 0.4115, + "step": 1070 + }, + { + "epoch": 6.945337620578778, + "grad_norm": 1.1610374450683594, + "learning_rate": 0.0002, + "loss": 0.434, + "step": 1080 + }, + { + "epoch": 6.996784565916399, + "eval_loss": 1.7738038301467896, + "eval_runtime": 10.5081, + "eval_samples_per_second": 9.326, + "eval_steps_per_second": 1.237, + "step": 1088 + }, + { + "epoch": 7.009646302250804, + "grad_norm": 0.8958842158317566, + "learning_rate": 0.0002, + "loss": 0.4252, + "step": 1090 + }, + { + "epoch": 7.07395498392283, + "grad_norm": 1.4632889032363892, + "learning_rate": 0.0002, + "loss": 0.2968, + "step": 1100 + }, + { + "epoch": 7.138263665594855, + "grad_norm": 1.1745072603225708, + "learning_rate": 0.0002, + "loss": 0.3, + "step": 1110 + }, + { + "epoch": 7.202572347266881, + "grad_norm": 1.1066304445266724, + "learning_rate": 0.0002, + "loss": 0.2984, + "step": 1120 + }, + { + "epoch": 7.266881028938907, + "grad_norm": 1.455328345298767, + "learning_rate": 0.0002, + "loss": 0.3335, + "step": 1130 + }, + { + "epoch": 7.331189710610932, + "grad_norm": 1.5219749212265015, + "learning_rate": 0.0002, + "loss": 0.2985, + "step": 1140 + }, + { + "epoch": 7.395498392282958, + "grad_norm": 1.083840012550354, + "learning_rate": 0.0002, + "loss": 0.3215, + "step": 1150 + }, + { + "epoch": 7.459807073954984, + "grad_norm": 1.161246418952942, + "learning_rate": 0.0002, + "loss": 0.3276, + "step": 1160 + }, + { + "epoch": 7.52411575562701, + "grad_norm": 1.1832561492919922, + "learning_rate": 0.0002, + "loss": 0.335, + "step": 1170 + }, + { + "epoch": 7.588424437299035, + "grad_norm": 1.2522748708724976, + "learning_rate": 0.0002, + "loss": 0.3361, + "step": 1180 + }, + { + "epoch": 7.652733118971061, + "grad_norm": 1.2288755178451538, + "learning_rate": 0.0002, + "loss": 0.3385, + "step": 1190 + }, + { + "epoch": 7.717041800643087, + "grad_norm": 1.5007057189941406, + "learning_rate": 0.0002, + "loss": 0.3396, + "step": 1200 + }, + { + "epoch": 7.781350482315112, + "grad_norm": 0.8879519701004028, + "learning_rate": 0.0002, + "loss": 0.3361, + "step": 1210 + }, + { + "epoch": 7.845659163987138, + "grad_norm": 1.287729024887085, + "learning_rate": 0.0002, + "loss": 0.3553, + "step": 1220 + }, + { + "epoch": 7.909967845659164, + "grad_norm": 1.179373025894165, + "learning_rate": 0.0002, + "loss": 0.3364, + "step": 1230 + }, + { + "epoch": 7.97427652733119, + "grad_norm": 1.1422494649887085, + "learning_rate": 0.0002, + "loss": 0.3609, + "step": 1240 + }, + { + "epoch": 7.97427652733119, + "eval_loss": 1.9337385892868042, + "eval_runtime": 10.5047, + "eval_samples_per_second": 9.329, + "eval_steps_per_second": 1.238, + "step": 1240 + } + ], + "logging_steps": 10, + "max_steps": 1240, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 6.36910445789184e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/training_args.bin b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e6d36188f471587f71e03f7ccac2f1da5dc8040 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-1240/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:704b3d18001b534393bbe379593543ad27254922eefa5e6e440a4466e8c325c3 +size 5560 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/README.md b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/README.md new file mode 100644 index 0000000000000000000000000000000000000000..503a34a03e25483aa99213835fd87bfc8289a3fe --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/adapter_config.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e98db163734cc03f7a8f8b3f720d3a2befdf7453 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/adapter_model.safetensors b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..23b43e9837dad2dcd548b63c3bebe3b8678e63e6 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8bc850505b34f2f5c49fbcd34d0f42c30834e5f72a93a0b7ac12021174cde5b +size 143153376 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/optimizer.pt b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..92f5ebddd2d304b1902aef148431142d98685586 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:387aac15a43f1560a9b87e7d462fbebde2ece440f7eff694d88a745518378ac3 +size 72886458 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/rng_state.pth b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e0af47a7482a9f35a3c5913a663105047d6d8547 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e923c90f436d888b7ad9aa4699d02561edc22f203f21c257906bdacbff8bdb8 +size 14244 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/scheduler.pt b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cdbea9b8f7233574bf35b02e95e781189d86504f --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e3f0fe030665431024765f87c4555310f3f8007f7d01f8d632b8f1b7b61efdc +size 1064 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/special_tokens_map.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/tokenizer.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..af0eac5c0056f83b8f3fcdb79165f8847111c305 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922 +size 17525357 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/tokenizer.model b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/tokenizer_config.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/trainer_state.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..082ab6cda195993d73618e9c0a2b1755d9ffaf50 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/trainer_state.json @@ -0,0 +1,146 @@ +{ + "best_metric": 1.1744003295898438, + "best_model_checkpoint": "outputs-001/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155", + "epoch": 0.9967845659163987, + "eval_steps": 10, + "global_step": 155, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06430868167202572, + "grad_norm": 0.5317481756210327, + "learning_rate": 0.0002, + "loss": 2.2272, + "step": 10 + }, + { + "epoch": 0.12861736334405144, + "grad_norm": 0.7665001749992371, + "learning_rate": 0.0002, + "loss": 1.6604, + "step": 20 + }, + { + "epoch": 0.19292604501607716, + "grad_norm": 0.4396904408931732, + "learning_rate": 0.0002, + "loss": 1.4015, + "step": 30 + }, + { + "epoch": 0.2572347266881029, + "grad_norm": 0.31786906719207764, + "learning_rate": 0.0002, + "loss": 1.3297, + "step": 40 + }, + { + "epoch": 0.3215434083601286, + "grad_norm": 0.41404595971107483, + "learning_rate": 0.0002, + "loss": 1.2938, + "step": 50 + }, + { + "epoch": 0.3858520900321543, + "grad_norm": 0.36728185415267944, + "learning_rate": 0.0002, + "loss": 1.2673, + "step": 60 + }, + { + "epoch": 0.45016077170418006, + "grad_norm": 0.38337618112564087, + "learning_rate": 0.0002, + "loss": 1.2074, + "step": 70 + }, + { + "epoch": 0.5144694533762058, + "grad_norm": 0.39411404728889465, + "learning_rate": 0.0002, + "loss": 1.2691, + "step": 80 + }, + { + "epoch": 0.5787781350482315, + "grad_norm": 0.39903542399406433, + "learning_rate": 0.0002, + "loss": 1.2563, + "step": 90 + }, + { + "epoch": 0.6430868167202572, + "grad_norm": 0.3390332758426666, + "learning_rate": 0.0002, + "loss": 1.2122, + "step": 100 + }, + { + "epoch": 0.707395498392283, + "grad_norm": 0.35814088582992554, + "learning_rate": 0.0002, + "loss": 1.251, + "step": 110 + }, + { + "epoch": 0.7717041800643086, + "grad_norm": 0.3480045199394226, + "learning_rate": 0.0002, + "loss": 1.218, + "step": 120 + }, + { + "epoch": 0.8360128617363344, + "grad_norm": 0.3282570540904999, + "learning_rate": 0.0002, + "loss": 1.1504, + "step": 130 + }, + { + "epoch": 0.9003215434083601, + "grad_norm": 0.33441081643104553, + "learning_rate": 0.0002, + "loss": 1.2131, + "step": 140 + }, + { + "epoch": 0.9646302250803859, + "grad_norm": 0.3344958424568176, + "learning_rate": 0.0002, + "loss": 1.2116, + "step": 150 + }, + { + "epoch": 0.9967845659163987, + "eval_loss": 1.1744003295898438, + "eval_runtime": 10.521, + "eval_samples_per_second": 9.315, + "eval_steps_per_second": 1.236, + "step": 155 + } + ], + "logging_steps": 10, + "max_steps": 1240, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7987062445178880.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/training_args.bin b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e6d36188f471587f71e03f7ccac2f1da5dc8040 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:704b3d18001b534393bbe379593543ad27254922eefa5e6e440a4466e8c325c3 +size 5560 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/README.md b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/README.md new file mode 100644 index 0000000000000000000000000000000000000000..503a34a03e25483aa99213835fd87bfc8289a3fe --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/adapter_config.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e98db163734cc03f7a8f8b3f720d3a2befdf7453 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/adapter_model.safetensors b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7663de466c6e845744ae2a046d483b7398cd4d2 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4e8fa468f6323f0633dcf14b9c91a98ec6a8814d4cb7d97fe784fc997809aea +size 143153376 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/optimizer.pt b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ea2b59dcb84dfc139d9918aa956236a673b821b9 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a273f24c578fee3817b01c30781ba0b0a7ecd5b66d336e8e8cb12f2f77e9e32 +size 72886650 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/rng_state.pth b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..11ae4c8cf3c84a4f637ca1ab053d22d4498a55b0 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d77a4829ded2c11274da3a0c69dc621ed005f8834b2c7fbd8c249e8be57e4925 +size 14244 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/scheduler.pt b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..92c7f8b11f4c96f5642b82931a4e43c63e2d5d77 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd38fdb01fc173da3c2d0d1780adbdfc75337bcb1813b8816ee20f23aa4b6bf4 +size 1064 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/special_tokens_map.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/tokenizer.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..af0eac5c0056f83b8f3fcdb79165f8847111c305 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922 +size 17525357 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/tokenizer.model b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/tokenizer_config.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/trainer_state.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bf5dc434e94f9c4d1a5150ec3e8968314a3f8063 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/trainer_state.json @@ -0,0 +1,266 @@ +{ + "best_metric": 1.1546189785003662, + "best_model_checkpoint": "outputs-001/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311", + "epoch": 2.0, + "eval_steps": 10, + "global_step": 311, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06430868167202572, + "grad_norm": 0.5317481756210327, + "learning_rate": 0.0002, + "loss": 2.2272, + "step": 10 + }, + { + "epoch": 0.12861736334405144, + "grad_norm": 0.7665001749992371, + "learning_rate": 0.0002, + "loss": 1.6604, + "step": 20 + }, + { + "epoch": 0.19292604501607716, + "grad_norm": 0.4396904408931732, + "learning_rate": 0.0002, + "loss": 1.4015, + "step": 30 + }, + { + "epoch": 0.2572347266881029, + "grad_norm": 0.31786906719207764, + "learning_rate": 0.0002, + "loss": 1.3297, + "step": 40 + }, + { + "epoch": 0.3215434083601286, + "grad_norm": 0.41404595971107483, + "learning_rate": 0.0002, + "loss": 1.2938, + "step": 50 + }, + { + "epoch": 0.3858520900321543, + "grad_norm": 0.36728185415267944, + "learning_rate": 0.0002, + "loss": 1.2673, + "step": 60 + }, + { + "epoch": 0.45016077170418006, + "grad_norm": 0.38337618112564087, + "learning_rate": 0.0002, + "loss": 1.2074, + "step": 70 + }, + { + "epoch": 0.5144694533762058, + "grad_norm": 0.39411404728889465, + "learning_rate": 0.0002, + "loss": 1.2691, + "step": 80 + }, + { + "epoch": 0.5787781350482315, + "grad_norm": 0.39903542399406433, + "learning_rate": 0.0002, + "loss": 1.2563, + "step": 90 + }, + { + "epoch": 0.6430868167202572, + "grad_norm": 0.3390332758426666, + "learning_rate": 0.0002, + "loss": 1.2122, + "step": 100 + }, + { + "epoch": 0.707395498392283, + "grad_norm": 0.35814088582992554, + "learning_rate": 0.0002, + "loss": 1.251, + "step": 110 + }, + { + "epoch": 0.7717041800643086, + "grad_norm": 0.3480045199394226, + "learning_rate": 0.0002, + "loss": 1.218, + "step": 120 + }, + { + "epoch": 0.8360128617363344, + "grad_norm": 0.3282570540904999, + "learning_rate": 0.0002, + "loss": 1.1504, + "step": 130 + }, + { + "epoch": 0.9003215434083601, + "grad_norm": 0.33441081643104553, + "learning_rate": 0.0002, + "loss": 1.2131, + "step": 140 + }, + { + "epoch": 0.9646302250803859, + "grad_norm": 0.3344958424568176, + "learning_rate": 0.0002, + "loss": 1.2116, + "step": 150 + }, + { + "epoch": 0.9967845659163987, + "eval_loss": 1.1744003295898438, + "eval_runtime": 10.521, + "eval_samples_per_second": 9.315, + "eval_steps_per_second": 1.236, + "step": 155 + }, + { + "epoch": 1.0289389067524115, + "grad_norm": 0.3614383637905121, + "learning_rate": 0.0002, + "loss": 1.1898, + "step": 160 + }, + { + "epoch": 1.0932475884244373, + "grad_norm": 0.37686896324157715, + "learning_rate": 0.0002, + "loss": 1.1153, + "step": 170 + }, + { + "epoch": 1.157556270096463, + "grad_norm": 0.3803747296333313, + "learning_rate": 0.0002, + "loss": 1.1288, + "step": 180 + }, + { + "epoch": 1.2218649517684887, + "grad_norm": 0.35592594742774963, + "learning_rate": 0.0002, + "loss": 1.0915, + "step": 190 + }, + { + "epoch": 1.2861736334405145, + "grad_norm": 0.5097760558128357, + "learning_rate": 0.0002, + "loss": 1.0895, + "step": 200 + }, + { + "epoch": 1.3504823151125402, + "grad_norm": 0.3641100227832794, + "learning_rate": 0.0002, + "loss": 1.1268, + "step": 210 + }, + { + "epoch": 1.414790996784566, + "grad_norm": 0.3824535310268402, + "learning_rate": 0.0002, + "loss": 1.1212, + "step": 220 + }, + { + "epoch": 1.4790996784565915, + "grad_norm": 0.42148709297180176, + "learning_rate": 0.0002, + "loss": 1.1234, + "step": 230 + }, + { + "epoch": 1.5434083601286175, + "grad_norm": 0.44197967648506165, + "learning_rate": 0.0002, + "loss": 1.112, + "step": 240 + }, + { + "epoch": 1.607717041800643, + "grad_norm": 0.42140334844589233, + "learning_rate": 0.0002, + "loss": 1.0923, + "step": 250 + }, + { + "epoch": 1.6720257234726688, + "grad_norm": 0.404341459274292, + "learning_rate": 0.0002, + "loss": 1.1455, + "step": 260 + }, + { + "epoch": 1.7363344051446945, + "grad_norm": 0.47345927357673645, + "learning_rate": 0.0002, + "loss": 1.1258, + "step": 270 + }, + { + "epoch": 1.8006430868167203, + "grad_norm": 0.45900461077690125, + "learning_rate": 0.0002, + "loss": 1.0682, + "step": 280 + }, + { + "epoch": 1.864951768488746, + "grad_norm": 0.3809300363063812, + "learning_rate": 0.0002, + "loss": 1.0815, + "step": 290 + }, + { + "epoch": 1.9292604501607717, + "grad_norm": 0.4094211757183075, + "learning_rate": 0.0002, + "loss": 1.0773, + "step": 300 + }, + { + "epoch": 1.9935691318327975, + "grad_norm": 0.40402060747146606, + "learning_rate": 0.0002, + "loss": 1.1134, + "step": 310 + }, + { + "epoch": 2.0, + "eval_loss": 1.1546189785003662, + "eval_runtime": 10.5193, + "eval_samples_per_second": 9.316, + "eval_steps_per_second": 1.236, + "step": 311 + } + ], + "logging_steps": 10, + "max_steps": 1240, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.597412489035776e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/training_args.bin b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e6d36188f471587f71e03f7ccac2f1da5dc8040 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:704b3d18001b534393bbe379593543ad27254922eefa5e6e440a4466e8c325c3 +size 5560 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/README.md b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/README.md new file mode 100644 index 0000000000000000000000000000000000000000..503a34a03e25483aa99213835fd87bfc8289a3fe --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/adapter_config.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e98db163734cc03f7a8f8b3f720d3a2befdf7453 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/adapter_model.safetensors b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..48fa15f80c884c649c9b75448fb851fe5cbcf67a --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72a96ddce6a3db3fcf8e0875fb46d66a04f52b6d908891be642fe48f384733b5 +size 143153376 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/optimizer.pt b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d1a2d402fd087e33d3e7f7b58ccb08d533bb02da --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec9b6b612df288eb317b96c56fed0e44dfdf615dec53b00e647d3f3b5f6a4832 +size 72886650 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/rng_state.pth b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..13aa58c872f3685238142f2cea7c6fbc488ecda6 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc69ef8a3551ddeb83dfc9536c8b10a441b7cbfe589457ea1c672c69b3048265 +size 14244 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/scheduler.pt b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ab994a5e2998156e9c4ad881e6ac12ee2a814bec --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f420082d54c6bbbdb67cb0ca4fa59f3ed4d8141130741b067b32777d3593062d +size 1064 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/special_tokens_map.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/tokenizer.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..af0eac5c0056f83b8f3fcdb79165f8847111c305 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922 +size 17525357 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/tokenizer.model b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/tokenizer_config.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/trainer_state.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2c3fe0d45a0659a7d2bec8dad9d767ca35aee82a --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/trainer_state.json @@ -0,0 +1,379 @@ +{ + "best_metric": 1.1546189785003662, + "best_model_checkpoint": "outputs-001/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311", + "epoch": 2.996784565916399, + "eval_steps": 10, + "global_step": 466, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06430868167202572, + "grad_norm": 0.5317481756210327, + "learning_rate": 0.0002, + "loss": 2.2272, + "step": 10 + }, + { + "epoch": 0.12861736334405144, + "grad_norm": 0.7665001749992371, + "learning_rate": 0.0002, + "loss": 1.6604, + "step": 20 + }, + { + "epoch": 0.19292604501607716, + "grad_norm": 0.4396904408931732, + "learning_rate": 0.0002, + "loss": 1.4015, + "step": 30 + }, + { + "epoch": 0.2572347266881029, + "grad_norm": 0.31786906719207764, + "learning_rate": 0.0002, + "loss": 1.3297, + "step": 40 + }, + { + "epoch": 0.3215434083601286, + "grad_norm": 0.41404595971107483, + "learning_rate": 0.0002, + "loss": 1.2938, + "step": 50 + }, + { + "epoch": 0.3858520900321543, + "grad_norm": 0.36728185415267944, + "learning_rate": 0.0002, + "loss": 1.2673, + "step": 60 + }, + { + "epoch": 0.45016077170418006, + "grad_norm": 0.38337618112564087, + "learning_rate": 0.0002, + "loss": 1.2074, + "step": 70 + }, + { + "epoch": 0.5144694533762058, + "grad_norm": 0.39411404728889465, + "learning_rate": 0.0002, + "loss": 1.2691, + "step": 80 + }, + { + "epoch": 0.5787781350482315, + "grad_norm": 0.39903542399406433, + "learning_rate": 0.0002, + "loss": 1.2563, + "step": 90 + }, + { + "epoch": 0.6430868167202572, + "grad_norm": 0.3390332758426666, + "learning_rate": 0.0002, + "loss": 1.2122, + "step": 100 + }, + { + "epoch": 0.707395498392283, + "grad_norm": 0.35814088582992554, + "learning_rate": 0.0002, + "loss": 1.251, + "step": 110 + }, + { + "epoch": 0.7717041800643086, + "grad_norm": 0.3480045199394226, + "learning_rate": 0.0002, + "loss": 1.218, + "step": 120 + }, + { + "epoch": 0.8360128617363344, + "grad_norm": 0.3282570540904999, + "learning_rate": 0.0002, + "loss": 1.1504, + "step": 130 + }, + { + "epoch": 0.9003215434083601, + "grad_norm": 0.33441081643104553, + "learning_rate": 0.0002, + "loss": 1.2131, + "step": 140 + }, + { + "epoch": 0.9646302250803859, + "grad_norm": 0.3344958424568176, + "learning_rate": 0.0002, + "loss": 1.2116, + "step": 150 + }, + { + "epoch": 0.9967845659163987, + "eval_loss": 1.1744003295898438, + "eval_runtime": 10.521, + "eval_samples_per_second": 9.315, + "eval_steps_per_second": 1.236, + "step": 155 + }, + { + "epoch": 1.0289389067524115, + "grad_norm": 0.3614383637905121, + "learning_rate": 0.0002, + "loss": 1.1898, + "step": 160 + }, + { + "epoch": 1.0932475884244373, + "grad_norm": 0.37686896324157715, + "learning_rate": 0.0002, + "loss": 1.1153, + "step": 170 + }, + { + "epoch": 1.157556270096463, + "grad_norm": 0.3803747296333313, + "learning_rate": 0.0002, + "loss": 1.1288, + "step": 180 + }, + { + "epoch": 1.2218649517684887, + "grad_norm": 0.35592594742774963, + "learning_rate": 0.0002, + "loss": 1.0915, + "step": 190 + }, + { + "epoch": 1.2861736334405145, + "grad_norm": 0.5097760558128357, + "learning_rate": 0.0002, + "loss": 1.0895, + "step": 200 + }, + { + "epoch": 1.3504823151125402, + "grad_norm": 0.3641100227832794, + "learning_rate": 0.0002, + "loss": 1.1268, + "step": 210 + }, + { + "epoch": 1.414790996784566, + "grad_norm": 0.3824535310268402, + "learning_rate": 0.0002, + "loss": 1.1212, + "step": 220 + }, + { + "epoch": 1.4790996784565915, + "grad_norm": 0.42148709297180176, + "learning_rate": 0.0002, + "loss": 1.1234, + "step": 230 + }, + { + "epoch": 1.5434083601286175, + "grad_norm": 0.44197967648506165, + "learning_rate": 0.0002, + "loss": 1.112, + "step": 240 + }, + { + "epoch": 1.607717041800643, + "grad_norm": 0.42140334844589233, + "learning_rate": 0.0002, + "loss": 1.0923, + "step": 250 + }, + { + "epoch": 1.6720257234726688, + "grad_norm": 0.404341459274292, + "learning_rate": 0.0002, + "loss": 1.1455, + "step": 260 + }, + { + "epoch": 1.7363344051446945, + "grad_norm": 0.47345927357673645, + "learning_rate": 0.0002, + "loss": 1.1258, + "step": 270 + }, + { + "epoch": 1.8006430868167203, + "grad_norm": 0.45900461077690125, + "learning_rate": 0.0002, + "loss": 1.0682, + "step": 280 + }, + { + "epoch": 1.864951768488746, + "grad_norm": 0.3809300363063812, + "learning_rate": 0.0002, + "loss": 1.0815, + "step": 290 + }, + { + "epoch": 1.9292604501607717, + "grad_norm": 0.4094211757183075, + "learning_rate": 0.0002, + "loss": 1.0773, + "step": 300 + }, + { + "epoch": 1.9935691318327975, + "grad_norm": 0.40402060747146606, + "learning_rate": 0.0002, + "loss": 1.1134, + "step": 310 + }, + { + "epoch": 2.0, + "eval_loss": 1.1546189785003662, + "eval_runtime": 10.5193, + "eval_samples_per_second": 9.316, + "eval_steps_per_second": 1.236, + "step": 311 + }, + { + "epoch": 2.057877813504823, + "grad_norm": 0.5758638978004456, + "learning_rate": 0.0002, + "loss": 0.9775, + "step": 320 + }, + { + "epoch": 2.122186495176849, + "grad_norm": 0.4793509542942047, + "learning_rate": 0.0002, + "loss": 0.975, + "step": 330 + }, + { + "epoch": 2.1864951768488745, + "grad_norm": 0.5104694962501526, + "learning_rate": 0.0002, + "loss": 0.9331, + "step": 340 + }, + { + "epoch": 2.2508038585209005, + "grad_norm": 0.49754178524017334, + "learning_rate": 0.0002, + "loss": 0.9605, + "step": 350 + }, + { + "epoch": 2.315112540192926, + "grad_norm": 0.5055416822433472, + "learning_rate": 0.0002, + "loss": 0.9609, + "step": 360 + }, + { + "epoch": 2.379421221864952, + "grad_norm": 0.5762393474578857, + "learning_rate": 0.0002, + "loss": 0.9793, + "step": 370 + }, + { + "epoch": 2.4437299035369775, + "grad_norm": 0.44768989086151123, + "learning_rate": 0.0002, + "loss": 0.9392, + "step": 380 + }, + { + "epoch": 2.508038585209003, + "grad_norm": 0.5598754286766052, + "learning_rate": 0.0002, + "loss": 0.9488, + "step": 390 + }, + { + "epoch": 2.572347266881029, + "grad_norm": 0.5343462824821472, + "learning_rate": 0.0002, + "loss": 1.0028, + "step": 400 + }, + { + "epoch": 2.6366559485530545, + "grad_norm": 0.4544358253479004, + "learning_rate": 0.0002, + "loss": 0.9871, + "step": 410 + }, + { + "epoch": 2.7009646302250805, + "grad_norm": 0.5724653005599976, + "learning_rate": 0.0002, + "loss": 1.0025, + "step": 420 + }, + { + "epoch": 2.765273311897106, + "grad_norm": 0.5844957828521729, + "learning_rate": 0.0002, + "loss": 0.9776, + "step": 430 + }, + { + "epoch": 2.829581993569132, + "grad_norm": 0.5306688547134399, + "learning_rate": 0.0002, + "loss": 0.9608, + "step": 440 + }, + { + "epoch": 2.8938906752411575, + "grad_norm": 0.5121245384216309, + "learning_rate": 0.0002, + "loss": 1.0221, + "step": 450 + }, + { + "epoch": 2.958199356913183, + "grad_norm": 0.47789978981018066, + "learning_rate": 0.0002, + "loss": 0.9438, + "step": 460 + }, + { + "epoch": 2.996784565916399, + "eval_loss": 1.1760698556900024, + "eval_runtime": 10.5123, + "eval_samples_per_second": 9.322, + "eval_steps_per_second": 1.237, + "step": 466 + } + ], + "logging_steps": 10, + "max_steps": 1240, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.396118733553664e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/training_args.bin b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e6d36188f471587f71e03f7ccac2f1da5dc8040 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-466/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:704b3d18001b534393bbe379593543ad27254922eefa5e6e440a4466e8c325c3 +size 5560 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/README.md b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/README.md new file mode 100644 index 0000000000000000000000000000000000000000..503a34a03e25483aa99213835fd87bfc8289a3fe --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/adapter_config.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e98db163734cc03f7a8f8b3f720d3a2befdf7453 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/adapter_model.safetensors b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..303a6be6e1f2a8f78dd9d2f1c259b82327b34901 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5917d10852a399cc5dc31171f7b8884d9d2523a59a8cf889472777de2d0f30b2 +size 143153376 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/optimizer.pt b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a28e134aba8d8614248a719896339d614f5dca94 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39e4f70326fc3e7a18d2a3f397c06208aa4f86065bf4ba92c386e08c75a97d01 +size 72886650 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/rng_state.pth b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..57482c28ff24779df69deb49312e2d6ff2b95028 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eacbd9935d9c02cc296a5388e415a62bbbe68691213a21b8ea1cf2aeb030813 +size 14244 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/scheduler.pt b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c64ea4f8b7c04d9f76f62e4100091dd84e4b8fd8 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69296377486d674aff814f74ffc24646c39db648f5275685c6a05fdd7724528a +size 1064 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/special_tokens_map.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/tokenizer.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..af0eac5c0056f83b8f3fcdb79165f8847111c305 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922 +size 17525357 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/tokenizer.model b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/tokenizer_config.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/trainer_state.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8969984f27bc45201a653b10ba01efab320a02a9 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/trainer_state.json @@ -0,0 +1,499 @@ +{ + "best_metric": 1.1546189785003662, + "best_model_checkpoint": "outputs-001/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311", + "epoch": 4.0, + "eval_steps": 10, + "global_step": 622, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06430868167202572, + "grad_norm": 0.5317481756210327, + "learning_rate": 0.0002, + "loss": 2.2272, + "step": 10 + }, + { + "epoch": 0.12861736334405144, + "grad_norm": 0.7665001749992371, + "learning_rate": 0.0002, + "loss": 1.6604, + "step": 20 + }, + { + "epoch": 0.19292604501607716, + "grad_norm": 0.4396904408931732, + "learning_rate": 0.0002, + "loss": 1.4015, + "step": 30 + }, + { + "epoch": 0.2572347266881029, + "grad_norm": 0.31786906719207764, + "learning_rate": 0.0002, + "loss": 1.3297, + "step": 40 + }, + { + "epoch": 0.3215434083601286, + "grad_norm": 0.41404595971107483, + "learning_rate": 0.0002, + "loss": 1.2938, + "step": 50 + }, + { + "epoch": 0.3858520900321543, + "grad_norm": 0.36728185415267944, + "learning_rate": 0.0002, + "loss": 1.2673, + "step": 60 + }, + { + "epoch": 0.45016077170418006, + "grad_norm": 0.38337618112564087, + "learning_rate": 0.0002, + "loss": 1.2074, + "step": 70 + }, + { + "epoch": 0.5144694533762058, + "grad_norm": 0.39411404728889465, + "learning_rate": 0.0002, + "loss": 1.2691, + "step": 80 + }, + { + "epoch": 0.5787781350482315, + "grad_norm": 0.39903542399406433, + "learning_rate": 0.0002, + "loss": 1.2563, + "step": 90 + }, + { + "epoch": 0.6430868167202572, + "grad_norm": 0.3390332758426666, + "learning_rate": 0.0002, + "loss": 1.2122, + "step": 100 + }, + { + "epoch": 0.707395498392283, + "grad_norm": 0.35814088582992554, + "learning_rate": 0.0002, + "loss": 1.251, + "step": 110 + }, + { + "epoch": 0.7717041800643086, + "grad_norm": 0.3480045199394226, + "learning_rate": 0.0002, + "loss": 1.218, + "step": 120 + }, + { + "epoch": 0.8360128617363344, + "grad_norm": 0.3282570540904999, + "learning_rate": 0.0002, + "loss": 1.1504, + "step": 130 + }, + { + "epoch": 0.9003215434083601, + "grad_norm": 0.33441081643104553, + "learning_rate": 0.0002, + "loss": 1.2131, + "step": 140 + }, + { + "epoch": 0.9646302250803859, + "grad_norm": 0.3344958424568176, + "learning_rate": 0.0002, + "loss": 1.2116, + "step": 150 + }, + { + "epoch": 0.9967845659163987, + "eval_loss": 1.1744003295898438, + "eval_runtime": 10.521, + "eval_samples_per_second": 9.315, + "eval_steps_per_second": 1.236, + "step": 155 + }, + { + "epoch": 1.0289389067524115, + "grad_norm": 0.3614383637905121, + "learning_rate": 0.0002, + "loss": 1.1898, + "step": 160 + }, + { + "epoch": 1.0932475884244373, + "grad_norm": 0.37686896324157715, + "learning_rate": 0.0002, + "loss": 1.1153, + "step": 170 + }, + { + "epoch": 1.157556270096463, + "grad_norm": 0.3803747296333313, + "learning_rate": 0.0002, + "loss": 1.1288, + "step": 180 + }, + { + "epoch": 1.2218649517684887, + "grad_norm": 0.35592594742774963, + "learning_rate": 0.0002, + "loss": 1.0915, + "step": 190 + }, + { + "epoch": 1.2861736334405145, + "grad_norm": 0.5097760558128357, + "learning_rate": 0.0002, + "loss": 1.0895, + "step": 200 + }, + { + "epoch": 1.3504823151125402, + "grad_norm": 0.3641100227832794, + "learning_rate": 0.0002, + "loss": 1.1268, + "step": 210 + }, + { + "epoch": 1.414790996784566, + "grad_norm": 0.3824535310268402, + "learning_rate": 0.0002, + "loss": 1.1212, + "step": 220 + }, + { + "epoch": 1.4790996784565915, + "grad_norm": 0.42148709297180176, + "learning_rate": 0.0002, + "loss": 1.1234, + "step": 230 + }, + { + "epoch": 1.5434083601286175, + "grad_norm": 0.44197967648506165, + "learning_rate": 0.0002, + "loss": 1.112, + "step": 240 + }, + { + "epoch": 1.607717041800643, + "grad_norm": 0.42140334844589233, + "learning_rate": 0.0002, + "loss": 1.0923, + "step": 250 + }, + { + "epoch": 1.6720257234726688, + "grad_norm": 0.404341459274292, + "learning_rate": 0.0002, + "loss": 1.1455, + "step": 260 + }, + { + "epoch": 1.7363344051446945, + "grad_norm": 0.47345927357673645, + "learning_rate": 0.0002, + "loss": 1.1258, + "step": 270 + }, + { + "epoch": 1.8006430868167203, + "grad_norm": 0.45900461077690125, + "learning_rate": 0.0002, + "loss": 1.0682, + "step": 280 + }, + { + "epoch": 1.864951768488746, + "grad_norm": 0.3809300363063812, + "learning_rate": 0.0002, + "loss": 1.0815, + "step": 290 + }, + { + "epoch": 1.9292604501607717, + "grad_norm": 0.4094211757183075, + "learning_rate": 0.0002, + "loss": 1.0773, + "step": 300 + }, + { + "epoch": 1.9935691318327975, + "grad_norm": 0.40402060747146606, + "learning_rate": 0.0002, + "loss": 1.1134, + "step": 310 + }, + { + "epoch": 2.0, + "eval_loss": 1.1546189785003662, + "eval_runtime": 10.5193, + "eval_samples_per_second": 9.316, + "eval_steps_per_second": 1.236, + "step": 311 + }, + { + "epoch": 2.057877813504823, + "grad_norm": 0.5758638978004456, + "learning_rate": 0.0002, + "loss": 0.9775, + "step": 320 + }, + { + "epoch": 2.122186495176849, + "grad_norm": 0.4793509542942047, + "learning_rate": 0.0002, + "loss": 0.975, + "step": 330 + }, + { + "epoch": 2.1864951768488745, + "grad_norm": 0.5104694962501526, + "learning_rate": 0.0002, + "loss": 0.9331, + "step": 340 + }, + { + "epoch": 2.2508038585209005, + "grad_norm": 0.49754178524017334, + "learning_rate": 0.0002, + "loss": 0.9605, + "step": 350 + }, + { + "epoch": 2.315112540192926, + "grad_norm": 0.5055416822433472, + "learning_rate": 0.0002, + "loss": 0.9609, + "step": 360 + }, + { + "epoch": 2.379421221864952, + "grad_norm": 0.5762393474578857, + "learning_rate": 0.0002, + "loss": 0.9793, + "step": 370 + }, + { + "epoch": 2.4437299035369775, + "grad_norm": 0.44768989086151123, + "learning_rate": 0.0002, + "loss": 0.9392, + "step": 380 + }, + { + "epoch": 2.508038585209003, + "grad_norm": 0.5598754286766052, + "learning_rate": 0.0002, + "loss": 0.9488, + "step": 390 + }, + { + "epoch": 2.572347266881029, + "grad_norm": 0.5343462824821472, + "learning_rate": 0.0002, + "loss": 1.0028, + "step": 400 + }, + { + "epoch": 2.6366559485530545, + "grad_norm": 0.4544358253479004, + "learning_rate": 0.0002, + "loss": 0.9871, + "step": 410 + }, + { + "epoch": 2.7009646302250805, + "grad_norm": 0.5724653005599976, + "learning_rate": 0.0002, + "loss": 1.0025, + "step": 420 + }, + { + "epoch": 2.765273311897106, + "grad_norm": 0.5844957828521729, + "learning_rate": 0.0002, + "loss": 0.9776, + "step": 430 + }, + { + "epoch": 2.829581993569132, + "grad_norm": 0.5306688547134399, + "learning_rate": 0.0002, + "loss": 0.9608, + "step": 440 + }, + { + "epoch": 2.8938906752411575, + "grad_norm": 0.5121245384216309, + "learning_rate": 0.0002, + "loss": 1.0221, + "step": 450 + }, + { + "epoch": 2.958199356913183, + "grad_norm": 0.47789978981018066, + "learning_rate": 0.0002, + "loss": 0.9438, + "step": 460 + }, + { + "epoch": 2.996784565916399, + "eval_loss": 1.1760698556900024, + "eval_runtime": 10.5123, + "eval_samples_per_second": 9.322, + "eval_steps_per_second": 1.237, + "step": 466 + }, + { + "epoch": 3.022508038585209, + "grad_norm": 0.4903484582901001, + "learning_rate": 0.0002, + "loss": 0.9531, + "step": 470 + }, + { + "epoch": 3.0868167202572345, + "grad_norm": 0.7591149210929871, + "learning_rate": 0.0002, + "loss": 0.7771, + "step": 480 + }, + { + "epoch": 3.1511254019292605, + "grad_norm": 0.8178006410598755, + "learning_rate": 0.0002, + "loss": 0.8044, + "step": 490 + }, + { + "epoch": 3.215434083601286, + "grad_norm": 0.7482298016548157, + "learning_rate": 0.0002, + "loss": 0.8237, + "step": 500 + }, + { + "epoch": 3.279742765273312, + "grad_norm": 0.7520643472671509, + "learning_rate": 0.0002, + "loss": 0.8061, + "step": 510 + }, + { + "epoch": 3.3440514469453375, + "grad_norm": 0.6797061562538147, + "learning_rate": 0.0002, + "loss": 0.8065, + "step": 520 + }, + { + "epoch": 3.4083601286173635, + "grad_norm": 0.6733362674713135, + "learning_rate": 0.0002, + "loss": 0.827, + "step": 530 + }, + { + "epoch": 3.472668810289389, + "grad_norm": 0.6488103270530701, + "learning_rate": 0.0002, + "loss": 0.8628, + "step": 540 + }, + { + "epoch": 3.536977491961415, + "grad_norm": 0.6773484349250793, + "learning_rate": 0.0002, + "loss": 0.8053, + "step": 550 + }, + { + "epoch": 3.6012861736334405, + "grad_norm": 0.6569041609764099, + "learning_rate": 0.0002, + "loss": 0.8631, + "step": 560 + }, + { + "epoch": 3.665594855305466, + "grad_norm": 0.7477148771286011, + "learning_rate": 0.0002, + "loss": 0.8251, + "step": 570 + }, + { + "epoch": 3.729903536977492, + "grad_norm": 0.6446558237075806, + "learning_rate": 0.0002, + "loss": 0.8089, + "step": 580 + }, + { + "epoch": 3.7942122186495175, + "grad_norm": 0.6831859946250916, + "learning_rate": 0.0002, + "loss": 0.8287, + "step": 590 + }, + { + "epoch": 3.8585209003215435, + "grad_norm": 0.7512634992599487, + "learning_rate": 0.0002, + "loss": 0.8726, + "step": 600 + }, + { + "epoch": 3.922829581993569, + "grad_norm": 0.7508474588394165, + "learning_rate": 0.0002, + "loss": 0.8014, + "step": 610 + }, + { + "epoch": 3.987138263665595, + "grad_norm": 0.7288223505020142, + "learning_rate": 0.0002, + "loss": 0.845, + "step": 620 + }, + { + "epoch": 4.0, + "eval_loss": 1.2500178813934326, + "eval_runtime": 10.5131, + "eval_samples_per_second": 9.322, + "eval_steps_per_second": 1.237, + "step": 622 + } + ], + "logging_steps": 10, + "max_steps": 1240, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.194824978071552e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/training_args.bin b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e6d36188f471587f71e03f7ccac2f1da5dc8040 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-622/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:704b3d18001b534393bbe379593543ad27254922eefa5e6e440a4466e8c325c3 +size 5560 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/README.md b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/README.md new file mode 100644 index 0000000000000000000000000000000000000000..503a34a03e25483aa99213835fd87bfc8289a3fe --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/adapter_config.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e98db163734cc03f7a8f8b3f720d3a2befdf7453 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/adapter_model.safetensors b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4b504318409b3212d4c358c3278cce4a5c796c68 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22af1885e3c89fa70ea6ba7047915b60b64319de52c1df7107b6c9870d81e9dd +size 143153376 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/optimizer.pt b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..05a10b181c8fffeb140d856022a5a5b894ad9267 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:144914c8dfdab602c55200cff069b037cc85732235a0d93e8fd322026b384cc4 +size 72886650 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/rng_state.pth b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..40e41a444b612ec055e02142bd6a7b33313615d9 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af4096ad5695bbb5858f943b35da6ec003ae7c812b505201064538687543cf82 +size 14244 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/scheduler.pt b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c53c046e5e666e7ae2acefd8331b823730fcefde --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80e6b07157d5cc62935f088136c0e65636aeaaa907ab49ead3e4efc764b8a588 +size 1064 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/special_tokens_map.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/tokenizer.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..af0eac5c0056f83b8f3fcdb79165f8847111c305 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922 +size 17525357 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/tokenizer.model b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/tokenizer_config.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/trainer_state.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ad902d94c39786dfacd12bc242dd2e59d2f8ca7a --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/trainer_state.json @@ -0,0 +1,612 @@ +{ + "best_metric": 1.1546189785003662, + "best_model_checkpoint": "outputs-001/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311", + "epoch": 4.996784565916399, + "eval_steps": 10, + "global_step": 777, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06430868167202572, + "grad_norm": 0.5317481756210327, + "learning_rate": 0.0002, + "loss": 2.2272, + "step": 10 + }, + { + "epoch": 0.12861736334405144, + "grad_norm": 0.7665001749992371, + "learning_rate": 0.0002, + "loss": 1.6604, + "step": 20 + }, + { + "epoch": 0.19292604501607716, + "grad_norm": 0.4396904408931732, + "learning_rate": 0.0002, + "loss": 1.4015, + "step": 30 + }, + { + "epoch": 0.2572347266881029, + "grad_norm": 0.31786906719207764, + "learning_rate": 0.0002, + "loss": 1.3297, + "step": 40 + }, + { + "epoch": 0.3215434083601286, + "grad_norm": 0.41404595971107483, + "learning_rate": 0.0002, + "loss": 1.2938, + "step": 50 + }, + { + "epoch": 0.3858520900321543, + "grad_norm": 0.36728185415267944, + "learning_rate": 0.0002, + "loss": 1.2673, + "step": 60 + }, + { + "epoch": 0.45016077170418006, + "grad_norm": 0.38337618112564087, + "learning_rate": 0.0002, + "loss": 1.2074, + "step": 70 + }, + { + "epoch": 0.5144694533762058, + "grad_norm": 0.39411404728889465, + "learning_rate": 0.0002, + "loss": 1.2691, + "step": 80 + }, + { + "epoch": 0.5787781350482315, + "grad_norm": 0.39903542399406433, + "learning_rate": 0.0002, + "loss": 1.2563, + "step": 90 + }, + { + "epoch": 0.6430868167202572, + "grad_norm": 0.3390332758426666, + "learning_rate": 0.0002, + "loss": 1.2122, + "step": 100 + }, + { + "epoch": 0.707395498392283, + "grad_norm": 0.35814088582992554, + "learning_rate": 0.0002, + "loss": 1.251, + "step": 110 + }, + { + "epoch": 0.7717041800643086, + "grad_norm": 0.3480045199394226, + "learning_rate": 0.0002, + "loss": 1.218, + "step": 120 + }, + { + "epoch": 0.8360128617363344, + "grad_norm": 0.3282570540904999, + "learning_rate": 0.0002, + "loss": 1.1504, + "step": 130 + }, + { + "epoch": 0.9003215434083601, + "grad_norm": 0.33441081643104553, + "learning_rate": 0.0002, + "loss": 1.2131, + "step": 140 + }, + { + "epoch": 0.9646302250803859, + "grad_norm": 0.3344958424568176, + "learning_rate": 0.0002, + "loss": 1.2116, + "step": 150 + }, + { + "epoch": 0.9967845659163987, + "eval_loss": 1.1744003295898438, + "eval_runtime": 10.521, + "eval_samples_per_second": 9.315, + "eval_steps_per_second": 1.236, + "step": 155 + }, + { + "epoch": 1.0289389067524115, + "grad_norm": 0.3614383637905121, + "learning_rate": 0.0002, + "loss": 1.1898, + "step": 160 + }, + { + "epoch": 1.0932475884244373, + "grad_norm": 0.37686896324157715, + "learning_rate": 0.0002, + "loss": 1.1153, + "step": 170 + }, + { + "epoch": 1.157556270096463, + "grad_norm": 0.3803747296333313, + "learning_rate": 0.0002, + "loss": 1.1288, + "step": 180 + }, + { + "epoch": 1.2218649517684887, + "grad_norm": 0.35592594742774963, + "learning_rate": 0.0002, + "loss": 1.0915, + "step": 190 + }, + { + "epoch": 1.2861736334405145, + "grad_norm": 0.5097760558128357, + "learning_rate": 0.0002, + "loss": 1.0895, + "step": 200 + }, + { + "epoch": 1.3504823151125402, + "grad_norm": 0.3641100227832794, + "learning_rate": 0.0002, + "loss": 1.1268, + "step": 210 + }, + { + "epoch": 1.414790996784566, + "grad_norm": 0.3824535310268402, + "learning_rate": 0.0002, + "loss": 1.1212, + "step": 220 + }, + { + "epoch": 1.4790996784565915, + "grad_norm": 0.42148709297180176, + "learning_rate": 0.0002, + "loss": 1.1234, + "step": 230 + }, + { + "epoch": 1.5434083601286175, + "grad_norm": 0.44197967648506165, + "learning_rate": 0.0002, + "loss": 1.112, + "step": 240 + }, + { + "epoch": 1.607717041800643, + "grad_norm": 0.42140334844589233, + "learning_rate": 0.0002, + "loss": 1.0923, + "step": 250 + }, + { + "epoch": 1.6720257234726688, + "grad_norm": 0.404341459274292, + "learning_rate": 0.0002, + "loss": 1.1455, + "step": 260 + }, + { + "epoch": 1.7363344051446945, + "grad_norm": 0.47345927357673645, + "learning_rate": 0.0002, + "loss": 1.1258, + "step": 270 + }, + { + "epoch": 1.8006430868167203, + "grad_norm": 0.45900461077690125, + "learning_rate": 0.0002, + "loss": 1.0682, + "step": 280 + }, + { + "epoch": 1.864951768488746, + "grad_norm": 0.3809300363063812, + "learning_rate": 0.0002, + "loss": 1.0815, + "step": 290 + }, + { + "epoch": 1.9292604501607717, + "grad_norm": 0.4094211757183075, + "learning_rate": 0.0002, + "loss": 1.0773, + "step": 300 + }, + { + "epoch": 1.9935691318327975, + "grad_norm": 0.40402060747146606, + "learning_rate": 0.0002, + "loss": 1.1134, + "step": 310 + }, + { + "epoch": 2.0, + "eval_loss": 1.1546189785003662, + "eval_runtime": 10.5193, + "eval_samples_per_second": 9.316, + "eval_steps_per_second": 1.236, + "step": 311 + }, + { + "epoch": 2.057877813504823, + "grad_norm": 0.5758638978004456, + "learning_rate": 0.0002, + "loss": 0.9775, + "step": 320 + }, + { + "epoch": 2.122186495176849, + "grad_norm": 0.4793509542942047, + "learning_rate": 0.0002, + "loss": 0.975, + "step": 330 + }, + { + "epoch": 2.1864951768488745, + "grad_norm": 0.5104694962501526, + "learning_rate": 0.0002, + "loss": 0.9331, + "step": 340 + }, + { + "epoch": 2.2508038585209005, + "grad_norm": 0.49754178524017334, + "learning_rate": 0.0002, + "loss": 0.9605, + "step": 350 + }, + { + "epoch": 2.315112540192926, + "grad_norm": 0.5055416822433472, + "learning_rate": 0.0002, + "loss": 0.9609, + "step": 360 + }, + { + "epoch": 2.379421221864952, + "grad_norm": 0.5762393474578857, + "learning_rate": 0.0002, + "loss": 0.9793, + "step": 370 + }, + { + "epoch": 2.4437299035369775, + "grad_norm": 0.44768989086151123, + "learning_rate": 0.0002, + "loss": 0.9392, + "step": 380 + }, + { + "epoch": 2.508038585209003, + "grad_norm": 0.5598754286766052, + "learning_rate": 0.0002, + "loss": 0.9488, + "step": 390 + }, + { + "epoch": 2.572347266881029, + "grad_norm": 0.5343462824821472, + "learning_rate": 0.0002, + "loss": 1.0028, + "step": 400 + }, + { + "epoch": 2.6366559485530545, + "grad_norm": 0.4544358253479004, + "learning_rate": 0.0002, + "loss": 0.9871, + "step": 410 + }, + { + "epoch": 2.7009646302250805, + "grad_norm": 0.5724653005599976, + "learning_rate": 0.0002, + "loss": 1.0025, + "step": 420 + }, + { + "epoch": 2.765273311897106, + "grad_norm": 0.5844957828521729, + "learning_rate": 0.0002, + "loss": 0.9776, + "step": 430 + }, + { + "epoch": 2.829581993569132, + "grad_norm": 0.5306688547134399, + "learning_rate": 0.0002, + "loss": 0.9608, + "step": 440 + }, + { + "epoch": 2.8938906752411575, + "grad_norm": 0.5121245384216309, + "learning_rate": 0.0002, + "loss": 1.0221, + "step": 450 + }, + { + "epoch": 2.958199356913183, + "grad_norm": 0.47789978981018066, + "learning_rate": 0.0002, + "loss": 0.9438, + "step": 460 + }, + { + "epoch": 2.996784565916399, + "eval_loss": 1.1760698556900024, + "eval_runtime": 10.5123, + "eval_samples_per_second": 9.322, + "eval_steps_per_second": 1.237, + "step": 466 + }, + { + "epoch": 3.022508038585209, + "grad_norm": 0.4903484582901001, + "learning_rate": 0.0002, + "loss": 0.9531, + "step": 470 + }, + { + "epoch": 3.0868167202572345, + "grad_norm": 0.7591149210929871, + "learning_rate": 0.0002, + "loss": 0.7771, + "step": 480 + }, + { + "epoch": 3.1511254019292605, + "grad_norm": 0.8178006410598755, + "learning_rate": 0.0002, + "loss": 0.8044, + "step": 490 + }, + { + "epoch": 3.215434083601286, + "grad_norm": 0.7482298016548157, + "learning_rate": 0.0002, + "loss": 0.8237, + "step": 500 + }, + { + "epoch": 3.279742765273312, + "grad_norm": 0.7520643472671509, + "learning_rate": 0.0002, + "loss": 0.8061, + "step": 510 + }, + { + "epoch": 3.3440514469453375, + "grad_norm": 0.6797061562538147, + "learning_rate": 0.0002, + "loss": 0.8065, + "step": 520 + }, + { + "epoch": 3.4083601286173635, + "grad_norm": 0.6733362674713135, + "learning_rate": 0.0002, + "loss": 0.827, + "step": 530 + }, + { + "epoch": 3.472668810289389, + "grad_norm": 0.6488103270530701, + "learning_rate": 0.0002, + "loss": 0.8628, + "step": 540 + }, + { + "epoch": 3.536977491961415, + "grad_norm": 0.6773484349250793, + "learning_rate": 0.0002, + "loss": 0.8053, + "step": 550 + }, + { + "epoch": 3.6012861736334405, + "grad_norm": 0.6569041609764099, + "learning_rate": 0.0002, + "loss": 0.8631, + "step": 560 + }, + { + "epoch": 3.665594855305466, + "grad_norm": 0.7477148771286011, + "learning_rate": 0.0002, + "loss": 0.8251, + "step": 570 + }, + { + "epoch": 3.729903536977492, + "grad_norm": 0.6446558237075806, + "learning_rate": 0.0002, + "loss": 0.8089, + "step": 580 + }, + { + "epoch": 3.7942122186495175, + "grad_norm": 0.6831859946250916, + "learning_rate": 0.0002, + "loss": 0.8287, + "step": 590 + }, + { + "epoch": 3.8585209003215435, + "grad_norm": 0.7512634992599487, + "learning_rate": 0.0002, + "loss": 0.8726, + "step": 600 + }, + { + "epoch": 3.922829581993569, + "grad_norm": 0.7508474588394165, + "learning_rate": 0.0002, + "loss": 0.8014, + "step": 610 + }, + { + "epoch": 3.987138263665595, + "grad_norm": 0.7288223505020142, + "learning_rate": 0.0002, + "loss": 0.845, + "step": 620 + }, + { + "epoch": 4.0, + "eval_loss": 1.2500178813934326, + "eval_runtime": 10.5131, + "eval_samples_per_second": 9.322, + "eval_steps_per_second": 1.237, + "step": 622 + }, + { + "epoch": 4.051446945337621, + "grad_norm": 0.8475615382194519, + "learning_rate": 0.0002, + "loss": 0.6423, + "step": 630 + }, + { + "epoch": 4.115755627009646, + "grad_norm": 0.7431837916374207, + "learning_rate": 0.0002, + "loss": 0.6416, + "step": 640 + }, + { + "epoch": 4.180064308681672, + "grad_norm": 1.154038667678833, + "learning_rate": 0.0002, + "loss": 0.6748, + "step": 650 + }, + { + "epoch": 4.244372990353698, + "grad_norm": 0.8179714679718018, + "learning_rate": 0.0002, + "loss": 0.65, + "step": 660 + }, + { + "epoch": 4.308681672025724, + "grad_norm": 0.9329283237457275, + "learning_rate": 0.0002, + "loss": 0.6385, + "step": 670 + }, + { + "epoch": 4.372990353697749, + "grad_norm": 0.824656069278717, + "learning_rate": 0.0002, + "loss": 0.671, + "step": 680 + }, + { + "epoch": 4.437299035369775, + "grad_norm": 0.9766148924827576, + "learning_rate": 0.0002, + "loss": 0.6679, + "step": 690 + }, + { + "epoch": 4.501607717041801, + "grad_norm": 0.9103652238845825, + "learning_rate": 0.0002, + "loss": 0.6525, + "step": 700 + }, + { + "epoch": 4.565916398713826, + "grad_norm": 0.793594241142273, + "learning_rate": 0.0002, + "loss": 0.6809, + "step": 710 + }, + { + "epoch": 4.630225080385852, + "grad_norm": 0.9835829734802246, + "learning_rate": 0.0002, + "loss": 0.6712, + "step": 720 + }, + { + "epoch": 4.694533762057878, + "grad_norm": 1.0390352010726929, + "learning_rate": 0.0002, + "loss": 0.6757, + "step": 730 + }, + { + "epoch": 4.758842443729904, + "grad_norm": 1.0840471982955933, + "learning_rate": 0.0002, + "loss": 0.6959, + "step": 740 + }, + { + "epoch": 4.823151125401929, + "grad_norm": 0.8057735562324524, + "learning_rate": 0.0002, + "loss": 0.6809, + "step": 750 + }, + { + "epoch": 4.887459807073955, + "grad_norm": 0.8504151701927185, + "learning_rate": 0.0002, + "loss": 0.7202, + "step": 760 + }, + { + "epoch": 4.951768488745981, + "grad_norm": 0.8389859199523926, + "learning_rate": 0.0002, + "loss": 0.7001, + "step": 770 + }, + { + "epoch": 4.996784565916399, + "eval_loss": 1.3824537992477417, + "eval_runtime": 10.5075, + "eval_samples_per_second": 9.327, + "eval_steps_per_second": 1.237, + "step": 777 + } + ], + "logging_steps": 10, + "max_steps": 1240, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.99353122258944e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/training_args.bin b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e6d36188f471587f71e03f7ccac2f1da5dc8040 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-777/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:704b3d18001b534393bbe379593543ad27254922eefa5e6e440a4466e8c325c3 +size 5560 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/README.md b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/README.md new file mode 100644 index 0000000000000000000000000000000000000000..503a34a03e25483aa99213835fd87bfc8289a3fe --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/adapter_config.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e98db163734cc03f7a8f8b3f720d3a2befdf7453 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "v_proj", + "q_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/adapter_model.safetensors b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed5bc781aed0374b470963c28dba7139d66f86df --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a53ee8e746b5d01fbebdae587cb53d02e628451fd16f965c9f47b092adec918a +size 143153376 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/optimizer.pt b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c0544a2db6e3e326d79e73a4d39c84f6c6654254 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fa0c8642dd758abb9bd452cb26a6ec26f8799d38d121abf7467c16e9d19d1d9 +size 72886650 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/rng_state.pth b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..15db3ad3234a1029777d6ef300ff0f6a1b2fb866 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dbf3c83a5788084c6f63a635b2913be376a9183fd43e1bd1a7d79486c4592ae +size 14244 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/scheduler.pt b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..944681f0b4a9abd4fa54d725affd00421325827d --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73c7aa607a403af1a1baa550a97d4e61b7aa5463febb1d53b4fd3fa39af337ac +size 1064 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/special_tokens_map.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/tokenizer.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..af0eac5c0056f83b8f3fcdb79165f8847111c305 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922 +size 17525357 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/tokenizer.model b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/tokenizer_config.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/trainer_state.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a82da71912a732bf3d848fb2c0e4b4be3d4095c9 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/trainer_state.json @@ -0,0 +1,732 @@ +{ + "best_metric": 1.1546189785003662, + "best_model_checkpoint": "outputs-001/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311", + "epoch": 6.0, + "eval_steps": 10, + "global_step": 933, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06430868167202572, + "grad_norm": 0.5317481756210327, + "learning_rate": 0.0002, + "loss": 2.2272, + "step": 10 + }, + { + "epoch": 0.12861736334405144, + "grad_norm": 0.7665001749992371, + "learning_rate": 0.0002, + "loss": 1.6604, + "step": 20 + }, + { + "epoch": 0.19292604501607716, + "grad_norm": 0.4396904408931732, + "learning_rate": 0.0002, + "loss": 1.4015, + "step": 30 + }, + { + "epoch": 0.2572347266881029, + "grad_norm": 0.31786906719207764, + "learning_rate": 0.0002, + "loss": 1.3297, + "step": 40 + }, + { + "epoch": 0.3215434083601286, + "grad_norm": 0.41404595971107483, + "learning_rate": 0.0002, + "loss": 1.2938, + "step": 50 + }, + { + "epoch": 0.3858520900321543, + "grad_norm": 0.36728185415267944, + "learning_rate": 0.0002, + "loss": 1.2673, + "step": 60 + }, + { + "epoch": 0.45016077170418006, + "grad_norm": 0.38337618112564087, + "learning_rate": 0.0002, + "loss": 1.2074, + "step": 70 + }, + { + "epoch": 0.5144694533762058, + "grad_norm": 0.39411404728889465, + "learning_rate": 0.0002, + "loss": 1.2691, + "step": 80 + }, + { + "epoch": 0.5787781350482315, + "grad_norm": 0.39903542399406433, + "learning_rate": 0.0002, + "loss": 1.2563, + "step": 90 + }, + { + "epoch": 0.6430868167202572, + "grad_norm": 0.3390332758426666, + "learning_rate": 0.0002, + "loss": 1.2122, + "step": 100 + }, + { + "epoch": 0.707395498392283, + "grad_norm": 0.35814088582992554, + "learning_rate": 0.0002, + "loss": 1.251, + "step": 110 + }, + { + "epoch": 0.7717041800643086, + "grad_norm": 0.3480045199394226, + "learning_rate": 0.0002, + "loss": 1.218, + "step": 120 + }, + { + "epoch": 0.8360128617363344, + "grad_norm": 0.3282570540904999, + "learning_rate": 0.0002, + "loss": 1.1504, + "step": 130 + }, + { + "epoch": 0.9003215434083601, + "grad_norm": 0.33441081643104553, + "learning_rate": 0.0002, + "loss": 1.2131, + "step": 140 + }, + { + "epoch": 0.9646302250803859, + "grad_norm": 0.3344958424568176, + "learning_rate": 0.0002, + "loss": 1.2116, + "step": 150 + }, + { + "epoch": 0.9967845659163987, + "eval_loss": 1.1744003295898438, + "eval_runtime": 10.521, + "eval_samples_per_second": 9.315, + "eval_steps_per_second": 1.236, + "step": 155 + }, + { + "epoch": 1.0289389067524115, + "grad_norm": 0.3614383637905121, + "learning_rate": 0.0002, + "loss": 1.1898, + "step": 160 + }, + { + "epoch": 1.0932475884244373, + "grad_norm": 0.37686896324157715, + "learning_rate": 0.0002, + "loss": 1.1153, + "step": 170 + }, + { + "epoch": 1.157556270096463, + "grad_norm": 0.3803747296333313, + "learning_rate": 0.0002, + "loss": 1.1288, + "step": 180 + }, + { + "epoch": 1.2218649517684887, + "grad_norm": 0.35592594742774963, + "learning_rate": 0.0002, + "loss": 1.0915, + "step": 190 + }, + { + "epoch": 1.2861736334405145, + "grad_norm": 0.5097760558128357, + "learning_rate": 0.0002, + "loss": 1.0895, + "step": 200 + }, + { + "epoch": 1.3504823151125402, + "grad_norm": 0.3641100227832794, + "learning_rate": 0.0002, + "loss": 1.1268, + "step": 210 + }, + { + "epoch": 1.414790996784566, + "grad_norm": 0.3824535310268402, + "learning_rate": 0.0002, + "loss": 1.1212, + "step": 220 + }, + { + "epoch": 1.4790996784565915, + "grad_norm": 0.42148709297180176, + "learning_rate": 0.0002, + "loss": 1.1234, + "step": 230 + }, + { + "epoch": 1.5434083601286175, + "grad_norm": 0.44197967648506165, + "learning_rate": 0.0002, + "loss": 1.112, + "step": 240 + }, + { + "epoch": 1.607717041800643, + "grad_norm": 0.42140334844589233, + "learning_rate": 0.0002, + "loss": 1.0923, + "step": 250 + }, + { + "epoch": 1.6720257234726688, + "grad_norm": 0.404341459274292, + "learning_rate": 0.0002, + "loss": 1.1455, + "step": 260 + }, + { + "epoch": 1.7363344051446945, + "grad_norm": 0.47345927357673645, + "learning_rate": 0.0002, + "loss": 1.1258, + "step": 270 + }, + { + "epoch": 1.8006430868167203, + "grad_norm": 0.45900461077690125, + "learning_rate": 0.0002, + "loss": 1.0682, + "step": 280 + }, + { + "epoch": 1.864951768488746, + "grad_norm": 0.3809300363063812, + "learning_rate": 0.0002, + "loss": 1.0815, + "step": 290 + }, + { + "epoch": 1.9292604501607717, + "grad_norm": 0.4094211757183075, + "learning_rate": 0.0002, + "loss": 1.0773, + "step": 300 + }, + { + "epoch": 1.9935691318327975, + "grad_norm": 0.40402060747146606, + "learning_rate": 0.0002, + "loss": 1.1134, + "step": 310 + }, + { + "epoch": 2.0, + "eval_loss": 1.1546189785003662, + "eval_runtime": 10.5193, + "eval_samples_per_second": 9.316, + "eval_steps_per_second": 1.236, + "step": 311 + }, + { + "epoch": 2.057877813504823, + "grad_norm": 0.5758638978004456, + "learning_rate": 0.0002, + "loss": 0.9775, + "step": 320 + }, + { + "epoch": 2.122186495176849, + "grad_norm": 0.4793509542942047, + "learning_rate": 0.0002, + "loss": 0.975, + "step": 330 + }, + { + "epoch": 2.1864951768488745, + "grad_norm": 0.5104694962501526, + "learning_rate": 0.0002, + "loss": 0.9331, + "step": 340 + }, + { + "epoch": 2.2508038585209005, + "grad_norm": 0.49754178524017334, + "learning_rate": 0.0002, + "loss": 0.9605, + "step": 350 + }, + { + "epoch": 2.315112540192926, + "grad_norm": 0.5055416822433472, + "learning_rate": 0.0002, + "loss": 0.9609, + "step": 360 + }, + { + "epoch": 2.379421221864952, + "grad_norm": 0.5762393474578857, + "learning_rate": 0.0002, + "loss": 0.9793, + "step": 370 + }, + { + "epoch": 2.4437299035369775, + "grad_norm": 0.44768989086151123, + "learning_rate": 0.0002, + "loss": 0.9392, + "step": 380 + }, + { + "epoch": 2.508038585209003, + "grad_norm": 0.5598754286766052, + "learning_rate": 0.0002, + "loss": 0.9488, + "step": 390 + }, + { + "epoch": 2.572347266881029, + "grad_norm": 0.5343462824821472, + "learning_rate": 0.0002, + "loss": 1.0028, + "step": 400 + }, + { + "epoch": 2.6366559485530545, + "grad_norm": 0.4544358253479004, + "learning_rate": 0.0002, + "loss": 0.9871, + "step": 410 + }, + { + "epoch": 2.7009646302250805, + "grad_norm": 0.5724653005599976, + "learning_rate": 0.0002, + "loss": 1.0025, + "step": 420 + }, + { + "epoch": 2.765273311897106, + "grad_norm": 0.5844957828521729, + "learning_rate": 0.0002, + "loss": 0.9776, + "step": 430 + }, + { + "epoch": 2.829581993569132, + "grad_norm": 0.5306688547134399, + "learning_rate": 0.0002, + "loss": 0.9608, + "step": 440 + }, + { + "epoch": 2.8938906752411575, + "grad_norm": 0.5121245384216309, + "learning_rate": 0.0002, + "loss": 1.0221, + "step": 450 + }, + { + "epoch": 2.958199356913183, + "grad_norm": 0.47789978981018066, + "learning_rate": 0.0002, + "loss": 0.9438, + "step": 460 + }, + { + "epoch": 2.996784565916399, + "eval_loss": 1.1760698556900024, + "eval_runtime": 10.5123, + "eval_samples_per_second": 9.322, + "eval_steps_per_second": 1.237, + "step": 466 + }, + { + "epoch": 3.022508038585209, + "grad_norm": 0.4903484582901001, + "learning_rate": 0.0002, + "loss": 0.9531, + "step": 470 + }, + { + "epoch": 3.0868167202572345, + "grad_norm": 0.7591149210929871, + "learning_rate": 0.0002, + "loss": 0.7771, + "step": 480 + }, + { + "epoch": 3.1511254019292605, + "grad_norm": 0.8178006410598755, + "learning_rate": 0.0002, + "loss": 0.8044, + "step": 490 + }, + { + "epoch": 3.215434083601286, + "grad_norm": 0.7482298016548157, + "learning_rate": 0.0002, + "loss": 0.8237, + "step": 500 + }, + { + "epoch": 3.279742765273312, + "grad_norm": 0.7520643472671509, + "learning_rate": 0.0002, + "loss": 0.8061, + "step": 510 + }, + { + "epoch": 3.3440514469453375, + "grad_norm": 0.6797061562538147, + "learning_rate": 0.0002, + "loss": 0.8065, + "step": 520 + }, + { + "epoch": 3.4083601286173635, + "grad_norm": 0.6733362674713135, + "learning_rate": 0.0002, + "loss": 0.827, + "step": 530 + }, + { + "epoch": 3.472668810289389, + "grad_norm": 0.6488103270530701, + "learning_rate": 0.0002, + "loss": 0.8628, + "step": 540 + }, + { + "epoch": 3.536977491961415, + "grad_norm": 0.6773484349250793, + "learning_rate": 0.0002, + "loss": 0.8053, + "step": 550 + }, + { + "epoch": 3.6012861736334405, + "grad_norm": 0.6569041609764099, + "learning_rate": 0.0002, + "loss": 0.8631, + "step": 560 + }, + { + "epoch": 3.665594855305466, + "grad_norm": 0.7477148771286011, + "learning_rate": 0.0002, + "loss": 0.8251, + "step": 570 + }, + { + "epoch": 3.729903536977492, + "grad_norm": 0.6446558237075806, + "learning_rate": 0.0002, + "loss": 0.8089, + "step": 580 + }, + { + "epoch": 3.7942122186495175, + "grad_norm": 0.6831859946250916, + "learning_rate": 0.0002, + "loss": 0.8287, + "step": 590 + }, + { + "epoch": 3.8585209003215435, + "grad_norm": 0.7512634992599487, + "learning_rate": 0.0002, + "loss": 0.8726, + "step": 600 + }, + { + "epoch": 3.922829581993569, + "grad_norm": 0.7508474588394165, + "learning_rate": 0.0002, + "loss": 0.8014, + "step": 610 + }, + { + "epoch": 3.987138263665595, + "grad_norm": 0.7288223505020142, + "learning_rate": 0.0002, + "loss": 0.845, + "step": 620 + }, + { + "epoch": 4.0, + "eval_loss": 1.2500178813934326, + "eval_runtime": 10.5131, + "eval_samples_per_second": 9.322, + "eval_steps_per_second": 1.237, + "step": 622 + }, + { + "epoch": 4.051446945337621, + "grad_norm": 0.8475615382194519, + "learning_rate": 0.0002, + "loss": 0.6423, + "step": 630 + }, + { + "epoch": 4.115755627009646, + "grad_norm": 0.7431837916374207, + "learning_rate": 0.0002, + "loss": 0.6416, + "step": 640 + }, + { + "epoch": 4.180064308681672, + "grad_norm": 1.154038667678833, + "learning_rate": 0.0002, + "loss": 0.6748, + "step": 650 + }, + { + "epoch": 4.244372990353698, + "grad_norm": 0.8179714679718018, + "learning_rate": 0.0002, + "loss": 0.65, + "step": 660 + }, + { + "epoch": 4.308681672025724, + "grad_norm": 0.9329283237457275, + "learning_rate": 0.0002, + "loss": 0.6385, + "step": 670 + }, + { + "epoch": 4.372990353697749, + "grad_norm": 0.824656069278717, + "learning_rate": 0.0002, + "loss": 0.671, + "step": 680 + }, + { + "epoch": 4.437299035369775, + "grad_norm": 0.9766148924827576, + "learning_rate": 0.0002, + "loss": 0.6679, + "step": 690 + }, + { + "epoch": 4.501607717041801, + "grad_norm": 0.9103652238845825, + "learning_rate": 0.0002, + "loss": 0.6525, + "step": 700 + }, + { + "epoch": 4.565916398713826, + "grad_norm": 0.793594241142273, + "learning_rate": 0.0002, + "loss": 0.6809, + "step": 710 + }, + { + "epoch": 4.630225080385852, + "grad_norm": 0.9835829734802246, + "learning_rate": 0.0002, + "loss": 0.6712, + "step": 720 + }, + { + "epoch": 4.694533762057878, + "grad_norm": 1.0390352010726929, + "learning_rate": 0.0002, + "loss": 0.6757, + "step": 730 + }, + { + "epoch": 4.758842443729904, + "grad_norm": 1.0840471982955933, + "learning_rate": 0.0002, + "loss": 0.6959, + "step": 740 + }, + { + "epoch": 4.823151125401929, + "grad_norm": 0.8057735562324524, + "learning_rate": 0.0002, + "loss": 0.6809, + "step": 750 + }, + { + "epoch": 4.887459807073955, + "grad_norm": 0.8504151701927185, + "learning_rate": 0.0002, + "loss": 0.7202, + "step": 760 + }, + { + "epoch": 4.951768488745981, + "grad_norm": 0.8389859199523926, + "learning_rate": 0.0002, + "loss": 0.7001, + "step": 770 + }, + { + "epoch": 4.996784565916399, + "eval_loss": 1.3824537992477417, + "eval_runtime": 10.5075, + "eval_samples_per_second": 9.327, + "eval_steps_per_second": 1.237, + "step": 777 + }, + { + "epoch": 5.016077170418006, + "grad_norm": 0.8725755214691162, + "learning_rate": 0.0002, + "loss": 0.6425, + "step": 780 + }, + { + "epoch": 5.080385852090032, + "grad_norm": 0.792286217212677, + "learning_rate": 0.0002, + "loss": 0.4945, + "step": 790 + }, + { + "epoch": 5.144694533762058, + "grad_norm": 0.9615631699562073, + "learning_rate": 0.0002, + "loss": 0.5229, + "step": 800 + }, + { + "epoch": 5.209003215434084, + "grad_norm": 0.9059127569198608, + "learning_rate": 0.0002, + "loss": 0.5237, + "step": 810 + }, + { + "epoch": 5.273311897106109, + "grad_norm": 1.0275076627731323, + "learning_rate": 0.0002, + "loss": 0.5122, + "step": 820 + }, + { + "epoch": 5.337620578778135, + "grad_norm": 1.2929821014404297, + "learning_rate": 0.0002, + "loss": 0.4987, + "step": 830 + }, + { + "epoch": 5.401929260450161, + "grad_norm": 1.17123281955719, + "learning_rate": 0.0002, + "loss": 0.53, + "step": 840 + }, + { + "epoch": 5.466237942122186, + "grad_norm": 1.140464186668396, + "learning_rate": 0.0002, + "loss": 0.5364, + "step": 850 + }, + { + "epoch": 5.530546623794212, + "grad_norm": 1.3640265464782715, + "learning_rate": 0.0002, + "loss": 0.5303, + "step": 860 + }, + { + "epoch": 5.594855305466238, + "grad_norm": 1.1880438327789307, + "learning_rate": 0.0002, + "loss": 0.5272, + "step": 870 + }, + { + "epoch": 5.659163987138264, + "grad_norm": 1.1584500074386597, + "learning_rate": 0.0002, + "loss": 0.5574, + "step": 880 + }, + { + "epoch": 5.723472668810289, + "grad_norm": 1.1855696439743042, + "learning_rate": 0.0002, + "loss": 0.5469, + "step": 890 + }, + { + "epoch": 5.787781350482315, + "grad_norm": 1.0149868726730347, + "learning_rate": 0.0002, + "loss": 0.5376, + "step": 900 + }, + { + "epoch": 5.852090032154341, + "grad_norm": 1.0635329484939575, + "learning_rate": 0.0002, + "loss": 0.5131, + "step": 910 + }, + { + "epoch": 5.916398713826366, + "grad_norm": 1.2947518825531006, + "learning_rate": 0.0002, + "loss": 0.5486, + "step": 920 + }, + { + "epoch": 5.980707395498392, + "grad_norm": 1.205394983291626, + "learning_rate": 0.0002, + "loss": 0.5701, + "step": 930 + }, + { + "epoch": 6.0, + "eval_loss": 1.6060408353805542, + "eval_runtime": 10.5111, + "eval_samples_per_second": 9.323, + "eval_steps_per_second": 1.237, + "step": 933 + } + ], + "logging_steps": 10, + "max_steps": 1240, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4.792237467107328e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/training_args.bin b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e6d36188f471587f71e03f7ccac2f1da5dc8040 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-933/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:704b3d18001b534393bbe379593543ad27254922eefa5e6e440a4466e8c325c3 +size 5560 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/special_tokens_map.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/tokenizer.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..af0eac5c0056f83b8f3fcdb79165f8847111c305 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922 +size 17525357 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/tokenizer.model b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/tokenizer_config.json b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/training_args.bin b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..0e6d36188f471587f71e03f7ccac2f1da5dc8040 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:704b3d18001b534393bbe379593543ad27254922eefa5e6e440a4466e8c325c3 +size 5560 diff --git a/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/training_log.jsonl b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/training_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..c586ee849a6641155d6be762bf07b8a22c772ea0 --- /dev/null +++ b/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/training_log.jsonl @@ -0,0 +1,9 @@ +{"epoch": 0.9967845659163987, "step": 155, "epoch_duration": 869.7375962734222, "total_accumulated_duration": 869.7375962734222, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7887.97119140625}, "peak_memory_usage": {"GPU_0": 11696.9921875}, "avg_memory_reserved": {"GPU_0": 12758.0}, "peak_memory_reserved": {"GPU_0": 12758.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.2309, "grad_norm": 0.4888594150543213, "learning_rate": 0.0002, "epoch": 0.06430868167202572, "step": 10}, {"loss": 1.681, "grad_norm": 0.620786726474762, "learning_rate": 0.0002, "epoch": 0.12861736334405144, "step": 20}, {"loss": 1.3863, "grad_norm": 0.46370211243629456, "learning_rate": 0.0002, "epoch": 0.19292604501607716, "step": 30}, {"loss": 1.3238, "grad_norm": 0.3179257810115814, "learning_rate": 0.0002, "epoch": 0.2572347266881029, "step": 40}, {"loss": 1.2897, "grad_norm": 0.3916812837123871, "learning_rate": 0.0002, "epoch": 0.3215434083601286, "step": 50}, {"loss": 1.2578, "grad_norm": 0.35894036293029785, "learning_rate": 0.0002, "epoch": 0.3858520900321543, "step": 60}, {"loss": 1.2052, "grad_norm": 0.42763397097587585, "learning_rate": 0.0002, "epoch": 0.45016077170418006, "step": 70}, {"loss": 1.2679, "grad_norm": 0.3873509168624878, "learning_rate": 0.0002, "epoch": 0.5144694533762058, "step": 80}, {"loss": 1.254, "grad_norm": 0.4062667191028595, "learning_rate": 0.0002, "epoch": 0.5787781350482315, "step": 90}, {"loss": 1.2107, "grad_norm": 0.34608784317970276, "learning_rate": 0.0002, "epoch": 0.6430868167202572, "step": 100}, {"loss": 1.2487, "grad_norm": 0.3554210960865021, "learning_rate": 0.0002, "epoch": 0.707395498392283, "step": 110}, {"loss": 1.2198, "grad_norm": 0.3504224121570587, "learning_rate": 0.0002, "epoch": 0.7717041800643086, "step": 120}, {"loss": 1.1502, "grad_norm": 0.33106109499931335, "learning_rate": 0.0002, "epoch": 0.8360128617363344, "step": 130}, {"loss": 1.2121, "grad_norm": 0.33262011408805847, "learning_rate": 0.0002, "epoch": 0.9003215434083601, "step": 140}, {"loss": 1.2112, "grad_norm": 0.35451996326446533, "learning_rate": 0.0002, "epoch": 0.9646302250803859, "step": 150}]} +{"epoch": 0.9967845659163987, "step": 155, "epoch_duration": 232.74758458137512, "total_accumulated_duration": 232.74758458137512, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7887.97119140625}, "peak_memory_usage": {"GPU_0": 11696.9921875}, "avg_memory_reserved": {"GPU_0": 12758.0}, "peak_memory_reserved": {"GPU_0": 12758.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.2272, "grad_norm": 0.5317481756210327, "learning_rate": 0.0002, "epoch": 0.06430868167202572, "step": 10}, {"loss": 1.6604, "grad_norm": 0.7665001749992371, "learning_rate": 0.0002, "epoch": 0.12861736334405144, "step": 20}, {"loss": 1.4015, "grad_norm": 0.4396904408931732, "learning_rate": 0.0002, "epoch": 0.19292604501607716, "step": 30}, {"loss": 1.3297, "grad_norm": 0.31786906719207764, "learning_rate": 0.0002, "epoch": 0.2572347266881029, "step": 40}, {"loss": 1.2938, "grad_norm": 0.41404595971107483, "learning_rate": 0.0002, "epoch": 0.3215434083601286, "step": 50}, {"loss": 1.2673, "grad_norm": 0.36728185415267944, "learning_rate": 0.0002, "epoch": 0.3858520900321543, "step": 60}, {"loss": 1.2074, "grad_norm": 0.38337618112564087, "learning_rate": 0.0002, "epoch": 0.45016077170418006, "step": 70}, {"loss": 1.2691, "grad_norm": 0.39411404728889465, "learning_rate": 0.0002, "epoch": 0.5144694533762058, "step": 80}, {"loss": 1.2563, "grad_norm": 0.39903542399406433, "learning_rate": 0.0002, "epoch": 0.5787781350482315, "step": 90}, {"loss": 1.2122, "grad_norm": 0.3390332758426666, "learning_rate": 0.0002, "epoch": 0.6430868167202572, "step": 100}, {"loss": 1.251, "grad_norm": 0.35814088582992554, "learning_rate": 0.0002, "epoch": 0.707395498392283, "step": 110}, {"loss": 1.218, "grad_norm": 0.3480045199394226, "learning_rate": 0.0002, "epoch": 0.7717041800643086, "step": 120}, {"loss": 1.1504, "grad_norm": 0.3282570540904999, "learning_rate": 0.0002, "epoch": 0.8360128617363344, "step": 130}, {"loss": 1.2131, "grad_norm": 0.33441081643104553, "learning_rate": 0.0002, "epoch": 0.9003215434083601, "step": 140}, {"loss": 1.2116, "grad_norm": 0.3344958424568176, "learning_rate": 0.0002, "epoch": 0.9646302250803859, "step": 150}]} +{"epoch": 2.0, "step": 311, "epoch_duration": 233.23126673698425, "total_accumulated_duration": 465.9788513183594, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7751.47119140625}, "peak_memory_usage": {"GPU_0": 19996.72314453125}, "avg_memory_reserved": {"GPU_0": 28746.0}, "peak_memory_reserved": {"GPU_0": 28746.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-155", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.2272, "grad_norm": 0.5317481756210327, "learning_rate": 0.0002, "epoch": 0.06430868167202572, "step": 10}, {"loss": 1.6604, "grad_norm": 0.7665001749992371, "learning_rate": 0.0002, "epoch": 0.12861736334405144, "step": 20}, {"loss": 1.4015, "grad_norm": 0.4396904408931732, "learning_rate": 0.0002, "epoch": 0.19292604501607716, "step": 30}, {"loss": 1.3297, "grad_norm": 0.31786906719207764, "learning_rate": 0.0002, "epoch": 0.2572347266881029, "step": 40}, {"loss": 1.2938, "grad_norm": 0.41404595971107483, "learning_rate": 0.0002, "epoch": 0.3215434083601286, "step": 50}, {"loss": 1.2673, "grad_norm": 0.36728185415267944, "learning_rate": 0.0002, "epoch": 0.3858520900321543, "step": 60}, {"loss": 1.2074, "grad_norm": 0.38337618112564087, "learning_rate": 0.0002, "epoch": 0.45016077170418006, "step": 70}, {"loss": 1.2691, "grad_norm": 0.39411404728889465, "learning_rate": 0.0002, "epoch": 0.5144694533762058, "step": 80}, {"loss": 1.2563, "grad_norm": 0.39903542399406433, "learning_rate": 0.0002, "epoch": 0.5787781350482315, "step": 90}, {"loss": 1.2122, "grad_norm": 0.3390332758426666, "learning_rate": 0.0002, "epoch": 0.6430868167202572, "step": 100}, {"loss": 1.251, "grad_norm": 0.35814088582992554, "learning_rate": 0.0002, "epoch": 0.707395498392283, "step": 110}, {"loss": 1.218, "grad_norm": 0.3480045199394226, "learning_rate": 0.0002, "epoch": 0.7717041800643086, "step": 120}, {"loss": 1.1504, "grad_norm": 0.3282570540904999, "learning_rate": 0.0002, "epoch": 0.8360128617363344, "step": 130}, {"loss": 1.2131, "grad_norm": 0.33441081643104553, "learning_rate": 0.0002, "epoch": 0.9003215434083601, "step": 140}, {"loss": 1.2116, "grad_norm": 0.3344958424568176, "learning_rate": 0.0002, "epoch": 0.9646302250803859, "step": 150}, {"eval_loss": 1.1744003295898438, "eval_runtime": 10.521, "eval_samples_per_second": 9.315, "eval_steps_per_second": 1.236, "epoch": 0.9967845659163987, "step": 155}, {"loss": 1.1898, "grad_norm": 0.3614383637905121, "learning_rate": 0.0002, "epoch": 1.0289389067524115, "step": 160}, {"loss": 1.1153, "grad_norm": 0.37686896324157715, "learning_rate": 0.0002, "epoch": 1.0932475884244373, "step": 170}, {"loss": 1.1288, "grad_norm": 0.3803747296333313, "learning_rate": 0.0002, "epoch": 1.157556270096463, "step": 180}, {"loss": 1.0915, "grad_norm": 0.35592594742774963, "learning_rate": 0.0002, "epoch": 1.2218649517684887, "step": 190}, {"loss": 1.0895, "grad_norm": 0.5097760558128357, "learning_rate": 0.0002, "epoch": 1.2861736334405145, "step": 200}, {"loss": 1.1268, "grad_norm": 0.3641100227832794, "learning_rate": 0.0002, "epoch": 1.3504823151125402, "step": 210}, {"loss": 1.1212, "grad_norm": 0.3824535310268402, "learning_rate": 0.0002, "epoch": 1.414790996784566, "step": 220}, {"loss": 1.1234, "grad_norm": 0.42148709297180176, "learning_rate": 0.0002, "epoch": 1.4790996784565915, "step": 230}, {"loss": 1.112, "grad_norm": 0.44197967648506165, "learning_rate": 0.0002, "epoch": 1.5434083601286175, "step": 240}, {"loss": 1.0923, "grad_norm": 0.42140334844589233, "learning_rate": 0.0002, "epoch": 1.607717041800643, "step": 250}, {"loss": 1.1455, "grad_norm": 0.404341459274292, "learning_rate": 0.0002, "epoch": 1.6720257234726688, "step": 260}, {"loss": 1.1258, "grad_norm": 0.47345927357673645, "learning_rate": 0.0002, "epoch": 1.7363344051446945, "step": 270}, {"loss": 1.0682, "grad_norm": 0.45900461077690125, "learning_rate": 0.0002, "epoch": 1.8006430868167203, "step": 280}, {"loss": 1.0815, "grad_norm": 0.3809300363063812, "learning_rate": 0.0002, "epoch": 1.864951768488746, "step": 290}, {"loss": 1.0773, "grad_norm": 0.4094211757183075, "learning_rate": 0.0002, "epoch": 1.9292604501607717, "step": 300}, {"loss": 1.1134, "grad_norm": 0.40402060747146606, "learning_rate": 0.0002, "epoch": 1.9935691318327975, "step": 310}]} +{"epoch": 2.996784565916399, "step": 466, "epoch_duration": 233.33213567733765, "total_accumulated_duration": 699.310986995697, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7887.97119140625}, "peak_memory_usage": {"GPU_0": 19996.72314453125}, "avg_memory_reserved": {"GPU_0": 28746.0}, "peak_memory_reserved": {"GPU_0": 28746.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.2272, "grad_norm": 0.5317481756210327, "learning_rate": 0.0002, "epoch": 0.06430868167202572, "step": 10}, {"loss": 1.6604, "grad_norm": 0.7665001749992371, "learning_rate": 0.0002, "epoch": 0.12861736334405144, "step": 20}, {"loss": 1.4015, "grad_norm": 0.4396904408931732, "learning_rate": 0.0002, "epoch": 0.19292604501607716, "step": 30}, {"loss": 1.3297, "grad_norm": 0.31786906719207764, "learning_rate": 0.0002, "epoch": 0.2572347266881029, "step": 40}, {"loss": 1.2938, "grad_norm": 0.41404595971107483, "learning_rate": 0.0002, "epoch": 0.3215434083601286, "step": 50}, {"loss": 1.2673, "grad_norm": 0.36728185415267944, "learning_rate": 0.0002, "epoch": 0.3858520900321543, "step": 60}, {"loss": 1.2074, "grad_norm": 0.38337618112564087, "learning_rate": 0.0002, "epoch": 0.45016077170418006, "step": 70}, {"loss": 1.2691, "grad_norm": 0.39411404728889465, "learning_rate": 0.0002, "epoch": 0.5144694533762058, "step": 80}, {"loss": 1.2563, "grad_norm": 0.39903542399406433, "learning_rate": 0.0002, "epoch": 0.5787781350482315, "step": 90}, {"loss": 1.2122, "grad_norm": 0.3390332758426666, "learning_rate": 0.0002, "epoch": 0.6430868167202572, "step": 100}, {"loss": 1.251, "grad_norm": 0.35814088582992554, "learning_rate": 0.0002, "epoch": 0.707395498392283, "step": 110}, {"loss": 1.218, "grad_norm": 0.3480045199394226, "learning_rate": 0.0002, "epoch": 0.7717041800643086, "step": 120}, {"loss": 1.1504, "grad_norm": 0.3282570540904999, "learning_rate": 0.0002, "epoch": 0.8360128617363344, "step": 130}, {"loss": 1.2131, "grad_norm": 0.33441081643104553, "learning_rate": 0.0002, "epoch": 0.9003215434083601, "step": 140}, {"loss": 1.2116, "grad_norm": 0.3344958424568176, "learning_rate": 0.0002, "epoch": 0.9646302250803859, "step": 150}, {"eval_loss": 1.1744003295898438, "eval_runtime": 10.521, "eval_samples_per_second": 9.315, "eval_steps_per_second": 1.236, "epoch": 0.9967845659163987, "step": 155}, {"loss": 1.1898, "grad_norm": 0.3614383637905121, "learning_rate": 0.0002, "epoch": 1.0289389067524115, "step": 160}, {"loss": 1.1153, "grad_norm": 0.37686896324157715, "learning_rate": 0.0002, "epoch": 1.0932475884244373, "step": 170}, {"loss": 1.1288, "grad_norm": 0.3803747296333313, "learning_rate": 0.0002, "epoch": 1.157556270096463, "step": 180}, {"loss": 1.0915, "grad_norm": 0.35592594742774963, "learning_rate": 0.0002, "epoch": 1.2218649517684887, "step": 190}, {"loss": 1.0895, "grad_norm": 0.5097760558128357, "learning_rate": 0.0002, "epoch": 1.2861736334405145, "step": 200}, {"loss": 1.1268, "grad_norm": 0.3641100227832794, "learning_rate": 0.0002, "epoch": 1.3504823151125402, "step": 210}, {"loss": 1.1212, "grad_norm": 0.3824535310268402, "learning_rate": 0.0002, "epoch": 1.414790996784566, "step": 220}, {"loss": 1.1234, "grad_norm": 0.42148709297180176, "learning_rate": 0.0002, "epoch": 1.4790996784565915, "step": 230}, {"loss": 1.112, "grad_norm": 0.44197967648506165, "learning_rate": 0.0002, "epoch": 1.5434083601286175, "step": 240}, {"loss": 1.0923, "grad_norm": 0.42140334844589233, "learning_rate": 0.0002, "epoch": 1.607717041800643, "step": 250}, {"loss": 1.1455, "grad_norm": 0.404341459274292, "learning_rate": 0.0002, "epoch": 1.6720257234726688, "step": 260}, {"loss": 1.1258, "grad_norm": 0.47345927357673645, "learning_rate": 0.0002, "epoch": 1.7363344051446945, "step": 270}, {"loss": 1.0682, "grad_norm": 0.45900461077690125, "learning_rate": 0.0002, "epoch": 1.8006430868167203, "step": 280}, {"loss": 1.0815, "grad_norm": 0.3809300363063812, "learning_rate": 0.0002, "epoch": 1.864951768488746, "step": 290}, {"loss": 1.0773, "grad_norm": 0.4094211757183075, "learning_rate": 0.0002, "epoch": 1.9292604501607717, "step": 300}, {"loss": 1.1134, "grad_norm": 0.40402060747146606, "learning_rate": 0.0002, "epoch": 1.9935691318327975, "step": 310}, {"eval_loss": 1.1546189785003662, "eval_runtime": 10.5193, "eval_samples_per_second": 9.316, "eval_steps_per_second": 1.236, "epoch": 2.0, "step": 311}, {"loss": 0.9775, "grad_norm": 0.5758638978004456, "learning_rate": 0.0002, "epoch": 2.057877813504823, "step": 320}, {"loss": 0.975, "grad_norm": 0.4793509542942047, "learning_rate": 0.0002, "epoch": 2.122186495176849, "step": 330}, {"loss": 0.9331, "grad_norm": 0.5104694962501526, "learning_rate": 0.0002, "epoch": 2.1864951768488745, "step": 340}, {"loss": 0.9605, "grad_norm": 0.49754178524017334, "learning_rate": 0.0002, "epoch": 2.2508038585209005, "step": 350}, {"loss": 0.9609, "grad_norm": 0.5055416822433472, "learning_rate": 0.0002, "epoch": 2.315112540192926, "step": 360}, {"loss": 0.9793, "grad_norm": 0.5762393474578857, "learning_rate": 0.0002, "epoch": 2.379421221864952, "step": 370}, {"loss": 0.9392, "grad_norm": 0.44768989086151123, "learning_rate": 0.0002, "epoch": 2.4437299035369775, "step": 380}, {"loss": 0.9488, "grad_norm": 0.5598754286766052, "learning_rate": 0.0002, "epoch": 2.508038585209003, "step": 390}, {"loss": 1.0028, "grad_norm": 0.5343462824821472, "learning_rate": 0.0002, "epoch": 2.572347266881029, "step": 400}, {"loss": 0.9871, "grad_norm": 0.4544358253479004, "learning_rate": 0.0002, "epoch": 2.6366559485530545, "step": 410}, {"loss": 1.0025, "grad_norm": 0.5724653005599976, "learning_rate": 0.0002, "epoch": 2.7009646302250805, "step": 420}, {"loss": 0.9776, "grad_norm": 0.5844957828521729, "learning_rate": 0.0002, "epoch": 2.765273311897106, "step": 430}, {"loss": 0.9608, "grad_norm": 0.5306688547134399, "learning_rate": 0.0002, "epoch": 2.829581993569132, "step": 440}, {"loss": 1.0221, "grad_norm": 0.5121245384216309, "learning_rate": 0.0002, "epoch": 2.8938906752411575, "step": 450}, {"loss": 0.9438, "grad_norm": 0.47789978981018066, "learning_rate": 0.0002, "epoch": 2.958199356913183, "step": 460}]} +{"epoch": 4.0, "step": 622, "epoch_duration": 233.0598976612091, "total_accumulated_duration": 932.3708846569061, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7751.47119140625}, "peak_memory_usage": {"GPU_0": 19996.72314453125}, "avg_memory_reserved": {"GPU_0": 28746.0}, "peak_memory_reserved": {"GPU_0": 28746.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.2272, "grad_norm": 0.5317481756210327, "learning_rate": 0.0002, "epoch": 0.06430868167202572, "step": 10}, {"loss": 1.6604, "grad_norm": 0.7665001749992371, "learning_rate": 0.0002, "epoch": 0.12861736334405144, "step": 20}, {"loss": 1.4015, "grad_norm": 0.4396904408931732, "learning_rate": 0.0002, "epoch": 0.19292604501607716, "step": 30}, {"loss": 1.3297, "grad_norm": 0.31786906719207764, "learning_rate": 0.0002, "epoch": 0.2572347266881029, "step": 40}, {"loss": 1.2938, "grad_norm": 0.41404595971107483, "learning_rate": 0.0002, "epoch": 0.3215434083601286, "step": 50}, {"loss": 1.2673, "grad_norm": 0.36728185415267944, "learning_rate": 0.0002, "epoch": 0.3858520900321543, "step": 60}, {"loss": 1.2074, "grad_norm": 0.38337618112564087, "learning_rate": 0.0002, "epoch": 0.45016077170418006, "step": 70}, {"loss": 1.2691, "grad_norm": 0.39411404728889465, "learning_rate": 0.0002, "epoch": 0.5144694533762058, "step": 80}, {"loss": 1.2563, "grad_norm": 0.39903542399406433, "learning_rate": 0.0002, "epoch": 0.5787781350482315, "step": 90}, {"loss": 1.2122, "grad_norm": 0.3390332758426666, "learning_rate": 0.0002, "epoch": 0.6430868167202572, "step": 100}, {"loss": 1.251, "grad_norm": 0.35814088582992554, "learning_rate": 0.0002, "epoch": 0.707395498392283, "step": 110}, {"loss": 1.218, "grad_norm": 0.3480045199394226, "learning_rate": 0.0002, "epoch": 0.7717041800643086, "step": 120}, {"loss": 1.1504, "grad_norm": 0.3282570540904999, "learning_rate": 0.0002, "epoch": 0.8360128617363344, "step": 130}, {"loss": 1.2131, "grad_norm": 0.33441081643104553, "learning_rate": 0.0002, "epoch": 0.9003215434083601, "step": 140}, {"loss": 1.2116, "grad_norm": 0.3344958424568176, "learning_rate": 0.0002, "epoch": 0.9646302250803859, "step": 150}, {"eval_loss": 1.1744003295898438, "eval_runtime": 10.521, "eval_samples_per_second": 9.315, "eval_steps_per_second": 1.236, "epoch": 0.9967845659163987, "step": 155}, {"loss": 1.1898, "grad_norm": 0.3614383637905121, "learning_rate": 0.0002, "epoch": 1.0289389067524115, "step": 160}, {"loss": 1.1153, "grad_norm": 0.37686896324157715, "learning_rate": 0.0002, "epoch": 1.0932475884244373, "step": 170}, {"loss": 1.1288, "grad_norm": 0.3803747296333313, "learning_rate": 0.0002, "epoch": 1.157556270096463, "step": 180}, {"loss": 1.0915, "grad_norm": 0.35592594742774963, "learning_rate": 0.0002, "epoch": 1.2218649517684887, "step": 190}, {"loss": 1.0895, "grad_norm": 0.5097760558128357, "learning_rate": 0.0002, "epoch": 1.2861736334405145, "step": 200}, {"loss": 1.1268, "grad_norm": 0.3641100227832794, "learning_rate": 0.0002, "epoch": 1.3504823151125402, "step": 210}, {"loss": 1.1212, "grad_norm": 0.3824535310268402, "learning_rate": 0.0002, "epoch": 1.414790996784566, "step": 220}, {"loss": 1.1234, "grad_norm": 0.42148709297180176, "learning_rate": 0.0002, "epoch": 1.4790996784565915, "step": 230}, {"loss": 1.112, "grad_norm": 0.44197967648506165, "learning_rate": 0.0002, "epoch": 1.5434083601286175, "step": 240}, {"loss": 1.0923, "grad_norm": 0.42140334844589233, "learning_rate": 0.0002, "epoch": 1.607717041800643, "step": 250}, {"loss": 1.1455, "grad_norm": 0.404341459274292, "learning_rate": 0.0002, "epoch": 1.6720257234726688, "step": 260}, {"loss": 1.1258, "grad_norm": 0.47345927357673645, "learning_rate": 0.0002, "epoch": 1.7363344051446945, "step": 270}, {"loss": 1.0682, "grad_norm": 0.45900461077690125, "learning_rate": 0.0002, "epoch": 1.8006430868167203, "step": 280}, {"loss": 1.0815, "grad_norm": 0.3809300363063812, "learning_rate": 0.0002, "epoch": 1.864951768488746, "step": 290}, {"loss": 1.0773, "grad_norm": 0.4094211757183075, "learning_rate": 0.0002, "epoch": 1.9292604501607717, "step": 300}, {"loss": 1.1134, "grad_norm": 0.40402060747146606, "learning_rate": 0.0002, "epoch": 1.9935691318327975, "step": 310}, {"eval_loss": 1.1546189785003662, "eval_runtime": 10.5193, "eval_samples_per_second": 9.316, "eval_steps_per_second": 1.236, "epoch": 2.0, "step": 311}, {"loss": 0.9775, "grad_norm": 0.5758638978004456, "learning_rate": 0.0002, "epoch": 2.057877813504823, "step": 320}, {"loss": 0.975, "grad_norm": 0.4793509542942047, "learning_rate": 0.0002, "epoch": 2.122186495176849, "step": 330}, {"loss": 0.9331, "grad_norm": 0.5104694962501526, "learning_rate": 0.0002, "epoch": 2.1864951768488745, "step": 340}, {"loss": 0.9605, "grad_norm": 0.49754178524017334, "learning_rate": 0.0002, "epoch": 2.2508038585209005, "step": 350}, {"loss": 0.9609, "grad_norm": 0.5055416822433472, "learning_rate": 0.0002, "epoch": 2.315112540192926, "step": 360}, {"loss": 0.9793, "grad_norm": 0.5762393474578857, "learning_rate": 0.0002, "epoch": 2.379421221864952, "step": 370}, {"loss": 0.9392, "grad_norm": 0.44768989086151123, "learning_rate": 0.0002, "epoch": 2.4437299035369775, "step": 380}, {"loss": 0.9488, "grad_norm": 0.5598754286766052, "learning_rate": 0.0002, "epoch": 2.508038585209003, "step": 390}, {"loss": 1.0028, "grad_norm": 0.5343462824821472, "learning_rate": 0.0002, "epoch": 2.572347266881029, "step": 400}, {"loss": 0.9871, "grad_norm": 0.4544358253479004, "learning_rate": 0.0002, "epoch": 2.6366559485530545, "step": 410}, {"loss": 1.0025, "grad_norm": 0.5724653005599976, "learning_rate": 0.0002, "epoch": 2.7009646302250805, "step": 420}, {"loss": 0.9776, "grad_norm": 0.5844957828521729, "learning_rate": 0.0002, "epoch": 2.765273311897106, "step": 430}, {"loss": 0.9608, "grad_norm": 0.5306688547134399, "learning_rate": 0.0002, "epoch": 2.829581993569132, "step": 440}, {"loss": 1.0221, "grad_norm": 0.5121245384216309, "learning_rate": 0.0002, "epoch": 2.8938906752411575, "step": 450}, {"loss": 0.9438, "grad_norm": 0.47789978981018066, "learning_rate": 0.0002, "epoch": 2.958199356913183, "step": 460}, {"eval_loss": 1.1760698556900024, "eval_runtime": 10.5123, "eval_samples_per_second": 9.322, "eval_steps_per_second": 1.237, "epoch": 2.996784565916399, "step": 466}, {"loss": 0.9531, "grad_norm": 0.4903484582901001, "learning_rate": 0.0002, "epoch": 3.022508038585209, "step": 470}, {"loss": 0.7771, "grad_norm": 0.7591149210929871, "learning_rate": 0.0002, "epoch": 3.0868167202572345, "step": 480}, {"loss": 0.8044, "grad_norm": 0.8178006410598755, "learning_rate": 0.0002, "epoch": 3.1511254019292605, "step": 490}, {"loss": 0.8237, "grad_norm": 0.7482298016548157, "learning_rate": 0.0002, "epoch": 3.215434083601286, "step": 500}, {"loss": 0.8061, "grad_norm": 0.7520643472671509, "learning_rate": 0.0002, "epoch": 3.279742765273312, "step": 510}, {"loss": 0.8065, "grad_norm": 0.6797061562538147, "learning_rate": 0.0002, "epoch": 3.3440514469453375, "step": 520}, {"loss": 0.827, "grad_norm": 0.6733362674713135, "learning_rate": 0.0002, "epoch": 3.4083601286173635, "step": 530}, {"loss": 0.8628, "grad_norm": 0.6488103270530701, "learning_rate": 0.0002, "epoch": 3.472668810289389, "step": 540}, {"loss": 0.8053, "grad_norm": 0.6773484349250793, "learning_rate": 0.0002, "epoch": 3.536977491961415, "step": 550}, {"loss": 0.8631, "grad_norm": 0.6569041609764099, "learning_rate": 0.0002, "epoch": 3.6012861736334405, "step": 560}, {"loss": 0.8251, "grad_norm": 0.7477148771286011, "learning_rate": 0.0002, "epoch": 3.665594855305466, "step": 570}, {"loss": 0.8089, "grad_norm": 0.6446558237075806, "learning_rate": 0.0002, "epoch": 3.729903536977492, "step": 580}, {"loss": 0.8287, "grad_norm": 0.6831859946250916, "learning_rate": 0.0002, "epoch": 3.7942122186495175, "step": 590}, {"loss": 0.8726, "grad_norm": 0.7512634992599487, "learning_rate": 0.0002, "epoch": 3.8585209003215435, "step": 600}, {"loss": 0.8014, "grad_norm": 0.7508474588394165, "learning_rate": 0.0002, "epoch": 3.922829581993569, "step": 610}, {"loss": 0.845, "grad_norm": 0.7288223505020142, "learning_rate": 0.0002, "epoch": 3.987138263665595, "step": 620}]} +{"epoch": 4.996784565916399, "step": 777, "epoch_duration": 232.9391164779663, "total_accumulated_duration": 1165.3100011348724, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7887.97119140625}, "peak_memory_usage": {"GPU_0": 19996.72314453125}, "avg_memory_reserved": {"GPU_0": 28746.0}, "peak_memory_reserved": {"GPU_0": 28746.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.2272, "grad_norm": 0.5317481756210327, "learning_rate": 0.0002, "epoch": 0.06430868167202572, "step": 10}, {"loss": 1.6604, "grad_norm": 0.7665001749992371, "learning_rate": 0.0002, "epoch": 0.12861736334405144, "step": 20}, {"loss": 1.4015, "grad_norm": 0.4396904408931732, "learning_rate": 0.0002, "epoch": 0.19292604501607716, "step": 30}, {"loss": 1.3297, "grad_norm": 0.31786906719207764, "learning_rate": 0.0002, "epoch": 0.2572347266881029, "step": 40}, {"loss": 1.2938, "grad_norm": 0.41404595971107483, "learning_rate": 0.0002, "epoch": 0.3215434083601286, "step": 50}, {"loss": 1.2673, "grad_norm": 0.36728185415267944, "learning_rate": 0.0002, "epoch": 0.3858520900321543, "step": 60}, {"loss": 1.2074, "grad_norm": 0.38337618112564087, "learning_rate": 0.0002, "epoch": 0.45016077170418006, "step": 70}, {"loss": 1.2691, "grad_norm": 0.39411404728889465, "learning_rate": 0.0002, "epoch": 0.5144694533762058, "step": 80}, {"loss": 1.2563, "grad_norm": 0.39903542399406433, "learning_rate": 0.0002, "epoch": 0.5787781350482315, "step": 90}, {"loss": 1.2122, "grad_norm": 0.3390332758426666, "learning_rate": 0.0002, "epoch": 0.6430868167202572, "step": 100}, {"loss": 1.251, "grad_norm": 0.35814088582992554, "learning_rate": 0.0002, "epoch": 0.707395498392283, "step": 110}, {"loss": 1.218, "grad_norm": 0.3480045199394226, "learning_rate": 0.0002, "epoch": 0.7717041800643086, "step": 120}, {"loss": 1.1504, "grad_norm": 0.3282570540904999, "learning_rate": 0.0002, "epoch": 0.8360128617363344, "step": 130}, {"loss": 1.2131, "grad_norm": 0.33441081643104553, "learning_rate": 0.0002, "epoch": 0.9003215434083601, "step": 140}, {"loss": 1.2116, "grad_norm": 0.3344958424568176, "learning_rate": 0.0002, "epoch": 0.9646302250803859, "step": 150}, {"eval_loss": 1.1744003295898438, "eval_runtime": 10.521, "eval_samples_per_second": 9.315, "eval_steps_per_second": 1.236, "epoch": 0.9967845659163987, "step": 155}, {"loss": 1.1898, "grad_norm": 0.3614383637905121, "learning_rate": 0.0002, "epoch": 1.0289389067524115, "step": 160}, {"loss": 1.1153, "grad_norm": 0.37686896324157715, "learning_rate": 0.0002, "epoch": 1.0932475884244373, "step": 170}, {"loss": 1.1288, "grad_norm": 0.3803747296333313, "learning_rate": 0.0002, "epoch": 1.157556270096463, "step": 180}, {"loss": 1.0915, "grad_norm": 0.35592594742774963, "learning_rate": 0.0002, "epoch": 1.2218649517684887, "step": 190}, {"loss": 1.0895, "grad_norm": 0.5097760558128357, "learning_rate": 0.0002, "epoch": 1.2861736334405145, "step": 200}, {"loss": 1.1268, "grad_norm": 0.3641100227832794, "learning_rate": 0.0002, "epoch": 1.3504823151125402, "step": 210}, {"loss": 1.1212, "grad_norm": 0.3824535310268402, "learning_rate": 0.0002, "epoch": 1.414790996784566, "step": 220}, {"loss": 1.1234, "grad_norm": 0.42148709297180176, "learning_rate": 0.0002, "epoch": 1.4790996784565915, "step": 230}, {"loss": 1.112, "grad_norm": 0.44197967648506165, "learning_rate": 0.0002, "epoch": 1.5434083601286175, "step": 240}, {"loss": 1.0923, "grad_norm": 0.42140334844589233, "learning_rate": 0.0002, "epoch": 1.607717041800643, "step": 250}, {"loss": 1.1455, "grad_norm": 0.404341459274292, "learning_rate": 0.0002, "epoch": 1.6720257234726688, "step": 260}, {"loss": 1.1258, "grad_norm": 0.47345927357673645, "learning_rate": 0.0002, "epoch": 1.7363344051446945, "step": 270}, {"loss": 1.0682, "grad_norm": 0.45900461077690125, "learning_rate": 0.0002, "epoch": 1.8006430868167203, "step": 280}, {"loss": 1.0815, "grad_norm": 0.3809300363063812, "learning_rate": 0.0002, "epoch": 1.864951768488746, "step": 290}, {"loss": 1.0773, "grad_norm": 0.4094211757183075, "learning_rate": 0.0002, "epoch": 1.9292604501607717, "step": 300}, {"loss": 1.1134, "grad_norm": 0.40402060747146606, "learning_rate": 0.0002, "epoch": 1.9935691318327975, "step": 310}, {"eval_loss": 1.1546189785003662, "eval_runtime": 10.5193, "eval_samples_per_second": 9.316, "eval_steps_per_second": 1.236, "epoch": 2.0, "step": 311}, {"loss": 0.9775, "grad_norm": 0.5758638978004456, "learning_rate": 0.0002, "epoch": 2.057877813504823, "step": 320}, {"loss": 0.975, "grad_norm": 0.4793509542942047, "learning_rate": 0.0002, "epoch": 2.122186495176849, "step": 330}, {"loss": 0.9331, "grad_norm": 0.5104694962501526, "learning_rate": 0.0002, "epoch": 2.1864951768488745, "step": 340}, {"loss": 0.9605, "grad_norm": 0.49754178524017334, "learning_rate": 0.0002, "epoch": 2.2508038585209005, "step": 350}, {"loss": 0.9609, "grad_norm": 0.5055416822433472, "learning_rate": 0.0002, "epoch": 2.315112540192926, "step": 360}, {"loss": 0.9793, "grad_norm": 0.5762393474578857, "learning_rate": 0.0002, "epoch": 2.379421221864952, "step": 370}, {"loss": 0.9392, "grad_norm": 0.44768989086151123, "learning_rate": 0.0002, "epoch": 2.4437299035369775, "step": 380}, {"loss": 0.9488, "grad_norm": 0.5598754286766052, "learning_rate": 0.0002, "epoch": 2.508038585209003, "step": 390}, {"loss": 1.0028, "grad_norm": 0.5343462824821472, "learning_rate": 0.0002, "epoch": 2.572347266881029, "step": 400}, {"loss": 0.9871, "grad_norm": 0.4544358253479004, "learning_rate": 0.0002, "epoch": 2.6366559485530545, "step": 410}, {"loss": 1.0025, "grad_norm": 0.5724653005599976, "learning_rate": 0.0002, "epoch": 2.7009646302250805, "step": 420}, {"loss": 0.9776, "grad_norm": 0.5844957828521729, "learning_rate": 0.0002, "epoch": 2.765273311897106, "step": 430}, {"loss": 0.9608, "grad_norm": 0.5306688547134399, "learning_rate": 0.0002, "epoch": 2.829581993569132, "step": 440}, {"loss": 1.0221, "grad_norm": 0.5121245384216309, "learning_rate": 0.0002, "epoch": 2.8938906752411575, "step": 450}, {"loss": 0.9438, "grad_norm": 0.47789978981018066, "learning_rate": 0.0002, "epoch": 2.958199356913183, "step": 460}, {"eval_loss": 1.1760698556900024, "eval_runtime": 10.5123, "eval_samples_per_second": 9.322, "eval_steps_per_second": 1.237, "epoch": 2.996784565916399, "step": 466}, {"loss": 0.9531, "grad_norm": 0.4903484582901001, "learning_rate": 0.0002, "epoch": 3.022508038585209, "step": 470}, {"loss": 0.7771, "grad_norm": 0.7591149210929871, "learning_rate": 0.0002, "epoch": 3.0868167202572345, "step": 480}, {"loss": 0.8044, "grad_norm": 0.8178006410598755, "learning_rate": 0.0002, "epoch": 3.1511254019292605, "step": 490}, {"loss": 0.8237, "grad_norm": 0.7482298016548157, "learning_rate": 0.0002, "epoch": 3.215434083601286, "step": 500}, {"loss": 0.8061, "grad_norm": 0.7520643472671509, "learning_rate": 0.0002, "epoch": 3.279742765273312, "step": 510}, {"loss": 0.8065, "grad_norm": 0.6797061562538147, "learning_rate": 0.0002, "epoch": 3.3440514469453375, "step": 520}, {"loss": 0.827, "grad_norm": 0.6733362674713135, "learning_rate": 0.0002, "epoch": 3.4083601286173635, "step": 530}, {"loss": 0.8628, "grad_norm": 0.6488103270530701, "learning_rate": 0.0002, "epoch": 3.472668810289389, "step": 540}, {"loss": 0.8053, "grad_norm": 0.6773484349250793, "learning_rate": 0.0002, "epoch": 3.536977491961415, "step": 550}, {"loss": 0.8631, "grad_norm": 0.6569041609764099, "learning_rate": 0.0002, "epoch": 3.6012861736334405, "step": 560}, {"loss": 0.8251, "grad_norm": 0.7477148771286011, "learning_rate": 0.0002, "epoch": 3.665594855305466, "step": 570}, {"loss": 0.8089, "grad_norm": 0.6446558237075806, "learning_rate": 0.0002, "epoch": 3.729903536977492, "step": 580}, {"loss": 0.8287, "grad_norm": 0.6831859946250916, "learning_rate": 0.0002, "epoch": 3.7942122186495175, "step": 590}, {"loss": 0.8726, "grad_norm": 0.7512634992599487, "learning_rate": 0.0002, "epoch": 3.8585209003215435, "step": 600}, {"loss": 0.8014, "grad_norm": 0.7508474588394165, "learning_rate": 0.0002, "epoch": 3.922829581993569, "step": 610}, {"loss": 0.845, "grad_norm": 0.7288223505020142, "learning_rate": 0.0002, "epoch": 3.987138263665595, "step": 620}, {"eval_loss": 1.2500178813934326, "eval_runtime": 10.5131, "eval_samples_per_second": 9.322, "eval_steps_per_second": 1.237, "epoch": 4.0, "step": 622}, {"loss": 0.6423, "grad_norm": 0.8475615382194519, "learning_rate": 0.0002, "epoch": 4.051446945337621, "step": 630}, {"loss": 0.6416, "grad_norm": 0.7431837916374207, "learning_rate": 0.0002, "epoch": 4.115755627009646, "step": 640}, {"loss": 0.6748, "grad_norm": 1.154038667678833, "learning_rate": 0.0002, "epoch": 4.180064308681672, "step": 650}, {"loss": 0.65, "grad_norm": 0.8179714679718018, "learning_rate": 0.0002, "epoch": 4.244372990353698, "step": 660}, {"loss": 0.6385, "grad_norm": 0.9329283237457275, "learning_rate": 0.0002, "epoch": 4.308681672025724, "step": 670}, {"loss": 0.671, "grad_norm": 0.824656069278717, "learning_rate": 0.0002, "epoch": 4.372990353697749, "step": 680}, {"loss": 0.6679, "grad_norm": 0.9766148924827576, "learning_rate": 0.0002, "epoch": 4.437299035369775, "step": 690}, {"loss": 0.6525, "grad_norm": 0.9103652238845825, "learning_rate": 0.0002, "epoch": 4.501607717041801, "step": 700}, {"loss": 0.6809, "grad_norm": 0.793594241142273, "learning_rate": 0.0002, "epoch": 4.565916398713826, "step": 710}, {"loss": 0.6712, "grad_norm": 0.9835829734802246, "learning_rate": 0.0002, "epoch": 4.630225080385852, "step": 720}, {"loss": 0.6757, "grad_norm": 1.0390352010726929, "learning_rate": 0.0002, "epoch": 4.694533762057878, "step": 730}, {"loss": 0.6959, "grad_norm": 1.0840471982955933, "learning_rate": 0.0002, "epoch": 4.758842443729904, "step": 740}, {"loss": 0.6809, "grad_norm": 0.8057735562324524, "learning_rate": 0.0002, "epoch": 4.823151125401929, "step": 750}, {"loss": 0.7202, "grad_norm": 0.8504151701927185, "learning_rate": 0.0002, "epoch": 4.887459807073955, "step": 760}, {"loss": 0.7001, "grad_norm": 0.8389859199523926, "learning_rate": 0.0002, "epoch": 4.951768488745981, "step": 770}]} +{"epoch": 6.0, "step": 933, "epoch_duration": 232.85805296897888, "total_accumulated_duration": 1398.1680541038513, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7751.47119140625}, "peak_memory_usage": {"GPU_0": 19996.72314453125}, "avg_memory_reserved": {"GPU_0": 28746.0}, "peak_memory_reserved": {"GPU_0": 28746.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.2272, "grad_norm": 0.5317481756210327, "learning_rate": 0.0002, "epoch": 0.06430868167202572, "step": 10}, {"loss": 1.6604, "grad_norm": 0.7665001749992371, "learning_rate": 0.0002, "epoch": 0.12861736334405144, "step": 20}, {"loss": 1.4015, "grad_norm": 0.4396904408931732, "learning_rate": 0.0002, "epoch": 0.19292604501607716, "step": 30}, {"loss": 1.3297, "grad_norm": 0.31786906719207764, "learning_rate": 0.0002, "epoch": 0.2572347266881029, "step": 40}, {"loss": 1.2938, "grad_norm": 0.41404595971107483, "learning_rate": 0.0002, "epoch": 0.3215434083601286, "step": 50}, {"loss": 1.2673, "grad_norm": 0.36728185415267944, "learning_rate": 0.0002, "epoch": 0.3858520900321543, "step": 60}, {"loss": 1.2074, "grad_norm": 0.38337618112564087, "learning_rate": 0.0002, "epoch": 0.45016077170418006, "step": 70}, {"loss": 1.2691, "grad_norm": 0.39411404728889465, "learning_rate": 0.0002, "epoch": 0.5144694533762058, "step": 80}, {"loss": 1.2563, "grad_norm": 0.39903542399406433, "learning_rate": 0.0002, "epoch": 0.5787781350482315, "step": 90}, {"loss": 1.2122, "grad_norm": 0.3390332758426666, "learning_rate": 0.0002, "epoch": 0.6430868167202572, "step": 100}, {"loss": 1.251, "grad_norm": 0.35814088582992554, "learning_rate": 0.0002, "epoch": 0.707395498392283, "step": 110}, {"loss": 1.218, "grad_norm": 0.3480045199394226, "learning_rate": 0.0002, "epoch": 0.7717041800643086, "step": 120}, {"loss": 1.1504, "grad_norm": 0.3282570540904999, "learning_rate": 0.0002, "epoch": 0.8360128617363344, "step": 130}, {"loss": 1.2131, "grad_norm": 0.33441081643104553, "learning_rate": 0.0002, "epoch": 0.9003215434083601, "step": 140}, {"loss": 1.2116, "grad_norm": 0.3344958424568176, "learning_rate": 0.0002, "epoch": 0.9646302250803859, "step": 150}, {"eval_loss": 1.1744003295898438, "eval_runtime": 10.521, "eval_samples_per_second": 9.315, "eval_steps_per_second": 1.236, "epoch": 0.9967845659163987, "step": 155}, {"loss": 1.1898, "grad_norm": 0.3614383637905121, "learning_rate": 0.0002, "epoch": 1.0289389067524115, "step": 160}, {"loss": 1.1153, "grad_norm": 0.37686896324157715, "learning_rate": 0.0002, "epoch": 1.0932475884244373, "step": 170}, {"loss": 1.1288, "grad_norm": 0.3803747296333313, "learning_rate": 0.0002, "epoch": 1.157556270096463, "step": 180}, {"loss": 1.0915, "grad_norm": 0.35592594742774963, "learning_rate": 0.0002, "epoch": 1.2218649517684887, "step": 190}, {"loss": 1.0895, "grad_norm": 0.5097760558128357, "learning_rate": 0.0002, "epoch": 1.2861736334405145, "step": 200}, {"loss": 1.1268, "grad_norm": 0.3641100227832794, "learning_rate": 0.0002, "epoch": 1.3504823151125402, "step": 210}, {"loss": 1.1212, "grad_norm": 0.3824535310268402, "learning_rate": 0.0002, "epoch": 1.414790996784566, "step": 220}, {"loss": 1.1234, "grad_norm": 0.42148709297180176, "learning_rate": 0.0002, "epoch": 1.4790996784565915, "step": 230}, {"loss": 1.112, "grad_norm": 0.44197967648506165, "learning_rate": 0.0002, "epoch": 1.5434083601286175, "step": 240}, {"loss": 1.0923, "grad_norm": 0.42140334844589233, "learning_rate": 0.0002, "epoch": 1.607717041800643, "step": 250}, {"loss": 1.1455, "grad_norm": 0.404341459274292, "learning_rate": 0.0002, "epoch": 1.6720257234726688, "step": 260}, {"loss": 1.1258, "grad_norm": 0.47345927357673645, "learning_rate": 0.0002, "epoch": 1.7363344051446945, "step": 270}, {"loss": 1.0682, "grad_norm": 0.45900461077690125, "learning_rate": 0.0002, "epoch": 1.8006430868167203, "step": 280}, {"loss": 1.0815, "grad_norm": 0.3809300363063812, "learning_rate": 0.0002, "epoch": 1.864951768488746, "step": 290}, {"loss": 1.0773, "grad_norm": 0.4094211757183075, "learning_rate": 0.0002, "epoch": 1.9292604501607717, "step": 300}, {"loss": 1.1134, "grad_norm": 0.40402060747146606, "learning_rate": 0.0002, "epoch": 1.9935691318327975, "step": 310}, {"eval_loss": 1.1546189785003662, "eval_runtime": 10.5193, "eval_samples_per_second": 9.316, "eval_steps_per_second": 1.236, "epoch": 2.0, "step": 311}, {"loss": 0.9775, "grad_norm": 0.5758638978004456, "learning_rate": 0.0002, "epoch": 2.057877813504823, "step": 320}, {"loss": 0.975, "grad_norm": 0.4793509542942047, "learning_rate": 0.0002, "epoch": 2.122186495176849, "step": 330}, {"loss": 0.9331, "grad_norm": 0.5104694962501526, "learning_rate": 0.0002, "epoch": 2.1864951768488745, "step": 340}, {"loss": 0.9605, "grad_norm": 0.49754178524017334, "learning_rate": 0.0002, "epoch": 2.2508038585209005, "step": 350}, {"loss": 0.9609, "grad_norm": 0.5055416822433472, "learning_rate": 0.0002, "epoch": 2.315112540192926, "step": 360}, {"loss": 0.9793, "grad_norm": 0.5762393474578857, "learning_rate": 0.0002, "epoch": 2.379421221864952, "step": 370}, {"loss": 0.9392, "grad_norm": 0.44768989086151123, "learning_rate": 0.0002, "epoch": 2.4437299035369775, "step": 380}, {"loss": 0.9488, "grad_norm": 0.5598754286766052, "learning_rate": 0.0002, "epoch": 2.508038585209003, "step": 390}, {"loss": 1.0028, "grad_norm": 0.5343462824821472, "learning_rate": 0.0002, "epoch": 2.572347266881029, "step": 400}, {"loss": 0.9871, "grad_norm": 0.4544358253479004, "learning_rate": 0.0002, "epoch": 2.6366559485530545, "step": 410}, {"loss": 1.0025, "grad_norm": 0.5724653005599976, "learning_rate": 0.0002, "epoch": 2.7009646302250805, "step": 420}, {"loss": 0.9776, "grad_norm": 0.5844957828521729, "learning_rate": 0.0002, "epoch": 2.765273311897106, "step": 430}, {"loss": 0.9608, "grad_norm": 0.5306688547134399, "learning_rate": 0.0002, "epoch": 2.829581993569132, "step": 440}, {"loss": 1.0221, "grad_norm": 0.5121245384216309, "learning_rate": 0.0002, "epoch": 2.8938906752411575, "step": 450}, {"loss": 0.9438, "grad_norm": 0.47789978981018066, "learning_rate": 0.0002, "epoch": 2.958199356913183, "step": 460}, {"eval_loss": 1.1760698556900024, "eval_runtime": 10.5123, "eval_samples_per_second": 9.322, "eval_steps_per_second": 1.237, "epoch": 2.996784565916399, "step": 466}, {"loss": 0.9531, "grad_norm": 0.4903484582901001, "learning_rate": 0.0002, "epoch": 3.022508038585209, "step": 470}, {"loss": 0.7771, "grad_norm": 0.7591149210929871, "learning_rate": 0.0002, "epoch": 3.0868167202572345, "step": 480}, {"loss": 0.8044, "grad_norm": 0.8178006410598755, "learning_rate": 0.0002, "epoch": 3.1511254019292605, "step": 490}, {"loss": 0.8237, "grad_norm": 0.7482298016548157, "learning_rate": 0.0002, "epoch": 3.215434083601286, "step": 500}, {"loss": 0.8061, "grad_norm": 0.7520643472671509, "learning_rate": 0.0002, "epoch": 3.279742765273312, "step": 510}, {"loss": 0.8065, "grad_norm": 0.6797061562538147, "learning_rate": 0.0002, "epoch": 3.3440514469453375, "step": 520}, {"loss": 0.827, "grad_norm": 0.6733362674713135, "learning_rate": 0.0002, "epoch": 3.4083601286173635, "step": 530}, {"loss": 0.8628, "grad_norm": 0.6488103270530701, "learning_rate": 0.0002, "epoch": 3.472668810289389, "step": 540}, {"loss": 0.8053, "grad_norm": 0.6773484349250793, "learning_rate": 0.0002, "epoch": 3.536977491961415, "step": 550}, {"loss": 0.8631, "grad_norm": 0.6569041609764099, "learning_rate": 0.0002, "epoch": 3.6012861736334405, "step": 560}, {"loss": 0.8251, "grad_norm": 0.7477148771286011, "learning_rate": 0.0002, "epoch": 3.665594855305466, "step": 570}, {"loss": 0.8089, "grad_norm": 0.6446558237075806, "learning_rate": 0.0002, "epoch": 3.729903536977492, "step": 580}, {"loss": 0.8287, "grad_norm": 0.6831859946250916, "learning_rate": 0.0002, "epoch": 3.7942122186495175, "step": 590}, {"loss": 0.8726, "grad_norm": 0.7512634992599487, "learning_rate": 0.0002, "epoch": 3.8585209003215435, "step": 600}, {"loss": 0.8014, "grad_norm": 0.7508474588394165, "learning_rate": 0.0002, "epoch": 3.922829581993569, "step": 610}, {"loss": 0.845, "grad_norm": 0.7288223505020142, "learning_rate": 0.0002, "epoch": 3.987138263665595, "step": 620}, {"eval_loss": 1.2500178813934326, "eval_runtime": 10.5131, "eval_samples_per_second": 9.322, "eval_steps_per_second": 1.237, "epoch": 4.0, "step": 622}, {"loss": 0.6423, "grad_norm": 0.8475615382194519, "learning_rate": 0.0002, "epoch": 4.051446945337621, "step": 630}, {"loss": 0.6416, "grad_norm": 0.7431837916374207, "learning_rate": 0.0002, "epoch": 4.115755627009646, "step": 640}, {"loss": 0.6748, "grad_norm": 1.154038667678833, "learning_rate": 0.0002, "epoch": 4.180064308681672, "step": 650}, {"loss": 0.65, "grad_norm": 0.8179714679718018, "learning_rate": 0.0002, "epoch": 4.244372990353698, "step": 660}, {"loss": 0.6385, "grad_norm": 0.9329283237457275, "learning_rate": 0.0002, "epoch": 4.308681672025724, "step": 670}, {"loss": 0.671, "grad_norm": 0.824656069278717, "learning_rate": 0.0002, "epoch": 4.372990353697749, "step": 680}, {"loss": 0.6679, "grad_norm": 0.9766148924827576, "learning_rate": 0.0002, "epoch": 4.437299035369775, "step": 690}, {"loss": 0.6525, "grad_norm": 0.9103652238845825, "learning_rate": 0.0002, "epoch": 4.501607717041801, "step": 700}, {"loss": 0.6809, "grad_norm": 0.793594241142273, "learning_rate": 0.0002, "epoch": 4.565916398713826, "step": 710}, {"loss": 0.6712, "grad_norm": 0.9835829734802246, "learning_rate": 0.0002, "epoch": 4.630225080385852, "step": 720}, {"loss": 0.6757, "grad_norm": 1.0390352010726929, "learning_rate": 0.0002, "epoch": 4.694533762057878, "step": 730}, {"loss": 0.6959, "grad_norm": 1.0840471982955933, "learning_rate": 0.0002, "epoch": 4.758842443729904, "step": 740}, {"loss": 0.6809, "grad_norm": 0.8057735562324524, "learning_rate": 0.0002, "epoch": 4.823151125401929, "step": 750}, {"loss": 0.7202, "grad_norm": 0.8504151701927185, "learning_rate": 0.0002, "epoch": 4.887459807073955, "step": 760}, {"loss": 0.7001, "grad_norm": 0.8389859199523926, "learning_rate": 0.0002, "epoch": 4.951768488745981, "step": 770}, {"eval_loss": 1.3824537992477417, "eval_runtime": 10.5075, "eval_samples_per_second": 9.327, "eval_steps_per_second": 1.237, "epoch": 4.996784565916399, "step": 777}, {"loss": 0.6425, "grad_norm": 0.8725755214691162, "learning_rate": 0.0002, "epoch": 5.016077170418006, "step": 780}, {"loss": 0.4945, "grad_norm": 0.792286217212677, "learning_rate": 0.0002, "epoch": 5.080385852090032, "step": 790}, {"loss": 0.5229, "grad_norm": 0.9615631699562073, "learning_rate": 0.0002, "epoch": 5.144694533762058, "step": 800}, {"loss": 0.5237, "grad_norm": 0.9059127569198608, "learning_rate": 0.0002, "epoch": 5.209003215434084, "step": 810}, {"loss": 0.5122, "grad_norm": 1.0275076627731323, "learning_rate": 0.0002, "epoch": 5.273311897106109, "step": 820}, {"loss": 0.4987, "grad_norm": 1.2929821014404297, "learning_rate": 0.0002, "epoch": 5.337620578778135, "step": 830}, {"loss": 0.53, "grad_norm": 1.17123281955719, "learning_rate": 0.0002, "epoch": 5.401929260450161, "step": 840}, {"loss": 0.5364, "grad_norm": 1.140464186668396, "learning_rate": 0.0002, "epoch": 5.466237942122186, "step": 850}, {"loss": 0.5303, "grad_norm": 1.3640265464782715, "learning_rate": 0.0002, "epoch": 5.530546623794212, "step": 860}, {"loss": 0.5272, "grad_norm": 1.1880438327789307, "learning_rate": 0.0002, "epoch": 5.594855305466238, "step": 870}, {"loss": 0.5574, "grad_norm": 1.1584500074386597, "learning_rate": 0.0002, "epoch": 5.659163987138264, "step": 880}, {"loss": 0.5469, "grad_norm": 1.1855696439743042, "learning_rate": 0.0002, "epoch": 5.723472668810289, "step": 890}, {"loss": 0.5376, "grad_norm": 1.0149868726730347, "learning_rate": 0.0002, "epoch": 5.787781350482315, "step": 900}, {"loss": 0.5131, "grad_norm": 1.0635329484939575, "learning_rate": 0.0002, "epoch": 5.852090032154341, "step": 910}, {"loss": 0.5486, "grad_norm": 1.2947518825531006, "learning_rate": 0.0002, "epoch": 5.916398713826366, "step": 920}, {"loss": 0.5701, "grad_norm": 1.205394983291626, "learning_rate": 0.0002, "epoch": 5.980707395498392, "step": 930}]} +{"epoch": 6.996784565916399, "step": 1088, "epoch_duration": 232.78678274154663, "total_accumulated_duration": 1630.954836845398, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7887.97119140625}, "peak_memory_usage": {"GPU_0": 19996.72314453125}, "avg_memory_reserved": {"GPU_0": 28746.0}, "peak_memory_reserved": {"GPU_0": 28746.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.2272, "grad_norm": 0.5317481756210327, "learning_rate": 0.0002, "epoch": 0.06430868167202572, "step": 10}, {"loss": 1.6604, "grad_norm": 0.7665001749992371, "learning_rate": 0.0002, "epoch": 0.12861736334405144, "step": 20}, {"loss": 1.4015, "grad_norm": 0.4396904408931732, "learning_rate": 0.0002, "epoch": 0.19292604501607716, "step": 30}, {"loss": 1.3297, "grad_norm": 0.31786906719207764, "learning_rate": 0.0002, "epoch": 0.2572347266881029, "step": 40}, {"loss": 1.2938, "grad_norm": 0.41404595971107483, "learning_rate": 0.0002, "epoch": 0.3215434083601286, "step": 50}, {"loss": 1.2673, "grad_norm": 0.36728185415267944, "learning_rate": 0.0002, "epoch": 0.3858520900321543, "step": 60}, {"loss": 1.2074, "grad_norm": 0.38337618112564087, "learning_rate": 0.0002, "epoch": 0.45016077170418006, "step": 70}, {"loss": 1.2691, "grad_norm": 0.39411404728889465, "learning_rate": 0.0002, "epoch": 0.5144694533762058, "step": 80}, {"loss": 1.2563, "grad_norm": 0.39903542399406433, "learning_rate": 0.0002, "epoch": 0.5787781350482315, "step": 90}, {"loss": 1.2122, "grad_norm": 0.3390332758426666, "learning_rate": 0.0002, "epoch": 0.6430868167202572, "step": 100}, {"loss": 1.251, "grad_norm": 0.35814088582992554, "learning_rate": 0.0002, "epoch": 0.707395498392283, "step": 110}, {"loss": 1.218, "grad_norm": 0.3480045199394226, "learning_rate": 0.0002, "epoch": 0.7717041800643086, "step": 120}, {"loss": 1.1504, "grad_norm": 0.3282570540904999, "learning_rate": 0.0002, "epoch": 0.8360128617363344, "step": 130}, {"loss": 1.2131, "grad_norm": 0.33441081643104553, "learning_rate": 0.0002, "epoch": 0.9003215434083601, "step": 140}, {"loss": 1.2116, "grad_norm": 0.3344958424568176, "learning_rate": 0.0002, "epoch": 0.9646302250803859, "step": 150}, {"eval_loss": 1.1744003295898438, "eval_runtime": 10.521, "eval_samples_per_second": 9.315, "eval_steps_per_second": 1.236, "epoch": 0.9967845659163987, "step": 155}, {"loss": 1.1898, "grad_norm": 0.3614383637905121, "learning_rate": 0.0002, "epoch": 1.0289389067524115, "step": 160}, {"loss": 1.1153, "grad_norm": 0.37686896324157715, "learning_rate": 0.0002, "epoch": 1.0932475884244373, "step": 170}, {"loss": 1.1288, "grad_norm": 0.3803747296333313, "learning_rate": 0.0002, "epoch": 1.157556270096463, "step": 180}, {"loss": 1.0915, "grad_norm": 0.35592594742774963, "learning_rate": 0.0002, "epoch": 1.2218649517684887, "step": 190}, {"loss": 1.0895, "grad_norm": 0.5097760558128357, "learning_rate": 0.0002, "epoch": 1.2861736334405145, "step": 200}, {"loss": 1.1268, "grad_norm": 0.3641100227832794, "learning_rate": 0.0002, "epoch": 1.3504823151125402, "step": 210}, {"loss": 1.1212, "grad_norm": 0.3824535310268402, "learning_rate": 0.0002, "epoch": 1.414790996784566, "step": 220}, {"loss": 1.1234, "grad_norm": 0.42148709297180176, "learning_rate": 0.0002, "epoch": 1.4790996784565915, "step": 230}, {"loss": 1.112, "grad_norm": 0.44197967648506165, "learning_rate": 0.0002, "epoch": 1.5434083601286175, "step": 240}, {"loss": 1.0923, "grad_norm": 0.42140334844589233, "learning_rate": 0.0002, "epoch": 1.607717041800643, "step": 250}, {"loss": 1.1455, "grad_norm": 0.404341459274292, "learning_rate": 0.0002, "epoch": 1.6720257234726688, "step": 260}, {"loss": 1.1258, "grad_norm": 0.47345927357673645, "learning_rate": 0.0002, "epoch": 1.7363344051446945, "step": 270}, {"loss": 1.0682, "grad_norm": 0.45900461077690125, "learning_rate": 0.0002, "epoch": 1.8006430868167203, "step": 280}, {"loss": 1.0815, "grad_norm": 0.3809300363063812, "learning_rate": 0.0002, "epoch": 1.864951768488746, "step": 290}, {"loss": 1.0773, "grad_norm": 0.4094211757183075, "learning_rate": 0.0002, "epoch": 1.9292604501607717, "step": 300}, {"loss": 1.1134, "grad_norm": 0.40402060747146606, "learning_rate": 0.0002, "epoch": 1.9935691318327975, "step": 310}, {"eval_loss": 1.1546189785003662, "eval_runtime": 10.5193, "eval_samples_per_second": 9.316, "eval_steps_per_second": 1.236, "epoch": 2.0, "step": 311}, {"loss": 0.9775, "grad_norm": 0.5758638978004456, "learning_rate": 0.0002, "epoch": 2.057877813504823, "step": 320}, {"loss": 0.975, "grad_norm": 0.4793509542942047, "learning_rate": 0.0002, "epoch": 2.122186495176849, "step": 330}, {"loss": 0.9331, "grad_norm": 0.5104694962501526, "learning_rate": 0.0002, "epoch": 2.1864951768488745, "step": 340}, {"loss": 0.9605, "grad_norm": 0.49754178524017334, "learning_rate": 0.0002, "epoch": 2.2508038585209005, "step": 350}, {"loss": 0.9609, "grad_norm": 0.5055416822433472, "learning_rate": 0.0002, "epoch": 2.315112540192926, "step": 360}, {"loss": 0.9793, "grad_norm": 0.5762393474578857, "learning_rate": 0.0002, "epoch": 2.379421221864952, "step": 370}, {"loss": 0.9392, "grad_norm": 0.44768989086151123, "learning_rate": 0.0002, "epoch": 2.4437299035369775, "step": 380}, {"loss": 0.9488, "grad_norm": 0.5598754286766052, "learning_rate": 0.0002, "epoch": 2.508038585209003, "step": 390}, {"loss": 1.0028, "grad_norm": 0.5343462824821472, "learning_rate": 0.0002, "epoch": 2.572347266881029, "step": 400}, {"loss": 0.9871, "grad_norm": 0.4544358253479004, "learning_rate": 0.0002, "epoch": 2.6366559485530545, "step": 410}, {"loss": 1.0025, "grad_norm": 0.5724653005599976, "learning_rate": 0.0002, "epoch": 2.7009646302250805, "step": 420}, {"loss": 0.9776, "grad_norm": 0.5844957828521729, "learning_rate": 0.0002, "epoch": 2.765273311897106, "step": 430}, {"loss": 0.9608, "grad_norm": 0.5306688547134399, "learning_rate": 0.0002, "epoch": 2.829581993569132, "step": 440}, {"loss": 1.0221, "grad_norm": 0.5121245384216309, "learning_rate": 0.0002, "epoch": 2.8938906752411575, "step": 450}, {"loss": 0.9438, "grad_norm": 0.47789978981018066, "learning_rate": 0.0002, "epoch": 2.958199356913183, "step": 460}, {"eval_loss": 1.1760698556900024, "eval_runtime": 10.5123, "eval_samples_per_second": 9.322, "eval_steps_per_second": 1.237, "epoch": 2.996784565916399, "step": 466}, {"loss": 0.9531, "grad_norm": 0.4903484582901001, "learning_rate": 0.0002, "epoch": 3.022508038585209, "step": 470}, {"loss": 0.7771, "grad_norm": 0.7591149210929871, "learning_rate": 0.0002, "epoch": 3.0868167202572345, "step": 480}, {"loss": 0.8044, "grad_norm": 0.8178006410598755, "learning_rate": 0.0002, "epoch": 3.1511254019292605, "step": 490}, {"loss": 0.8237, "grad_norm": 0.7482298016548157, "learning_rate": 0.0002, "epoch": 3.215434083601286, "step": 500}, {"loss": 0.8061, "grad_norm": 0.7520643472671509, "learning_rate": 0.0002, "epoch": 3.279742765273312, "step": 510}, {"loss": 0.8065, "grad_norm": 0.6797061562538147, "learning_rate": 0.0002, "epoch": 3.3440514469453375, "step": 520}, {"loss": 0.827, "grad_norm": 0.6733362674713135, "learning_rate": 0.0002, "epoch": 3.4083601286173635, "step": 530}, {"loss": 0.8628, "grad_norm": 0.6488103270530701, "learning_rate": 0.0002, "epoch": 3.472668810289389, "step": 540}, {"loss": 0.8053, "grad_norm": 0.6773484349250793, "learning_rate": 0.0002, "epoch": 3.536977491961415, "step": 550}, {"loss": 0.8631, "grad_norm": 0.6569041609764099, "learning_rate": 0.0002, "epoch": 3.6012861736334405, "step": 560}, {"loss": 0.8251, "grad_norm": 0.7477148771286011, "learning_rate": 0.0002, "epoch": 3.665594855305466, "step": 570}, {"loss": 0.8089, "grad_norm": 0.6446558237075806, "learning_rate": 0.0002, "epoch": 3.729903536977492, "step": 580}, {"loss": 0.8287, "grad_norm": 0.6831859946250916, "learning_rate": 0.0002, "epoch": 3.7942122186495175, "step": 590}, {"loss": 0.8726, "grad_norm": 0.7512634992599487, "learning_rate": 0.0002, "epoch": 3.8585209003215435, "step": 600}, {"loss": 0.8014, "grad_norm": 0.7508474588394165, "learning_rate": 0.0002, "epoch": 3.922829581993569, "step": 610}, {"loss": 0.845, "grad_norm": 0.7288223505020142, "learning_rate": 0.0002, "epoch": 3.987138263665595, "step": 620}, {"eval_loss": 1.2500178813934326, "eval_runtime": 10.5131, "eval_samples_per_second": 9.322, "eval_steps_per_second": 1.237, "epoch": 4.0, "step": 622}, {"loss": 0.6423, "grad_norm": 0.8475615382194519, "learning_rate": 0.0002, "epoch": 4.051446945337621, "step": 630}, {"loss": 0.6416, "grad_norm": 0.7431837916374207, "learning_rate": 0.0002, "epoch": 4.115755627009646, "step": 640}, {"loss": 0.6748, "grad_norm": 1.154038667678833, "learning_rate": 0.0002, "epoch": 4.180064308681672, "step": 650}, {"loss": 0.65, "grad_norm": 0.8179714679718018, "learning_rate": 0.0002, "epoch": 4.244372990353698, "step": 660}, {"loss": 0.6385, "grad_norm": 0.9329283237457275, "learning_rate": 0.0002, "epoch": 4.308681672025724, "step": 670}, {"loss": 0.671, "grad_norm": 0.824656069278717, "learning_rate": 0.0002, "epoch": 4.372990353697749, "step": 680}, {"loss": 0.6679, "grad_norm": 0.9766148924827576, "learning_rate": 0.0002, "epoch": 4.437299035369775, "step": 690}, {"loss": 0.6525, "grad_norm": 0.9103652238845825, "learning_rate": 0.0002, "epoch": 4.501607717041801, "step": 700}, {"loss": 0.6809, "grad_norm": 0.793594241142273, "learning_rate": 0.0002, "epoch": 4.565916398713826, "step": 710}, {"loss": 0.6712, "grad_norm": 0.9835829734802246, "learning_rate": 0.0002, "epoch": 4.630225080385852, "step": 720}, {"loss": 0.6757, "grad_norm": 1.0390352010726929, "learning_rate": 0.0002, "epoch": 4.694533762057878, "step": 730}, {"loss": 0.6959, "grad_norm": 1.0840471982955933, "learning_rate": 0.0002, "epoch": 4.758842443729904, "step": 740}, {"loss": 0.6809, "grad_norm": 0.8057735562324524, "learning_rate": 0.0002, "epoch": 4.823151125401929, "step": 750}, {"loss": 0.7202, "grad_norm": 0.8504151701927185, "learning_rate": 0.0002, "epoch": 4.887459807073955, "step": 760}, {"loss": 0.7001, "grad_norm": 0.8389859199523926, "learning_rate": 0.0002, "epoch": 4.951768488745981, "step": 770}, {"eval_loss": 1.3824537992477417, "eval_runtime": 10.5075, "eval_samples_per_second": 9.327, "eval_steps_per_second": 1.237, "epoch": 4.996784565916399, "step": 777}, {"loss": 0.6425, "grad_norm": 0.8725755214691162, "learning_rate": 0.0002, "epoch": 5.016077170418006, "step": 780}, {"loss": 0.4945, "grad_norm": 0.792286217212677, "learning_rate": 0.0002, "epoch": 5.080385852090032, "step": 790}, {"loss": 0.5229, "grad_norm": 0.9615631699562073, "learning_rate": 0.0002, "epoch": 5.144694533762058, "step": 800}, {"loss": 0.5237, "grad_norm": 0.9059127569198608, "learning_rate": 0.0002, "epoch": 5.209003215434084, "step": 810}, {"loss": 0.5122, "grad_norm": 1.0275076627731323, "learning_rate": 0.0002, "epoch": 5.273311897106109, "step": 820}, {"loss": 0.4987, "grad_norm": 1.2929821014404297, "learning_rate": 0.0002, "epoch": 5.337620578778135, "step": 830}, {"loss": 0.53, "grad_norm": 1.17123281955719, "learning_rate": 0.0002, "epoch": 5.401929260450161, "step": 840}, {"loss": 0.5364, "grad_norm": 1.140464186668396, "learning_rate": 0.0002, "epoch": 5.466237942122186, "step": 850}, {"loss": 0.5303, "grad_norm": 1.3640265464782715, "learning_rate": 0.0002, "epoch": 5.530546623794212, "step": 860}, {"loss": 0.5272, "grad_norm": 1.1880438327789307, "learning_rate": 0.0002, "epoch": 5.594855305466238, "step": 870}, {"loss": 0.5574, "grad_norm": 1.1584500074386597, "learning_rate": 0.0002, "epoch": 5.659163987138264, "step": 880}, {"loss": 0.5469, "grad_norm": 1.1855696439743042, "learning_rate": 0.0002, "epoch": 5.723472668810289, "step": 890}, {"loss": 0.5376, "grad_norm": 1.0149868726730347, "learning_rate": 0.0002, "epoch": 5.787781350482315, "step": 900}, {"loss": 0.5131, "grad_norm": 1.0635329484939575, "learning_rate": 0.0002, "epoch": 5.852090032154341, "step": 910}, {"loss": 0.5486, "grad_norm": 1.2947518825531006, "learning_rate": 0.0002, "epoch": 5.916398713826366, "step": 920}, {"loss": 0.5701, "grad_norm": 1.205394983291626, "learning_rate": 0.0002, "epoch": 5.980707395498392, "step": 930}, {"eval_loss": 1.6060408353805542, "eval_runtime": 10.5111, "eval_samples_per_second": 9.323, "eval_steps_per_second": 1.237, "epoch": 6.0, "step": 933}, {"loss": 0.4285, "grad_norm": 1.1479188203811646, "learning_rate": 0.0002, "epoch": 6.045016077170418, "step": 940}, {"loss": 0.3774, "grad_norm": 0.8727015256881714, "learning_rate": 0.0002, "epoch": 6.109324758842444, "step": 950}, {"loss": 0.407, "grad_norm": 1.1554491519927979, "learning_rate": 0.0002, "epoch": 6.173633440514469, "step": 960}, {"loss": 0.3888, "grad_norm": 1.0589015483856201, "learning_rate": 0.0002, "epoch": 6.237942122186495, "step": 970}, {"loss": 0.4051, "grad_norm": 1.158897876739502, "learning_rate": 0.0002, "epoch": 6.302250803858521, "step": 980}, {"loss": 0.4258, "grad_norm": 1.4029475450515747, "learning_rate": 0.0002, "epoch": 6.366559485530547, "step": 990}, {"loss": 0.4103, "grad_norm": 1.156851887702942, "learning_rate": 0.0002, "epoch": 6.430868167202572, "step": 1000}, {"loss": 0.4098, "grad_norm": 0.9887818694114685, "learning_rate": 0.0002, "epoch": 6.495176848874598, "step": 1010}, {"loss": 0.4329, "grad_norm": 1.0826616287231445, "learning_rate": 0.0002, "epoch": 6.559485530546624, "step": 1020}, {"loss": 0.4026, "grad_norm": 1.1170333623886108, "learning_rate": 0.0002, "epoch": 6.62379421221865, "step": 1030}, {"loss": 0.4052, "grad_norm": 1.313014030456543, "learning_rate": 0.0002, "epoch": 6.688102893890675, "step": 1040}, {"loss": 0.4382, "grad_norm": 1.183534026145935, "learning_rate": 0.0002, "epoch": 6.752411575562701, "step": 1050}, {"loss": 0.4142, "grad_norm": 1.1945377588272095, "learning_rate": 0.0002, "epoch": 6.816720257234727, "step": 1060}, {"loss": 0.4115, "grad_norm": 1.1162303686141968, "learning_rate": 0.0002, "epoch": 6.881028938906752, "step": 1070}, {"loss": 0.434, "grad_norm": 1.1610374450683594, "learning_rate": 0.0002, "epoch": 6.945337620578778, "step": 1080}]} +{"epoch": 7.97427652733119, "step": 1240, "epoch_duration": 227.62692785263062, "total_accumulated_duration": 1858.5817646980286, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7751.47119140625}, "peak_memory_usage": {"GPU_0": 19996.72314453125}, "avg_memory_reserved": {"GPU_0": 28746.0}, "peak_memory_reserved": {"GPU_0": 28746.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2-9b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-2480-sd-42/checkpoint-311", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.2272, "grad_norm": 0.5317481756210327, "learning_rate": 0.0002, "epoch": 0.06430868167202572, "step": 10}, {"loss": 1.6604, "grad_norm": 0.7665001749992371, "learning_rate": 0.0002, "epoch": 0.12861736334405144, "step": 20}, {"loss": 1.4015, "grad_norm": 0.4396904408931732, "learning_rate": 0.0002, "epoch": 0.19292604501607716, "step": 30}, {"loss": 1.3297, "grad_norm": 0.31786906719207764, "learning_rate": 0.0002, "epoch": 0.2572347266881029, "step": 40}, {"loss": 1.2938, "grad_norm": 0.41404595971107483, "learning_rate": 0.0002, "epoch": 0.3215434083601286, "step": 50}, {"loss": 1.2673, "grad_norm": 0.36728185415267944, "learning_rate": 0.0002, "epoch": 0.3858520900321543, "step": 60}, {"loss": 1.2074, "grad_norm": 0.38337618112564087, "learning_rate": 0.0002, "epoch": 0.45016077170418006, "step": 70}, {"loss": 1.2691, "grad_norm": 0.39411404728889465, "learning_rate": 0.0002, "epoch": 0.5144694533762058, "step": 80}, {"loss": 1.2563, "grad_norm": 0.39903542399406433, "learning_rate": 0.0002, "epoch": 0.5787781350482315, "step": 90}, {"loss": 1.2122, "grad_norm": 0.3390332758426666, "learning_rate": 0.0002, "epoch": 0.6430868167202572, "step": 100}, {"loss": 1.251, "grad_norm": 0.35814088582992554, "learning_rate": 0.0002, "epoch": 0.707395498392283, "step": 110}, {"loss": 1.218, "grad_norm": 0.3480045199394226, "learning_rate": 0.0002, "epoch": 0.7717041800643086, "step": 120}, {"loss": 1.1504, "grad_norm": 0.3282570540904999, "learning_rate": 0.0002, "epoch": 0.8360128617363344, "step": 130}, {"loss": 1.2131, "grad_norm": 0.33441081643104553, "learning_rate": 0.0002, "epoch": 0.9003215434083601, "step": 140}, {"loss": 1.2116, "grad_norm": 0.3344958424568176, "learning_rate": 0.0002, "epoch": 0.9646302250803859, "step": 150}, {"eval_loss": 1.1744003295898438, "eval_runtime": 10.521, "eval_samples_per_second": 9.315, "eval_steps_per_second": 1.236, "epoch": 0.9967845659163987, "step": 155}, {"loss": 1.1898, "grad_norm": 0.3614383637905121, "learning_rate": 0.0002, "epoch": 1.0289389067524115, "step": 160}, {"loss": 1.1153, "grad_norm": 0.37686896324157715, "learning_rate": 0.0002, "epoch": 1.0932475884244373, "step": 170}, {"loss": 1.1288, "grad_norm": 0.3803747296333313, "learning_rate": 0.0002, "epoch": 1.157556270096463, "step": 180}, {"loss": 1.0915, "grad_norm": 0.35592594742774963, "learning_rate": 0.0002, "epoch": 1.2218649517684887, "step": 190}, {"loss": 1.0895, "grad_norm": 0.5097760558128357, "learning_rate": 0.0002, "epoch": 1.2861736334405145, "step": 200}, {"loss": 1.1268, "grad_norm": 0.3641100227832794, "learning_rate": 0.0002, "epoch": 1.3504823151125402, "step": 210}, {"loss": 1.1212, "grad_norm": 0.3824535310268402, "learning_rate": 0.0002, "epoch": 1.414790996784566, "step": 220}, {"loss": 1.1234, "grad_norm": 0.42148709297180176, "learning_rate": 0.0002, "epoch": 1.4790996784565915, "step": 230}, {"loss": 1.112, "grad_norm": 0.44197967648506165, "learning_rate": 0.0002, "epoch": 1.5434083601286175, "step": 240}, {"loss": 1.0923, "grad_norm": 0.42140334844589233, "learning_rate": 0.0002, "epoch": 1.607717041800643, "step": 250}, {"loss": 1.1455, "grad_norm": 0.404341459274292, "learning_rate": 0.0002, "epoch": 1.6720257234726688, "step": 260}, {"loss": 1.1258, "grad_norm": 0.47345927357673645, "learning_rate": 0.0002, "epoch": 1.7363344051446945, "step": 270}, {"loss": 1.0682, "grad_norm": 0.45900461077690125, "learning_rate": 0.0002, "epoch": 1.8006430868167203, "step": 280}, {"loss": 1.0815, "grad_norm": 0.3809300363063812, "learning_rate": 0.0002, "epoch": 1.864951768488746, "step": 290}, {"loss": 1.0773, "grad_norm": 0.4094211757183075, "learning_rate": 0.0002, "epoch": 1.9292604501607717, "step": 300}, {"loss": 1.1134, "grad_norm": 0.40402060747146606, "learning_rate": 0.0002, "epoch": 1.9935691318327975, "step": 310}, {"eval_loss": 1.1546189785003662, "eval_runtime": 10.5193, "eval_samples_per_second": 9.316, "eval_steps_per_second": 1.236, "epoch": 2.0, "step": 311}, {"loss": 0.9775, "grad_norm": 0.5758638978004456, "learning_rate": 0.0002, "epoch": 2.057877813504823, "step": 320}, {"loss": 0.975, "grad_norm": 0.4793509542942047, "learning_rate": 0.0002, "epoch": 2.122186495176849, "step": 330}, {"loss": 0.9331, "grad_norm": 0.5104694962501526, "learning_rate": 0.0002, "epoch": 2.1864951768488745, "step": 340}, {"loss": 0.9605, "grad_norm": 0.49754178524017334, "learning_rate": 0.0002, "epoch": 2.2508038585209005, "step": 350}, {"loss": 0.9609, "grad_norm": 0.5055416822433472, "learning_rate": 0.0002, "epoch": 2.315112540192926, "step": 360}, {"loss": 0.9793, "grad_norm": 0.5762393474578857, "learning_rate": 0.0002, "epoch": 2.379421221864952, "step": 370}, {"loss": 0.9392, "grad_norm": 0.44768989086151123, "learning_rate": 0.0002, "epoch": 2.4437299035369775, "step": 380}, {"loss": 0.9488, "grad_norm": 0.5598754286766052, "learning_rate": 0.0002, "epoch": 2.508038585209003, "step": 390}, {"loss": 1.0028, "grad_norm": 0.5343462824821472, "learning_rate": 0.0002, "epoch": 2.572347266881029, "step": 400}, {"loss": 0.9871, "grad_norm": 0.4544358253479004, "learning_rate": 0.0002, "epoch": 2.6366559485530545, "step": 410}, {"loss": 1.0025, "grad_norm": 0.5724653005599976, "learning_rate": 0.0002, "epoch": 2.7009646302250805, "step": 420}, {"loss": 0.9776, "grad_norm": 0.5844957828521729, "learning_rate": 0.0002, "epoch": 2.765273311897106, "step": 430}, {"loss": 0.9608, "grad_norm": 0.5306688547134399, "learning_rate": 0.0002, "epoch": 2.829581993569132, "step": 440}, {"loss": 1.0221, "grad_norm": 0.5121245384216309, "learning_rate": 0.0002, "epoch": 2.8938906752411575, "step": 450}, {"loss": 0.9438, "grad_norm": 0.47789978981018066, "learning_rate": 0.0002, "epoch": 2.958199356913183, "step": 460}, {"eval_loss": 1.1760698556900024, "eval_runtime": 10.5123, "eval_samples_per_second": 9.322, "eval_steps_per_second": 1.237, "epoch": 2.996784565916399, "step": 466}, {"loss": 0.9531, "grad_norm": 0.4903484582901001, "learning_rate": 0.0002, "epoch": 3.022508038585209, "step": 470}, {"loss": 0.7771, "grad_norm": 0.7591149210929871, "learning_rate": 0.0002, "epoch": 3.0868167202572345, "step": 480}, {"loss": 0.8044, "grad_norm": 0.8178006410598755, "learning_rate": 0.0002, "epoch": 3.1511254019292605, "step": 490}, {"loss": 0.8237, "grad_norm": 0.7482298016548157, "learning_rate": 0.0002, "epoch": 3.215434083601286, "step": 500}, {"loss": 0.8061, "grad_norm": 0.7520643472671509, "learning_rate": 0.0002, "epoch": 3.279742765273312, "step": 510}, {"loss": 0.8065, "grad_norm": 0.6797061562538147, "learning_rate": 0.0002, "epoch": 3.3440514469453375, "step": 520}, {"loss": 0.827, "grad_norm": 0.6733362674713135, "learning_rate": 0.0002, "epoch": 3.4083601286173635, "step": 530}, {"loss": 0.8628, "grad_norm": 0.6488103270530701, "learning_rate": 0.0002, "epoch": 3.472668810289389, "step": 540}, {"loss": 0.8053, "grad_norm": 0.6773484349250793, "learning_rate": 0.0002, "epoch": 3.536977491961415, "step": 550}, {"loss": 0.8631, "grad_norm": 0.6569041609764099, "learning_rate": 0.0002, "epoch": 3.6012861736334405, "step": 560}, {"loss": 0.8251, "grad_norm": 0.7477148771286011, "learning_rate": 0.0002, "epoch": 3.665594855305466, "step": 570}, {"loss": 0.8089, "grad_norm": 0.6446558237075806, "learning_rate": 0.0002, "epoch": 3.729903536977492, "step": 580}, {"loss": 0.8287, "grad_norm": 0.6831859946250916, "learning_rate": 0.0002, "epoch": 3.7942122186495175, "step": 590}, {"loss": 0.8726, "grad_norm": 0.7512634992599487, "learning_rate": 0.0002, "epoch": 3.8585209003215435, "step": 600}, {"loss": 0.8014, "grad_norm": 0.7508474588394165, "learning_rate": 0.0002, "epoch": 3.922829581993569, "step": 610}, {"loss": 0.845, "grad_norm": 0.7288223505020142, "learning_rate": 0.0002, "epoch": 3.987138263665595, "step": 620}, {"eval_loss": 1.2500178813934326, "eval_runtime": 10.5131, "eval_samples_per_second": 9.322, "eval_steps_per_second": 1.237, "epoch": 4.0, "step": 622}, {"loss": 0.6423, "grad_norm": 0.8475615382194519, "learning_rate": 0.0002, "epoch": 4.051446945337621, "step": 630}, {"loss": 0.6416, "grad_norm": 0.7431837916374207, "learning_rate": 0.0002, "epoch": 4.115755627009646, "step": 640}, {"loss": 0.6748, "grad_norm": 1.154038667678833, "learning_rate": 0.0002, "epoch": 4.180064308681672, "step": 650}, {"loss": 0.65, "grad_norm": 0.8179714679718018, "learning_rate": 0.0002, "epoch": 4.244372990353698, "step": 660}, {"loss": 0.6385, "grad_norm": 0.9329283237457275, "learning_rate": 0.0002, "epoch": 4.308681672025724, "step": 670}, {"loss": 0.671, "grad_norm": 0.824656069278717, "learning_rate": 0.0002, "epoch": 4.372990353697749, "step": 680}, {"loss": 0.6679, "grad_norm": 0.9766148924827576, "learning_rate": 0.0002, "epoch": 4.437299035369775, "step": 690}, {"loss": 0.6525, "grad_norm": 0.9103652238845825, "learning_rate": 0.0002, "epoch": 4.501607717041801, "step": 700}, {"loss": 0.6809, "grad_norm": 0.793594241142273, "learning_rate": 0.0002, "epoch": 4.565916398713826, "step": 710}, {"loss": 0.6712, "grad_norm": 0.9835829734802246, "learning_rate": 0.0002, "epoch": 4.630225080385852, "step": 720}, {"loss": 0.6757, "grad_norm": 1.0390352010726929, "learning_rate": 0.0002, "epoch": 4.694533762057878, "step": 730}, {"loss": 0.6959, "grad_norm": 1.0840471982955933, "learning_rate": 0.0002, "epoch": 4.758842443729904, "step": 740}, {"loss": 0.6809, "grad_norm": 0.8057735562324524, "learning_rate": 0.0002, "epoch": 4.823151125401929, "step": 750}, {"loss": 0.7202, "grad_norm": 0.8504151701927185, "learning_rate": 0.0002, "epoch": 4.887459807073955, "step": 760}, {"loss": 0.7001, "grad_norm": 0.8389859199523926, "learning_rate": 0.0002, "epoch": 4.951768488745981, "step": 770}, {"eval_loss": 1.3824537992477417, "eval_runtime": 10.5075, "eval_samples_per_second": 9.327, "eval_steps_per_second": 1.237, "epoch": 4.996784565916399, "step": 777}, {"loss": 0.6425, "grad_norm": 0.8725755214691162, "learning_rate": 0.0002, "epoch": 5.016077170418006, "step": 780}, {"loss": 0.4945, "grad_norm": 0.792286217212677, "learning_rate": 0.0002, "epoch": 5.080385852090032, "step": 790}, {"loss": 0.5229, "grad_norm": 0.9615631699562073, "learning_rate": 0.0002, "epoch": 5.144694533762058, "step": 800}, {"loss": 0.5237, "grad_norm": 0.9059127569198608, "learning_rate": 0.0002, "epoch": 5.209003215434084, "step": 810}, {"loss": 0.5122, "grad_norm": 1.0275076627731323, "learning_rate": 0.0002, "epoch": 5.273311897106109, "step": 820}, {"loss": 0.4987, "grad_norm": 1.2929821014404297, "learning_rate": 0.0002, "epoch": 5.337620578778135, "step": 830}, {"loss": 0.53, "grad_norm": 1.17123281955719, "learning_rate": 0.0002, "epoch": 5.401929260450161, "step": 840}, {"loss": 0.5364, "grad_norm": 1.140464186668396, "learning_rate": 0.0002, "epoch": 5.466237942122186, "step": 850}, {"loss": 0.5303, "grad_norm": 1.3640265464782715, "learning_rate": 0.0002, "epoch": 5.530546623794212, "step": 860}, {"loss": 0.5272, "grad_norm": 1.1880438327789307, "learning_rate": 0.0002, "epoch": 5.594855305466238, "step": 870}, {"loss": 0.5574, "grad_norm": 1.1584500074386597, "learning_rate": 0.0002, "epoch": 5.659163987138264, "step": 880}, {"loss": 0.5469, "grad_norm": 1.1855696439743042, "learning_rate": 0.0002, "epoch": 5.723472668810289, "step": 890}, {"loss": 0.5376, "grad_norm": 1.0149868726730347, "learning_rate": 0.0002, "epoch": 5.787781350482315, "step": 900}, {"loss": 0.5131, "grad_norm": 1.0635329484939575, "learning_rate": 0.0002, "epoch": 5.852090032154341, "step": 910}, {"loss": 0.5486, "grad_norm": 1.2947518825531006, "learning_rate": 0.0002, "epoch": 5.916398713826366, "step": 920}, {"loss": 0.5701, "grad_norm": 1.205394983291626, "learning_rate": 0.0002, "epoch": 5.980707395498392, "step": 930}, {"eval_loss": 1.6060408353805542, "eval_runtime": 10.5111, "eval_samples_per_second": 9.323, "eval_steps_per_second": 1.237, "epoch": 6.0, "step": 933}, {"loss": 0.4285, "grad_norm": 1.1479188203811646, "learning_rate": 0.0002, "epoch": 6.045016077170418, "step": 940}, {"loss": 0.3774, "grad_norm": 0.8727015256881714, "learning_rate": 0.0002, "epoch": 6.109324758842444, "step": 950}, {"loss": 0.407, "grad_norm": 1.1554491519927979, "learning_rate": 0.0002, "epoch": 6.173633440514469, "step": 960}, {"loss": 0.3888, "grad_norm": 1.0589015483856201, "learning_rate": 0.0002, "epoch": 6.237942122186495, "step": 970}, {"loss": 0.4051, "grad_norm": 1.158897876739502, "learning_rate": 0.0002, "epoch": 6.302250803858521, "step": 980}, {"loss": 0.4258, "grad_norm": 1.4029475450515747, "learning_rate": 0.0002, "epoch": 6.366559485530547, "step": 990}, {"loss": 0.4103, "grad_norm": 1.156851887702942, "learning_rate": 0.0002, "epoch": 6.430868167202572, "step": 1000}, {"loss": 0.4098, "grad_norm": 0.9887818694114685, "learning_rate": 0.0002, "epoch": 6.495176848874598, "step": 1010}, {"loss": 0.4329, "grad_norm": 1.0826616287231445, "learning_rate": 0.0002, "epoch": 6.559485530546624, "step": 1020}, {"loss": 0.4026, "grad_norm": 1.1170333623886108, "learning_rate": 0.0002, "epoch": 6.62379421221865, "step": 1030}, {"loss": 0.4052, "grad_norm": 1.313014030456543, "learning_rate": 0.0002, "epoch": 6.688102893890675, "step": 1040}, {"loss": 0.4382, "grad_norm": 1.183534026145935, "learning_rate": 0.0002, "epoch": 6.752411575562701, "step": 1050}, {"loss": 0.4142, "grad_norm": 1.1945377588272095, "learning_rate": 0.0002, "epoch": 6.816720257234727, "step": 1060}, {"loss": 0.4115, "grad_norm": 1.1162303686141968, "learning_rate": 0.0002, "epoch": 6.881028938906752, "step": 1070}, {"loss": 0.434, "grad_norm": 1.1610374450683594, "learning_rate": 0.0002, "epoch": 6.945337620578778, "step": 1080}, {"eval_loss": 1.7738038301467896, "eval_runtime": 10.5081, "eval_samples_per_second": 9.326, "eval_steps_per_second": 1.237, "epoch": 6.996784565916399, "step": 1088}, {"loss": 0.4252, "grad_norm": 0.8958842158317566, "learning_rate": 0.0002, "epoch": 7.009646302250804, "step": 1090}, {"loss": 0.2968, "grad_norm": 1.4632889032363892, "learning_rate": 0.0002, "epoch": 7.07395498392283, "step": 1100}, {"loss": 0.3, "grad_norm": 1.1745072603225708, "learning_rate": 0.0002, "epoch": 7.138263665594855, "step": 1110}, {"loss": 0.2984, "grad_norm": 1.1066304445266724, "learning_rate": 0.0002, "epoch": 7.202572347266881, "step": 1120}, {"loss": 0.3335, "grad_norm": 1.455328345298767, "learning_rate": 0.0002, "epoch": 7.266881028938907, "step": 1130}, {"loss": 0.2985, "grad_norm": 1.5219749212265015, "learning_rate": 0.0002, "epoch": 7.331189710610932, "step": 1140}, {"loss": 0.3215, "grad_norm": 1.083840012550354, "learning_rate": 0.0002, "epoch": 7.395498392282958, "step": 1150}, {"loss": 0.3276, "grad_norm": 1.161246418952942, "learning_rate": 0.0002, "epoch": 7.459807073954984, "step": 1160}, {"loss": 0.335, "grad_norm": 1.1832561492919922, "learning_rate": 0.0002, "epoch": 7.52411575562701, "step": 1170}, {"loss": 0.3361, "grad_norm": 1.2522748708724976, "learning_rate": 0.0002, "epoch": 7.588424437299035, "step": 1180}, {"loss": 0.3385, "grad_norm": 1.2288755178451538, "learning_rate": 0.0002, "epoch": 7.652733118971061, "step": 1190}, {"loss": 0.3396, "grad_norm": 1.5007057189941406, "learning_rate": 0.0002, "epoch": 7.717041800643087, "step": 1200}, {"loss": 0.3361, "grad_norm": 0.8879519701004028, "learning_rate": 0.0002, "epoch": 7.781350482315112, "step": 1210}, {"loss": 0.3553, "grad_norm": 1.287729024887085, "learning_rate": 0.0002, "epoch": 7.845659163987138, "step": 1220}, {"loss": 0.3364, "grad_norm": 1.179373025894165, "learning_rate": 0.0002, "epoch": 7.909967845659164, "step": 1230}, {"loss": 0.3609, "grad_norm": 1.1422494649887085, "learning_rate": 0.0002, "epoch": 7.97427652733119, "step": 1240}]}