diff --git a/.gitattributes b/.gitattributes index 6d83228933044ed3c52a4d49691d8861257eb8a6..acd9087fb0b52cb637caeda73cd66e6fda0c01c2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -4883,3 +4883,12 @@ Meta-Llama-3-8B-Instruct_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_ Meta-Llama-3-8B-Instruct_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-980-sd-42/checkpoint-760/tokenizer.json filter=lfs diff=lfs merge=lfs -text Meta-Llama-3-8B-Instruct_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-980-sd-42/checkpoint-912/tokenizer.json filter=lfs diff=lfs merge=lfs -text Meta-Llama-3-8B-Instruct_int4_mmlu-routerbench-0shot-full-by-task_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-980-sd-42/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/README.md b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/adapter_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/adapter_model.safetensors b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..480d8e3096d0f71968d6de54b96b9d84949b4873 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bc67ec890142281c5617484415bf724c30bf7041fe46db7fa4689fe6e0343eb +size 29500848 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/README.md b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/adapter_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/adapter_model.safetensors b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a964cebada93f1f325f9bb61975aaa2f8024f268 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37830d653b56e6f44a388784dae15686603824d087d366cfb03434307d4cd3c7 +size 29500848 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/optimizer.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d237fb95989c0bcfa912ba542e2182c75013a26b --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5b84fe6fbac3c0fc9769303ad3b5a9ceb051a0987746adf9c27529b1cae3dec +size 15064314 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/rng_state.pth b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..370017cefef373b92691186e414b0fca8201a7f8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fd17961c1fefa34e34972211a3b122a7a7ec8ea342a16270a1601f763f3ab6a +size 14244 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/scheduler.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..946f0b7cc62c21770a5c301d2b4d29052e0d1d35 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:969199c3fbf754fec216bbd170861254d629f5797d20964b9b424cdfaeaf270b +size 1064 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/special_tokens_map.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/tokenizer.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/tokenizer.model b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/tokenizer_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/trainer_state.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..dcb2a77d4e7aa74f6e22153831c4bdeecc6ad18a --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/trainer_state.json @@ -0,0 +1,853 @@ +{ + "best_metric": 1.4206745624542236, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408", + "epoch": 8.0, + "eval_steps": 10, + "global_step": 1088, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07352941176470588, + "grad_norm": 1.515879511833191, + "learning_rate": 0.0002, + "loss": 3.6278, + "step": 10 + }, + { + "epoch": 0.14705882352941177, + "grad_norm": 6.156805992126465, + "learning_rate": 0.0002, + "loss": 2.4224, + "step": 20 + }, + { + "epoch": 0.22058823529411764, + "grad_norm": 1.264341950416565, + "learning_rate": 0.0002, + "loss": 2.107, + "step": 30 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.8591746091842651, + "learning_rate": 0.0002, + "loss": 1.9838, + "step": 40 + }, + { + "epoch": 0.36764705882352944, + "grad_norm": 1.4083963632583618, + "learning_rate": 0.0002, + "loss": 1.916, + "step": 50 + }, + { + "epoch": 0.4411764705882353, + "grad_norm": 3.570463180541992, + "learning_rate": 0.0002, + "loss": 1.7336, + "step": 60 + }, + { + "epoch": 0.5147058823529411, + "grad_norm": 0.6044552326202393, + "learning_rate": 0.0002, + "loss": 1.7316, + "step": 70 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.1396939754486084, + "learning_rate": 0.0002, + "loss": 1.7078, + "step": 80 + }, + { + "epoch": 0.6617647058823529, + "grad_norm": 0.7549962997436523, + "learning_rate": 0.0002, + "loss": 1.6052, + "step": 90 + }, + { + "epoch": 0.7352941176470589, + "grad_norm": 1.2676323652267456, + "learning_rate": 0.0002, + "loss": 1.7247, + "step": 100 + }, + { + "epoch": 0.8088235294117647, + "grad_norm": 1.223105788230896, + "learning_rate": 0.0002, + "loss": 1.6836, + "step": 110 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 1.0946531295776367, + "learning_rate": 0.0002, + "loss": 1.631, + "step": 120 + }, + { + "epoch": 0.9558823529411765, + "grad_norm": 0.8674123883247375, + "learning_rate": 0.0002, + "loss": 1.6089, + "step": 130 + }, + { + "epoch": 1.0, + "eval_loss": 1.534969449043274, + "eval_runtime": 7.804, + "eval_samples_per_second": 12.558, + "eval_steps_per_second": 1.666, + "step": 136 + }, + { + "epoch": 1.0294117647058822, + "grad_norm": 0.6233109831809998, + "learning_rate": 0.0002, + "loss": 1.4976, + "step": 140 + }, + { + "epoch": 1.1029411764705883, + "grad_norm": 0.85622239112854, + "learning_rate": 0.0002, + "loss": 1.4431, + "step": 150 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 0.7703060507774353, + "learning_rate": 0.0002, + "loss": 1.4246, + "step": 160 + }, + { + "epoch": 1.25, + "grad_norm": 0.8302682638168335, + "learning_rate": 0.0002, + "loss": 1.5783, + "step": 170 + }, + { + "epoch": 1.3235294117647058, + "grad_norm": 0.7850589156150818, + "learning_rate": 0.0002, + "loss": 1.4883, + "step": 180 + }, + { + "epoch": 1.3970588235294117, + "grad_norm": 0.6718934774398804, + "learning_rate": 0.0002, + "loss": 1.5075, + "step": 190 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 0.7245007753372192, + "learning_rate": 0.0002, + "loss": 1.4136, + "step": 200 + }, + { + "epoch": 1.5441176470588234, + "grad_norm": 0.7120554447174072, + "learning_rate": 0.0002, + "loss": 1.4666, + "step": 210 + }, + { + "epoch": 1.6176470588235294, + "grad_norm": 0.6771036982536316, + "learning_rate": 0.0002, + "loss": 1.4619, + "step": 220 + }, + { + "epoch": 1.6911764705882353, + "grad_norm": 0.618951141834259, + "learning_rate": 0.0002, + "loss": 1.4465, + "step": 230 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 0.6583403944969177, + "learning_rate": 0.0002, + "loss": 1.4896, + "step": 240 + }, + { + "epoch": 1.8382352941176472, + "grad_norm": 0.6750185489654541, + "learning_rate": 0.0002, + "loss": 1.4613, + "step": 250 + }, + { + "epoch": 1.9117647058823528, + "grad_norm": 0.6572791337966919, + "learning_rate": 0.0002, + "loss": 1.4614, + "step": 260 + }, + { + "epoch": 1.9852941176470589, + "grad_norm": 0.6265441179275513, + "learning_rate": 0.0002, + "loss": 1.4242, + "step": 270 + }, + { + "epoch": 2.0, + "eval_loss": 1.4521459341049194, + "eval_runtime": 7.6483, + "eval_samples_per_second": 12.813, + "eval_steps_per_second": 1.7, + "step": 272 + }, + { + "epoch": 2.0588235294117645, + "grad_norm": 0.7698636054992676, + "learning_rate": 0.0002, + "loss": 1.4108, + "step": 280 + }, + { + "epoch": 2.1323529411764706, + "grad_norm": 0.7958650588989258, + "learning_rate": 0.0002, + "loss": 1.4143, + "step": 290 + }, + { + "epoch": 2.2058823529411766, + "grad_norm": 0.7007899284362793, + "learning_rate": 0.0002, + "loss": 1.3826, + "step": 300 + }, + { + "epoch": 2.2794117647058822, + "grad_norm": 0.6673262715339661, + "learning_rate": 0.0002, + "loss": 1.3624, + "step": 310 + }, + { + "epoch": 2.3529411764705883, + "grad_norm": 0.6731301546096802, + "learning_rate": 0.0002, + "loss": 1.3212, + "step": 320 + }, + { + "epoch": 2.426470588235294, + "grad_norm": 0.7587279081344604, + "learning_rate": 0.0002, + "loss": 1.3626, + "step": 330 + }, + { + "epoch": 2.5, + "grad_norm": 0.804149329662323, + "learning_rate": 0.0002, + "loss": 1.3401, + "step": 340 + }, + { + "epoch": 2.5735294117647056, + "grad_norm": 0.7690186500549316, + "learning_rate": 0.0002, + "loss": 1.4204, + "step": 350 + }, + { + "epoch": 2.6470588235294117, + "grad_norm": 0.9660338163375854, + "learning_rate": 0.0002, + "loss": 1.3885, + "step": 360 + }, + { + "epoch": 2.7205882352941178, + "grad_norm": 0.6990594267845154, + "learning_rate": 0.0002, + "loss": 1.36, + "step": 370 + }, + { + "epoch": 2.7941176470588234, + "grad_norm": 0.7933360934257507, + "learning_rate": 0.0002, + "loss": 1.3354, + "step": 380 + }, + { + "epoch": 2.8676470588235294, + "grad_norm": 0.8198168277740479, + "learning_rate": 0.0002, + "loss": 1.2904, + "step": 390 + }, + { + "epoch": 2.9411764705882355, + "grad_norm": 0.6719775199890137, + "learning_rate": 0.0002, + "loss": 1.291, + "step": 400 + }, + { + "epoch": 3.0, + "eval_loss": 1.4206745624542236, + "eval_runtime": 7.0429, + "eval_samples_per_second": 13.915, + "eval_steps_per_second": 1.846, + "step": 408 + }, + { + "epoch": 3.014705882352941, + "grad_norm": 0.6254619359970093, + "learning_rate": 0.0002, + "loss": 1.4063, + "step": 410 + }, + { + "epoch": 3.088235294117647, + "grad_norm": 0.7344406843185425, + "learning_rate": 0.0002, + "loss": 1.2863, + "step": 420 + }, + { + "epoch": 3.161764705882353, + "grad_norm": 0.7327449321746826, + "learning_rate": 0.0002, + "loss": 1.3088, + "step": 430 + }, + { + "epoch": 3.235294117647059, + "grad_norm": 0.7766542434692383, + "learning_rate": 0.0002, + "loss": 1.2746, + "step": 440 + }, + { + "epoch": 3.3088235294117645, + "grad_norm": 0.7025649547576904, + "learning_rate": 0.0002, + "loss": 1.1912, + "step": 450 + }, + { + "epoch": 3.3823529411764706, + "grad_norm": 0.7508461475372314, + "learning_rate": 0.0002, + "loss": 1.2526, + "step": 460 + }, + { + "epoch": 3.4558823529411766, + "grad_norm": 0.8148072361946106, + "learning_rate": 0.0002, + "loss": 1.2834, + "step": 470 + }, + { + "epoch": 3.5294117647058822, + "grad_norm": 0.7245928645133972, + "learning_rate": 0.0002, + "loss": 1.2115, + "step": 480 + }, + { + "epoch": 3.6029411764705883, + "grad_norm": 0.9170019626617432, + "learning_rate": 0.0002, + "loss": 1.2885, + "step": 490 + }, + { + "epoch": 3.6764705882352944, + "grad_norm": 0.9033855199813843, + "learning_rate": 0.0002, + "loss": 1.2879, + "step": 500 + }, + { + "epoch": 3.75, + "grad_norm": 0.7575234174728394, + "learning_rate": 0.0002, + "loss": 1.3065, + "step": 510 + }, + { + "epoch": 3.8235294117647056, + "grad_norm": 0.7426044344902039, + "learning_rate": 0.0002, + "loss": 1.3299, + "step": 520 + }, + { + "epoch": 3.8970588235294117, + "grad_norm": 0.6892959475517273, + "learning_rate": 0.0002, + "loss": 1.26, + "step": 530 + }, + { + "epoch": 3.9705882352941178, + "grad_norm": 0.7498812675476074, + "learning_rate": 0.0002, + "loss": 1.3376, + "step": 540 + }, + { + "epoch": 4.0, + "eval_loss": 1.4219430685043335, + "eval_runtime": 7.7457, + "eval_samples_per_second": 12.652, + "eval_steps_per_second": 1.678, + "step": 544 + }, + { + "epoch": 4.044117647058823, + "grad_norm": 0.8324301838874817, + "learning_rate": 0.0002, + "loss": 1.1944, + "step": 550 + }, + { + "epoch": 4.117647058823529, + "grad_norm": 0.8911291360855103, + "learning_rate": 0.0002, + "loss": 1.2263, + "step": 560 + }, + { + "epoch": 4.1911764705882355, + "grad_norm": 0.856676459312439, + "learning_rate": 0.0002, + "loss": 1.1778, + "step": 570 + }, + { + "epoch": 4.264705882352941, + "grad_norm": 1.074108600616455, + "learning_rate": 0.0002, + "loss": 1.2294, + "step": 580 + }, + { + "epoch": 4.338235294117647, + "grad_norm": 0.8867416977882385, + "learning_rate": 0.0002, + "loss": 1.1478, + "step": 590 + }, + { + "epoch": 4.411764705882353, + "grad_norm": 0.7843716740608215, + "learning_rate": 0.0002, + "loss": 1.1816, + "step": 600 + }, + { + "epoch": 4.485294117647059, + "grad_norm": 0.8869543075561523, + "learning_rate": 0.0002, + "loss": 1.1885, + "step": 610 + }, + { + "epoch": 4.5588235294117645, + "grad_norm": 0.7744895815849304, + "learning_rate": 0.0002, + "loss": 1.2468, + "step": 620 + }, + { + "epoch": 4.632352941176471, + "grad_norm": 0.7312784790992737, + "learning_rate": 0.0002, + "loss": 1.1613, + "step": 630 + }, + { + "epoch": 4.705882352941177, + "grad_norm": 0.8561248779296875, + "learning_rate": 0.0002, + "loss": 1.1359, + "step": 640 + }, + { + "epoch": 4.779411764705882, + "grad_norm": 0.888317346572876, + "learning_rate": 0.0002, + "loss": 1.15, + "step": 650 + }, + { + "epoch": 4.852941176470588, + "grad_norm": 0.8369079828262329, + "learning_rate": 0.0002, + "loss": 1.2659, + "step": 660 + }, + { + "epoch": 4.926470588235294, + "grad_norm": 0.7990967631340027, + "learning_rate": 0.0002, + "loss": 1.185, + "step": 670 + }, + { + "epoch": 5.0, + "grad_norm": 0.8745001554489136, + "learning_rate": 0.0002, + "loss": 1.2121, + "step": 680 + }, + { + "epoch": 5.0, + "eval_loss": 1.426682949066162, + "eval_runtime": 7.7664, + "eval_samples_per_second": 12.618, + "eval_steps_per_second": 1.674, + "step": 680 + }, + { + "epoch": 5.073529411764706, + "grad_norm": 0.8637261986732483, + "learning_rate": 0.0002, + "loss": 1.0755, + "step": 690 + }, + { + "epoch": 5.147058823529412, + "grad_norm": 0.8743941187858582, + "learning_rate": 0.0002, + "loss": 1.0615, + "step": 700 + }, + { + "epoch": 5.220588235294118, + "grad_norm": 0.8632293939590454, + "learning_rate": 0.0002, + "loss": 1.0753, + "step": 710 + }, + { + "epoch": 5.294117647058823, + "grad_norm": 1.1503057479858398, + "learning_rate": 0.0002, + "loss": 1.1341, + "step": 720 + }, + { + "epoch": 5.367647058823529, + "grad_norm": 0.9048053026199341, + "learning_rate": 0.0002, + "loss": 1.1174, + "step": 730 + }, + { + "epoch": 5.4411764705882355, + "grad_norm": 0.8516059517860413, + "learning_rate": 0.0002, + "loss": 1.0939, + "step": 740 + }, + { + "epoch": 5.514705882352941, + "grad_norm": 0.9515685439109802, + "learning_rate": 0.0002, + "loss": 1.1518, + "step": 750 + }, + { + "epoch": 5.588235294117647, + "grad_norm": 0.8125670552253723, + "learning_rate": 0.0002, + "loss": 1.0982, + "step": 760 + }, + { + "epoch": 5.661764705882353, + "grad_norm": 1.0451067686080933, + "learning_rate": 0.0002, + "loss": 1.1298, + "step": 770 + }, + { + "epoch": 5.735294117647059, + "grad_norm": 0.8425356149673462, + "learning_rate": 0.0002, + "loss": 1.0582, + "step": 780 + }, + { + "epoch": 5.8088235294117645, + "grad_norm": 0.8448241353034973, + "learning_rate": 0.0002, + "loss": 1.1046, + "step": 790 + }, + { + "epoch": 5.882352941176471, + "grad_norm": 0.9654536843299866, + "learning_rate": 0.0002, + "loss": 1.1626, + "step": 800 + }, + { + "epoch": 5.955882352941177, + "grad_norm": 1.099204659461975, + "learning_rate": 0.0002, + "loss": 1.1457, + "step": 810 + }, + { + "epoch": 6.0, + "eval_loss": 1.453696370124817, + "eval_runtime": 7.8117, + "eval_samples_per_second": 12.545, + "eval_steps_per_second": 1.664, + "step": 816 + }, + { + "epoch": 6.029411764705882, + "grad_norm": 0.7710627913475037, + "learning_rate": 0.0002, + "loss": 1.0491, + "step": 820 + }, + { + "epoch": 6.102941176470588, + "grad_norm": 1.0457934141159058, + "learning_rate": 0.0002, + "loss": 0.9758, + "step": 830 + }, + { + "epoch": 6.176470588235294, + "grad_norm": 0.9009696245193481, + "learning_rate": 0.0002, + "loss": 0.9917, + "step": 840 + }, + { + "epoch": 6.25, + "grad_norm": 0.9443604946136475, + "learning_rate": 0.0002, + "loss": 0.9978, + "step": 850 + }, + { + "epoch": 6.323529411764706, + "grad_norm": 1.017409086227417, + "learning_rate": 0.0002, + "loss": 1.0012, + "step": 860 + }, + { + "epoch": 6.397058823529412, + "grad_norm": 1.0726631879806519, + "learning_rate": 0.0002, + "loss": 1.0073, + "step": 870 + }, + { + "epoch": 6.470588235294118, + "grad_norm": 1.0754258632659912, + "learning_rate": 0.0002, + "loss": 1.0144, + "step": 880 + }, + { + "epoch": 6.544117647058823, + "grad_norm": 0.9952278733253479, + "learning_rate": 0.0002, + "loss": 1.042, + "step": 890 + }, + { + "epoch": 6.617647058823529, + "grad_norm": 1.0648400783538818, + "learning_rate": 0.0002, + "loss": 1.0573, + "step": 900 + }, + { + "epoch": 6.6911764705882355, + "grad_norm": 1.102169394493103, + "learning_rate": 0.0002, + "loss": 0.9765, + "step": 910 + }, + { + "epoch": 6.764705882352941, + "grad_norm": 1.022658348083496, + "learning_rate": 0.0002, + "loss": 1.0358, + "step": 920 + }, + { + "epoch": 6.838235294117647, + "grad_norm": 0.9385603666305542, + "learning_rate": 0.0002, + "loss": 1.0819, + "step": 930 + }, + { + "epoch": 6.911764705882353, + "grad_norm": 0.9402251839637756, + "learning_rate": 0.0002, + "loss": 1.0395, + "step": 940 + }, + { + "epoch": 6.985294117647059, + "grad_norm": 1.3918722867965698, + "learning_rate": 0.0002, + "loss": 1.014, + "step": 950 + }, + { + "epoch": 7.0, + "eval_loss": 1.4942296743392944, + "eval_runtime": 7.7264, + "eval_samples_per_second": 12.684, + "eval_steps_per_second": 1.683, + "step": 952 + }, + { + "epoch": 7.0588235294117645, + "grad_norm": 0.9380860328674316, + "learning_rate": 0.0002, + "loss": 0.8846, + "step": 960 + }, + { + "epoch": 7.132352941176471, + "grad_norm": 1.0754766464233398, + "learning_rate": 0.0002, + "loss": 0.9289, + "step": 970 + }, + { + "epoch": 7.205882352941177, + "grad_norm": 1.2220656871795654, + "learning_rate": 0.0002, + "loss": 0.942, + "step": 980 + }, + { + "epoch": 7.279411764705882, + "grad_norm": 1.0372205972671509, + "learning_rate": 0.0002, + "loss": 0.8638, + "step": 990 + }, + { + "epoch": 7.352941176470588, + "grad_norm": 1.1364140510559082, + "learning_rate": 0.0002, + "loss": 0.9062, + "step": 1000 + }, + { + "epoch": 7.426470588235294, + "grad_norm": 0.9808094501495361, + "learning_rate": 0.0002, + "loss": 0.9327, + "step": 1010 + }, + { + "epoch": 7.5, + "grad_norm": 1.245301365852356, + "learning_rate": 0.0002, + "loss": 0.9982, + "step": 1020 + }, + { + "epoch": 7.573529411764706, + "grad_norm": 1.1632885932922363, + "learning_rate": 0.0002, + "loss": 0.9838, + "step": 1030 + }, + { + "epoch": 7.647058823529412, + "grad_norm": 1.3757420778274536, + "learning_rate": 0.0002, + "loss": 0.9876, + "step": 1040 + }, + { + "epoch": 7.720588235294118, + "grad_norm": 1.4189417362213135, + "learning_rate": 0.0002, + "loss": 0.9419, + "step": 1050 + }, + { + "epoch": 7.794117647058823, + "grad_norm": 1.1543806791305542, + "learning_rate": 0.0002, + "loss": 0.9511, + "step": 1060 + }, + { + "epoch": 7.867647058823529, + "grad_norm": 1.1373614072799683, + "learning_rate": 0.0002, + "loss": 0.926, + "step": 1070 + }, + { + "epoch": 7.9411764705882355, + "grad_norm": 1.0185565948486328, + "learning_rate": 0.0002, + "loss": 0.9216, + "step": 1080 + }, + { + "epoch": 8.0, + "eval_loss": 1.5546869039535522, + "eval_runtime": 9.6426, + "eval_samples_per_second": 10.163, + "eval_steps_per_second": 1.348, + "step": 1088 + } + ], + "logging_steps": 10, + "max_steps": 1088, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.3297532012593152e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/training_args.bin b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fe1a14f6cb5131ff1804a27066009ed40560ae4 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-1088/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd75b9e004c485c0e30ba43806c8cf7ca805ddb83c107b87f20798de6e2c8c14 +size 5560 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/README.md b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/adapter_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/adapter_model.safetensors b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..21904089004fc4c680ec4d74a0776c075ed6bf03 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5b65e51d19593607a42bd92329dbb4d2266dd0dd38392c6ca6dd88d5b5f5927 +size 29500848 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/optimizer.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cc2612dc15568d3648702711f703ba102269d229 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfa3529ffa8be24df426b8c3783608100e568d625824c4ac60a53924dd5bcc94 +size 15064250 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/rng_state.pth b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..86611a844c674563bbf49a5346d7ab15ab2b64c2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f9d88aa947e080c4616593e64eede4b43491f22483e88a1debbc0c6882948fc +size 14244 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/scheduler.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e31379e1823e0fd5b4b362e35c7c8bbdaaa49bb --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2458f7bb4b9befba259e3bcae8f869b0197348e531edc729085efeaa1716cf89 +size 1064 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/special_tokens_map.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/tokenizer.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/tokenizer.model b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/tokenizer_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/trainer_state.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..92a5b389962329258b30dc2ab331f08e1395fc91 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/trainer_state.json @@ -0,0 +1,132 @@ +{ + "best_metric": 1.534969449043274, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136", + "epoch": 1.0, + "eval_steps": 10, + "global_step": 136, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07352941176470588, + "grad_norm": 1.515879511833191, + "learning_rate": 0.0002, + "loss": 3.6278, + "step": 10 + }, + { + "epoch": 0.14705882352941177, + "grad_norm": 6.156805992126465, + "learning_rate": 0.0002, + "loss": 2.4224, + "step": 20 + }, + { + "epoch": 0.22058823529411764, + "grad_norm": 1.264341950416565, + "learning_rate": 0.0002, + "loss": 2.107, + "step": 30 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.8591746091842651, + "learning_rate": 0.0002, + "loss": 1.9838, + "step": 40 + }, + { + "epoch": 0.36764705882352944, + "grad_norm": 1.4083963632583618, + "learning_rate": 0.0002, + "loss": 1.916, + "step": 50 + }, + { + "epoch": 0.4411764705882353, + "grad_norm": 3.570463180541992, + "learning_rate": 0.0002, + "loss": 1.7336, + "step": 60 + }, + { + "epoch": 0.5147058823529411, + "grad_norm": 0.6044552326202393, + "learning_rate": 0.0002, + "loss": 1.7316, + "step": 70 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.1396939754486084, + "learning_rate": 0.0002, + "loss": 1.7078, + "step": 80 + }, + { + "epoch": 0.6617647058823529, + "grad_norm": 0.7549962997436523, + "learning_rate": 0.0002, + "loss": 1.6052, + "step": 90 + }, + { + "epoch": 0.7352941176470589, + "grad_norm": 1.2676323652267456, + "learning_rate": 0.0002, + "loss": 1.7247, + "step": 100 + }, + { + "epoch": 0.8088235294117647, + "grad_norm": 1.223105788230896, + "learning_rate": 0.0002, + "loss": 1.6836, + "step": 110 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 1.0946531295776367, + "learning_rate": 0.0002, + "loss": 1.631, + "step": 120 + }, + { + "epoch": 0.9558823529411765, + "grad_norm": 0.8674123883247375, + "learning_rate": 0.0002, + "loss": 1.6089, + "step": 130 + }, + { + "epoch": 1.0, + "eval_loss": 1.534969449043274, + "eval_runtime": 7.804, + "eval_samples_per_second": 12.558, + "eval_steps_per_second": 1.666, + "step": 136 + } + ], + "logging_steps": 10, + "max_steps": 1088, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1662191501574144.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/training_args.bin b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fe1a14f6cb5131ff1804a27066009ed40560ae4 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd75b9e004c485c0e30ba43806c8cf7ca805ddb83c107b87f20798de6e2c8c14 +size 5560 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/README.md b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/adapter_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/adapter_model.safetensors b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1a4ccc7cb881225fe7d87f569c2833dc8ee75605 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e79de1ae74fa645fa6149fe6fd4acf6e4b9f9149ae6631bf7c1b82ce3c9f4be +size 29500848 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/optimizer.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..46dd4b4b3619f031885f81f53d59b0cb187bf4b8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea1aaad3f6124ddd9626d45c861b885e3fe200f62521ca6753f0486356787336 +size 15064314 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/rng_state.pth b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..de8d25d1addbf2d4d169a9827b284924e5dfb580 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0328a7bcb3c9afcdaea56b5e0aec7e2884db21ccfc9fa4b6e9350bbb2111e4f +size 14244 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/scheduler.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1215758471ad7a5a83d75c9e3f4c1e264b00f99a --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fad2cc56521eb5e30b649af2b53281df895c7d3e88c53792bba6b60e4f7c796f +size 1064 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/special_tokens_map.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/tokenizer.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/tokenizer.model b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/tokenizer_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/trainer_state.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..947837024644c811aa478a1c62302c0ce1c38e96 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/trainer_state.json @@ -0,0 +1,238 @@ +{ + "best_metric": 1.4521459341049194, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272", + "epoch": 2.0, + "eval_steps": 10, + "global_step": 272, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07352941176470588, + "grad_norm": 1.515879511833191, + "learning_rate": 0.0002, + "loss": 3.6278, + "step": 10 + }, + { + "epoch": 0.14705882352941177, + "grad_norm": 6.156805992126465, + "learning_rate": 0.0002, + "loss": 2.4224, + "step": 20 + }, + { + "epoch": 0.22058823529411764, + "grad_norm": 1.264341950416565, + "learning_rate": 0.0002, + "loss": 2.107, + "step": 30 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.8591746091842651, + "learning_rate": 0.0002, + "loss": 1.9838, + "step": 40 + }, + { + "epoch": 0.36764705882352944, + "grad_norm": 1.4083963632583618, + "learning_rate": 0.0002, + "loss": 1.916, + "step": 50 + }, + { + "epoch": 0.4411764705882353, + "grad_norm": 3.570463180541992, + "learning_rate": 0.0002, + "loss": 1.7336, + "step": 60 + }, + { + "epoch": 0.5147058823529411, + "grad_norm": 0.6044552326202393, + "learning_rate": 0.0002, + "loss": 1.7316, + "step": 70 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.1396939754486084, + "learning_rate": 0.0002, + "loss": 1.7078, + "step": 80 + }, + { + "epoch": 0.6617647058823529, + "grad_norm": 0.7549962997436523, + "learning_rate": 0.0002, + "loss": 1.6052, + "step": 90 + }, + { + "epoch": 0.7352941176470589, + "grad_norm": 1.2676323652267456, + "learning_rate": 0.0002, + "loss": 1.7247, + "step": 100 + }, + { + "epoch": 0.8088235294117647, + "grad_norm": 1.223105788230896, + "learning_rate": 0.0002, + "loss": 1.6836, + "step": 110 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 1.0946531295776367, + "learning_rate": 0.0002, + "loss": 1.631, + "step": 120 + }, + { + "epoch": 0.9558823529411765, + "grad_norm": 0.8674123883247375, + "learning_rate": 0.0002, + "loss": 1.6089, + "step": 130 + }, + { + "epoch": 1.0, + "eval_loss": 1.534969449043274, + "eval_runtime": 7.804, + "eval_samples_per_second": 12.558, + "eval_steps_per_second": 1.666, + "step": 136 + }, + { + "epoch": 1.0294117647058822, + "grad_norm": 0.6233109831809998, + "learning_rate": 0.0002, + "loss": 1.4976, + "step": 140 + }, + { + "epoch": 1.1029411764705883, + "grad_norm": 0.85622239112854, + "learning_rate": 0.0002, + "loss": 1.4431, + "step": 150 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 0.7703060507774353, + "learning_rate": 0.0002, + "loss": 1.4246, + "step": 160 + }, + { + "epoch": 1.25, + "grad_norm": 0.8302682638168335, + "learning_rate": 0.0002, + "loss": 1.5783, + "step": 170 + }, + { + "epoch": 1.3235294117647058, + "grad_norm": 0.7850589156150818, + "learning_rate": 0.0002, + "loss": 1.4883, + "step": 180 + }, + { + "epoch": 1.3970588235294117, + "grad_norm": 0.6718934774398804, + "learning_rate": 0.0002, + "loss": 1.5075, + "step": 190 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 0.7245007753372192, + "learning_rate": 0.0002, + "loss": 1.4136, + "step": 200 + }, + { + "epoch": 1.5441176470588234, + "grad_norm": 0.7120554447174072, + "learning_rate": 0.0002, + "loss": 1.4666, + "step": 210 + }, + { + "epoch": 1.6176470588235294, + "grad_norm": 0.6771036982536316, + "learning_rate": 0.0002, + "loss": 1.4619, + "step": 220 + }, + { + "epoch": 1.6911764705882353, + "grad_norm": 0.618951141834259, + "learning_rate": 0.0002, + "loss": 1.4465, + "step": 230 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 0.6583403944969177, + "learning_rate": 0.0002, + "loss": 1.4896, + "step": 240 + }, + { + "epoch": 1.8382352941176472, + "grad_norm": 0.6750185489654541, + "learning_rate": 0.0002, + "loss": 1.4613, + "step": 250 + }, + { + "epoch": 1.9117647058823528, + "grad_norm": 0.6572791337966919, + "learning_rate": 0.0002, + "loss": 1.4614, + "step": 260 + }, + { + "epoch": 1.9852941176470589, + "grad_norm": 0.6265441179275513, + "learning_rate": 0.0002, + "loss": 1.4242, + "step": 270 + }, + { + "epoch": 2.0, + "eval_loss": 1.4521459341049194, + "eval_runtime": 7.6483, + "eval_samples_per_second": 12.813, + "eval_steps_per_second": 1.7, + "step": 272 + } + ], + "logging_steps": 10, + "max_steps": 1088, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3324383003148288.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/training_args.bin b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fe1a14f6cb5131ff1804a27066009ed40560ae4 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd75b9e004c485c0e30ba43806c8cf7ca805ddb83c107b87f20798de6e2c8c14 +size 5560 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/README.md b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/adapter_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/adapter_model.safetensors b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..480d8e3096d0f71968d6de54b96b9d84949b4873 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bc67ec890142281c5617484415bf724c30bf7041fe46db7fa4689fe6e0343eb +size 29500848 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/optimizer.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..9312e4ac6966b724bde8770e08b11e1f596d82bf --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ed8c528c9a6e783f52dbb649f9b5e81d400448c31a05c01b4d7c3eeba5e1329 +size 15064314 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/rng_state.pth b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2862690df36321297699d867da05c79c1bdd700b --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a036943bfb58afef86f1a2829ca2f5d5d65083afa705615640f7e7c4e9843920 +size 14244 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/scheduler.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f3c1a8735b5f32f3bce6018fe8b94c197cb6af8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90e6a951da8a1cccd92bce056a221d3cd4c19301d0af43e638bf585e699c7106 +size 1064 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/special_tokens_map.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/tokenizer.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/tokenizer.model b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/tokenizer_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/trainer_state.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c16863cfee3eb805761cebc87fe533422c0f86db --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/trainer_state.json @@ -0,0 +1,337 @@ +{ + "best_metric": 1.4206745624542236, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408", + "epoch": 3.0, + "eval_steps": 10, + "global_step": 408, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07352941176470588, + "grad_norm": 1.515879511833191, + "learning_rate": 0.0002, + "loss": 3.6278, + "step": 10 + }, + { + "epoch": 0.14705882352941177, + "grad_norm": 6.156805992126465, + "learning_rate": 0.0002, + "loss": 2.4224, + "step": 20 + }, + { + "epoch": 0.22058823529411764, + "grad_norm": 1.264341950416565, + "learning_rate": 0.0002, + "loss": 2.107, + "step": 30 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.8591746091842651, + "learning_rate": 0.0002, + "loss": 1.9838, + "step": 40 + }, + { + "epoch": 0.36764705882352944, + "grad_norm": 1.4083963632583618, + "learning_rate": 0.0002, + "loss": 1.916, + "step": 50 + }, + { + "epoch": 0.4411764705882353, + "grad_norm": 3.570463180541992, + "learning_rate": 0.0002, + "loss": 1.7336, + "step": 60 + }, + { + "epoch": 0.5147058823529411, + "grad_norm": 0.6044552326202393, + "learning_rate": 0.0002, + "loss": 1.7316, + "step": 70 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.1396939754486084, + "learning_rate": 0.0002, + "loss": 1.7078, + "step": 80 + }, + { + "epoch": 0.6617647058823529, + "grad_norm": 0.7549962997436523, + "learning_rate": 0.0002, + "loss": 1.6052, + "step": 90 + }, + { + "epoch": 0.7352941176470589, + "grad_norm": 1.2676323652267456, + "learning_rate": 0.0002, + "loss": 1.7247, + "step": 100 + }, + { + "epoch": 0.8088235294117647, + "grad_norm": 1.223105788230896, + "learning_rate": 0.0002, + "loss": 1.6836, + "step": 110 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 1.0946531295776367, + "learning_rate": 0.0002, + "loss": 1.631, + "step": 120 + }, + { + "epoch": 0.9558823529411765, + "grad_norm": 0.8674123883247375, + "learning_rate": 0.0002, + "loss": 1.6089, + "step": 130 + }, + { + "epoch": 1.0, + "eval_loss": 1.534969449043274, + "eval_runtime": 7.804, + "eval_samples_per_second": 12.558, + "eval_steps_per_second": 1.666, + "step": 136 + }, + { + "epoch": 1.0294117647058822, + "grad_norm": 0.6233109831809998, + "learning_rate": 0.0002, + "loss": 1.4976, + "step": 140 + }, + { + "epoch": 1.1029411764705883, + "grad_norm": 0.85622239112854, + "learning_rate": 0.0002, + "loss": 1.4431, + "step": 150 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 0.7703060507774353, + "learning_rate": 0.0002, + "loss": 1.4246, + "step": 160 + }, + { + "epoch": 1.25, + "grad_norm": 0.8302682638168335, + "learning_rate": 0.0002, + "loss": 1.5783, + "step": 170 + }, + { + "epoch": 1.3235294117647058, + "grad_norm": 0.7850589156150818, + "learning_rate": 0.0002, + "loss": 1.4883, + "step": 180 + }, + { + "epoch": 1.3970588235294117, + "grad_norm": 0.6718934774398804, + "learning_rate": 0.0002, + "loss": 1.5075, + "step": 190 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 0.7245007753372192, + "learning_rate": 0.0002, + "loss": 1.4136, + "step": 200 + }, + { + "epoch": 1.5441176470588234, + "grad_norm": 0.7120554447174072, + "learning_rate": 0.0002, + "loss": 1.4666, + "step": 210 + }, + { + "epoch": 1.6176470588235294, + "grad_norm": 0.6771036982536316, + "learning_rate": 0.0002, + "loss": 1.4619, + "step": 220 + }, + { + "epoch": 1.6911764705882353, + "grad_norm": 0.618951141834259, + "learning_rate": 0.0002, + "loss": 1.4465, + "step": 230 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 0.6583403944969177, + "learning_rate": 0.0002, + "loss": 1.4896, + "step": 240 + }, + { + "epoch": 1.8382352941176472, + "grad_norm": 0.6750185489654541, + "learning_rate": 0.0002, + "loss": 1.4613, + "step": 250 + }, + { + "epoch": 1.9117647058823528, + "grad_norm": 0.6572791337966919, + "learning_rate": 0.0002, + "loss": 1.4614, + "step": 260 + }, + { + "epoch": 1.9852941176470589, + "grad_norm": 0.6265441179275513, + "learning_rate": 0.0002, + "loss": 1.4242, + "step": 270 + }, + { + "epoch": 2.0, + "eval_loss": 1.4521459341049194, + "eval_runtime": 7.6483, + "eval_samples_per_second": 12.813, + "eval_steps_per_second": 1.7, + "step": 272 + }, + { + "epoch": 2.0588235294117645, + "grad_norm": 0.7698636054992676, + "learning_rate": 0.0002, + "loss": 1.4108, + "step": 280 + }, + { + "epoch": 2.1323529411764706, + "grad_norm": 0.7958650588989258, + "learning_rate": 0.0002, + "loss": 1.4143, + "step": 290 + }, + { + "epoch": 2.2058823529411766, + "grad_norm": 0.7007899284362793, + "learning_rate": 0.0002, + "loss": 1.3826, + "step": 300 + }, + { + "epoch": 2.2794117647058822, + "grad_norm": 0.6673262715339661, + "learning_rate": 0.0002, + "loss": 1.3624, + "step": 310 + }, + { + "epoch": 2.3529411764705883, + "grad_norm": 0.6731301546096802, + "learning_rate": 0.0002, + "loss": 1.3212, + "step": 320 + }, + { + "epoch": 2.426470588235294, + "grad_norm": 0.7587279081344604, + "learning_rate": 0.0002, + "loss": 1.3626, + "step": 330 + }, + { + "epoch": 2.5, + "grad_norm": 0.804149329662323, + "learning_rate": 0.0002, + "loss": 1.3401, + "step": 340 + }, + { + "epoch": 2.5735294117647056, + "grad_norm": 0.7690186500549316, + "learning_rate": 0.0002, + "loss": 1.4204, + "step": 350 + }, + { + "epoch": 2.6470588235294117, + "grad_norm": 0.9660338163375854, + "learning_rate": 0.0002, + "loss": 1.3885, + "step": 360 + }, + { + "epoch": 2.7205882352941178, + "grad_norm": 0.6990594267845154, + "learning_rate": 0.0002, + "loss": 1.36, + "step": 370 + }, + { + "epoch": 2.7941176470588234, + "grad_norm": 0.7933360934257507, + "learning_rate": 0.0002, + "loss": 1.3354, + "step": 380 + }, + { + "epoch": 2.8676470588235294, + "grad_norm": 0.8198168277740479, + "learning_rate": 0.0002, + "loss": 1.2904, + "step": 390 + }, + { + "epoch": 2.9411764705882355, + "grad_norm": 0.6719775199890137, + "learning_rate": 0.0002, + "loss": 1.291, + "step": 400 + }, + { + "epoch": 3.0, + "eval_loss": 1.4206745624542236, + "eval_runtime": 7.0429, + "eval_samples_per_second": 13.915, + "eval_steps_per_second": 1.846, + "step": 408 + } + ], + "logging_steps": 10, + "max_steps": 1088, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4986574504722432.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/training_args.bin b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fe1a14f6cb5131ff1804a27066009ed40560ae4 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd75b9e004c485c0e30ba43806c8cf7ca805ddb83c107b87f20798de6e2c8c14 +size 5560 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/README.md b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/adapter_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/adapter_model.safetensors b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f43be30c6cbd7e54ec28e2b394e84ca24bd24d50 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aab8c48d31dece66db1b87ba733aec2baf583b2ebf66a0e8832302c13484e5c6 +size 29500848 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/optimizer.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cce8bcc6c42dd71ade86269bb2af17aaecc563f1 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d1ac5d1aabe0e3344242b4bb359cb8a2bada37533945ef6fd8a08c2ec822ac9 +size 15064314 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/rng_state.pth b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..cf0162d7fdc39c3d5090060da064df2bf486fe39 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:521b9899ee18247ea48f19dfced3082aed15f9db6294fa280948d84209bb65d2 +size 14244 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/scheduler.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0be0017abba1452304e0dd0a724c3a7f4768a9f2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ff4e50d408e078ce1bfe894abaf9dcca4933dc659f69a24bc400ad149997dd2 +size 1064 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/special_tokens_map.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/tokenizer.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/tokenizer.model b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/tokenizer_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/trainer_state.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..280d02dd3aa56a390e3a7378a2e8069563050858 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/trainer_state.json @@ -0,0 +1,443 @@ +{ + "best_metric": 1.4206745624542236, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408", + "epoch": 4.0, + "eval_steps": 10, + "global_step": 544, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07352941176470588, + "grad_norm": 1.515879511833191, + "learning_rate": 0.0002, + "loss": 3.6278, + "step": 10 + }, + { + "epoch": 0.14705882352941177, + "grad_norm": 6.156805992126465, + "learning_rate": 0.0002, + "loss": 2.4224, + "step": 20 + }, + { + "epoch": 0.22058823529411764, + "grad_norm": 1.264341950416565, + "learning_rate": 0.0002, + "loss": 2.107, + "step": 30 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.8591746091842651, + "learning_rate": 0.0002, + "loss": 1.9838, + "step": 40 + }, + { + "epoch": 0.36764705882352944, + "grad_norm": 1.4083963632583618, + "learning_rate": 0.0002, + "loss": 1.916, + "step": 50 + }, + { + "epoch": 0.4411764705882353, + "grad_norm": 3.570463180541992, + "learning_rate": 0.0002, + "loss": 1.7336, + "step": 60 + }, + { + "epoch": 0.5147058823529411, + "grad_norm": 0.6044552326202393, + "learning_rate": 0.0002, + "loss": 1.7316, + "step": 70 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.1396939754486084, + "learning_rate": 0.0002, + "loss": 1.7078, + "step": 80 + }, + { + "epoch": 0.6617647058823529, + "grad_norm": 0.7549962997436523, + "learning_rate": 0.0002, + "loss": 1.6052, + "step": 90 + }, + { + "epoch": 0.7352941176470589, + "grad_norm": 1.2676323652267456, + "learning_rate": 0.0002, + "loss": 1.7247, + "step": 100 + }, + { + "epoch": 0.8088235294117647, + "grad_norm": 1.223105788230896, + "learning_rate": 0.0002, + "loss": 1.6836, + "step": 110 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 1.0946531295776367, + "learning_rate": 0.0002, + "loss": 1.631, + "step": 120 + }, + { + "epoch": 0.9558823529411765, + "grad_norm": 0.8674123883247375, + "learning_rate": 0.0002, + "loss": 1.6089, + "step": 130 + }, + { + "epoch": 1.0, + "eval_loss": 1.534969449043274, + "eval_runtime": 7.804, + "eval_samples_per_second": 12.558, + "eval_steps_per_second": 1.666, + "step": 136 + }, + { + "epoch": 1.0294117647058822, + "grad_norm": 0.6233109831809998, + "learning_rate": 0.0002, + "loss": 1.4976, + "step": 140 + }, + { + "epoch": 1.1029411764705883, + "grad_norm": 0.85622239112854, + "learning_rate": 0.0002, + "loss": 1.4431, + "step": 150 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 0.7703060507774353, + "learning_rate": 0.0002, + "loss": 1.4246, + "step": 160 + }, + { + "epoch": 1.25, + "grad_norm": 0.8302682638168335, + "learning_rate": 0.0002, + "loss": 1.5783, + "step": 170 + }, + { + "epoch": 1.3235294117647058, + "grad_norm": 0.7850589156150818, + "learning_rate": 0.0002, + "loss": 1.4883, + "step": 180 + }, + { + "epoch": 1.3970588235294117, + "grad_norm": 0.6718934774398804, + "learning_rate": 0.0002, + "loss": 1.5075, + "step": 190 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 0.7245007753372192, + "learning_rate": 0.0002, + "loss": 1.4136, + "step": 200 + }, + { + "epoch": 1.5441176470588234, + "grad_norm": 0.7120554447174072, + "learning_rate": 0.0002, + "loss": 1.4666, + "step": 210 + }, + { + "epoch": 1.6176470588235294, + "grad_norm": 0.6771036982536316, + "learning_rate": 0.0002, + "loss": 1.4619, + "step": 220 + }, + { + "epoch": 1.6911764705882353, + "grad_norm": 0.618951141834259, + "learning_rate": 0.0002, + "loss": 1.4465, + "step": 230 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 0.6583403944969177, + "learning_rate": 0.0002, + "loss": 1.4896, + "step": 240 + }, + { + "epoch": 1.8382352941176472, + "grad_norm": 0.6750185489654541, + "learning_rate": 0.0002, + "loss": 1.4613, + "step": 250 + }, + { + "epoch": 1.9117647058823528, + "grad_norm": 0.6572791337966919, + "learning_rate": 0.0002, + "loss": 1.4614, + "step": 260 + }, + { + "epoch": 1.9852941176470589, + "grad_norm": 0.6265441179275513, + "learning_rate": 0.0002, + "loss": 1.4242, + "step": 270 + }, + { + "epoch": 2.0, + "eval_loss": 1.4521459341049194, + "eval_runtime": 7.6483, + "eval_samples_per_second": 12.813, + "eval_steps_per_second": 1.7, + "step": 272 + }, + { + "epoch": 2.0588235294117645, + "grad_norm": 0.7698636054992676, + "learning_rate": 0.0002, + "loss": 1.4108, + "step": 280 + }, + { + "epoch": 2.1323529411764706, + "grad_norm": 0.7958650588989258, + "learning_rate": 0.0002, + "loss": 1.4143, + "step": 290 + }, + { + "epoch": 2.2058823529411766, + "grad_norm": 0.7007899284362793, + "learning_rate": 0.0002, + "loss": 1.3826, + "step": 300 + }, + { + "epoch": 2.2794117647058822, + "grad_norm": 0.6673262715339661, + "learning_rate": 0.0002, + "loss": 1.3624, + "step": 310 + }, + { + "epoch": 2.3529411764705883, + "grad_norm": 0.6731301546096802, + "learning_rate": 0.0002, + "loss": 1.3212, + "step": 320 + }, + { + "epoch": 2.426470588235294, + "grad_norm": 0.7587279081344604, + "learning_rate": 0.0002, + "loss": 1.3626, + "step": 330 + }, + { + "epoch": 2.5, + "grad_norm": 0.804149329662323, + "learning_rate": 0.0002, + "loss": 1.3401, + "step": 340 + }, + { + "epoch": 2.5735294117647056, + "grad_norm": 0.7690186500549316, + "learning_rate": 0.0002, + "loss": 1.4204, + "step": 350 + }, + { + "epoch": 2.6470588235294117, + "grad_norm": 0.9660338163375854, + "learning_rate": 0.0002, + "loss": 1.3885, + "step": 360 + }, + { + "epoch": 2.7205882352941178, + "grad_norm": 0.6990594267845154, + "learning_rate": 0.0002, + "loss": 1.36, + "step": 370 + }, + { + "epoch": 2.7941176470588234, + "grad_norm": 0.7933360934257507, + "learning_rate": 0.0002, + "loss": 1.3354, + "step": 380 + }, + { + "epoch": 2.8676470588235294, + "grad_norm": 0.8198168277740479, + "learning_rate": 0.0002, + "loss": 1.2904, + "step": 390 + }, + { + "epoch": 2.9411764705882355, + "grad_norm": 0.6719775199890137, + "learning_rate": 0.0002, + "loss": 1.291, + "step": 400 + }, + { + "epoch": 3.0, + "eval_loss": 1.4206745624542236, + "eval_runtime": 7.0429, + "eval_samples_per_second": 13.915, + "eval_steps_per_second": 1.846, + "step": 408 + }, + { + "epoch": 3.014705882352941, + "grad_norm": 0.6254619359970093, + "learning_rate": 0.0002, + "loss": 1.4063, + "step": 410 + }, + { + "epoch": 3.088235294117647, + "grad_norm": 0.7344406843185425, + "learning_rate": 0.0002, + "loss": 1.2863, + "step": 420 + }, + { + "epoch": 3.161764705882353, + "grad_norm": 0.7327449321746826, + "learning_rate": 0.0002, + "loss": 1.3088, + "step": 430 + }, + { + "epoch": 3.235294117647059, + "grad_norm": 0.7766542434692383, + "learning_rate": 0.0002, + "loss": 1.2746, + "step": 440 + }, + { + "epoch": 3.3088235294117645, + "grad_norm": 0.7025649547576904, + "learning_rate": 0.0002, + "loss": 1.1912, + "step": 450 + }, + { + "epoch": 3.3823529411764706, + "grad_norm": 0.7508461475372314, + "learning_rate": 0.0002, + "loss": 1.2526, + "step": 460 + }, + { + "epoch": 3.4558823529411766, + "grad_norm": 0.8148072361946106, + "learning_rate": 0.0002, + "loss": 1.2834, + "step": 470 + }, + { + "epoch": 3.5294117647058822, + "grad_norm": 0.7245928645133972, + "learning_rate": 0.0002, + "loss": 1.2115, + "step": 480 + }, + { + "epoch": 3.6029411764705883, + "grad_norm": 0.9170019626617432, + "learning_rate": 0.0002, + "loss": 1.2885, + "step": 490 + }, + { + "epoch": 3.6764705882352944, + "grad_norm": 0.9033855199813843, + "learning_rate": 0.0002, + "loss": 1.2879, + "step": 500 + }, + { + "epoch": 3.75, + "grad_norm": 0.7575234174728394, + "learning_rate": 0.0002, + "loss": 1.3065, + "step": 510 + }, + { + "epoch": 3.8235294117647056, + "grad_norm": 0.7426044344902039, + "learning_rate": 0.0002, + "loss": 1.3299, + "step": 520 + }, + { + "epoch": 3.8970588235294117, + "grad_norm": 0.6892959475517273, + "learning_rate": 0.0002, + "loss": 1.26, + "step": 530 + }, + { + "epoch": 3.9705882352941178, + "grad_norm": 0.7498812675476074, + "learning_rate": 0.0002, + "loss": 1.3376, + "step": 540 + }, + { + "epoch": 4.0, + "eval_loss": 1.4219430685043335, + "eval_runtime": 7.7457, + "eval_samples_per_second": 12.652, + "eval_steps_per_second": 1.678, + "step": 544 + } + ], + "logging_steps": 10, + "max_steps": 1088, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6648766006296576.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/training_args.bin b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fe1a14f6cb5131ff1804a27066009ed40560ae4 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-544/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd75b9e004c485c0e30ba43806c8cf7ca805ddb83c107b87f20798de6e2c8c14 +size 5560 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/README.md b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/adapter_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/adapter_model.safetensors b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ed77231e93b61c485a48eab6041555ecab0fdc00 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c8caf7552bd4c1b9de1243e34e24afd55c7ca551782f0127a4511deacb8fa61 +size 29500848 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/optimizer.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc32968988d82b34cad7c8db2317f21f897d70fe --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d8fd79878a9897491a13e725381546f18ffad24dc15965ae57ddf41e0256507 +size 15064314 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/rng_state.pth b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1e9ddd082ba5db42b2214f173a9d09c7c6445806 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5027368d66770541a994ff5b87c19a93f812ff69fff1688b14abb95eb9cb39e8 +size 14244 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/scheduler.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..108374b581901310664c287a16344956e975ddac --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d3e75366ce88ab9b8a8ed3e4086a8ae241a689cf6f46222b151bdb70fe30bd3 +size 1064 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/special_tokens_map.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/tokenizer.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/tokenizer.model b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/tokenizer_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/trainer_state.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..251510781fa7cc68bb2298b725fb9f7f1c5b4d92 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/trainer_state.json @@ -0,0 +1,549 @@ +{ + "best_metric": 1.4206745624542236, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408", + "epoch": 5.0, + "eval_steps": 10, + "global_step": 680, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07352941176470588, + "grad_norm": 1.515879511833191, + "learning_rate": 0.0002, + "loss": 3.6278, + "step": 10 + }, + { + "epoch": 0.14705882352941177, + "grad_norm": 6.156805992126465, + "learning_rate": 0.0002, + "loss": 2.4224, + "step": 20 + }, + { + "epoch": 0.22058823529411764, + "grad_norm": 1.264341950416565, + "learning_rate": 0.0002, + "loss": 2.107, + "step": 30 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.8591746091842651, + "learning_rate": 0.0002, + "loss": 1.9838, + "step": 40 + }, + { + "epoch": 0.36764705882352944, + "grad_norm": 1.4083963632583618, + "learning_rate": 0.0002, + "loss": 1.916, + "step": 50 + }, + { + "epoch": 0.4411764705882353, + "grad_norm": 3.570463180541992, + "learning_rate": 0.0002, + "loss": 1.7336, + "step": 60 + }, + { + "epoch": 0.5147058823529411, + "grad_norm": 0.6044552326202393, + "learning_rate": 0.0002, + "loss": 1.7316, + "step": 70 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.1396939754486084, + "learning_rate": 0.0002, + "loss": 1.7078, + "step": 80 + }, + { + "epoch": 0.6617647058823529, + "grad_norm": 0.7549962997436523, + "learning_rate": 0.0002, + "loss": 1.6052, + "step": 90 + }, + { + "epoch": 0.7352941176470589, + "grad_norm": 1.2676323652267456, + "learning_rate": 0.0002, + "loss": 1.7247, + "step": 100 + }, + { + "epoch": 0.8088235294117647, + "grad_norm": 1.223105788230896, + "learning_rate": 0.0002, + "loss": 1.6836, + "step": 110 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 1.0946531295776367, + "learning_rate": 0.0002, + "loss": 1.631, + "step": 120 + }, + { + "epoch": 0.9558823529411765, + "grad_norm": 0.8674123883247375, + "learning_rate": 0.0002, + "loss": 1.6089, + "step": 130 + }, + { + "epoch": 1.0, + "eval_loss": 1.534969449043274, + "eval_runtime": 7.804, + "eval_samples_per_second": 12.558, + "eval_steps_per_second": 1.666, + "step": 136 + }, + { + "epoch": 1.0294117647058822, + "grad_norm": 0.6233109831809998, + "learning_rate": 0.0002, + "loss": 1.4976, + "step": 140 + }, + { + "epoch": 1.1029411764705883, + "grad_norm": 0.85622239112854, + "learning_rate": 0.0002, + "loss": 1.4431, + "step": 150 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 0.7703060507774353, + "learning_rate": 0.0002, + "loss": 1.4246, + "step": 160 + }, + { + "epoch": 1.25, + "grad_norm": 0.8302682638168335, + "learning_rate": 0.0002, + "loss": 1.5783, + "step": 170 + }, + { + "epoch": 1.3235294117647058, + "grad_norm": 0.7850589156150818, + "learning_rate": 0.0002, + "loss": 1.4883, + "step": 180 + }, + { + "epoch": 1.3970588235294117, + "grad_norm": 0.6718934774398804, + "learning_rate": 0.0002, + "loss": 1.5075, + "step": 190 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 0.7245007753372192, + "learning_rate": 0.0002, + "loss": 1.4136, + "step": 200 + }, + { + "epoch": 1.5441176470588234, + "grad_norm": 0.7120554447174072, + "learning_rate": 0.0002, + "loss": 1.4666, + "step": 210 + }, + { + "epoch": 1.6176470588235294, + "grad_norm": 0.6771036982536316, + "learning_rate": 0.0002, + "loss": 1.4619, + "step": 220 + }, + { + "epoch": 1.6911764705882353, + "grad_norm": 0.618951141834259, + "learning_rate": 0.0002, + "loss": 1.4465, + "step": 230 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 0.6583403944969177, + "learning_rate": 0.0002, + "loss": 1.4896, + "step": 240 + }, + { + "epoch": 1.8382352941176472, + "grad_norm": 0.6750185489654541, + "learning_rate": 0.0002, + "loss": 1.4613, + "step": 250 + }, + { + "epoch": 1.9117647058823528, + "grad_norm": 0.6572791337966919, + "learning_rate": 0.0002, + "loss": 1.4614, + "step": 260 + }, + { + "epoch": 1.9852941176470589, + "grad_norm": 0.6265441179275513, + "learning_rate": 0.0002, + "loss": 1.4242, + "step": 270 + }, + { + "epoch": 2.0, + "eval_loss": 1.4521459341049194, + "eval_runtime": 7.6483, + "eval_samples_per_second": 12.813, + "eval_steps_per_second": 1.7, + "step": 272 + }, + { + "epoch": 2.0588235294117645, + "grad_norm": 0.7698636054992676, + "learning_rate": 0.0002, + "loss": 1.4108, + "step": 280 + }, + { + "epoch": 2.1323529411764706, + "grad_norm": 0.7958650588989258, + "learning_rate": 0.0002, + "loss": 1.4143, + "step": 290 + }, + { + "epoch": 2.2058823529411766, + "grad_norm": 0.7007899284362793, + "learning_rate": 0.0002, + "loss": 1.3826, + "step": 300 + }, + { + "epoch": 2.2794117647058822, + "grad_norm": 0.6673262715339661, + "learning_rate": 0.0002, + "loss": 1.3624, + "step": 310 + }, + { + "epoch": 2.3529411764705883, + "grad_norm": 0.6731301546096802, + "learning_rate": 0.0002, + "loss": 1.3212, + "step": 320 + }, + { + "epoch": 2.426470588235294, + "grad_norm": 0.7587279081344604, + "learning_rate": 0.0002, + "loss": 1.3626, + "step": 330 + }, + { + "epoch": 2.5, + "grad_norm": 0.804149329662323, + "learning_rate": 0.0002, + "loss": 1.3401, + "step": 340 + }, + { + "epoch": 2.5735294117647056, + "grad_norm": 0.7690186500549316, + "learning_rate": 0.0002, + "loss": 1.4204, + "step": 350 + }, + { + "epoch": 2.6470588235294117, + "grad_norm": 0.9660338163375854, + "learning_rate": 0.0002, + "loss": 1.3885, + "step": 360 + }, + { + "epoch": 2.7205882352941178, + "grad_norm": 0.6990594267845154, + "learning_rate": 0.0002, + "loss": 1.36, + "step": 370 + }, + { + "epoch": 2.7941176470588234, + "grad_norm": 0.7933360934257507, + "learning_rate": 0.0002, + "loss": 1.3354, + "step": 380 + }, + { + "epoch": 2.8676470588235294, + "grad_norm": 0.8198168277740479, + "learning_rate": 0.0002, + "loss": 1.2904, + "step": 390 + }, + { + "epoch": 2.9411764705882355, + "grad_norm": 0.6719775199890137, + "learning_rate": 0.0002, + "loss": 1.291, + "step": 400 + }, + { + "epoch": 3.0, + "eval_loss": 1.4206745624542236, + "eval_runtime": 7.0429, + "eval_samples_per_second": 13.915, + "eval_steps_per_second": 1.846, + "step": 408 + }, + { + "epoch": 3.014705882352941, + "grad_norm": 0.6254619359970093, + "learning_rate": 0.0002, + "loss": 1.4063, + "step": 410 + }, + { + "epoch": 3.088235294117647, + "grad_norm": 0.7344406843185425, + "learning_rate": 0.0002, + "loss": 1.2863, + "step": 420 + }, + { + "epoch": 3.161764705882353, + "grad_norm": 0.7327449321746826, + "learning_rate": 0.0002, + "loss": 1.3088, + "step": 430 + }, + { + "epoch": 3.235294117647059, + "grad_norm": 0.7766542434692383, + "learning_rate": 0.0002, + "loss": 1.2746, + "step": 440 + }, + { + "epoch": 3.3088235294117645, + "grad_norm": 0.7025649547576904, + "learning_rate": 0.0002, + "loss": 1.1912, + "step": 450 + }, + { + "epoch": 3.3823529411764706, + "grad_norm": 0.7508461475372314, + "learning_rate": 0.0002, + "loss": 1.2526, + "step": 460 + }, + { + "epoch": 3.4558823529411766, + "grad_norm": 0.8148072361946106, + "learning_rate": 0.0002, + "loss": 1.2834, + "step": 470 + }, + { + "epoch": 3.5294117647058822, + "grad_norm": 0.7245928645133972, + "learning_rate": 0.0002, + "loss": 1.2115, + "step": 480 + }, + { + "epoch": 3.6029411764705883, + "grad_norm": 0.9170019626617432, + "learning_rate": 0.0002, + "loss": 1.2885, + "step": 490 + }, + { + "epoch": 3.6764705882352944, + "grad_norm": 0.9033855199813843, + "learning_rate": 0.0002, + "loss": 1.2879, + "step": 500 + }, + { + "epoch": 3.75, + "grad_norm": 0.7575234174728394, + "learning_rate": 0.0002, + "loss": 1.3065, + "step": 510 + }, + { + "epoch": 3.8235294117647056, + "grad_norm": 0.7426044344902039, + "learning_rate": 0.0002, + "loss": 1.3299, + "step": 520 + }, + { + "epoch": 3.8970588235294117, + "grad_norm": 0.6892959475517273, + "learning_rate": 0.0002, + "loss": 1.26, + "step": 530 + }, + { + "epoch": 3.9705882352941178, + "grad_norm": 0.7498812675476074, + "learning_rate": 0.0002, + "loss": 1.3376, + "step": 540 + }, + { + "epoch": 4.0, + "eval_loss": 1.4219430685043335, + "eval_runtime": 7.7457, + "eval_samples_per_second": 12.652, + "eval_steps_per_second": 1.678, + "step": 544 + }, + { + "epoch": 4.044117647058823, + "grad_norm": 0.8324301838874817, + "learning_rate": 0.0002, + "loss": 1.1944, + "step": 550 + }, + { + "epoch": 4.117647058823529, + "grad_norm": 0.8911291360855103, + "learning_rate": 0.0002, + "loss": 1.2263, + "step": 560 + }, + { + "epoch": 4.1911764705882355, + "grad_norm": 0.856676459312439, + "learning_rate": 0.0002, + "loss": 1.1778, + "step": 570 + }, + { + "epoch": 4.264705882352941, + "grad_norm": 1.074108600616455, + "learning_rate": 0.0002, + "loss": 1.2294, + "step": 580 + }, + { + "epoch": 4.338235294117647, + "grad_norm": 0.8867416977882385, + "learning_rate": 0.0002, + "loss": 1.1478, + "step": 590 + }, + { + "epoch": 4.411764705882353, + "grad_norm": 0.7843716740608215, + "learning_rate": 0.0002, + "loss": 1.1816, + "step": 600 + }, + { + "epoch": 4.485294117647059, + "grad_norm": 0.8869543075561523, + "learning_rate": 0.0002, + "loss": 1.1885, + "step": 610 + }, + { + "epoch": 4.5588235294117645, + "grad_norm": 0.7744895815849304, + "learning_rate": 0.0002, + "loss": 1.2468, + "step": 620 + }, + { + "epoch": 4.632352941176471, + "grad_norm": 0.7312784790992737, + "learning_rate": 0.0002, + "loss": 1.1613, + "step": 630 + }, + { + "epoch": 4.705882352941177, + "grad_norm": 0.8561248779296875, + "learning_rate": 0.0002, + "loss": 1.1359, + "step": 640 + }, + { + "epoch": 4.779411764705882, + "grad_norm": 0.888317346572876, + "learning_rate": 0.0002, + "loss": 1.15, + "step": 650 + }, + { + "epoch": 4.852941176470588, + "grad_norm": 0.8369079828262329, + "learning_rate": 0.0002, + "loss": 1.2659, + "step": 660 + }, + { + "epoch": 4.926470588235294, + "grad_norm": 0.7990967631340027, + "learning_rate": 0.0002, + "loss": 1.185, + "step": 670 + }, + { + "epoch": 5.0, + "grad_norm": 0.8745001554489136, + "learning_rate": 0.0002, + "loss": 1.2121, + "step": 680 + }, + { + "epoch": 5.0, + "eval_loss": 1.426682949066162, + "eval_runtime": 7.7664, + "eval_samples_per_second": 12.618, + "eval_steps_per_second": 1.674, + "step": 680 + } + ], + "logging_steps": 10, + "max_steps": 1088, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8310957507870720.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/training_args.bin b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fe1a14f6cb5131ff1804a27066009ed40560ae4 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-680/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd75b9e004c485c0e30ba43806c8cf7ca805ddb83c107b87f20798de6e2c8c14 +size 5560 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/README.md b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/adapter_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/adapter_model.safetensors b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d7fbb5b4488ae6749b60c0f7c615e760cf3903ce --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10bad023e0fa3d040aacb79e9c63f7cba222e322676f84552679facd8ad6f916 +size 29500848 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/optimizer.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e4f945ad77f990ad7b79a74d6f6ca9953d6c5e7 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:085975e8ea15bf9fbc43cb6636d59a0684810bc4f4a3a41f98ad6b0188253014 +size 15064314 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/rng_state.pth b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7951172891742b1e583d9013e33641b3db329eda --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dfd79525ed98589c0f5ee0d79fcecb1ea7a5718da7264e98451e106c2ee87b8 +size 14244 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/scheduler.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..655cb2aab0a4f04c4d052fb1259942b65b3c69eb --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a578d4d11a0a30c9b24f807555c74079f41c84c3f5879b91829538d18295ca46 +size 1064 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/special_tokens_map.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/tokenizer.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/tokenizer.model b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/tokenizer_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/trainer_state.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c73c23310856b0ed1482bf1d84a45068f3951c4b --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/trainer_state.json @@ -0,0 +1,648 @@ +{ + "best_metric": 1.4206745624542236, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408", + "epoch": 6.0, + "eval_steps": 10, + "global_step": 816, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07352941176470588, + "grad_norm": 1.515879511833191, + "learning_rate": 0.0002, + "loss": 3.6278, + "step": 10 + }, + { + "epoch": 0.14705882352941177, + "grad_norm": 6.156805992126465, + "learning_rate": 0.0002, + "loss": 2.4224, + "step": 20 + }, + { + "epoch": 0.22058823529411764, + "grad_norm": 1.264341950416565, + "learning_rate": 0.0002, + "loss": 2.107, + "step": 30 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.8591746091842651, + "learning_rate": 0.0002, + "loss": 1.9838, + "step": 40 + }, + { + "epoch": 0.36764705882352944, + "grad_norm": 1.4083963632583618, + "learning_rate": 0.0002, + "loss": 1.916, + "step": 50 + }, + { + "epoch": 0.4411764705882353, + "grad_norm": 3.570463180541992, + "learning_rate": 0.0002, + "loss": 1.7336, + "step": 60 + }, + { + "epoch": 0.5147058823529411, + "grad_norm": 0.6044552326202393, + "learning_rate": 0.0002, + "loss": 1.7316, + "step": 70 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.1396939754486084, + "learning_rate": 0.0002, + "loss": 1.7078, + "step": 80 + }, + { + "epoch": 0.6617647058823529, + "grad_norm": 0.7549962997436523, + "learning_rate": 0.0002, + "loss": 1.6052, + "step": 90 + }, + { + "epoch": 0.7352941176470589, + "grad_norm": 1.2676323652267456, + "learning_rate": 0.0002, + "loss": 1.7247, + "step": 100 + }, + { + "epoch": 0.8088235294117647, + "grad_norm": 1.223105788230896, + "learning_rate": 0.0002, + "loss": 1.6836, + "step": 110 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 1.0946531295776367, + "learning_rate": 0.0002, + "loss": 1.631, + "step": 120 + }, + { + "epoch": 0.9558823529411765, + "grad_norm": 0.8674123883247375, + "learning_rate": 0.0002, + "loss": 1.6089, + "step": 130 + }, + { + "epoch": 1.0, + "eval_loss": 1.534969449043274, + "eval_runtime": 7.804, + "eval_samples_per_second": 12.558, + "eval_steps_per_second": 1.666, + "step": 136 + }, + { + "epoch": 1.0294117647058822, + "grad_norm": 0.6233109831809998, + "learning_rate": 0.0002, + "loss": 1.4976, + "step": 140 + }, + { + "epoch": 1.1029411764705883, + "grad_norm": 0.85622239112854, + "learning_rate": 0.0002, + "loss": 1.4431, + "step": 150 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 0.7703060507774353, + "learning_rate": 0.0002, + "loss": 1.4246, + "step": 160 + }, + { + "epoch": 1.25, + "grad_norm": 0.8302682638168335, + "learning_rate": 0.0002, + "loss": 1.5783, + "step": 170 + }, + { + "epoch": 1.3235294117647058, + "grad_norm": 0.7850589156150818, + "learning_rate": 0.0002, + "loss": 1.4883, + "step": 180 + }, + { + "epoch": 1.3970588235294117, + "grad_norm": 0.6718934774398804, + "learning_rate": 0.0002, + "loss": 1.5075, + "step": 190 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 0.7245007753372192, + "learning_rate": 0.0002, + "loss": 1.4136, + "step": 200 + }, + { + "epoch": 1.5441176470588234, + "grad_norm": 0.7120554447174072, + "learning_rate": 0.0002, + "loss": 1.4666, + "step": 210 + }, + { + "epoch": 1.6176470588235294, + "grad_norm": 0.6771036982536316, + "learning_rate": 0.0002, + "loss": 1.4619, + "step": 220 + }, + { + "epoch": 1.6911764705882353, + "grad_norm": 0.618951141834259, + "learning_rate": 0.0002, + "loss": 1.4465, + "step": 230 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 0.6583403944969177, + "learning_rate": 0.0002, + "loss": 1.4896, + "step": 240 + }, + { + "epoch": 1.8382352941176472, + "grad_norm": 0.6750185489654541, + "learning_rate": 0.0002, + "loss": 1.4613, + "step": 250 + }, + { + "epoch": 1.9117647058823528, + "grad_norm": 0.6572791337966919, + "learning_rate": 0.0002, + "loss": 1.4614, + "step": 260 + }, + { + "epoch": 1.9852941176470589, + "grad_norm": 0.6265441179275513, + "learning_rate": 0.0002, + "loss": 1.4242, + "step": 270 + }, + { + "epoch": 2.0, + "eval_loss": 1.4521459341049194, + "eval_runtime": 7.6483, + "eval_samples_per_second": 12.813, + "eval_steps_per_second": 1.7, + "step": 272 + }, + { + "epoch": 2.0588235294117645, + "grad_norm": 0.7698636054992676, + "learning_rate": 0.0002, + "loss": 1.4108, + "step": 280 + }, + { + "epoch": 2.1323529411764706, + "grad_norm": 0.7958650588989258, + "learning_rate": 0.0002, + "loss": 1.4143, + "step": 290 + }, + { + "epoch": 2.2058823529411766, + "grad_norm": 0.7007899284362793, + "learning_rate": 0.0002, + "loss": 1.3826, + "step": 300 + }, + { + "epoch": 2.2794117647058822, + "grad_norm": 0.6673262715339661, + "learning_rate": 0.0002, + "loss": 1.3624, + "step": 310 + }, + { + "epoch": 2.3529411764705883, + "grad_norm": 0.6731301546096802, + "learning_rate": 0.0002, + "loss": 1.3212, + "step": 320 + }, + { + "epoch": 2.426470588235294, + "grad_norm": 0.7587279081344604, + "learning_rate": 0.0002, + "loss": 1.3626, + "step": 330 + }, + { + "epoch": 2.5, + "grad_norm": 0.804149329662323, + "learning_rate": 0.0002, + "loss": 1.3401, + "step": 340 + }, + { + "epoch": 2.5735294117647056, + "grad_norm": 0.7690186500549316, + "learning_rate": 0.0002, + "loss": 1.4204, + "step": 350 + }, + { + "epoch": 2.6470588235294117, + "grad_norm": 0.9660338163375854, + "learning_rate": 0.0002, + "loss": 1.3885, + "step": 360 + }, + { + "epoch": 2.7205882352941178, + "grad_norm": 0.6990594267845154, + "learning_rate": 0.0002, + "loss": 1.36, + "step": 370 + }, + { + "epoch": 2.7941176470588234, + "grad_norm": 0.7933360934257507, + "learning_rate": 0.0002, + "loss": 1.3354, + "step": 380 + }, + { + "epoch": 2.8676470588235294, + "grad_norm": 0.8198168277740479, + "learning_rate": 0.0002, + "loss": 1.2904, + "step": 390 + }, + { + "epoch": 2.9411764705882355, + "grad_norm": 0.6719775199890137, + "learning_rate": 0.0002, + "loss": 1.291, + "step": 400 + }, + { + "epoch": 3.0, + "eval_loss": 1.4206745624542236, + "eval_runtime": 7.0429, + "eval_samples_per_second": 13.915, + "eval_steps_per_second": 1.846, + "step": 408 + }, + { + "epoch": 3.014705882352941, + "grad_norm": 0.6254619359970093, + "learning_rate": 0.0002, + "loss": 1.4063, + "step": 410 + }, + { + "epoch": 3.088235294117647, + "grad_norm": 0.7344406843185425, + "learning_rate": 0.0002, + "loss": 1.2863, + "step": 420 + }, + { + "epoch": 3.161764705882353, + "grad_norm": 0.7327449321746826, + "learning_rate": 0.0002, + "loss": 1.3088, + "step": 430 + }, + { + "epoch": 3.235294117647059, + "grad_norm": 0.7766542434692383, + "learning_rate": 0.0002, + "loss": 1.2746, + "step": 440 + }, + { + "epoch": 3.3088235294117645, + "grad_norm": 0.7025649547576904, + "learning_rate": 0.0002, + "loss": 1.1912, + "step": 450 + }, + { + "epoch": 3.3823529411764706, + "grad_norm": 0.7508461475372314, + "learning_rate": 0.0002, + "loss": 1.2526, + "step": 460 + }, + { + "epoch": 3.4558823529411766, + "grad_norm": 0.8148072361946106, + "learning_rate": 0.0002, + "loss": 1.2834, + "step": 470 + }, + { + "epoch": 3.5294117647058822, + "grad_norm": 0.7245928645133972, + "learning_rate": 0.0002, + "loss": 1.2115, + "step": 480 + }, + { + "epoch": 3.6029411764705883, + "grad_norm": 0.9170019626617432, + "learning_rate": 0.0002, + "loss": 1.2885, + "step": 490 + }, + { + "epoch": 3.6764705882352944, + "grad_norm": 0.9033855199813843, + "learning_rate": 0.0002, + "loss": 1.2879, + "step": 500 + }, + { + "epoch": 3.75, + "grad_norm": 0.7575234174728394, + "learning_rate": 0.0002, + "loss": 1.3065, + "step": 510 + }, + { + "epoch": 3.8235294117647056, + "grad_norm": 0.7426044344902039, + "learning_rate": 0.0002, + "loss": 1.3299, + "step": 520 + }, + { + "epoch": 3.8970588235294117, + "grad_norm": 0.6892959475517273, + "learning_rate": 0.0002, + "loss": 1.26, + "step": 530 + }, + { + "epoch": 3.9705882352941178, + "grad_norm": 0.7498812675476074, + "learning_rate": 0.0002, + "loss": 1.3376, + "step": 540 + }, + { + "epoch": 4.0, + "eval_loss": 1.4219430685043335, + "eval_runtime": 7.7457, + "eval_samples_per_second": 12.652, + "eval_steps_per_second": 1.678, + "step": 544 + }, + { + "epoch": 4.044117647058823, + "grad_norm": 0.8324301838874817, + "learning_rate": 0.0002, + "loss": 1.1944, + "step": 550 + }, + { + "epoch": 4.117647058823529, + "grad_norm": 0.8911291360855103, + "learning_rate": 0.0002, + "loss": 1.2263, + "step": 560 + }, + { + "epoch": 4.1911764705882355, + "grad_norm": 0.856676459312439, + "learning_rate": 0.0002, + "loss": 1.1778, + "step": 570 + }, + { + "epoch": 4.264705882352941, + "grad_norm": 1.074108600616455, + "learning_rate": 0.0002, + "loss": 1.2294, + "step": 580 + }, + { + "epoch": 4.338235294117647, + "grad_norm": 0.8867416977882385, + "learning_rate": 0.0002, + "loss": 1.1478, + "step": 590 + }, + { + "epoch": 4.411764705882353, + "grad_norm": 0.7843716740608215, + "learning_rate": 0.0002, + "loss": 1.1816, + "step": 600 + }, + { + "epoch": 4.485294117647059, + "grad_norm": 0.8869543075561523, + "learning_rate": 0.0002, + "loss": 1.1885, + "step": 610 + }, + { + "epoch": 4.5588235294117645, + "grad_norm": 0.7744895815849304, + "learning_rate": 0.0002, + "loss": 1.2468, + "step": 620 + }, + { + "epoch": 4.632352941176471, + "grad_norm": 0.7312784790992737, + "learning_rate": 0.0002, + "loss": 1.1613, + "step": 630 + }, + { + "epoch": 4.705882352941177, + "grad_norm": 0.8561248779296875, + "learning_rate": 0.0002, + "loss": 1.1359, + "step": 640 + }, + { + "epoch": 4.779411764705882, + "grad_norm": 0.888317346572876, + "learning_rate": 0.0002, + "loss": 1.15, + "step": 650 + }, + { + "epoch": 4.852941176470588, + "grad_norm": 0.8369079828262329, + "learning_rate": 0.0002, + "loss": 1.2659, + "step": 660 + }, + { + "epoch": 4.926470588235294, + "grad_norm": 0.7990967631340027, + "learning_rate": 0.0002, + "loss": 1.185, + "step": 670 + }, + { + "epoch": 5.0, + "grad_norm": 0.8745001554489136, + "learning_rate": 0.0002, + "loss": 1.2121, + "step": 680 + }, + { + "epoch": 5.0, + "eval_loss": 1.426682949066162, + "eval_runtime": 7.7664, + "eval_samples_per_second": 12.618, + "eval_steps_per_second": 1.674, + "step": 680 + }, + { + "epoch": 5.073529411764706, + "grad_norm": 0.8637261986732483, + "learning_rate": 0.0002, + "loss": 1.0755, + "step": 690 + }, + { + "epoch": 5.147058823529412, + "grad_norm": 0.8743941187858582, + "learning_rate": 0.0002, + "loss": 1.0615, + "step": 700 + }, + { + "epoch": 5.220588235294118, + "grad_norm": 0.8632293939590454, + "learning_rate": 0.0002, + "loss": 1.0753, + "step": 710 + }, + { + "epoch": 5.294117647058823, + "grad_norm": 1.1503057479858398, + "learning_rate": 0.0002, + "loss": 1.1341, + "step": 720 + }, + { + "epoch": 5.367647058823529, + "grad_norm": 0.9048053026199341, + "learning_rate": 0.0002, + "loss": 1.1174, + "step": 730 + }, + { + "epoch": 5.4411764705882355, + "grad_norm": 0.8516059517860413, + "learning_rate": 0.0002, + "loss": 1.0939, + "step": 740 + }, + { + "epoch": 5.514705882352941, + "grad_norm": 0.9515685439109802, + "learning_rate": 0.0002, + "loss": 1.1518, + "step": 750 + }, + { + "epoch": 5.588235294117647, + "grad_norm": 0.8125670552253723, + "learning_rate": 0.0002, + "loss": 1.0982, + "step": 760 + }, + { + "epoch": 5.661764705882353, + "grad_norm": 1.0451067686080933, + "learning_rate": 0.0002, + "loss": 1.1298, + "step": 770 + }, + { + "epoch": 5.735294117647059, + "grad_norm": 0.8425356149673462, + "learning_rate": 0.0002, + "loss": 1.0582, + "step": 780 + }, + { + "epoch": 5.8088235294117645, + "grad_norm": 0.8448241353034973, + "learning_rate": 0.0002, + "loss": 1.1046, + "step": 790 + }, + { + "epoch": 5.882352941176471, + "grad_norm": 0.9654536843299866, + "learning_rate": 0.0002, + "loss": 1.1626, + "step": 800 + }, + { + "epoch": 5.955882352941177, + "grad_norm": 1.099204659461975, + "learning_rate": 0.0002, + "loss": 1.1457, + "step": 810 + }, + { + "epoch": 6.0, + "eval_loss": 1.453696370124817, + "eval_runtime": 7.8117, + "eval_samples_per_second": 12.545, + "eval_steps_per_second": 1.664, + "step": 816 + } + ], + "logging_steps": 10, + "max_steps": 1088, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 9973149009444864.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/training_args.bin b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fe1a14f6cb5131ff1804a27066009ed40560ae4 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-816/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd75b9e004c485c0e30ba43806c8cf7ca805ddb83c107b87f20798de6e2c8c14 +size 5560 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/README.md b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/adapter_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/adapter_model.safetensors b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..adaf1d1f040a8db185f0f8274152ed40fc0b4eb6 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7b940617b1a263e4f708736de685d91f32e7d7d7d12211c1d559a4ad327d575 +size 29500848 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/optimizer.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..36436ceaedfc85f47ea75298400c57dac80f00e5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e708f4ae70f3dd4ca75e09ae5274ad6b479a2b2e12882c300259702c1efdf9a9 +size 15064314 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/rng_state.pth b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..45e146b006474bb0a54f36097f3bc1a01b3cd8e6 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84360f00f07c3d982d67367e723d69d7e60d2f4f709bebf0d9b8152500813bb7 +size 14244 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/scheduler.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d738a553235cbb297ad1c736de1c83cd4f98d76 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5a72023196c8ba04831271c5d47b3780925ccf76a485519fac3a4b1615424a9 +size 1064 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/special_tokens_map.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/tokenizer.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/tokenizer.model b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/tokenizer_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/trainer_state.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e3537c105d1d50088edfe53fb33b8aa019dff6fe --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/trainer_state.json @@ -0,0 +1,754 @@ +{ + "best_metric": 1.4206745624542236, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408", + "epoch": 7.0, + "eval_steps": 10, + "global_step": 952, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.07352941176470588, + "grad_norm": 1.515879511833191, + "learning_rate": 0.0002, + "loss": 3.6278, + "step": 10 + }, + { + "epoch": 0.14705882352941177, + "grad_norm": 6.156805992126465, + "learning_rate": 0.0002, + "loss": 2.4224, + "step": 20 + }, + { + "epoch": 0.22058823529411764, + "grad_norm": 1.264341950416565, + "learning_rate": 0.0002, + "loss": 2.107, + "step": 30 + }, + { + "epoch": 0.29411764705882354, + "grad_norm": 0.8591746091842651, + "learning_rate": 0.0002, + "loss": 1.9838, + "step": 40 + }, + { + "epoch": 0.36764705882352944, + "grad_norm": 1.4083963632583618, + "learning_rate": 0.0002, + "loss": 1.916, + "step": 50 + }, + { + "epoch": 0.4411764705882353, + "grad_norm": 3.570463180541992, + "learning_rate": 0.0002, + "loss": 1.7336, + "step": 60 + }, + { + "epoch": 0.5147058823529411, + "grad_norm": 0.6044552326202393, + "learning_rate": 0.0002, + "loss": 1.7316, + "step": 70 + }, + { + "epoch": 0.5882352941176471, + "grad_norm": 1.1396939754486084, + "learning_rate": 0.0002, + "loss": 1.7078, + "step": 80 + }, + { + "epoch": 0.6617647058823529, + "grad_norm": 0.7549962997436523, + "learning_rate": 0.0002, + "loss": 1.6052, + "step": 90 + }, + { + "epoch": 0.7352941176470589, + "grad_norm": 1.2676323652267456, + "learning_rate": 0.0002, + "loss": 1.7247, + "step": 100 + }, + { + "epoch": 0.8088235294117647, + "grad_norm": 1.223105788230896, + "learning_rate": 0.0002, + "loss": 1.6836, + "step": 110 + }, + { + "epoch": 0.8823529411764706, + "grad_norm": 1.0946531295776367, + "learning_rate": 0.0002, + "loss": 1.631, + "step": 120 + }, + { + "epoch": 0.9558823529411765, + "grad_norm": 0.8674123883247375, + "learning_rate": 0.0002, + "loss": 1.6089, + "step": 130 + }, + { + "epoch": 1.0, + "eval_loss": 1.534969449043274, + "eval_runtime": 7.804, + "eval_samples_per_second": 12.558, + "eval_steps_per_second": 1.666, + "step": 136 + }, + { + "epoch": 1.0294117647058822, + "grad_norm": 0.6233109831809998, + "learning_rate": 0.0002, + "loss": 1.4976, + "step": 140 + }, + { + "epoch": 1.1029411764705883, + "grad_norm": 0.85622239112854, + "learning_rate": 0.0002, + "loss": 1.4431, + "step": 150 + }, + { + "epoch": 1.1764705882352942, + "grad_norm": 0.7703060507774353, + "learning_rate": 0.0002, + "loss": 1.4246, + "step": 160 + }, + { + "epoch": 1.25, + "grad_norm": 0.8302682638168335, + "learning_rate": 0.0002, + "loss": 1.5783, + "step": 170 + }, + { + "epoch": 1.3235294117647058, + "grad_norm": 0.7850589156150818, + "learning_rate": 0.0002, + "loss": 1.4883, + "step": 180 + }, + { + "epoch": 1.3970588235294117, + "grad_norm": 0.6718934774398804, + "learning_rate": 0.0002, + "loss": 1.5075, + "step": 190 + }, + { + "epoch": 1.4705882352941178, + "grad_norm": 0.7245007753372192, + "learning_rate": 0.0002, + "loss": 1.4136, + "step": 200 + }, + { + "epoch": 1.5441176470588234, + "grad_norm": 0.7120554447174072, + "learning_rate": 0.0002, + "loss": 1.4666, + "step": 210 + }, + { + "epoch": 1.6176470588235294, + "grad_norm": 0.6771036982536316, + "learning_rate": 0.0002, + "loss": 1.4619, + "step": 220 + }, + { + "epoch": 1.6911764705882353, + "grad_norm": 0.618951141834259, + "learning_rate": 0.0002, + "loss": 1.4465, + "step": 230 + }, + { + "epoch": 1.7647058823529411, + "grad_norm": 0.6583403944969177, + "learning_rate": 0.0002, + "loss": 1.4896, + "step": 240 + }, + { + "epoch": 1.8382352941176472, + "grad_norm": 0.6750185489654541, + "learning_rate": 0.0002, + "loss": 1.4613, + "step": 250 + }, + { + "epoch": 1.9117647058823528, + "grad_norm": 0.6572791337966919, + "learning_rate": 0.0002, + "loss": 1.4614, + "step": 260 + }, + { + "epoch": 1.9852941176470589, + "grad_norm": 0.6265441179275513, + "learning_rate": 0.0002, + "loss": 1.4242, + "step": 270 + }, + { + "epoch": 2.0, + "eval_loss": 1.4521459341049194, + "eval_runtime": 7.6483, + "eval_samples_per_second": 12.813, + "eval_steps_per_second": 1.7, + "step": 272 + }, + { + "epoch": 2.0588235294117645, + "grad_norm": 0.7698636054992676, + "learning_rate": 0.0002, + "loss": 1.4108, + "step": 280 + }, + { + "epoch": 2.1323529411764706, + "grad_norm": 0.7958650588989258, + "learning_rate": 0.0002, + "loss": 1.4143, + "step": 290 + }, + { + "epoch": 2.2058823529411766, + "grad_norm": 0.7007899284362793, + "learning_rate": 0.0002, + "loss": 1.3826, + "step": 300 + }, + { + "epoch": 2.2794117647058822, + "grad_norm": 0.6673262715339661, + "learning_rate": 0.0002, + "loss": 1.3624, + "step": 310 + }, + { + "epoch": 2.3529411764705883, + "grad_norm": 0.6731301546096802, + "learning_rate": 0.0002, + "loss": 1.3212, + "step": 320 + }, + { + "epoch": 2.426470588235294, + "grad_norm": 0.7587279081344604, + "learning_rate": 0.0002, + "loss": 1.3626, + "step": 330 + }, + { + "epoch": 2.5, + "grad_norm": 0.804149329662323, + "learning_rate": 0.0002, + "loss": 1.3401, + "step": 340 + }, + { + "epoch": 2.5735294117647056, + "grad_norm": 0.7690186500549316, + "learning_rate": 0.0002, + "loss": 1.4204, + "step": 350 + }, + { + "epoch": 2.6470588235294117, + "grad_norm": 0.9660338163375854, + "learning_rate": 0.0002, + "loss": 1.3885, + "step": 360 + }, + { + "epoch": 2.7205882352941178, + "grad_norm": 0.6990594267845154, + "learning_rate": 0.0002, + "loss": 1.36, + "step": 370 + }, + { + "epoch": 2.7941176470588234, + "grad_norm": 0.7933360934257507, + "learning_rate": 0.0002, + "loss": 1.3354, + "step": 380 + }, + { + "epoch": 2.8676470588235294, + "grad_norm": 0.8198168277740479, + "learning_rate": 0.0002, + "loss": 1.2904, + "step": 390 + }, + { + "epoch": 2.9411764705882355, + "grad_norm": 0.6719775199890137, + "learning_rate": 0.0002, + "loss": 1.291, + "step": 400 + }, + { + "epoch": 3.0, + "eval_loss": 1.4206745624542236, + "eval_runtime": 7.0429, + "eval_samples_per_second": 13.915, + "eval_steps_per_second": 1.846, + "step": 408 + }, + { + "epoch": 3.014705882352941, + "grad_norm": 0.6254619359970093, + "learning_rate": 0.0002, + "loss": 1.4063, + "step": 410 + }, + { + "epoch": 3.088235294117647, + "grad_norm": 0.7344406843185425, + "learning_rate": 0.0002, + "loss": 1.2863, + "step": 420 + }, + { + "epoch": 3.161764705882353, + "grad_norm": 0.7327449321746826, + "learning_rate": 0.0002, + "loss": 1.3088, + "step": 430 + }, + { + "epoch": 3.235294117647059, + "grad_norm": 0.7766542434692383, + "learning_rate": 0.0002, + "loss": 1.2746, + "step": 440 + }, + { + "epoch": 3.3088235294117645, + "grad_norm": 0.7025649547576904, + "learning_rate": 0.0002, + "loss": 1.1912, + "step": 450 + }, + { + "epoch": 3.3823529411764706, + "grad_norm": 0.7508461475372314, + "learning_rate": 0.0002, + "loss": 1.2526, + "step": 460 + }, + { + "epoch": 3.4558823529411766, + "grad_norm": 0.8148072361946106, + "learning_rate": 0.0002, + "loss": 1.2834, + "step": 470 + }, + { + "epoch": 3.5294117647058822, + "grad_norm": 0.7245928645133972, + "learning_rate": 0.0002, + "loss": 1.2115, + "step": 480 + }, + { + "epoch": 3.6029411764705883, + "grad_norm": 0.9170019626617432, + "learning_rate": 0.0002, + "loss": 1.2885, + "step": 490 + }, + { + "epoch": 3.6764705882352944, + "grad_norm": 0.9033855199813843, + "learning_rate": 0.0002, + "loss": 1.2879, + "step": 500 + }, + { + "epoch": 3.75, + "grad_norm": 0.7575234174728394, + "learning_rate": 0.0002, + "loss": 1.3065, + "step": 510 + }, + { + "epoch": 3.8235294117647056, + "grad_norm": 0.7426044344902039, + "learning_rate": 0.0002, + "loss": 1.3299, + "step": 520 + }, + { + "epoch": 3.8970588235294117, + "grad_norm": 0.6892959475517273, + "learning_rate": 0.0002, + "loss": 1.26, + "step": 530 + }, + { + "epoch": 3.9705882352941178, + "grad_norm": 0.7498812675476074, + "learning_rate": 0.0002, + "loss": 1.3376, + "step": 540 + }, + { + "epoch": 4.0, + "eval_loss": 1.4219430685043335, + "eval_runtime": 7.7457, + "eval_samples_per_second": 12.652, + "eval_steps_per_second": 1.678, + "step": 544 + }, + { + "epoch": 4.044117647058823, + "grad_norm": 0.8324301838874817, + "learning_rate": 0.0002, + "loss": 1.1944, + "step": 550 + }, + { + "epoch": 4.117647058823529, + "grad_norm": 0.8911291360855103, + "learning_rate": 0.0002, + "loss": 1.2263, + "step": 560 + }, + { + "epoch": 4.1911764705882355, + "grad_norm": 0.856676459312439, + "learning_rate": 0.0002, + "loss": 1.1778, + "step": 570 + }, + { + "epoch": 4.264705882352941, + "grad_norm": 1.074108600616455, + "learning_rate": 0.0002, + "loss": 1.2294, + "step": 580 + }, + { + "epoch": 4.338235294117647, + "grad_norm": 0.8867416977882385, + "learning_rate": 0.0002, + "loss": 1.1478, + "step": 590 + }, + { + "epoch": 4.411764705882353, + "grad_norm": 0.7843716740608215, + "learning_rate": 0.0002, + "loss": 1.1816, + "step": 600 + }, + { + "epoch": 4.485294117647059, + "grad_norm": 0.8869543075561523, + "learning_rate": 0.0002, + "loss": 1.1885, + "step": 610 + }, + { + "epoch": 4.5588235294117645, + "grad_norm": 0.7744895815849304, + "learning_rate": 0.0002, + "loss": 1.2468, + "step": 620 + }, + { + "epoch": 4.632352941176471, + "grad_norm": 0.7312784790992737, + "learning_rate": 0.0002, + "loss": 1.1613, + "step": 630 + }, + { + "epoch": 4.705882352941177, + "grad_norm": 0.8561248779296875, + "learning_rate": 0.0002, + "loss": 1.1359, + "step": 640 + }, + { + "epoch": 4.779411764705882, + "grad_norm": 0.888317346572876, + "learning_rate": 0.0002, + "loss": 1.15, + "step": 650 + }, + { + "epoch": 4.852941176470588, + "grad_norm": 0.8369079828262329, + "learning_rate": 0.0002, + "loss": 1.2659, + "step": 660 + }, + { + "epoch": 4.926470588235294, + "grad_norm": 0.7990967631340027, + "learning_rate": 0.0002, + "loss": 1.185, + "step": 670 + }, + { + "epoch": 5.0, + "grad_norm": 0.8745001554489136, + "learning_rate": 0.0002, + "loss": 1.2121, + "step": 680 + }, + { + "epoch": 5.0, + "eval_loss": 1.426682949066162, + "eval_runtime": 7.7664, + "eval_samples_per_second": 12.618, + "eval_steps_per_second": 1.674, + "step": 680 + }, + { + "epoch": 5.073529411764706, + "grad_norm": 0.8637261986732483, + "learning_rate": 0.0002, + "loss": 1.0755, + "step": 690 + }, + { + "epoch": 5.147058823529412, + "grad_norm": 0.8743941187858582, + "learning_rate": 0.0002, + "loss": 1.0615, + "step": 700 + }, + { + "epoch": 5.220588235294118, + "grad_norm": 0.8632293939590454, + "learning_rate": 0.0002, + "loss": 1.0753, + "step": 710 + }, + { + "epoch": 5.294117647058823, + "grad_norm": 1.1503057479858398, + "learning_rate": 0.0002, + "loss": 1.1341, + "step": 720 + }, + { + "epoch": 5.367647058823529, + "grad_norm": 0.9048053026199341, + "learning_rate": 0.0002, + "loss": 1.1174, + "step": 730 + }, + { + "epoch": 5.4411764705882355, + "grad_norm": 0.8516059517860413, + "learning_rate": 0.0002, + "loss": 1.0939, + "step": 740 + }, + { + "epoch": 5.514705882352941, + "grad_norm": 0.9515685439109802, + "learning_rate": 0.0002, + "loss": 1.1518, + "step": 750 + }, + { + "epoch": 5.588235294117647, + "grad_norm": 0.8125670552253723, + "learning_rate": 0.0002, + "loss": 1.0982, + "step": 760 + }, + { + "epoch": 5.661764705882353, + "grad_norm": 1.0451067686080933, + "learning_rate": 0.0002, + "loss": 1.1298, + "step": 770 + }, + { + "epoch": 5.735294117647059, + "grad_norm": 0.8425356149673462, + "learning_rate": 0.0002, + "loss": 1.0582, + "step": 780 + }, + { + "epoch": 5.8088235294117645, + "grad_norm": 0.8448241353034973, + "learning_rate": 0.0002, + "loss": 1.1046, + "step": 790 + }, + { + "epoch": 5.882352941176471, + "grad_norm": 0.9654536843299866, + "learning_rate": 0.0002, + "loss": 1.1626, + "step": 800 + }, + { + "epoch": 5.955882352941177, + "grad_norm": 1.099204659461975, + "learning_rate": 0.0002, + "loss": 1.1457, + "step": 810 + }, + { + "epoch": 6.0, + "eval_loss": 1.453696370124817, + "eval_runtime": 7.8117, + "eval_samples_per_second": 12.545, + "eval_steps_per_second": 1.664, + "step": 816 + }, + { + "epoch": 6.029411764705882, + "grad_norm": 0.7710627913475037, + "learning_rate": 0.0002, + "loss": 1.0491, + "step": 820 + }, + { + "epoch": 6.102941176470588, + "grad_norm": 1.0457934141159058, + "learning_rate": 0.0002, + "loss": 0.9758, + "step": 830 + }, + { + "epoch": 6.176470588235294, + "grad_norm": 0.9009696245193481, + "learning_rate": 0.0002, + "loss": 0.9917, + "step": 840 + }, + { + "epoch": 6.25, + "grad_norm": 0.9443604946136475, + "learning_rate": 0.0002, + "loss": 0.9978, + "step": 850 + }, + { + "epoch": 6.323529411764706, + "grad_norm": 1.017409086227417, + "learning_rate": 0.0002, + "loss": 1.0012, + "step": 860 + }, + { + "epoch": 6.397058823529412, + "grad_norm": 1.0726631879806519, + "learning_rate": 0.0002, + "loss": 1.0073, + "step": 870 + }, + { + "epoch": 6.470588235294118, + "grad_norm": 1.0754258632659912, + "learning_rate": 0.0002, + "loss": 1.0144, + "step": 880 + }, + { + "epoch": 6.544117647058823, + "grad_norm": 0.9952278733253479, + "learning_rate": 0.0002, + "loss": 1.042, + "step": 890 + }, + { + "epoch": 6.617647058823529, + "grad_norm": 1.0648400783538818, + "learning_rate": 0.0002, + "loss": 1.0573, + "step": 900 + }, + { + "epoch": 6.6911764705882355, + "grad_norm": 1.102169394493103, + "learning_rate": 0.0002, + "loss": 0.9765, + "step": 910 + }, + { + "epoch": 6.764705882352941, + "grad_norm": 1.022658348083496, + "learning_rate": 0.0002, + "loss": 1.0358, + "step": 920 + }, + { + "epoch": 6.838235294117647, + "grad_norm": 0.9385603666305542, + "learning_rate": 0.0002, + "loss": 1.0819, + "step": 930 + }, + { + "epoch": 6.911764705882353, + "grad_norm": 0.9402251839637756, + "learning_rate": 0.0002, + "loss": 1.0395, + "step": 940 + }, + { + "epoch": 6.985294117647059, + "grad_norm": 1.3918722867965698, + "learning_rate": 0.0002, + "loss": 1.014, + "step": 950 + }, + { + "epoch": 7.0, + "eval_loss": 1.4942296743392944, + "eval_runtime": 7.7264, + "eval_samples_per_second": 12.684, + "eval_steps_per_second": 1.683, + "step": 952 + } + ], + "logging_steps": 10, + "max_steps": 1088, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.1635340511019008e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/training_args.bin b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fe1a14f6cb5131ff1804a27066009ed40560ae4 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-952/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd75b9e004c485c0e30ba43806c8cf7ca805ddb83c107b87f20798de6e2c8c14 +size 5560 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/special_tokens_map.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/tokenizer.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/tokenizer.model b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/tokenizer_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/training_args.bin b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..7fe1a14f6cb5131ff1804a27066009ed40560ae4 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd75b9e004c485c0e30ba43806c8cf7ca805ddb83c107b87f20798de6e2c8c14 +size 5560 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/training_log.jsonl b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/training_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..471ee1639f5f2e7c9dc5010d0726bb867628d430 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/training_log.jsonl @@ -0,0 +1,8 @@ +{"epoch": 1.0, "step": 136, "epoch_duration": 239.59363651275635, "total_accumulated_duration": 239.59363651275635, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 5628.7490234375}, "avg_memory_reserved": {"GPU_0": 6182.0}, "peak_memory_reserved": {"GPU_0": 6182.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.6278, "grad_norm": 1.515879511833191, "learning_rate": 0.0002, "epoch": 0.07352941176470588, "step": 10}, {"loss": 2.4224, "grad_norm": 6.156805992126465, "learning_rate": 0.0002, "epoch": 0.14705882352941177, "step": 20}, {"loss": 2.107, "grad_norm": 1.264341950416565, "learning_rate": 0.0002, "epoch": 0.22058823529411764, "step": 30}, {"loss": 1.9838, "grad_norm": 0.8591746091842651, "learning_rate": 0.0002, "epoch": 0.29411764705882354, "step": 40}, {"loss": 1.916, "grad_norm": 1.4083963632583618, "learning_rate": 0.0002, "epoch": 0.36764705882352944, "step": 50}, {"loss": 1.7336, "grad_norm": 3.570463180541992, "learning_rate": 0.0002, "epoch": 0.4411764705882353, "step": 60}, {"loss": 1.7316, "grad_norm": 0.6044552326202393, "learning_rate": 0.0002, "epoch": 0.5147058823529411, "step": 70}, {"loss": 1.7078, "grad_norm": 1.1396939754486084, "learning_rate": 0.0002, "epoch": 0.5882352941176471, "step": 80}, {"loss": 1.6052, "grad_norm": 0.7549962997436523, "learning_rate": 0.0002, "epoch": 0.6617647058823529, "step": 90}, {"loss": 1.7247, "grad_norm": 1.2676323652267456, "learning_rate": 0.0002, "epoch": 0.7352941176470589, "step": 100}, {"loss": 1.6836, "grad_norm": 1.223105788230896, "learning_rate": 0.0002, "epoch": 0.8088235294117647, "step": 110}, {"loss": 1.631, "grad_norm": 1.0946531295776367, "learning_rate": 0.0002, "epoch": 0.8823529411764706, "step": 120}, {"loss": 1.6089, "grad_norm": 0.8674123883247375, "learning_rate": 0.0002, "epoch": 0.9558823529411765, "step": 130}]} +{"epoch": 2.0, "step": 272, "epoch_duration": 171.7964813709259, "total_accumulated_duration": 411.39011788368225, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.17333984375}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-136", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.6278, "grad_norm": 1.515879511833191, "learning_rate": 0.0002, "epoch": 0.07352941176470588, "step": 10}, {"loss": 2.4224, "grad_norm": 6.156805992126465, "learning_rate": 0.0002, "epoch": 0.14705882352941177, "step": 20}, {"loss": 2.107, "grad_norm": 1.264341950416565, "learning_rate": 0.0002, "epoch": 0.22058823529411764, "step": 30}, {"loss": 1.9838, "grad_norm": 0.8591746091842651, "learning_rate": 0.0002, "epoch": 0.29411764705882354, "step": 40}, {"loss": 1.916, "grad_norm": 1.4083963632583618, "learning_rate": 0.0002, "epoch": 0.36764705882352944, "step": 50}, {"loss": 1.7336, "grad_norm": 3.570463180541992, "learning_rate": 0.0002, "epoch": 0.4411764705882353, "step": 60}, {"loss": 1.7316, "grad_norm": 0.6044552326202393, "learning_rate": 0.0002, "epoch": 0.5147058823529411, "step": 70}, {"loss": 1.7078, "grad_norm": 1.1396939754486084, "learning_rate": 0.0002, "epoch": 0.5882352941176471, "step": 80}, {"loss": 1.6052, "grad_norm": 0.7549962997436523, "learning_rate": 0.0002, "epoch": 0.6617647058823529, "step": 90}, {"loss": 1.7247, "grad_norm": 1.2676323652267456, "learning_rate": 0.0002, "epoch": 0.7352941176470589, "step": 100}, {"loss": 1.6836, "grad_norm": 1.223105788230896, "learning_rate": 0.0002, "epoch": 0.8088235294117647, "step": 110}, {"loss": 1.631, "grad_norm": 1.0946531295776367, "learning_rate": 0.0002, "epoch": 0.8823529411764706, "step": 120}, {"loss": 1.6089, "grad_norm": 0.8674123883247375, "learning_rate": 0.0002, "epoch": 0.9558823529411765, "step": 130}, {"eval_loss": 1.534969449043274, "eval_runtime": 7.804, "eval_samples_per_second": 12.558, "eval_steps_per_second": 1.666, "epoch": 1.0, "step": 136}, {"loss": 1.4976, "grad_norm": 0.6233109831809998, "learning_rate": 0.0002, "epoch": 1.0294117647058822, "step": 140}, {"loss": 1.4431, "grad_norm": 0.85622239112854, "learning_rate": 0.0002, "epoch": 1.1029411764705883, "step": 150}, {"loss": 1.4246, "grad_norm": 0.7703060507774353, "learning_rate": 0.0002, "epoch": 1.1764705882352942, "step": 160}, {"loss": 1.5783, "grad_norm": 0.8302682638168335, "learning_rate": 0.0002, "epoch": 1.25, "step": 170}, {"loss": 1.4883, "grad_norm": 0.7850589156150818, "learning_rate": 0.0002, "epoch": 1.3235294117647058, "step": 180}, {"loss": 1.5075, "grad_norm": 0.6718934774398804, "learning_rate": 0.0002, "epoch": 1.3970588235294117, "step": 190}, {"loss": 1.4136, "grad_norm": 0.7245007753372192, "learning_rate": 0.0002, "epoch": 1.4705882352941178, "step": 200}, {"loss": 1.4666, "grad_norm": 0.7120554447174072, "learning_rate": 0.0002, "epoch": 1.5441176470588234, "step": 210}, {"loss": 1.4619, "grad_norm": 0.6771036982536316, "learning_rate": 0.0002, "epoch": 1.6176470588235294, "step": 220}, {"loss": 1.4465, "grad_norm": 0.618951141834259, "learning_rate": 0.0002, "epoch": 1.6911764705882353, "step": 230}, {"loss": 1.4896, "grad_norm": 0.6583403944969177, "learning_rate": 0.0002, "epoch": 1.7647058823529411, "step": 240}, {"loss": 1.4613, "grad_norm": 0.6750185489654541, "learning_rate": 0.0002, "epoch": 1.8382352941176472, "step": 250}, {"loss": 1.4614, "grad_norm": 0.6572791337966919, "learning_rate": 0.0002, "epoch": 1.9117647058823528, "step": 260}, {"loss": 1.4242, "grad_norm": 0.6265441179275513, "learning_rate": 0.0002, "epoch": 1.9852941176470589, "step": 270}]} +{"epoch": 3.0, "step": 408, "epoch_duration": 164.16229391098022, "total_accumulated_duration": 575.5524117946625, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.17333984375}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-272", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.6278, "grad_norm": 1.515879511833191, "learning_rate": 0.0002, "epoch": 0.07352941176470588, "step": 10}, {"loss": 2.4224, "grad_norm": 6.156805992126465, "learning_rate": 0.0002, "epoch": 0.14705882352941177, "step": 20}, {"loss": 2.107, "grad_norm": 1.264341950416565, "learning_rate": 0.0002, "epoch": 0.22058823529411764, "step": 30}, {"loss": 1.9838, "grad_norm": 0.8591746091842651, "learning_rate": 0.0002, "epoch": 0.29411764705882354, "step": 40}, {"loss": 1.916, "grad_norm": 1.4083963632583618, "learning_rate": 0.0002, "epoch": 0.36764705882352944, "step": 50}, {"loss": 1.7336, "grad_norm": 3.570463180541992, "learning_rate": 0.0002, "epoch": 0.4411764705882353, "step": 60}, {"loss": 1.7316, "grad_norm": 0.6044552326202393, "learning_rate": 0.0002, "epoch": 0.5147058823529411, "step": 70}, {"loss": 1.7078, "grad_norm": 1.1396939754486084, "learning_rate": 0.0002, "epoch": 0.5882352941176471, "step": 80}, {"loss": 1.6052, "grad_norm": 0.7549962997436523, "learning_rate": 0.0002, "epoch": 0.6617647058823529, "step": 90}, {"loss": 1.7247, "grad_norm": 1.2676323652267456, "learning_rate": 0.0002, "epoch": 0.7352941176470589, "step": 100}, {"loss": 1.6836, "grad_norm": 1.223105788230896, "learning_rate": 0.0002, "epoch": 0.8088235294117647, "step": 110}, {"loss": 1.631, "grad_norm": 1.0946531295776367, "learning_rate": 0.0002, "epoch": 0.8823529411764706, "step": 120}, {"loss": 1.6089, "grad_norm": 0.8674123883247375, "learning_rate": 0.0002, "epoch": 0.9558823529411765, "step": 130}, {"eval_loss": 1.534969449043274, "eval_runtime": 7.804, "eval_samples_per_second": 12.558, "eval_steps_per_second": 1.666, "epoch": 1.0, "step": 136}, {"loss": 1.4976, "grad_norm": 0.6233109831809998, "learning_rate": 0.0002, "epoch": 1.0294117647058822, "step": 140}, {"loss": 1.4431, "grad_norm": 0.85622239112854, "learning_rate": 0.0002, "epoch": 1.1029411764705883, "step": 150}, {"loss": 1.4246, "grad_norm": 0.7703060507774353, "learning_rate": 0.0002, "epoch": 1.1764705882352942, "step": 160}, {"loss": 1.5783, "grad_norm": 0.8302682638168335, "learning_rate": 0.0002, "epoch": 1.25, "step": 170}, {"loss": 1.4883, "grad_norm": 0.7850589156150818, "learning_rate": 0.0002, "epoch": 1.3235294117647058, "step": 180}, {"loss": 1.5075, "grad_norm": 0.6718934774398804, "learning_rate": 0.0002, "epoch": 1.3970588235294117, "step": 190}, {"loss": 1.4136, "grad_norm": 0.7245007753372192, "learning_rate": 0.0002, "epoch": 1.4705882352941178, "step": 200}, {"loss": 1.4666, "grad_norm": 0.7120554447174072, "learning_rate": 0.0002, "epoch": 1.5441176470588234, "step": 210}, {"loss": 1.4619, "grad_norm": 0.6771036982536316, "learning_rate": 0.0002, "epoch": 1.6176470588235294, "step": 220}, {"loss": 1.4465, "grad_norm": 0.618951141834259, "learning_rate": 0.0002, "epoch": 1.6911764705882353, "step": 230}, {"loss": 1.4896, "grad_norm": 0.6583403944969177, "learning_rate": 0.0002, "epoch": 1.7647058823529411, "step": 240}, {"loss": 1.4613, "grad_norm": 0.6750185489654541, "learning_rate": 0.0002, "epoch": 1.8382352941176472, "step": 250}, {"loss": 1.4614, "grad_norm": 0.6572791337966919, "learning_rate": 0.0002, "epoch": 1.9117647058823528, "step": 260}, {"loss": 1.4242, "grad_norm": 0.6265441179275513, "learning_rate": 0.0002, "epoch": 1.9852941176470589, "step": 270}, {"eval_loss": 1.4521459341049194, "eval_runtime": 7.6483, "eval_samples_per_second": 12.813, "eval_steps_per_second": 1.7, "epoch": 2.0, "step": 272}, {"loss": 1.4108, "grad_norm": 0.7698636054992676, "learning_rate": 0.0002, "epoch": 2.0588235294117645, "step": 280}, {"loss": 1.4143, "grad_norm": 0.7958650588989258, "learning_rate": 0.0002, "epoch": 2.1323529411764706, "step": 290}, {"loss": 1.3826, "grad_norm": 0.7007899284362793, "learning_rate": 0.0002, "epoch": 2.2058823529411766, "step": 300}, {"loss": 1.3624, "grad_norm": 0.6673262715339661, "learning_rate": 0.0002, "epoch": 2.2794117647058822, "step": 310}, {"loss": 1.3212, "grad_norm": 0.6731301546096802, "learning_rate": 0.0002, "epoch": 2.3529411764705883, "step": 320}, {"loss": 1.3626, "grad_norm": 0.7587279081344604, "learning_rate": 0.0002, "epoch": 2.426470588235294, "step": 330}, {"loss": 1.3401, "grad_norm": 0.804149329662323, "learning_rate": 0.0002, "epoch": 2.5, "step": 340}, {"loss": 1.4204, "grad_norm": 0.7690186500549316, "learning_rate": 0.0002, "epoch": 2.5735294117647056, "step": 350}, {"loss": 1.3885, "grad_norm": 0.9660338163375854, "learning_rate": 0.0002, "epoch": 2.6470588235294117, "step": 360}, {"loss": 1.36, "grad_norm": 0.6990594267845154, "learning_rate": 0.0002, "epoch": 2.7205882352941178, "step": 370}, {"loss": 1.3354, "grad_norm": 0.7933360934257507, "learning_rate": 0.0002, "epoch": 2.7941176470588234, "step": 380}, {"loss": 1.2904, "grad_norm": 0.8198168277740479, "learning_rate": 0.0002, "epoch": 2.8676470588235294, "step": 390}, {"loss": 1.291, "grad_norm": 0.6719775199890137, "learning_rate": 0.0002, "epoch": 2.9411764705882355, "step": 400}]} +{"epoch": 4.0, "step": 544, "epoch_duration": 125.2698106765747, "total_accumulated_duration": 700.8222224712372, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.17333984375}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.6278, "grad_norm": 1.515879511833191, "learning_rate": 0.0002, "epoch": 0.07352941176470588, "step": 10}, {"loss": 2.4224, "grad_norm": 6.156805992126465, "learning_rate": 0.0002, "epoch": 0.14705882352941177, "step": 20}, {"loss": 2.107, "grad_norm": 1.264341950416565, "learning_rate": 0.0002, "epoch": 0.22058823529411764, "step": 30}, {"loss": 1.9838, "grad_norm": 0.8591746091842651, "learning_rate": 0.0002, "epoch": 0.29411764705882354, "step": 40}, {"loss": 1.916, "grad_norm": 1.4083963632583618, "learning_rate": 0.0002, "epoch": 0.36764705882352944, "step": 50}, {"loss": 1.7336, "grad_norm": 3.570463180541992, "learning_rate": 0.0002, "epoch": 0.4411764705882353, "step": 60}, {"loss": 1.7316, "grad_norm": 0.6044552326202393, "learning_rate": 0.0002, "epoch": 0.5147058823529411, "step": 70}, {"loss": 1.7078, "grad_norm": 1.1396939754486084, "learning_rate": 0.0002, "epoch": 0.5882352941176471, "step": 80}, {"loss": 1.6052, "grad_norm": 0.7549962997436523, "learning_rate": 0.0002, "epoch": 0.6617647058823529, "step": 90}, {"loss": 1.7247, "grad_norm": 1.2676323652267456, "learning_rate": 0.0002, "epoch": 0.7352941176470589, "step": 100}, {"loss": 1.6836, "grad_norm": 1.223105788230896, "learning_rate": 0.0002, "epoch": 0.8088235294117647, "step": 110}, {"loss": 1.631, "grad_norm": 1.0946531295776367, "learning_rate": 0.0002, "epoch": 0.8823529411764706, "step": 120}, {"loss": 1.6089, "grad_norm": 0.8674123883247375, "learning_rate": 0.0002, "epoch": 0.9558823529411765, "step": 130}, {"eval_loss": 1.534969449043274, "eval_runtime": 7.804, "eval_samples_per_second": 12.558, "eval_steps_per_second": 1.666, "epoch": 1.0, "step": 136}, {"loss": 1.4976, "grad_norm": 0.6233109831809998, "learning_rate": 0.0002, "epoch": 1.0294117647058822, "step": 140}, {"loss": 1.4431, "grad_norm": 0.85622239112854, "learning_rate": 0.0002, "epoch": 1.1029411764705883, "step": 150}, {"loss": 1.4246, "grad_norm": 0.7703060507774353, "learning_rate": 0.0002, "epoch": 1.1764705882352942, "step": 160}, {"loss": 1.5783, "grad_norm": 0.8302682638168335, "learning_rate": 0.0002, "epoch": 1.25, "step": 170}, {"loss": 1.4883, "grad_norm": 0.7850589156150818, "learning_rate": 0.0002, "epoch": 1.3235294117647058, "step": 180}, {"loss": 1.5075, "grad_norm": 0.6718934774398804, "learning_rate": 0.0002, "epoch": 1.3970588235294117, "step": 190}, {"loss": 1.4136, "grad_norm": 0.7245007753372192, "learning_rate": 0.0002, "epoch": 1.4705882352941178, "step": 200}, {"loss": 1.4666, "grad_norm": 0.7120554447174072, "learning_rate": 0.0002, "epoch": 1.5441176470588234, "step": 210}, {"loss": 1.4619, "grad_norm": 0.6771036982536316, "learning_rate": 0.0002, "epoch": 1.6176470588235294, "step": 220}, {"loss": 1.4465, "grad_norm": 0.618951141834259, "learning_rate": 0.0002, "epoch": 1.6911764705882353, "step": 230}, {"loss": 1.4896, "grad_norm": 0.6583403944969177, "learning_rate": 0.0002, "epoch": 1.7647058823529411, "step": 240}, {"loss": 1.4613, "grad_norm": 0.6750185489654541, "learning_rate": 0.0002, "epoch": 1.8382352941176472, "step": 250}, {"loss": 1.4614, "grad_norm": 0.6572791337966919, "learning_rate": 0.0002, "epoch": 1.9117647058823528, "step": 260}, {"loss": 1.4242, "grad_norm": 0.6265441179275513, "learning_rate": 0.0002, "epoch": 1.9852941176470589, "step": 270}, {"eval_loss": 1.4521459341049194, "eval_runtime": 7.6483, "eval_samples_per_second": 12.813, "eval_steps_per_second": 1.7, "epoch": 2.0, "step": 272}, {"loss": 1.4108, "grad_norm": 0.7698636054992676, "learning_rate": 0.0002, "epoch": 2.0588235294117645, "step": 280}, {"loss": 1.4143, "grad_norm": 0.7958650588989258, "learning_rate": 0.0002, "epoch": 2.1323529411764706, "step": 290}, {"loss": 1.3826, "grad_norm": 0.7007899284362793, "learning_rate": 0.0002, "epoch": 2.2058823529411766, "step": 300}, {"loss": 1.3624, "grad_norm": 0.6673262715339661, "learning_rate": 0.0002, "epoch": 2.2794117647058822, "step": 310}, {"loss": 1.3212, "grad_norm": 0.6731301546096802, "learning_rate": 0.0002, "epoch": 2.3529411764705883, "step": 320}, {"loss": 1.3626, "grad_norm": 0.7587279081344604, "learning_rate": 0.0002, "epoch": 2.426470588235294, "step": 330}, {"loss": 1.3401, "grad_norm": 0.804149329662323, "learning_rate": 0.0002, "epoch": 2.5, "step": 340}, {"loss": 1.4204, "grad_norm": 0.7690186500549316, "learning_rate": 0.0002, "epoch": 2.5735294117647056, "step": 350}, {"loss": 1.3885, "grad_norm": 0.9660338163375854, "learning_rate": 0.0002, "epoch": 2.6470588235294117, "step": 360}, {"loss": 1.36, "grad_norm": 0.6990594267845154, "learning_rate": 0.0002, "epoch": 2.7205882352941178, "step": 370}, {"loss": 1.3354, "grad_norm": 0.7933360934257507, "learning_rate": 0.0002, "epoch": 2.7941176470588234, "step": 380}, {"loss": 1.2904, "grad_norm": 0.8198168277740479, "learning_rate": 0.0002, "epoch": 2.8676470588235294, "step": 390}, {"loss": 1.291, "grad_norm": 0.6719775199890137, "learning_rate": 0.0002, "epoch": 2.9411764705882355, "step": 400}, {"eval_loss": 1.4206745624542236, "eval_runtime": 7.0429, "eval_samples_per_second": 13.915, "eval_steps_per_second": 1.846, "epoch": 3.0, "step": 408}, {"loss": 1.4063, "grad_norm": 0.6254619359970093, "learning_rate": 0.0002, "epoch": 3.014705882352941, "step": 410}, {"loss": 1.2863, "grad_norm": 0.7344406843185425, "learning_rate": 0.0002, "epoch": 3.088235294117647, "step": 420}, {"loss": 1.3088, "grad_norm": 0.7327449321746826, "learning_rate": 0.0002, "epoch": 3.161764705882353, "step": 430}, {"loss": 1.2746, "grad_norm": 0.7766542434692383, "learning_rate": 0.0002, "epoch": 3.235294117647059, "step": 440}, {"loss": 1.1912, "grad_norm": 0.7025649547576904, "learning_rate": 0.0002, "epoch": 3.3088235294117645, "step": 450}, {"loss": 1.2526, "grad_norm": 0.7508461475372314, "learning_rate": 0.0002, "epoch": 3.3823529411764706, "step": 460}, {"loss": 1.2834, "grad_norm": 0.8148072361946106, "learning_rate": 0.0002, "epoch": 3.4558823529411766, "step": 470}, {"loss": 1.2115, "grad_norm": 0.7245928645133972, "learning_rate": 0.0002, "epoch": 3.5294117647058822, "step": 480}, {"loss": 1.2885, "grad_norm": 0.9170019626617432, "learning_rate": 0.0002, "epoch": 3.6029411764705883, "step": 490}, {"loss": 1.2879, "grad_norm": 0.9033855199813843, "learning_rate": 0.0002, "epoch": 3.6764705882352944, "step": 500}, {"loss": 1.3065, "grad_norm": 0.7575234174728394, "learning_rate": 0.0002, "epoch": 3.75, "step": 510}, {"loss": 1.3299, "grad_norm": 0.7426044344902039, "learning_rate": 0.0002, "epoch": 3.8235294117647056, "step": 520}, {"loss": 1.26, "grad_norm": 0.6892959475517273, "learning_rate": 0.0002, "epoch": 3.8970588235294117, "step": 530}, {"loss": 1.3376, "grad_norm": 0.7498812675476074, "learning_rate": 0.0002, "epoch": 3.9705882352941178, "step": 540}]} +{"epoch": 5.0, "step": 680, "epoch_duration": 159.85904383659363, "total_accumulated_duration": 860.6812663078308, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.17333984375}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.6278, "grad_norm": 1.515879511833191, "learning_rate": 0.0002, "epoch": 0.07352941176470588, "step": 10}, {"loss": 2.4224, "grad_norm": 6.156805992126465, "learning_rate": 0.0002, "epoch": 0.14705882352941177, "step": 20}, {"loss": 2.107, "grad_norm": 1.264341950416565, "learning_rate": 0.0002, "epoch": 0.22058823529411764, "step": 30}, {"loss": 1.9838, "grad_norm": 0.8591746091842651, "learning_rate": 0.0002, "epoch": 0.29411764705882354, "step": 40}, {"loss": 1.916, "grad_norm": 1.4083963632583618, "learning_rate": 0.0002, "epoch": 0.36764705882352944, "step": 50}, {"loss": 1.7336, "grad_norm": 3.570463180541992, "learning_rate": 0.0002, "epoch": 0.4411764705882353, "step": 60}, {"loss": 1.7316, "grad_norm": 0.6044552326202393, "learning_rate": 0.0002, "epoch": 0.5147058823529411, "step": 70}, {"loss": 1.7078, "grad_norm": 1.1396939754486084, "learning_rate": 0.0002, "epoch": 0.5882352941176471, "step": 80}, {"loss": 1.6052, "grad_norm": 0.7549962997436523, "learning_rate": 0.0002, "epoch": 0.6617647058823529, "step": 90}, {"loss": 1.7247, "grad_norm": 1.2676323652267456, "learning_rate": 0.0002, "epoch": 0.7352941176470589, "step": 100}, {"loss": 1.6836, "grad_norm": 1.223105788230896, "learning_rate": 0.0002, "epoch": 0.8088235294117647, "step": 110}, {"loss": 1.631, "grad_norm": 1.0946531295776367, "learning_rate": 0.0002, "epoch": 0.8823529411764706, "step": 120}, {"loss": 1.6089, "grad_norm": 0.8674123883247375, "learning_rate": 0.0002, "epoch": 0.9558823529411765, "step": 130}, {"eval_loss": 1.534969449043274, "eval_runtime": 7.804, "eval_samples_per_second": 12.558, "eval_steps_per_second": 1.666, "epoch": 1.0, "step": 136}, {"loss": 1.4976, "grad_norm": 0.6233109831809998, "learning_rate": 0.0002, "epoch": 1.0294117647058822, "step": 140}, {"loss": 1.4431, "grad_norm": 0.85622239112854, "learning_rate": 0.0002, "epoch": 1.1029411764705883, "step": 150}, {"loss": 1.4246, "grad_norm": 0.7703060507774353, "learning_rate": 0.0002, "epoch": 1.1764705882352942, "step": 160}, {"loss": 1.5783, "grad_norm": 0.8302682638168335, "learning_rate": 0.0002, "epoch": 1.25, "step": 170}, {"loss": 1.4883, "grad_norm": 0.7850589156150818, "learning_rate": 0.0002, "epoch": 1.3235294117647058, "step": 180}, {"loss": 1.5075, "grad_norm": 0.6718934774398804, "learning_rate": 0.0002, "epoch": 1.3970588235294117, "step": 190}, {"loss": 1.4136, "grad_norm": 0.7245007753372192, "learning_rate": 0.0002, "epoch": 1.4705882352941178, "step": 200}, {"loss": 1.4666, "grad_norm": 0.7120554447174072, "learning_rate": 0.0002, "epoch": 1.5441176470588234, "step": 210}, {"loss": 1.4619, "grad_norm": 0.6771036982536316, "learning_rate": 0.0002, "epoch": 1.6176470588235294, "step": 220}, {"loss": 1.4465, "grad_norm": 0.618951141834259, "learning_rate": 0.0002, "epoch": 1.6911764705882353, "step": 230}, {"loss": 1.4896, "grad_norm": 0.6583403944969177, "learning_rate": 0.0002, "epoch": 1.7647058823529411, "step": 240}, {"loss": 1.4613, "grad_norm": 0.6750185489654541, "learning_rate": 0.0002, "epoch": 1.8382352941176472, "step": 250}, {"loss": 1.4614, "grad_norm": 0.6572791337966919, "learning_rate": 0.0002, "epoch": 1.9117647058823528, "step": 260}, {"loss": 1.4242, "grad_norm": 0.6265441179275513, "learning_rate": 0.0002, "epoch": 1.9852941176470589, "step": 270}, {"eval_loss": 1.4521459341049194, "eval_runtime": 7.6483, "eval_samples_per_second": 12.813, "eval_steps_per_second": 1.7, "epoch": 2.0, "step": 272}, {"loss": 1.4108, "grad_norm": 0.7698636054992676, "learning_rate": 0.0002, "epoch": 2.0588235294117645, "step": 280}, {"loss": 1.4143, "grad_norm": 0.7958650588989258, "learning_rate": 0.0002, "epoch": 2.1323529411764706, "step": 290}, {"loss": 1.3826, "grad_norm": 0.7007899284362793, "learning_rate": 0.0002, "epoch": 2.2058823529411766, "step": 300}, {"loss": 1.3624, "grad_norm": 0.6673262715339661, "learning_rate": 0.0002, "epoch": 2.2794117647058822, "step": 310}, {"loss": 1.3212, "grad_norm": 0.6731301546096802, "learning_rate": 0.0002, "epoch": 2.3529411764705883, "step": 320}, {"loss": 1.3626, "grad_norm": 0.7587279081344604, "learning_rate": 0.0002, "epoch": 2.426470588235294, "step": 330}, {"loss": 1.3401, "grad_norm": 0.804149329662323, "learning_rate": 0.0002, "epoch": 2.5, "step": 340}, {"loss": 1.4204, "grad_norm": 0.7690186500549316, "learning_rate": 0.0002, "epoch": 2.5735294117647056, "step": 350}, {"loss": 1.3885, "grad_norm": 0.9660338163375854, "learning_rate": 0.0002, "epoch": 2.6470588235294117, "step": 360}, {"loss": 1.36, "grad_norm": 0.6990594267845154, "learning_rate": 0.0002, "epoch": 2.7205882352941178, "step": 370}, {"loss": 1.3354, "grad_norm": 0.7933360934257507, "learning_rate": 0.0002, "epoch": 2.7941176470588234, "step": 380}, {"loss": 1.2904, "grad_norm": 0.8198168277740479, "learning_rate": 0.0002, "epoch": 2.8676470588235294, "step": 390}, {"loss": 1.291, "grad_norm": 0.6719775199890137, "learning_rate": 0.0002, "epoch": 2.9411764705882355, "step": 400}, {"eval_loss": 1.4206745624542236, "eval_runtime": 7.0429, "eval_samples_per_second": 13.915, "eval_steps_per_second": 1.846, "epoch": 3.0, "step": 408}, {"loss": 1.4063, "grad_norm": 0.6254619359970093, "learning_rate": 0.0002, "epoch": 3.014705882352941, "step": 410}, {"loss": 1.2863, "grad_norm": 0.7344406843185425, "learning_rate": 0.0002, "epoch": 3.088235294117647, "step": 420}, {"loss": 1.3088, "grad_norm": 0.7327449321746826, "learning_rate": 0.0002, "epoch": 3.161764705882353, "step": 430}, {"loss": 1.2746, "grad_norm": 0.7766542434692383, "learning_rate": 0.0002, "epoch": 3.235294117647059, "step": 440}, {"loss": 1.1912, "grad_norm": 0.7025649547576904, "learning_rate": 0.0002, "epoch": 3.3088235294117645, "step": 450}, {"loss": 1.2526, "grad_norm": 0.7508461475372314, "learning_rate": 0.0002, "epoch": 3.3823529411764706, "step": 460}, {"loss": 1.2834, "grad_norm": 0.8148072361946106, "learning_rate": 0.0002, "epoch": 3.4558823529411766, "step": 470}, {"loss": 1.2115, "grad_norm": 0.7245928645133972, "learning_rate": 0.0002, "epoch": 3.5294117647058822, "step": 480}, {"loss": 1.2885, "grad_norm": 0.9170019626617432, "learning_rate": 0.0002, "epoch": 3.6029411764705883, "step": 490}, {"loss": 1.2879, "grad_norm": 0.9033855199813843, "learning_rate": 0.0002, "epoch": 3.6764705882352944, "step": 500}, {"loss": 1.3065, "grad_norm": 0.7575234174728394, "learning_rate": 0.0002, "epoch": 3.75, "step": 510}, {"loss": 1.3299, "grad_norm": 0.7426044344902039, "learning_rate": 0.0002, "epoch": 3.8235294117647056, "step": 520}, {"loss": 1.26, "grad_norm": 0.6892959475517273, "learning_rate": 0.0002, "epoch": 3.8970588235294117, "step": 530}, {"loss": 1.3376, "grad_norm": 0.7498812675476074, "learning_rate": 0.0002, "epoch": 3.9705882352941178, "step": 540}, {"eval_loss": 1.4219430685043335, "eval_runtime": 7.7457, "eval_samples_per_second": 12.652, "eval_steps_per_second": 1.678, "epoch": 4.0, "step": 544}, {"loss": 1.1944, "grad_norm": 0.8324301838874817, "learning_rate": 0.0002, "epoch": 4.044117647058823, "step": 550}, {"loss": 1.2263, "grad_norm": 0.8911291360855103, "learning_rate": 0.0002, "epoch": 4.117647058823529, "step": 560}, {"loss": 1.1778, "grad_norm": 0.856676459312439, "learning_rate": 0.0002, "epoch": 4.1911764705882355, "step": 570}, {"loss": 1.2294, "grad_norm": 1.074108600616455, "learning_rate": 0.0002, "epoch": 4.264705882352941, "step": 580}, {"loss": 1.1478, "grad_norm": 0.8867416977882385, "learning_rate": 0.0002, "epoch": 4.338235294117647, "step": 590}, {"loss": 1.1816, "grad_norm": 0.7843716740608215, "learning_rate": 0.0002, "epoch": 4.411764705882353, "step": 600}, {"loss": 1.1885, "grad_norm": 0.8869543075561523, "learning_rate": 0.0002, "epoch": 4.485294117647059, "step": 610}, {"loss": 1.2468, "grad_norm": 0.7744895815849304, "learning_rate": 0.0002, "epoch": 4.5588235294117645, "step": 620}, {"loss": 1.1613, "grad_norm": 0.7312784790992737, "learning_rate": 0.0002, "epoch": 4.632352941176471, "step": 630}, {"loss": 1.1359, "grad_norm": 0.8561248779296875, "learning_rate": 0.0002, "epoch": 4.705882352941177, "step": 640}, {"loss": 1.15, "grad_norm": 0.888317346572876, "learning_rate": 0.0002, "epoch": 4.779411764705882, "step": 650}, {"loss": 1.2659, "grad_norm": 0.8369079828262329, "learning_rate": 0.0002, "epoch": 4.852941176470588, "step": 660}, {"loss": 1.185, "grad_norm": 0.7990967631340027, "learning_rate": 0.0002, "epoch": 4.926470588235294, "step": 670}, {"loss": 1.2121, "grad_norm": 0.8745001554489136, "learning_rate": 0.0002, "epoch": 5.0, "step": 680}]} +{"epoch": 6.0, "step": 816, "epoch_duration": 171.44942498207092, "total_accumulated_duration": 1032.1306912899017, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.17333984375}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.6278, "grad_norm": 1.515879511833191, "learning_rate": 0.0002, "epoch": 0.07352941176470588, "step": 10}, {"loss": 2.4224, "grad_norm": 6.156805992126465, "learning_rate": 0.0002, "epoch": 0.14705882352941177, "step": 20}, {"loss": 2.107, "grad_norm": 1.264341950416565, "learning_rate": 0.0002, "epoch": 0.22058823529411764, "step": 30}, {"loss": 1.9838, "grad_norm": 0.8591746091842651, "learning_rate": 0.0002, "epoch": 0.29411764705882354, "step": 40}, {"loss": 1.916, "grad_norm": 1.4083963632583618, "learning_rate": 0.0002, "epoch": 0.36764705882352944, "step": 50}, {"loss": 1.7336, "grad_norm": 3.570463180541992, "learning_rate": 0.0002, "epoch": 0.4411764705882353, "step": 60}, {"loss": 1.7316, "grad_norm": 0.6044552326202393, "learning_rate": 0.0002, "epoch": 0.5147058823529411, "step": 70}, {"loss": 1.7078, "grad_norm": 1.1396939754486084, "learning_rate": 0.0002, "epoch": 0.5882352941176471, "step": 80}, {"loss": 1.6052, "grad_norm": 0.7549962997436523, "learning_rate": 0.0002, "epoch": 0.6617647058823529, "step": 90}, {"loss": 1.7247, "grad_norm": 1.2676323652267456, "learning_rate": 0.0002, "epoch": 0.7352941176470589, "step": 100}, {"loss": 1.6836, "grad_norm": 1.223105788230896, "learning_rate": 0.0002, "epoch": 0.8088235294117647, "step": 110}, {"loss": 1.631, "grad_norm": 1.0946531295776367, "learning_rate": 0.0002, "epoch": 0.8823529411764706, "step": 120}, {"loss": 1.6089, "grad_norm": 0.8674123883247375, "learning_rate": 0.0002, "epoch": 0.9558823529411765, "step": 130}, {"eval_loss": 1.534969449043274, "eval_runtime": 7.804, "eval_samples_per_second": 12.558, "eval_steps_per_second": 1.666, "epoch": 1.0, "step": 136}, {"loss": 1.4976, "grad_norm": 0.6233109831809998, "learning_rate": 0.0002, "epoch": 1.0294117647058822, "step": 140}, {"loss": 1.4431, "grad_norm": 0.85622239112854, "learning_rate": 0.0002, "epoch": 1.1029411764705883, "step": 150}, {"loss": 1.4246, "grad_norm": 0.7703060507774353, "learning_rate": 0.0002, "epoch": 1.1764705882352942, "step": 160}, {"loss": 1.5783, "grad_norm": 0.8302682638168335, "learning_rate": 0.0002, "epoch": 1.25, "step": 170}, {"loss": 1.4883, "grad_norm": 0.7850589156150818, "learning_rate": 0.0002, "epoch": 1.3235294117647058, "step": 180}, {"loss": 1.5075, "grad_norm": 0.6718934774398804, "learning_rate": 0.0002, "epoch": 1.3970588235294117, "step": 190}, {"loss": 1.4136, "grad_norm": 0.7245007753372192, "learning_rate": 0.0002, "epoch": 1.4705882352941178, "step": 200}, {"loss": 1.4666, "grad_norm": 0.7120554447174072, "learning_rate": 0.0002, "epoch": 1.5441176470588234, "step": 210}, {"loss": 1.4619, "grad_norm": 0.6771036982536316, "learning_rate": 0.0002, "epoch": 1.6176470588235294, "step": 220}, {"loss": 1.4465, "grad_norm": 0.618951141834259, "learning_rate": 0.0002, "epoch": 1.6911764705882353, "step": 230}, {"loss": 1.4896, "grad_norm": 0.6583403944969177, "learning_rate": 0.0002, "epoch": 1.7647058823529411, "step": 240}, {"loss": 1.4613, "grad_norm": 0.6750185489654541, "learning_rate": 0.0002, "epoch": 1.8382352941176472, "step": 250}, {"loss": 1.4614, "grad_norm": 0.6572791337966919, "learning_rate": 0.0002, "epoch": 1.9117647058823528, "step": 260}, {"loss": 1.4242, "grad_norm": 0.6265441179275513, "learning_rate": 0.0002, "epoch": 1.9852941176470589, "step": 270}, {"eval_loss": 1.4521459341049194, "eval_runtime": 7.6483, "eval_samples_per_second": 12.813, "eval_steps_per_second": 1.7, "epoch": 2.0, "step": 272}, {"loss": 1.4108, "grad_norm": 0.7698636054992676, "learning_rate": 0.0002, "epoch": 2.0588235294117645, "step": 280}, {"loss": 1.4143, "grad_norm": 0.7958650588989258, "learning_rate": 0.0002, "epoch": 2.1323529411764706, "step": 290}, {"loss": 1.3826, "grad_norm": 0.7007899284362793, "learning_rate": 0.0002, "epoch": 2.2058823529411766, "step": 300}, {"loss": 1.3624, "grad_norm": 0.6673262715339661, "learning_rate": 0.0002, "epoch": 2.2794117647058822, "step": 310}, {"loss": 1.3212, "grad_norm": 0.6731301546096802, "learning_rate": 0.0002, "epoch": 2.3529411764705883, "step": 320}, {"loss": 1.3626, "grad_norm": 0.7587279081344604, "learning_rate": 0.0002, "epoch": 2.426470588235294, "step": 330}, {"loss": 1.3401, "grad_norm": 0.804149329662323, "learning_rate": 0.0002, "epoch": 2.5, "step": 340}, {"loss": 1.4204, "grad_norm": 0.7690186500549316, "learning_rate": 0.0002, "epoch": 2.5735294117647056, "step": 350}, {"loss": 1.3885, "grad_norm": 0.9660338163375854, "learning_rate": 0.0002, "epoch": 2.6470588235294117, "step": 360}, {"loss": 1.36, "grad_norm": 0.6990594267845154, "learning_rate": 0.0002, "epoch": 2.7205882352941178, "step": 370}, {"loss": 1.3354, "grad_norm": 0.7933360934257507, "learning_rate": 0.0002, "epoch": 2.7941176470588234, "step": 380}, {"loss": 1.2904, "grad_norm": 0.8198168277740479, "learning_rate": 0.0002, "epoch": 2.8676470588235294, "step": 390}, {"loss": 1.291, "grad_norm": 0.6719775199890137, "learning_rate": 0.0002, "epoch": 2.9411764705882355, "step": 400}, {"eval_loss": 1.4206745624542236, "eval_runtime": 7.0429, "eval_samples_per_second": 13.915, "eval_steps_per_second": 1.846, "epoch": 3.0, "step": 408}, {"loss": 1.4063, "grad_norm": 0.6254619359970093, "learning_rate": 0.0002, "epoch": 3.014705882352941, "step": 410}, {"loss": 1.2863, "grad_norm": 0.7344406843185425, "learning_rate": 0.0002, "epoch": 3.088235294117647, "step": 420}, {"loss": 1.3088, "grad_norm": 0.7327449321746826, "learning_rate": 0.0002, "epoch": 3.161764705882353, "step": 430}, {"loss": 1.2746, "grad_norm": 0.7766542434692383, "learning_rate": 0.0002, "epoch": 3.235294117647059, "step": 440}, {"loss": 1.1912, "grad_norm": 0.7025649547576904, "learning_rate": 0.0002, "epoch": 3.3088235294117645, "step": 450}, {"loss": 1.2526, "grad_norm": 0.7508461475372314, "learning_rate": 0.0002, "epoch": 3.3823529411764706, "step": 460}, {"loss": 1.2834, "grad_norm": 0.8148072361946106, "learning_rate": 0.0002, "epoch": 3.4558823529411766, "step": 470}, {"loss": 1.2115, "grad_norm": 0.7245928645133972, "learning_rate": 0.0002, "epoch": 3.5294117647058822, "step": 480}, {"loss": 1.2885, "grad_norm": 0.9170019626617432, "learning_rate": 0.0002, "epoch": 3.6029411764705883, "step": 490}, {"loss": 1.2879, "grad_norm": 0.9033855199813843, "learning_rate": 0.0002, "epoch": 3.6764705882352944, "step": 500}, {"loss": 1.3065, "grad_norm": 0.7575234174728394, "learning_rate": 0.0002, "epoch": 3.75, "step": 510}, {"loss": 1.3299, "grad_norm": 0.7426044344902039, "learning_rate": 0.0002, "epoch": 3.8235294117647056, "step": 520}, {"loss": 1.26, "grad_norm": 0.6892959475517273, "learning_rate": 0.0002, "epoch": 3.8970588235294117, "step": 530}, {"loss": 1.3376, "grad_norm": 0.7498812675476074, "learning_rate": 0.0002, "epoch": 3.9705882352941178, "step": 540}, {"eval_loss": 1.4219430685043335, "eval_runtime": 7.7457, "eval_samples_per_second": 12.652, "eval_steps_per_second": 1.678, "epoch": 4.0, "step": 544}, {"loss": 1.1944, "grad_norm": 0.8324301838874817, "learning_rate": 0.0002, "epoch": 4.044117647058823, "step": 550}, {"loss": 1.2263, "grad_norm": 0.8911291360855103, "learning_rate": 0.0002, "epoch": 4.117647058823529, "step": 560}, {"loss": 1.1778, "grad_norm": 0.856676459312439, "learning_rate": 0.0002, "epoch": 4.1911764705882355, "step": 570}, {"loss": 1.2294, "grad_norm": 1.074108600616455, "learning_rate": 0.0002, "epoch": 4.264705882352941, "step": 580}, {"loss": 1.1478, "grad_norm": 0.8867416977882385, "learning_rate": 0.0002, "epoch": 4.338235294117647, "step": 590}, {"loss": 1.1816, "grad_norm": 0.7843716740608215, "learning_rate": 0.0002, "epoch": 4.411764705882353, "step": 600}, {"loss": 1.1885, "grad_norm": 0.8869543075561523, "learning_rate": 0.0002, "epoch": 4.485294117647059, "step": 610}, {"loss": 1.2468, "grad_norm": 0.7744895815849304, "learning_rate": 0.0002, "epoch": 4.5588235294117645, "step": 620}, {"loss": 1.1613, "grad_norm": 0.7312784790992737, "learning_rate": 0.0002, "epoch": 4.632352941176471, "step": 630}, {"loss": 1.1359, "grad_norm": 0.8561248779296875, "learning_rate": 0.0002, "epoch": 4.705882352941177, "step": 640}, {"loss": 1.15, "grad_norm": 0.888317346572876, "learning_rate": 0.0002, "epoch": 4.779411764705882, "step": 650}, {"loss": 1.2659, "grad_norm": 0.8369079828262329, "learning_rate": 0.0002, "epoch": 4.852941176470588, "step": 660}, {"loss": 1.185, "grad_norm": 0.7990967631340027, "learning_rate": 0.0002, "epoch": 4.926470588235294, "step": 670}, {"loss": 1.2121, "grad_norm": 0.8745001554489136, "learning_rate": 0.0002, "epoch": 5.0, "step": 680}, {"eval_loss": 1.426682949066162, "eval_runtime": 7.7664, "eval_samples_per_second": 12.618, "eval_steps_per_second": 1.674, "epoch": 5.0, "step": 680}, {"loss": 1.0755, "grad_norm": 0.8637261986732483, "learning_rate": 0.0002, "epoch": 5.073529411764706, "step": 690}, {"loss": 1.0615, "grad_norm": 0.8743941187858582, "learning_rate": 0.0002, "epoch": 5.147058823529412, "step": 700}, {"loss": 1.0753, "grad_norm": 0.8632293939590454, "learning_rate": 0.0002, "epoch": 5.220588235294118, "step": 710}, {"loss": 1.1341, "grad_norm": 1.1503057479858398, "learning_rate": 0.0002, "epoch": 5.294117647058823, "step": 720}, {"loss": 1.1174, "grad_norm": 0.9048053026199341, "learning_rate": 0.0002, "epoch": 5.367647058823529, "step": 730}, {"loss": 1.0939, "grad_norm": 0.8516059517860413, "learning_rate": 0.0002, "epoch": 5.4411764705882355, "step": 740}, {"loss": 1.1518, "grad_norm": 0.9515685439109802, "learning_rate": 0.0002, "epoch": 5.514705882352941, "step": 750}, {"loss": 1.0982, "grad_norm": 0.8125670552253723, "learning_rate": 0.0002, "epoch": 5.588235294117647, "step": 760}, {"loss": 1.1298, "grad_norm": 1.0451067686080933, "learning_rate": 0.0002, "epoch": 5.661764705882353, "step": 770}, {"loss": 1.0582, "grad_norm": 0.8425356149673462, "learning_rate": 0.0002, "epoch": 5.735294117647059, "step": 780}, {"loss": 1.1046, "grad_norm": 0.8448241353034973, "learning_rate": 0.0002, "epoch": 5.8088235294117645, "step": 790}, {"loss": 1.1626, "grad_norm": 0.9654536843299866, "learning_rate": 0.0002, "epoch": 5.882352941176471, "step": 800}, {"loss": 1.1457, "grad_norm": 1.099204659461975, "learning_rate": 0.0002, "epoch": 5.955882352941177, "step": 810}]} +{"epoch": 7.0, "step": 952, "epoch_duration": 184.0236291885376, "total_accumulated_duration": 1216.1543204784393, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.17333984375}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.6278, "grad_norm": 1.515879511833191, "learning_rate": 0.0002, "epoch": 0.07352941176470588, "step": 10}, {"loss": 2.4224, "grad_norm": 6.156805992126465, "learning_rate": 0.0002, "epoch": 0.14705882352941177, "step": 20}, {"loss": 2.107, "grad_norm": 1.264341950416565, "learning_rate": 0.0002, "epoch": 0.22058823529411764, "step": 30}, {"loss": 1.9838, "grad_norm": 0.8591746091842651, "learning_rate": 0.0002, "epoch": 0.29411764705882354, "step": 40}, {"loss": 1.916, "grad_norm": 1.4083963632583618, "learning_rate": 0.0002, "epoch": 0.36764705882352944, "step": 50}, {"loss": 1.7336, "grad_norm": 3.570463180541992, "learning_rate": 0.0002, "epoch": 0.4411764705882353, "step": 60}, {"loss": 1.7316, "grad_norm": 0.6044552326202393, "learning_rate": 0.0002, "epoch": 0.5147058823529411, "step": 70}, {"loss": 1.7078, "grad_norm": 1.1396939754486084, "learning_rate": 0.0002, "epoch": 0.5882352941176471, "step": 80}, {"loss": 1.6052, "grad_norm": 0.7549962997436523, "learning_rate": 0.0002, "epoch": 0.6617647058823529, "step": 90}, {"loss": 1.7247, "grad_norm": 1.2676323652267456, "learning_rate": 0.0002, "epoch": 0.7352941176470589, "step": 100}, {"loss": 1.6836, "grad_norm": 1.223105788230896, "learning_rate": 0.0002, "epoch": 0.8088235294117647, "step": 110}, {"loss": 1.631, "grad_norm": 1.0946531295776367, "learning_rate": 0.0002, "epoch": 0.8823529411764706, "step": 120}, {"loss": 1.6089, "grad_norm": 0.8674123883247375, "learning_rate": 0.0002, "epoch": 0.9558823529411765, "step": 130}, {"eval_loss": 1.534969449043274, "eval_runtime": 7.804, "eval_samples_per_second": 12.558, "eval_steps_per_second": 1.666, "epoch": 1.0, "step": 136}, {"loss": 1.4976, "grad_norm": 0.6233109831809998, "learning_rate": 0.0002, "epoch": 1.0294117647058822, "step": 140}, {"loss": 1.4431, "grad_norm": 0.85622239112854, "learning_rate": 0.0002, "epoch": 1.1029411764705883, "step": 150}, {"loss": 1.4246, "grad_norm": 0.7703060507774353, "learning_rate": 0.0002, "epoch": 1.1764705882352942, "step": 160}, {"loss": 1.5783, "grad_norm": 0.8302682638168335, "learning_rate": 0.0002, "epoch": 1.25, "step": 170}, {"loss": 1.4883, "grad_norm": 0.7850589156150818, "learning_rate": 0.0002, "epoch": 1.3235294117647058, "step": 180}, {"loss": 1.5075, "grad_norm": 0.6718934774398804, "learning_rate": 0.0002, "epoch": 1.3970588235294117, "step": 190}, {"loss": 1.4136, "grad_norm": 0.7245007753372192, "learning_rate": 0.0002, "epoch": 1.4705882352941178, "step": 200}, {"loss": 1.4666, "grad_norm": 0.7120554447174072, "learning_rate": 0.0002, "epoch": 1.5441176470588234, "step": 210}, {"loss": 1.4619, "grad_norm": 0.6771036982536316, "learning_rate": 0.0002, "epoch": 1.6176470588235294, "step": 220}, {"loss": 1.4465, "grad_norm": 0.618951141834259, "learning_rate": 0.0002, "epoch": 1.6911764705882353, "step": 230}, {"loss": 1.4896, "grad_norm": 0.6583403944969177, "learning_rate": 0.0002, "epoch": 1.7647058823529411, "step": 240}, {"loss": 1.4613, "grad_norm": 0.6750185489654541, "learning_rate": 0.0002, "epoch": 1.8382352941176472, "step": 250}, {"loss": 1.4614, "grad_norm": 0.6572791337966919, "learning_rate": 0.0002, "epoch": 1.9117647058823528, "step": 260}, {"loss": 1.4242, "grad_norm": 0.6265441179275513, "learning_rate": 0.0002, "epoch": 1.9852941176470589, "step": 270}, {"eval_loss": 1.4521459341049194, "eval_runtime": 7.6483, "eval_samples_per_second": 12.813, "eval_steps_per_second": 1.7, "epoch": 2.0, "step": 272}, {"loss": 1.4108, "grad_norm": 0.7698636054992676, "learning_rate": 0.0002, "epoch": 2.0588235294117645, "step": 280}, {"loss": 1.4143, "grad_norm": 0.7958650588989258, "learning_rate": 0.0002, "epoch": 2.1323529411764706, "step": 290}, {"loss": 1.3826, "grad_norm": 0.7007899284362793, "learning_rate": 0.0002, "epoch": 2.2058823529411766, "step": 300}, {"loss": 1.3624, "grad_norm": 0.6673262715339661, "learning_rate": 0.0002, "epoch": 2.2794117647058822, "step": 310}, {"loss": 1.3212, "grad_norm": 0.6731301546096802, "learning_rate": 0.0002, "epoch": 2.3529411764705883, "step": 320}, {"loss": 1.3626, "grad_norm": 0.7587279081344604, "learning_rate": 0.0002, "epoch": 2.426470588235294, "step": 330}, {"loss": 1.3401, "grad_norm": 0.804149329662323, "learning_rate": 0.0002, "epoch": 2.5, "step": 340}, {"loss": 1.4204, "grad_norm": 0.7690186500549316, "learning_rate": 0.0002, "epoch": 2.5735294117647056, "step": 350}, {"loss": 1.3885, "grad_norm": 0.9660338163375854, "learning_rate": 0.0002, "epoch": 2.6470588235294117, "step": 360}, {"loss": 1.36, "grad_norm": 0.6990594267845154, "learning_rate": 0.0002, "epoch": 2.7205882352941178, "step": 370}, {"loss": 1.3354, "grad_norm": 0.7933360934257507, "learning_rate": 0.0002, "epoch": 2.7941176470588234, "step": 380}, {"loss": 1.2904, "grad_norm": 0.8198168277740479, "learning_rate": 0.0002, "epoch": 2.8676470588235294, "step": 390}, {"loss": 1.291, "grad_norm": 0.6719775199890137, "learning_rate": 0.0002, "epoch": 2.9411764705882355, "step": 400}, {"eval_loss": 1.4206745624542236, "eval_runtime": 7.0429, "eval_samples_per_second": 13.915, "eval_steps_per_second": 1.846, "epoch": 3.0, "step": 408}, {"loss": 1.4063, "grad_norm": 0.6254619359970093, "learning_rate": 0.0002, "epoch": 3.014705882352941, "step": 410}, {"loss": 1.2863, "grad_norm": 0.7344406843185425, "learning_rate": 0.0002, "epoch": 3.088235294117647, "step": 420}, {"loss": 1.3088, "grad_norm": 0.7327449321746826, "learning_rate": 0.0002, "epoch": 3.161764705882353, "step": 430}, {"loss": 1.2746, "grad_norm": 0.7766542434692383, "learning_rate": 0.0002, "epoch": 3.235294117647059, "step": 440}, {"loss": 1.1912, "grad_norm": 0.7025649547576904, "learning_rate": 0.0002, "epoch": 3.3088235294117645, "step": 450}, {"loss": 1.2526, "grad_norm": 0.7508461475372314, "learning_rate": 0.0002, "epoch": 3.3823529411764706, "step": 460}, {"loss": 1.2834, "grad_norm": 0.8148072361946106, "learning_rate": 0.0002, "epoch": 3.4558823529411766, "step": 470}, {"loss": 1.2115, "grad_norm": 0.7245928645133972, "learning_rate": 0.0002, "epoch": 3.5294117647058822, "step": 480}, {"loss": 1.2885, "grad_norm": 0.9170019626617432, "learning_rate": 0.0002, "epoch": 3.6029411764705883, "step": 490}, {"loss": 1.2879, "grad_norm": 0.9033855199813843, "learning_rate": 0.0002, "epoch": 3.6764705882352944, "step": 500}, {"loss": 1.3065, "grad_norm": 0.7575234174728394, "learning_rate": 0.0002, "epoch": 3.75, "step": 510}, {"loss": 1.3299, "grad_norm": 0.7426044344902039, "learning_rate": 0.0002, "epoch": 3.8235294117647056, "step": 520}, {"loss": 1.26, "grad_norm": 0.6892959475517273, "learning_rate": 0.0002, "epoch": 3.8970588235294117, "step": 530}, {"loss": 1.3376, "grad_norm": 0.7498812675476074, "learning_rate": 0.0002, "epoch": 3.9705882352941178, "step": 540}, {"eval_loss": 1.4219430685043335, "eval_runtime": 7.7457, "eval_samples_per_second": 12.652, "eval_steps_per_second": 1.678, "epoch": 4.0, "step": 544}, {"loss": 1.1944, "grad_norm": 0.8324301838874817, "learning_rate": 0.0002, "epoch": 4.044117647058823, "step": 550}, {"loss": 1.2263, "grad_norm": 0.8911291360855103, "learning_rate": 0.0002, "epoch": 4.117647058823529, "step": 560}, {"loss": 1.1778, "grad_norm": 0.856676459312439, "learning_rate": 0.0002, "epoch": 4.1911764705882355, "step": 570}, {"loss": 1.2294, "grad_norm": 1.074108600616455, "learning_rate": 0.0002, "epoch": 4.264705882352941, "step": 580}, {"loss": 1.1478, "grad_norm": 0.8867416977882385, "learning_rate": 0.0002, "epoch": 4.338235294117647, "step": 590}, {"loss": 1.1816, "grad_norm": 0.7843716740608215, "learning_rate": 0.0002, "epoch": 4.411764705882353, "step": 600}, {"loss": 1.1885, "grad_norm": 0.8869543075561523, "learning_rate": 0.0002, "epoch": 4.485294117647059, "step": 610}, {"loss": 1.2468, "grad_norm": 0.7744895815849304, "learning_rate": 0.0002, "epoch": 4.5588235294117645, "step": 620}, {"loss": 1.1613, "grad_norm": 0.7312784790992737, "learning_rate": 0.0002, "epoch": 4.632352941176471, "step": 630}, {"loss": 1.1359, "grad_norm": 0.8561248779296875, "learning_rate": 0.0002, "epoch": 4.705882352941177, "step": 640}, {"loss": 1.15, "grad_norm": 0.888317346572876, "learning_rate": 0.0002, "epoch": 4.779411764705882, "step": 650}, {"loss": 1.2659, "grad_norm": 0.8369079828262329, "learning_rate": 0.0002, "epoch": 4.852941176470588, "step": 660}, {"loss": 1.185, "grad_norm": 0.7990967631340027, "learning_rate": 0.0002, "epoch": 4.926470588235294, "step": 670}, {"loss": 1.2121, "grad_norm": 0.8745001554489136, "learning_rate": 0.0002, "epoch": 5.0, "step": 680}, {"eval_loss": 1.426682949066162, "eval_runtime": 7.7664, "eval_samples_per_second": 12.618, "eval_steps_per_second": 1.674, "epoch": 5.0, "step": 680}, {"loss": 1.0755, "grad_norm": 0.8637261986732483, "learning_rate": 0.0002, "epoch": 5.073529411764706, "step": 690}, {"loss": 1.0615, "grad_norm": 0.8743941187858582, "learning_rate": 0.0002, "epoch": 5.147058823529412, "step": 700}, {"loss": 1.0753, "grad_norm": 0.8632293939590454, "learning_rate": 0.0002, "epoch": 5.220588235294118, "step": 710}, {"loss": 1.1341, "grad_norm": 1.1503057479858398, "learning_rate": 0.0002, "epoch": 5.294117647058823, "step": 720}, {"loss": 1.1174, "grad_norm": 0.9048053026199341, "learning_rate": 0.0002, "epoch": 5.367647058823529, "step": 730}, {"loss": 1.0939, "grad_norm": 0.8516059517860413, "learning_rate": 0.0002, "epoch": 5.4411764705882355, "step": 740}, {"loss": 1.1518, "grad_norm": 0.9515685439109802, "learning_rate": 0.0002, "epoch": 5.514705882352941, "step": 750}, {"loss": 1.0982, "grad_norm": 0.8125670552253723, "learning_rate": 0.0002, "epoch": 5.588235294117647, "step": 760}, {"loss": 1.1298, "grad_norm": 1.0451067686080933, "learning_rate": 0.0002, "epoch": 5.661764705882353, "step": 770}, {"loss": 1.0582, "grad_norm": 0.8425356149673462, "learning_rate": 0.0002, "epoch": 5.735294117647059, "step": 780}, {"loss": 1.1046, "grad_norm": 0.8448241353034973, "learning_rate": 0.0002, "epoch": 5.8088235294117645, "step": 790}, {"loss": 1.1626, "grad_norm": 0.9654536843299866, "learning_rate": 0.0002, "epoch": 5.882352941176471, "step": 800}, {"loss": 1.1457, "grad_norm": 1.099204659461975, "learning_rate": 0.0002, "epoch": 5.955882352941177, "step": 810}, {"eval_loss": 1.453696370124817, "eval_runtime": 7.8117, "eval_samples_per_second": 12.545, "eval_steps_per_second": 1.664, "epoch": 6.0, "step": 816}, {"loss": 1.0491, "grad_norm": 0.7710627913475037, "learning_rate": 0.0002, "epoch": 6.029411764705882, "step": 820}, {"loss": 0.9758, "grad_norm": 1.0457934141159058, "learning_rate": 0.0002, "epoch": 6.102941176470588, "step": 830}, {"loss": 0.9917, "grad_norm": 0.9009696245193481, "learning_rate": 0.0002, "epoch": 6.176470588235294, "step": 840}, {"loss": 0.9978, "grad_norm": 0.9443604946136475, "learning_rate": 0.0002, "epoch": 6.25, "step": 850}, {"loss": 1.0012, "grad_norm": 1.017409086227417, "learning_rate": 0.0002, "epoch": 6.323529411764706, "step": 860}, {"loss": 1.0073, "grad_norm": 1.0726631879806519, "learning_rate": 0.0002, "epoch": 6.397058823529412, "step": 870}, {"loss": 1.0144, "grad_norm": 1.0754258632659912, "learning_rate": 0.0002, "epoch": 6.470588235294118, "step": 880}, {"loss": 1.042, "grad_norm": 0.9952278733253479, "learning_rate": 0.0002, "epoch": 6.544117647058823, "step": 890}, {"loss": 1.0573, "grad_norm": 1.0648400783538818, "learning_rate": 0.0002, "epoch": 6.617647058823529, "step": 900}, {"loss": 0.9765, "grad_norm": 1.102169394493103, "learning_rate": 0.0002, "epoch": 6.6911764705882355, "step": 910}, {"loss": 1.0358, "grad_norm": 1.022658348083496, "learning_rate": 0.0002, "epoch": 6.764705882352941, "step": 920}, {"loss": 1.0819, "grad_norm": 0.9385603666305542, "learning_rate": 0.0002, "epoch": 6.838235294117647, "step": 930}, {"loss": 1.0395, "grad_norm": 0.9402251839637756, "learning_rate": 0.0002, "epoch": 6.911764705882353, "step": 940}, {"loss": 1.014, "grad_norm": 1.3918722867965698, "learning_rate": 0.0002, "epoch": 6.985294117647059, "step": 950}]} +{"epoch": 8.0, "step": 1088, "epoch_duration": 196.87625670433044, "total_accumulated_duration": 1413.0305771827698, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.17333984375}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.7-num-2170-sd-42/checkpoint-408", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.6278, "grad_norm": 1.515879511833191, "learning_rate": 0.0002, "epoch": 0.07352941176470588, "step": 10}, {"loss": 2.4224, "grad_norm": 6.156805992126465, "learning_rate": 0.0002, "epoch": 0.14705882352941177, "step": 20}, {"loss": 2.107, "grad_norm": 1.264341950416565, "learning_rate": 0.0002, "epoch": 0.22058823529411764, "step": 30}, {"loss": 1.9838, "grad_norm": 0.8591746091842651, "learning_rate": 0.0002, "epoch": 0.29411764705882354, "step": 40}, {"loss": 1.916, "grad_norm": 1.4083963632583618, "learning_rate": 0.0002, "epoch": 0.36764705882352944, "step": 50}, {"loss": 1.7336, "grad_norm": 3.570463180541992, "learning_rate": 0.0002, "epoch": 0.4411764705882353, "step": 60}, {"loss": 1.7316, "grad_norm": 0.6044552326202393, "learning_rate": 0.0002, "epoch": 0.5147058823529411, "step": 70}, {"loss": 1.7078, "grad_norm": 1.1396939754486084, "learning_rate": 0.0002, "epoch": 0.5882352941176471, "step": 80}, {"loss": 1.6052, "grad_norm": 0.7549962997436523, "learning_rate": 0.0002, "epoch": 0.6617647058823529, "step": 90}, {"loss": 1.7247, "grad_norm": 1.2676323652267456, "learning_rate": 0.0002, "epoch": 0.7352941176470589, "step": 100}, {"loss": 1.6836, "grad_norm": 1.223105788230896, "learning_rate": 0.0002, "epoch": 0.8088235294117647, "step": 110}, {"loss": 1.631, "grad_norm": 1.0946531295776367, "learning_rate": 0.0002, "epoch": 0.8823529411764706, "step": 120}, {"loss": 1.6089, "grad_norm": 0.8674123883247375, "learning_rate": 0.0002, "epoch": 0.9558823529411765, "step": 130}, {"eval_loss": 1.534969449043274, "eval_runtime": 7.804, "eval_samples_per_second": 12.558, "eval_steps_per_second": 1.666, "epoch": 1.0, "step": 136}, {"loss": 1.4976, "grad_norm": 0.6233109831809998, "learning_rate": 0.0002, "epoch": 1.0294117647058822, "step": 140}, {"loss": 1.4431, "grad_norm": 0.85622239112854, "learning_rate": 0.0002, "epoch": 1.1029411764705883, "step": 150}, {"loss": 1.4246, "grad_norm": 0.7703060507774353, "learning_rate": 0.0002, "epoch": 1.1764705882352942, "step": 160}, {"loss": 1.5783, "grad_norm": 0.8302682638168335, "learning_rate": 0.0002, "epoch": 1.25, "step": 170}, {"loss": 1.4883, "grad_norm": 0.7850589156150818, "learning_rate": 0.0002, "epoch": 1.3235294117647058, "step": 180}, {"loss": 1.5075, "grad_norm": 0.6718934774398804, "learning_rate": 0.0002, "epoch": 1.3970588235294117, "step": 190}, {"loss": 1.4136, "grad_norm": 0.7245007753372192, "learning_rate": 0.0002, "epoch": 1.4705882352941178, "step": 200}, {"loss": 1.4666, "grad_norm": 0.7120554447174072, "learning_rate": 0.0002, "epoch": 1.5441176470588234, "step": 210}, {"loss": 1.4619, "grad_norm": 0.6771036982536316, "learning_rate": 0.0002, "epoch": 1.6176470588235294, "step": 220}, {"loss": 1.4465, "grad_norm": 0.618951141834259, "learning_rate": 0.0002, "epoch": 1.6911764705882353, "step": 230}, {"loss": 1.4896, "grad_norm": 0.6583403944969177, "learning_rate": 0.0002, "epoch": 1.7647058823529411, "step": 240}, {"loss": 1.4613, "grad_norm": 0.6750185489654541, "learning_rate": 0.0002, "epoch": 1.8382352941176472, "step": 250}, {"loss": 1.4614, "grad_norm": 0.6572791337966919, "learning_rate": 0.0002, "epoch": 1.9117647058823528, "step": 260}, {"loss": 1.4242, "grad_norm": 0.6265441179275513, "learning_rate": 0.0002, "epoch": 1.9852941176470589, "step": 270}, {"eval_loss": 1.4521459341049194, "eval_runtime": 7.6483, "eval_samples_per_second": 12.813, "eval_steps_per_second": 1.7, "epoch": 2.0, "step": 272}, {"loss": 1.4108, "grad_norm": 0.7698636054992676, "learning_rate": 0.0002, "epoch": 2.0588235294117645, "step": 280}, {"loss": 1.4143, "grad_norm": 0.7958650588989258, "learning_rate": 0.0002, "epoch": 2.1323529411764706, "step": 290}, {"loss": 1.3826, "grad_norm": 0.7007899284362793, "learning_rate": 0.0002, "epoch": 2.2058823529411766, "step": 300}, {"loss": 1.3624, "grad_norm": 0.6673262715339661, "learning_rate": 0.0002, "epoch": 2.2794117647058822, "step": 310}, {"loss": 1.3212, "grad_norm": 0.6731301546096802, "learning_rate": 0.0002, "epoch": 2.3529411764705883, "step": 320}, {"loss": 1.3626, "grad_norm": 0.7587279081344604, "learning_rate": 0.0002, "epoch": 2.426470588235294, "step": 330}, {"loss": 1.3401, "grad_norm": 0.804149329662323, "learning_rate": 0.0002, "epoch": 2.5, "step": 340}, {"loss": 1.4204, "grad_norm": 0.7690186500549316, "learning_rate": 0.0002, "epoch": 2.5735294117647056, "step": 350}, {"loss": 1.3885, "grad_norm": 0.9660338163375854, "learning_rate": 0.0002, "epoch": 2.6470588235294117, "step": 360}, {"loss": 1.36, "grad_norm": 0.6990594267845154, "learning_rate": 0.0002, "epoch": 2.7205882352941178, "step": 370}, {"loss": 1.3354, "grad_norm": 0.7933360934257507, "learning_rate": 0.0002, "epoch": 2.7941176470588234, "step": 380}, {"loss": 1.2904, "grad_norm": 0.8198168277740479, "learning_rate": 0.0002, "epoch": 2.8676470588235294, "step": 390}, {"loss": 1.291, "grad_norm": 0.6719775199890137, "learning_rate": 0.0002, "epoch": 2.9411764705882355, "step": 400}, {"eval_loss": 1.4206745624542236, "eval_runtime": 7.0429, "eval_samples_per_second": 13.915, "eval_steps_per_second": 1.846, "epoch": 3.0, "step": 408}, {"loss": 1.4063, "grad_norm": 0.6254619359970093, "learning_rate": 0.0002, "epoch": 3.014705882352941, "step": 410}, {"loss": 1.2863, "grad_norm": 0.7344406843185425, "learning_rate": 0.0002, "epoch": 3.088235294117647, "step": 420}, {"loss": 1.3088, "grad_norm": 0.7327449321746826, "learning_rate": 0.0002, "epoch": 3.161764705882353, "step": 430}, {"loss": 1.2746, "grad_norm": 0.7766542434692383, "learning_rate": 0.0002, "epoch": 3.235294117647059, "step": 440}, {"loss": 1.1912, "grad_norm": 0.7025649547576904, "learning_rate": 0.0002, "epoch": 3.3088235294117645, "step": 450}, {"loss": 1.2526, "grad_norm": 0.7508461475372314, "learning_rate": 0.0002, "epoch": 3.3823529411764706, "step": 460}, {"loss": 1.2834, "grad_norm": 0.8148072361946106, "learning_rate": 0.0002, "epoch": 3.4558823529411766, "step": 470}, {"loss": 1.2115, "grad_norm": 0.7245928645133972, "learning_rate": 0.0002, "epoch": 3.5294117647058822, "step": 480}, {"loss": 1.2885, "grad_norm": 0.9170019626617432, "learning_rate": 0.0002, "epoch": 3.6029411764705883, "step": 490}, {"loss": 1.2879, "grad_norm": 0.9033855199813843, "learning_rate": 0.0002, "epoch": 3.6764705882352944, "step": 500}, {"loss": 1.3065, "grad_norm": 0.7575234174728394, "learning_rate": 0.0002, "epoch": 3.75, "step": 510}, {"loss": 1.3299, "grad_norm": 0.7426044344902039, "learning_rate": 0.0002, "epoch": 3.8235294117647056, "step": 520}, {"loss": 1.26, "grad_norm": 0.6892959475517273, "learning_rate": 0.0002, "epoch": 3.8970588235294117, "step": 530}, {"loss": 1.3376, "grad_norm": 0.7498812675476074, "learning_rate": 0.0002, "epoch": 3.9705882352941178, "step": 540}, {"eval_loss": 1.4219430685043335, "eval_runtime": 7.7457, "eval_samples_per_second": 12.652, "eval_steps_per_second": 1.678, "epoch": 4.0, "step": 544}, {"loss": 1.1944, "grad_norm": 0.8324301838874817, "learning_rate": 0.0002, "epoch": 4.044117647058823, "step": 550}, {"loss": 1.2263, "grad_norm": 0.8911291360855103, "learning_rate": 0.0002, "epoch": 4.117647058823529, "step": 560}, {"loss": 1.1778, "grad_norm": 0.856676459312439, "learning_rate": 0.0002, "epoch": 4.1911764705882355, "step": 570}, {"loss": 1.2294, "grad_norm": 1.074108600616455, "learning_rate": 0.0002, "epoch": 4.264705882352941, "step": 580}, {"loss": 1.1478, "grad_norm": 0.8867416977882385, "learning_rate": 0.0002, "epoch": 4.338235294117647, "step": 590}, {"loss": 1.1816, "grad_norm": 0.7843716740608215, "learning_rate": 0.0002, "epoch": 4.411764705882353, "step": 600}, {"loss": 1.1885, "grad_norm": 0.8869543075561523, "learning_rate": 0.0002, "epoch": 4.485294117647059, "step": 610}, {"loss": 1.2468, "grad_norm": 0.7744895815849304, "learning_rate": 0.0002, "epoch": 4.5588235294117645, "step": 620}, {"loss": 1.1613, "grad_norm": 0.7312784790992737, "learning_rate": 0.0002, "epoch": 4.632352941176471, "step": 630}, {"loss": 1.1359, "grad_norm": 0.8561248779296875, "learning_rate": 0.0002, "epoch": 4.705882352941177, "step": 640}, {"loss": 1.15, "grad_norm": 0.888317346572876, "learning_rate": 0.0002, "epoch": 4.779411764705882, "step": 650}, {"loss": 1.2659, "grad_norm": 0.8369079828262329, "learning_rate": 0.0002, "epoch": 4.852941176470588, "step": 660}, {"loss": 1.185, "grad_norm": 0.7990967631340027, "learning_rate": 0.0002, "epoch": 4.926470588235294, "step": 670}, {"loss": 1.2121, "grad_norm": 0.8745001554489136, "learning_rate": 0.0002, "epoch": 5.0, "step": 680}, {"eval_loss": 1.426682949066162, "eval_runtime": 7.7664, "eval_samples_per_second": 12.618, "eval_steps_per_second": 1.674, "epoch": 5.0, "step": 680}, {"loss": 1.0755, "grad_norm": 0.8637261986732483, "learning_rate": 0.0002, "epoch": 5.073529411764706, "step": 690}, {"loss": 1.0615, "grad_norm": 0.8743941187858582, "learning_rate": 0.0002, "epoch": 5.147058823529412, "step": 700}, {"loss": 1.0753, "grad_norm": 0.8632293939590454, "learning_rate": 0.0002, "epoch": 5.220588235294118, "step": 710}, {"loss": 1.1341, "grad_norm": 1.1503057479858398, "learning_rate": 0.0002, "epoch": 5.294117647058823, "step": 720}, {"loss": 1.1174, "grad_norm": 0.9048053026199341, "learning_rate": 0.0002, "epoch": 5.367647058823529, "step": 730}, {"loss": 1.0939, "grad_norm": 0.8516059517860413, "learning_rate": 0.0002, "epoch": 5.4411764705882355, "step": 740}, {"loss": 1.1518, "grad_norm": 0.9515685439109802, "learning_rate": 0.0002, "epoch": 5.514705882352941, "step": 750}, {"loss": 1.0982, "grad_norm": 0.8125670552253723, "learning_rate": 0.0002, "epoch": 5.588235294117647, "step": 760}, {"loss": 1.1298, "grad_norm": 1.0451067686080933, "learning_rate": 0.0002, "epoch": 5.661764705882353, "step": 770}, {"loss": 1.0582, "grad_norm": 0.8425356149673462, "learning_rate": 0.0002, "epoch": 5.735294117647059, "step": 780}, {"loss": 1.1046, "grad_norm": 0.8448241353034973, "learning_rate": 0.0002, "epoch": 5.8088235294117645, "step": 790}, {"loss": 1.1626, "grad_norm": 0.9654536843299866, "learning_rate": 0.0002, "epoch": 5.882352941176471, "step": 800}, {"loss": 1.1457, "grad_norm": 1.099204659461975, "learning_rate": 0.0002, "epoch": 5.955882352941177, "step": 810}, {"eval_loss": 1.453696370124817, "eval_runtime": 7.8117, "eval_samples_per_second": 12.545, "eval_steps_per_second": 1.664, "epoch": 6.0, "step": 816}, {"loss": 1.0491, "grad_norm": 0.7710627913475037, "learning_rate": 0.0002, "epoch": 6.029411764705882, "step": 820}, {"loss": 0.9758, "grad_norm": 1.0457934141159058, "learning_rate": 0.0002, "epoch": 6.102941176470588, "step": 830}, {"loss": 0.9917, "grad_norm": 0.9009696245193481, "learning_rate": 0.0002, "epoch": 6.176470588235294, "step": 840}, {"loss": 0.9978, "grad_norm": 0.9443604946136475, "learning_rate": 0.0002, "epoch": 6.25, "step": 850}, {"loss": 1.0012, "grad_norm": 1.017409086227417, "learning_rate": 0.0002, "epoch": 6.323529411764706, "step": 860}, {"loss": 1.0073, "grad_norm": 1.0726631879806519, "learning_rate": 0.0002, "epoch": 6.397058823529412, "step": 870}, {"loss": 1.0144, "grad_norm": 1.0754258632659912, "learning_rate": 0.0002, "epoch": 6.470588235294118, "step": 880}, {"loss": 1.042, "grad_norm": 0.9952278733253479, "learning_rate": 0.0002, "epoch": 6.544117647058823, "step": 890}, {"loss": 1.0573, "grad_norm": 1.0648400783538818, "learning_rate": 0.0002, "epoch": 6.617647058823529, "step": 900}, {"loss": 0.9765, "grad_norm": 1.102169394493103, "learning_rate": 0.0002, "epoch": 6.6911764705882355, "step": 910}, {"loss": 1.0358, "grad_norm": 1.022658348083496, "learning_rate": 0.0002, "epoch": 6.764705882352941, "step": 920}, {"loss": 1.0819, "grad_norm": 0.9385603666305542, "learning_rate": 0.0002, "epoch": 6.838235294117647, "step": 930}, {"loss": 1.0395, "grad_norm": 0.9402251839637756, "learning_rate": 0.0002, "epoch": 6.911764705882353, "step": 940}, {"loss": 1.014, "grad_norm": 1.3918722867965698, "learning_rate": 0.0002, "epoch": 6.985294117647059, "step": 950}, {"eval_loss": 1.4942296743392944, "eval_runtime": 7.7264, "eval_samples_per_second": 12.684, "eval_steps_per_second": 1.683, "epoch": 7.0, "step": 952}, {"loss": 0.8846, "grad_norm": 0.9380860328674316, "learning_rate": 0.0002, "epoch": 7.0588235294117645, "step": 960}, {"loss": 0.9289, "grad_norm": 1.0754766464233398, "learning_rate": 0.0002, "epoch": 7.132352941176471, "step": 970}, {"loss": 0.942, "grad_norm": 1.2220656871795654, "learning_rate": 0.0002, "epoch": 7.205882352941177, "step": 980}, {"loss": 0.8638, "grad_norm": 1.0372205972671509, "learning_rate": 0.0002, "epoch": 7.279411764705882, "step": 990}, {"loss": 0.9062, "grad_norm": 1.1364140510559082, "learning_rate": 0.0002, "epoch": 7.352941176470588, "step": 1000}, {"loss": 0.9327, "grad_norm": 0.9808094501495361, "learning_rate": 0.0002, "epoch": 7.426470588235294, "step": 1010}, {"loss": 0.9982, "grad_norm": 1.245301365852356, "learning_rate": 0.0002, "epoch": 7.5, "step": 1020}, {"loss": 0.9838, "grad_norm": 1.1632885932922363, "learning_rate": 0.0002, "epoch": 7.573529411764706, "step": 1030}, {"loss": 0.9876, "grad_norm": 1.3757420778274536, "learning_rate": 0.0002, "epoch": 7.647058823529412, "step": 1040}, {"loss": 0.9419, "grad_norm": 1.4189417362213135, "learning_rate": 0.0002, "epoch": 7.720588235294118, "step": 1050}, {"loss": 0.9511, "grad_norm": 1.1543806791305542, "learning_rate": 0.0002, "epoch": 7.794117647058823, "step": 1060}, {"loss": 0.926, "grad_norm": 1.1373614072799683, "learning_rate": 0.0002, "epoch": 7.867647058823529, "step": 1070}, {"loss": 0.9216, "grad_norm": 1.0185565948486328, "learning_rate": 0.0002, "epoch": 7.9411764705882355, "step": 1080}]}