diff --git a/.gitattributes b/.gitattributes index 714b38b6c29461e77c2b5ca8e3ac7e6422e0c458..8e9018e1b737543e2b31a810421f8f9dfdbe97c2 100644 --- a/.gitattributes +++ b/.gitattributes @@ -488,3 +488,12 @@ gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_g gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-310-sd-10000/checkpoint-76/tokenizer.json filter=lfs diff=lfs merge=lfs -text gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-310-sd-10000/checkpoint-95/tokenizer.json filter=lfs diff=lfs merge=lfs -text gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.1-num-310-sd-10000/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/README.md b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/adapter_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/adapter_model.safetensors b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80d9d474ec5f736ff6f891f0bc050cac2310e785 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:546d9a50ec7778987fc8866c16760b7c1a04c7a6905e3f70f3124f1290485262 +size 29500848 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/README.md b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/adapter_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/adapter_model.safetensors b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a553d8d5bd48654e78c99dde9845367a20e8684b --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f32b0ce88853c6a2901f885b5787f8bb1e9c7893c80060b860a6c18322d6481e +size 29500848 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/optimizer.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5c6f5c8cd9c0819f65d4db8c14c771b603a0af0d --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58f5a36986eeaa8880fb1846e49e2d0db34827d7c540cf8368c0cc74bc30439c +size 15064314 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/rng_state.pth b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e7e06f1207c51a011b1cc675862b027dc0050ba0 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a63caed2314332795e1765ad356b005fd0aafff8aa84adc9b69d1976e6145414 +size 14244 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/scheduler.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f90f4683931460ef0a2748d22e850e81c371186 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:132d0da63c27ed01e5000b359996ee7c08d48a2e6d697c47283001f811dc2e1a +size 1064 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/special_tokens_map.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/tokenizer.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/tokenizer.model b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/tokenizer_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/trainer_state.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..63922fa50d2a5a4587810bdbd0cb0c5570f54e48 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/trainer_state.json @@ -0,0 +1,816 @@ +{ + "best_metric": 1.3988444805145264, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702", + "epoch": 6.0, + "eval_steps": 10, + "global_step": 1053, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.05698005698005698, + "grad_norm": 1.0227872133255005, + "learning_rate": 0.0002, + "loss": 3.5896, + "step": 10 + }, + { + "epoch": 0.11396011396011396, + "grad_norm": 3.86788010597229, + "learning_rate": 0.0002, + "loss": 2.5124, + "step": 20 + }, + { + "epoch": 0.17094017094017094, + "grad_norm": 1.3474394083023071, + "learning_rate": 0.0002, + "loss": 2.1576, + "step": 30 + }, + { + "epoch": 0.22792022792022792, + "grad_norm": 1.1816296577453613, + "learning_rate": 0.0002, + "loss": 2.0115, + "step": 40 + }, + { + "epoch": 0.2849002849002849, + "grad_norm": 1.0907047986984253, + "learning_rate": 0.0002, + "loss": 1.875, + "step": 50 + }, + { + "epoch": 0.3418803418803419, + "grad_norm": 0.9163471460342407, + "learning_rate": 0.0002, + "loss": 1.8608, + "step": 60 + }, + { + "epoch": 0.39886039886039887, + "grad_norm": 1.0441275835037231, + "learning_rate": 0.0002, + "loss": 1.7334, + "step": 70 + }, + { + "epoch": 0.45584045584045585, + "grad_norm": 1.0836364030838013, + "learning_rate": 0.0002, + "loss": 1.6496, + "step": 80 + }, + { + "epoch": 0.5128205128205128, + "grad_norm": 0.5817112922668457, + "learning_rate": 0.0002, + "loss": 1.5814, + "step": 90 + }, + { + "epoch": 0.5698005698005698, + "grad_norm": 0.8991169929504395, + "learning_rate": 0.0002, + "loss": 1.6697, + "step": 100 + }, + { + "epoch": 0.6267806267806267, + "grad_norm": 1.1820793151855469, + "learning_rate": 0.0002, + "loss": 1.621, + "step": 110 + }, + { + "epoch": 0.6837606837606838, + "grad_norm": 0.8205533623695374, + "learning_rate": 0.0002, + "loss": 1.6376, + "step": 120 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.8154979348182678, + "learning_rate": 0.0002, + "loss": 1.5902, + "step": 130 + }, + { + "epoch": 0.7977207977207977, + "grad_norm": 0.7292681336402893, + "learning_rate": 0.0002, + "loss": 1.6139, + "step": 140 + }, + { + "epoch": 0.8547008547008547, + "grad_norm": 0.7737869024276733, + "learning_rate": 0.0002, + "loss": 1.6554, + "step": 150 + }, + { + "epoch": 0.9116809116809117, + "grad_norm": 0.7786843180656433, + "learning_rate": 0.0002, + "loss": 1.4696, + "step": 160 + }, + { + "epoch": 0.9686609686609686, + "grad_norm": 0.6918405294418335, + "learning_rate": 0.0002, + "loss": 1.5062, + "step": 170 + }, + { + "epoch": 0.9971509971509972, + "eval_loss": 1.4962449073791504, + "eval_runtime": 2.869, + "eval_samples_per_second": 34.158, + "eval_steps_per_second": 4.531, + "step": 175 + }, + { + "epoch": 1.0256410256410255, + "grad_norm": 0.6754891872406006, + "learning_rate": 0.0002, + "loss": 1.5305, + "step": 180 + }, + { + "epoch": 1.0826210826210827, + "grad_norm": 0.6875350475311279, + "learning_rate": 0.0002, + "loss": 1.4709, + "step": 190 + }, + { + "epoch": 1.1396011396011396, + "grad_norm": 0.7870411276817322, + "learning_rate": 0.0002, + "loss": 1.4744, + "step": 200 + }, + { + "epoch": 1.1965811965811965, + "grad_norm": 0.6934282779693604, + "learning_rate": 0.0002, + "loss": 1.5414, + "step": 210 + }, + { + "epoch": 1.2535612535612537, + "grad_norm": 0.6980162858963013, + "learning_rate": 0.0002, + "loss": 1.5129, + "step": 220 + }, + { + "epoch": 1.3105413105413106, + "grad_norm": 0.6163203120231628, + "learning_rate": 0.0002, + "loss": 1.385, + "step": 230 + }, + { + "epoch": 1.3675213675213675, + "grad_norm": 0.5967347025871277, + "learning_rate": 0.0002, + "loss": 1.4028, + "step": 240 + }, + { + "epoch": 1.4245014245014245, + "grad_norm": 0.7622564435005188, + "learning_rate": 0.0002, + "loss": 1.4945, + "step": 250 + }, + { + "epoch": 1.4814814814814814, + "grad_norm": 0.6667674779891968, + "learning_rate": 0.0002, + "loss": 1.4426, + "step": 260 + }, + { + "epoch": 1.5384615384615383, + "grad_norm": 0.9225417971611023, + "learning_rate": 0.0002, + "loss": 1.4227, + "step": 270 + }, + { + "epoch": 1.5954415954415955, + "grad_norm": 0.6473053097724915, + "learning_rate": 0.0002, + "loss": 1.3687, + "step": 280 + }, + { + "epoch": 1.6524216524216524, + "grad_norm": 0.8250042796134949, + "learning_rate": 0.0002, + "loss": 1.5086, + "step": 290 + }, + { + "epoch": 1.7094017094017095, + "grad_norm": 0.6660609841346741, + "learning_rate": 0.0002, + "loss": 1.4259, + "step": 300 + }, + { + "epoch": 1.7663817663817665, + "grad_norm": 0.7542873620986938, + "learning_rate": 0.0002, + "loss": 1.373, + "step": 310 + }, + { + "epoch": 1.8233618233618234, + "grad_norm": 0.5261648297309875, + "learning_rate": 0.0002, + "loss": 1.3823, + "step": 320 + }, + { + "epoch": 1.8803418803418803, + "grad_norm": 0.6519118547439575, + "learning_rate": 0.0002, + "loss": 1.4251, + "step": 330 + }, + { + "epoch": 1.9373219373219372, + "grad_norm": 0.7584664821624756, + "learning_rate": 0.0002, + "loss": 1.3613, + "step": 340 + }, + { + "epoch": 1.9943019943019942, + "grad_norm": 0.6466017365455627, + "learning_rate": 0.0002, + "loss": 1.4797, + "step": 350 + }, + { + "epoch": 2.0, + "eval_loss": 1.424173355102539, + "eval_runtime": 2.8659, + "eval_samples_per_second": 34.195, + "eval_steps_per_second": 4.536, + "step": 351 + }, + { + "epoch": 2.051282051282051, + "grad_norm": 0.7457601428031921, + "learning_rate": 0.0002, + "loss": 1.3555, + "step": 360 + }, + { + "epoch": 2.1082621082621085, + "grad_norm": 0.6645848751068115, + "learning_rate": 0.0002, + "loss": 1.387, + "step": 370 + }, + { + "epoch": 2.1652421652421654, + "grad_norm": 0.6545299887657166, + "learning_rate": 0.0002, + "loss": 1.3244, + "step": 380 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.7429937124252319, + "learning_rate": 0.0002, + "loss": 1.4025, + "step": 390 + }, + { + "epoch": 2.2792022792022792, + "grad_norm": 0.6929682493209839, + "learning_rate": 0.0002, + "loss": 1.3995, + "step": 400 + }, + { + "epoch": 2.336182336182336, + "grad_norm": 0.6999889016151428, + "learning_rate": 0.0002, + "loss": 1.3073, + "step": 410 + }, + { + "epoch": 2.393162393162393, + "grad_norm": 0.7174718379974365, + "learning_rate": 0.0002, + "loss": 1.3573, + "step": 420 + }, + { + "epoch": 2.45014245014245, + "grad_norm": 0.667317807674408, + "learning_rate": 0.0002, + "loss": 1.3169, + "step": 430 + }, + { + "epoch": 2.5071225071225074, + "grad_norm": 0.8981409072875977, + "learning_rate": 0.0002, + "loss": 1.3877, + "step": 440 + }, + { + "epoch": 2.564102564102564, + "grad_norm": 0.7560263872146606, + "learning_rate": 0.0002, + "loss": 1.3085, + "step": 450 + }, + { + "epoch": 2.6210826210826212, + "grad_norm": 0.699364185333252, + "learning_rate": 0.0002, + "loss": 1.278, + "step": 460 + }, + { + "epoch": 2.678062678062678, + "grad_norm": 0.666292667388916, + "learning_rate": 0.0002, + "loss": 1.2962, + "step": 470 + }, + { + "epoch": 2.735042735042735, + "grad_norm": 0.7564692497253418, + "learning_rate": 0.0002, + "loss": 1.3471, + "step": 480 + }, + { + "epoch": 2.792022792022792, + "grad_norm": 0.7561964392662048, + "learning_rate": 0.0002, + "loss": 1.3489, + "step": 490 + }, + { + "epoch": 2.849002849002849, + "grad_norm": 0.6506860852241516, + "learning_rate": 0.0002, + "loss": 1.3357, + "step": 500 + }, + { + "epoch": 2.905982905982906, + "grad_norm": 0.6425383687019348, + "learning_rate": 0.0002, + "loss": 1.311, + "step": 510 + }, + { + "epoch": 2.962962962962963, + "grad_norm": 0.7424822449684143, + "learning_rate": 0.0002, + "loss": 1.2879, + "step": 520 + }, + { + "epoch": 2.9971509971509973, + "eval_loss": 1.401209831237793, + "eval_runtime": 2.8721, + "eval_samples_per_second": 34.121, + "eval_steps_per_second": 4.526, + "step": 526 + }, + { + "epoch": 3.0199430199430197, + "grad_norm": 0.7109280228614807, + "learning_rate": 0.0002, + "loss": 1.3656, + "step": 530 + }, + { + "epoch": 3.076923076923077, + "grad_norm": 0.6746246814727783, + "learning_rate": 0.0002, + "loss": 1.2571, + "step": 540 + }, + { + "epoch": 3.133903133903134, + "grad_norm": 0.7202523350715637, + "learning_rate": 0.0002, + "loss": 1.2685, + "step": 550 + }, + { + "epoch": 3.190883190883191, + "grad_norm": 0.697090208530426, + "learning_rate": 0.0002, + "loss": 1.1808, + "step": 560 + }, + { + "epoch": 3.247863247863248, + "grad_norm": 0.7157464623451233, + "learning_rate": 0.0002, + "loss": 1.2479, + "step": 570 + }, + { + "epoch": 3.304843304843305, + "grad_norm": 0.8729232549667358, + "learning_rate": 0.0002, + "loss": 1.2426, + "step": 580 + }, + { + "epoch": 3.3618233618233617, + "grad_norm": 0.7119743227958679, + "learning_rate": 0.0002, + "loss": 1.2957, + "step": 590 + }, + { + "epoch": 3.4188034188034186, + "grad_norm": 0.7417448163032532, + "learning_rate": 0.0002, + "loss": 1.2787, + "step": 600 + }, + { + "epoch": 3.4757834757834756, + "grad_norm": 0.8174124956130981, + "learning_rate": 0.0002, + "loss": 1.2317, + "step": 610 + }, + { + "epoch": 3.532763532763533, + "grad_norm": 0.7199270129203796, + "learning_rate": 0.0002, + "loss": 1.2916, + "step": 620 + }, + { + "epoch": 3.58974358974359, + "grad_norm": 0.989138662815094, + "learning_rate": 0.0002, + "loss": 1.2074, + "step": 630 + }, + { + "epoch": 3.646723646723647, + "grad_norm": 0.75921630859375, + "learning_rate": 0.0002, + "loss": 1.2263, + "step": 640 + }, + { + "epoch": 3.7037037037037037, + "grad_norm": 0.7844401001930237, + "learning_rate": 0.0002, + "loss": 1.2319, + "step": 650 + }, + { + "epoch": 3.7606837606837606, + "grad_norm": 0.9127110242843628, + "learning_rate": 0.0002, + "loss": 1.2851, + "step": 660 + }, + { + "epoch": 3.8176638176638176, + "grad_norm": 0.7972270846366882, + "learning_rate": 0.0002, + "loss": 1.2835, + "step": 670 + }, + { + "epoch": 3.8746438746438745, + "grad_norm": 0.7458992004394531, + "learning_rate": 0.0002, + "loss": 1.3105, + "step": 680 + }, + { + "epoch": 3.931623931623932, + "grad_norm": 0.854924738407135, + "learning_rate": 0.0002, + "loss": 1.3017, + "step": 690 + }, + { + "epoch": 3.9886039886039883, + "grad_norm": 0.7763816118240356, + "learning_rate": 0.0002, + "loss": 1.2455, + "step": 700 + }, + { + "epoch": 4.0, + "eval_loss": 1.3988444805145264, + "eval_runtime": 2.8697, + "eval_samples_per_second": 34.15, + "eval_steps_per_second": 4.53, + "step": 702 + }, + { + "epoch": 4.045584045584046, + "grad_norm": 0.877430260181427, + "learning_rate": 0.0002, + "loss": 1.178, + "step": 710 + }, + { + "epoch": 4.102564102564102, + "grad_norm": 0.8365248441696167, + "learning_rate": 0.0002, + "loss": 1.1635, + "step": 720 + }, + { + "epoch": 4.15954415954416, + "grad_norm": 0.7748925089836121, + "learning_rate": 0.0002, + "loss": 1.2286, + "step": 730 + }, + { + "epoch": 4.216524216524217, + "grad_norm": 0.7695241570472717, + "learning_rate": 0.0002, + "loss": 1.1836, + "step": 740 + }, + { + "epoch": 4.273504273504273, + "grad_norm": 0.7229928374290466, + "learning_rate": 0.0002, + "loss": 1.1685, + "step": 750 + }, + { + "epoch": 4.330484330484331, + "grad_norm": 0.7035910487174988, + "learning_rate": 0.0002, + "loss": 1.117, + "step": 760 + }, + { + "epoch": 4.387464387464387, + "grad_norm": 0.9075796008110046, + "learning_rate": 0.0002, + "loss": 1.189, + "step": 770 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 0.7957494854927063, + "learning_rate": 0.0002, + "loss": 1.1693, + "step": 780 + }, + { + "epoch": 4.501424501424501, + "grad_norm": 0.8733780384063721, + "learning_rate": 0.0002, + "loss": 1.1945, + "step": 790 + }, + { + "epoch": 4.5584045584045585, + "grad_norm": 0.8786619901657104, + "learning_rate": 0.0002, + "loss": 1.1867, + "step": 800 + }, + { + "epoch": 4.615384615384615, + "grad_norm": 0.7101715803146362, + "learning_rate": 0.0002, + "loss": 1.185, + "step": 810 + }, + { + "epoch": 4.672364672364672, + "grad_norm": 0.7451328039169312, + "learning_rate": 0.0002, + "loss": 1.2063, + "step": 820 + }, + { + "epoch": 4.72934472934473, + "grad_norm": 0.7830713987350464, + "learning_rate": 0.0002, + "loss": 1.1939, + "step": 830 + }, + { + "epoch": 4.786324786324786, + "grad_norm": 0.7804535031318665, + "learning_rate": 0.0002, + "loss": 1.1251, + "step": 840 + }, + { + "epoch": 4.843304843304844, + "grad_norm": 0.8121811747550964, + "learning_rate": 0.0002, + "loss": 1.2278, + "step": 850 + }, + { + "epoch": 4.9002849002849, + "grad_norm": 0.774864137172699, + "learning_rate": 0.0002, + "loss": 1.142, + "step": 860 + }, + { + "epoch": 4.957264957264957, + "grad_norm": 0.7517814040184021, + "learning_rate": 0.0002, + "loss": 1.1736, + "step": 870 + }, + { + "epoch": 4.997150997150997, + "eval_loss": 1.4074795246124268, + "eval_runtime": 2.8707, + "eval_samples_per_second": 34.138, + "eval_steps_per_second": 4.529, + "step": 877 + }, + { + "epoch": 5.014245014245014, + "grad_norm": 0.7974972128868103, + "learning_rate": 0.0002, + "loss": 1.151, + "step": 880 + }, + { + "epoch": 5.071225071225071, + "grad_norm": 1.1127357482910156, + "learning_rate": 0.0002, + "loss": 1.0637, + "step": 890 + }, + { + "epoch": 5.128205128205128, + "grad_norm": 0.8995195031166077, + "learning_rate": 0.0002, + "loss": 1.0497, + "step": 900 + }, + { + "epoch": 5.185185185185185, + "grad_norm": 0.8325890898704529, + "learning_rate": 0.0002, + "loss": 1.1101, + "step": 910 + }, + { + "epoch": 5.2421652421652425, + "grad_norm": 0.8830686807632446, + "learning_rate": 0.0002, + "loss": 1.0567, + "step": 920 + }, + { + "epoch": 5.299145299145299, + "grad_norm": 0.8856923580169678, + "learning_rate": 0.0002, + "loss": 1.1094, + "step": 930 + }, + { + "epoch": 5.356125356125356, + "grad_norm": 0.814587414264679, + "learning_rate": 0.0002, + "loss": 1.0328, + "step": 940 + }, + { + "epoch": 5.413105413105413, + "grad_norm": 0.9119254946708679, + "learning_rate": 0.0002, + "loss": 1.1379, + "step": 950 + }, + { + "epoch": 5.47008547008547, + "grad_norm": 0.8547661304473877, + "learning_rate": 0.0002, + "loss": 1.0993, + "step": 960 + }, + { + "epoch": 5.527065527065528, + "grad_norm": 0.943742036819458, + "learning_rate": 0.0002, + "loss": 1.1137, + "step": 970 + }, + { + "epoch": 5.584045584045584, + "grad_norm": 1.1333340406417847, + "learning_rate": 0.0002, + "loss": 1.0815, + "step": 980 + }, + { + "epoch": 5.641025641025641, + "grad_norm": 1.0290982723236084, + "learning_rate": 0.0002, + "loss": 1.1161, + "step": 990 + }, + { + "epoch": 5.698005698005698, + "grad_norm": 1.0613716840744019, + "learning_rate": 0.0002, + "loss": 1.1477, + "step": 1000 + }, + { + "epoch": 5.754985754985755, + "grad_norm": 0.925118088722229, + "learning_rate": 0.0002, + "loss": 1.0878, + "step": 1010 + }, + { + "epoch": 5.811965811965812, + "grad_norm": 0.828220546245575, + "learning_rate": 0.0002, + "loss": 1.0658, + "step": 1020 + }, + { + "epoch": 5.868945868945869, + "grad_norm": 0.7466493248939514, + "learning_rate": 0.0002, + "loss": 1.1179, + "step": 1030 + }, + { + "epoch": 5.925925925925926, + "grad_norm": 0.9189135432243347, + "learning_rate": 0.0002, + "loss": 1.1064, + "step": 1040 + }, + { + "epoch": 5.982905982905983, + "grad_norm": 0.9117513298988342, + "learning_rate": 0.0002, + "loss": 1.1114, + "step": 1050 + }, + { + "epoch": 6.0, + "eval_loss": 1.4333235025405884, + "eval_runtime": 2.9267, + "eval_samples_per_second": 33.485, + "eval_steps_per_second": 4.442, + "step": 1053 + } + ], + "logging_steps": 10, + "max_steps": 1400, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.2869762140864512e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/training_args.bin b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1d5855b60b1cd520479d619dcdb51a31eb8d4e70 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1053/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2c8913d560553d015fae1642e2e67f5097556e650cbd6fe1331d7f7e47624aa +size 5560 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/README.md b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/adapter_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/adapter_model.safetensors b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bdf6e48cb33977258f81418f17a4249bbed05123 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bb93bfb7a7e2f88be91722fe77b2e58951c3e881b76abab08aecfebca2193d2 +size 29500848 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/optimizer.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4f9be8f07aaff73ba4bd24f27ca279a8f3f5fc21 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c60cf797b395b8eea0213d2e932b1f07e9411bfa289d2649cd33a40be3e956d +size 15064314 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/rng_state.pth b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c451b7cfcfeea74502a26928ecf9f7cee9d3aa49 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d24bcd1d38956feab07c380c0ddc0fe1495b1c01d06a77ff9f3a7ca576b5bbb6 +size 14244 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/scheduler.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..617f0a8bdca03af1b48c2480aaac26a98874683e --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2510c124cf0764eadc4a791603e5beb4058d0b0e44bb1cbd79e4e0b8fb50e28d +size 1064 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/special_tokens_map.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/tokenizer.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/tokenizer.model b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/tokenizer_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/trainer_state.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e097bf8fc4c12e1d4f403eb89e3e06c844c13990 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/trainer_state.json @@ -0,0 +1,943 @@ +{ + "best_metric": 1.3988444805145264, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702", + "epoch": 6.997150997150997, + "eval_steps": 10, + "global_step": 1228, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.05698005698005698, + "grad_norm": 1.0227872133255005, + "learning_rate": 0.0002, + "loss": 3.5896, + "step": 10 + }, + { + "epoch": 0.11396011396011396, + "grad_norm": 3.86788010597229, + "learning_rate": 0.0002, + "loss": 2.5124, + "step": 20 + }, + { + "epoch": 0.17094017094017094, + "grad_norm": 1.3474394083023071, + "learning_rate": 0.0002, + "loss": 2.1576, + "step": 30 + }, + { + "epoch": 0.22792022792022792, + "grad_norm": 1.1816296577453613, + "learning_rate": 0.0002, + "loss": 2.0115, + "step": 40 + }, + { + "epoch": 0.2849002849002849, + "grad_norm": 1.0907047986984253, + "learning_rate": 0.0002, + "loss": 1.875, + "step": 50 + }, + { + "epoch": 0.3418803418803419, + "grad_norm": 0.9163471460342407, + "learning_rate": 0.0002, + "loss": 1.8608, + "step": 60 + }, + { + "epoch": 0.39886039886039887, + "grad_norm": 1.0441275835037231, + "learning_rate": 0.0002, + "loss": 1.7334, + "step": 70 + }, + { + "epoch": 0.45584045584045585, + "grad_norm": 1.0836364030838013, + "learning_rate": 0.0002, + "loss": 1.6496, + "step": 80 + }, + { + "epoch": 0.5128205128205128, + "grad_norm": 0.5817112922668457, + "learning_rate": 0.0002, + "loss": 1.5814, + "step": 90 + }, + { + "epoch": 0.5698005698005698, + "grad_norm": 0.8991169929504395, + "learning_rate": 0.0002, + "loss": 1.6697, + "step": 100 + }, + { + "epoch": 0.6267806267806267, + "grad_norm": 1.1820793151855469, + "learning_rate": 0.0002, + "loss": 1.621, + "step": 110 + }, + { + "epoch": 0.6837606837606838, + "grad_norm": 0.8205533623695374, + "learning_rate": 0.0002, + "loss": 1.6376, + "step": 120 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.8154979348182678, + "learning_rate": 0.0002, + "loss": 1.5902, + "step": 130 + }, + { + "epoch": 0.7977207977207977, + "grad_norm": 0.7292681336402893, + "learning_rate": 0.0002, + "loss": 1.6139, + "step": 140 + }, + { + "epoch": 0.8547008547008547, + "grad_norm": 0.7737869024276733, + "learning_rate": 0.0002, + "loss": 1.6554, + "step": 150 + }, + { + "epoch": 0.9116809116809117, + "grad_norm": 0.7786843180656433, + "learning_rate": 0.0002, + "loss": 1.4696, + "step": 160 + }, + { + "epoch": 0.9686609686609686, + "grad_norm": 0.6918405294418335, + "learning_rate": 0.0002, + "loss": 1.5062, + "step": 170 + }, + { + "epoch": 0.9971509971509972, + "eval_loss": 1.4962449073791504, + "eval_runtime": 2.869, + "eval_samples_per_second": 34.158, + "eval_steps_per_second": 4.531, + "step": 175 + }, + { + "epoch": 1.0256410256410255, + "grad_norm": 0.6754891872406006, + "learning_rate": 0.0002, + "loss": 1.5305, + "step": 180 + }, + { + "epoch": 1.0826210826210827, + "grad_norm": 0.6875350475311279, + "learning_rate": 0.0002, + "loss": 1.4709, + "step": 190 + }, + { + "epoch": 1.1396011396011396, + "grad_norm": 0.7870411276817322, + "learning_rate": 0.0002, + "loss": 1.4744, + "step": 200 + }, + { + "epoch": 1.1965811965811965, + "grad_norm": 0.6934282779693604, + "learning_rate": 0.0002, + "loss": 1.5414, + "step": 210 + }, + { + "epoch": 1.2535612535612537, + "grad_norm": 0.6980162858963013, + "learning_rate": 0.0002, + "loss": 1.5129, + "step": 220 + }, + { + "epoch": 1.3105413105413106, + "grad_norm": 0.6163203120231628, + "learning_rate": 0.0002, + "loss": 1.385, + "step": 230 + }, + { + "epoch": 1.3675213675213675, + "grad_norm": 0.5967347025871277, + "learning_rate": 0.0002, + "loss": 1.4028, + "step": 240 + }, + { + "epoch": 1.4245014245014245, + "grad_norm": 0.7622564435005188, + "learning_rate": 0.0002, + "loss": 1.4945, + "step": 250 + }, + { + "epoch": 1.4814814814814814, + "grad_norm": 0.6667674779891968, + "learning_rate": 0.0002, + "loss": 1.4426, + "step": 260 + }, + { + "epoch": 1.5384615384615383, + "grad_norm": 0.9225417971611023, + "learning_rate": 0.0002, + "loss": 1.4227, + "step": 270 + }, + { + "epoch": 1.5954415954415955, + "grad_norm": 0.6473053097724915, + "learning_rate": 0.0002, + "loss": 1.3687, + "step": 280 + }, + { + "epoch": 1.6524216524216524, + "grad_norm": 0.8250042796134949, + "learning_rate": 0.0002, + "loss": 1.5086, + "step": 290 + }, + { + "epoch": 1.7094017094017095, + "grad_norm": 0.6660609841346741, + "learning_rate": 0.0002, + "loss": 1.4259, + "step": 300 + }, + { + "epoch": 1.7663817663817665, + "grad_norm": 0.7542873620986938, + "learning_rate": 0.0002, + "loss": 1.373, + "step": 310 + }, + { + "epoch": 1.8233618233618234, + "grad_norm": 0.5261648297309875, + "learning_rate": 0.0002, + "loss": 1.3823, + "step": 320 + }, + { + "epoch": 1.8803418803418803, + "grad_norm": 0.6519118547439575, + "learning_rate": 0.0002, + "loss": 1.4251, + "step": 330 + }, + { + "epoch": 1.9373219373219372, + "grad_norm": 0.7584664821624756, + "learning_rate": 0.0002, + "loss": 1.3613, + "step": 340 + }, + { + "epoch": 1.9943019943019942, + "grad_norm": 0.6466017365455627, + "learning_rate": 0.0002, + "loss": 1.4797, + "step": 350 + }, + { + "epoch": 2.0, + "eval_loss": 1.424173355102539, + "eval_runtime": 2.8659, + "eval_samples_per_second": 34.195, + "eval_steps_per_second": 4.536, + "step": 351 + }, + { + "epoch": 2.051282051282051, + "grad_norm": 0.7457601428031921, + "learning_rate": 0.0002, + "loss": 1.3555, + "step": 360 + }, + { + "epoch": 2.1082621082621085, + "grad_norm": 0.6645848751068115, + "learning_rate": 0.0002, + "loss": 1.387, + "step": 370 + }, + { + "epoch": 2.1652421652421654, + "grad_norm": 0.6545299887657166, + "learning_rate": 0.0002, + "loss": 1.3244, + "step": 380 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.7429937124252319, + "learning_rate": 0.0002, + "loss": 1.4025, + "step": 390 + }, + { + "epoch": 2.2792022792022792, + "grad_norm": 0.6929682493209839, + "learning_rate": 0.0002, + "loss": 1.3995, + "step": 400 + }, + { + "epoch": 2.336182336182336, + "grad_norm": 0.6999889016151428, + "learning_rate": 0.0002, + "loss": 1.3073, + "step": 410 + }, + { + "epoch": 2.393162393162393, + "grad_norm": 0.7174718379974365, + "learning_rate": 0.0002, + "loss": 1.3573, + "step": 420 + }, + { + "epoch": 2.45014245014245, + "grad_norm": 0.667317807674408, + "learning_rate": 0.0002, + "loss": 1.3169, + "step": 430 + }, + { + "epoch": 2.5071225071225074, + "grad_norm": 0.8981409072875977, + "learning_rate": 0.0002, + "loss": 1.3877, + "step": 440 + }, + { + "epoch": 2.564102564102564, + "grad_norm": 0.7560263872146606, + "learning_rate": 0.0002, + "loss": 1.3085, + "step": 450 + }, + { + "epoch": 2.6210826210826212, + "grad_norm": 0.699364185333252, + "learning_rate": 0.0002, + "loss": 1.278, + "step": 460 + }, + { + "epoch": 2.678062678062678, + "grad_norm": 0.666292667388916, + "learning_rate": 0.0002, + "loss": 1.2962, + "step": 470 + }, + { + "epoch": 2.735042735042735, + "grad_norm": 0.7564692497253418, + "learning_rate": 0.0002, + "loss": 1.3471, + "step": 480 + }, + { + "epoch": 2.792022792022792, + "grad_norm": 0.7561964392662048, + "learning_rate": 0.0002, + "loss": 1.3489, + "step": 490 + }, + { + "epoch": 2.849002849002849, + "grad_norm": 0.6506860852241516, + "learning_rate": 0.0002, + "loss": 1.3357, + "step": 500 + }, + { + "epoch": 2.905982905982906, + "grad_norm": 0.6425383687019348, + "learning_rate": 0.0002, + "loss": 1.311, + "step": 510 + }, + { + "epoch": 2.962962962962963, + "grad_norm": 0.7424822449684143, + "learning_rate": 0.0002, + "loss": 1.2879, + "step": 520 + }, + { + "epoch": 2.9971509971509973, + "eval_loss": 1.401209831237793, + "eval_runtime": 2.8721, + "eval_samples_per_second": 34.121, + "eval_steps_per_second": 4.526, + "step": 526 + }, + { + "epoch": 3.0199430199430197, + "grad_norm": 0.7109280228614807, + "learning_rate": 0.0002, + "loss": 1.3656, + "step": 530 + }, + { + "epoch": 3.076923076923077, + "grad_norm": 0.6746246814727783, + "learning_rate": 0.0002, + "loss": 1.2571, + "step": 540 + }, + { + "epoch": 3.133903133903134, + "grad_norm": 0.7202523350715637, + "learning_rate": 0.0002, + "loss": 1.2685, + "step": 550 + }, + { + "epoch": 3.190883190883191, + "grad_norm": 0.697090208530426, + "learning_rate": 0.0002, + "loss": 1.1808, + "step": 560 + }, + { + "epoch": 3.247863247863248, + "grad_norm": 0.7157464623451233, + "learning_rate": 0.0002, + "loss": 1.2479, + "step": 570 + }, + { + "epoch": 3.304843304843305, + "grad_norm": 0.8729232549667358, + "learning_rate": 0.0002, + "loss": 1.2426, + "step": 580 + }, + { + "epoch": 3.3618233618233617, + "grad_norm": 0.7119743227958679, + "learning_rate": 0.0002, + "loss": 1.2957, + "step": 590 + }, + { + "epoch": 3.4188034188034186, + "grad_norm": 0.7417448163032532, + "learning_rate": 0.0002, + "loss": 1.2787, + "step": 600 + }, + { + "epoch": 3.4757834757834756, + "grad_norm": 0.8174124956130981, + "learning_rate": 0.0002, + "loss": 1.2317, + "step": 610 + }, + { + "epoch": 3.532763532763533, + "grad_norm": 0.7199270129203796, + "learning_rate": 0.0002, + "loss": 1.2916, + "step": 620 + }, + { + "epoch": 3.58974358974359, + "grad_norm": 0.989138662815094, + "learning_rate": 0.0002, + "loss": 1.2074, + "step": 630 + }, + { + "epoch": 3.646723646723647, + "grad_norm": 0.75921630859375, + "learning_rate": 0.0002, + "loss": 1.2263, + "step": 640 + }, + { + "epoch": 3.7037037037037037, + "grad_norm": 0.7844401001930237, + "learning_rate": 0.0002, + "loss": 1.2319, + "step": 650 + }, + { + "epoch": 3.7606837606837606, + "grad_norm": 0.9127110242843628, + "learning_rate": 0.0002, + "loss": 1.2851, + "step": 660 + }, + { + "epoch": 3.8176638176638176, + "grad_norm": 0.7972270846366882, + "learning_rate": 0.0002, + "loss": 1.2835, + "step": 670 + }, + { + "epoch": 3.8746438746438745, + "grad_norm": 0.7458992004394531, + "learning_rate": 0.0002, + "loss": 1.3105, + "step": 680 + }, + { + "epoch": 3.931623931623932, + "grad_norm": 0.854924738407135, + "learning_rate": 0.0002, + "loss": 1.3017, + "step": 690 + }, + { + "epoch": 3.9886039886039883, + "grad_norm": 0.7763816118240356, + "learning_rate": 0.0002, + "loss": 1.2455, + "step": 700 + }, + { + "epoch": 4.0, + "eval_loss": 1.3988444805145264, + "eval_runtime": 2.8697, + "eval_samples_per_second": 34.15, + "eval_steps_per_second": 4.53, + "step": 702 + }, + { + "epoch": 4.045584045584046, + "grad_norm": 0.877430260181427, + "learning_rate": 0.0002, + "loss": 1.178, + "step": 710 + }, + { + "epoch": 4.102564102564102, + "grad_norm": 0.8365248441696167, + "learning_rate": 0.0002, + "loss": 1.1635, + "step": 720 + }, + { + "epoch": 4.15954415954416, + "grad_norm": 0.7748925089836121, + "learning_rate": 0.0002, + "loss": 1.2286, + "step": 730 + }, + { + "epoch": 4.216524216524217, + "grad_norm": 0.7695241570472717, + "learning_rate": 0.0002, + "loss": 1.1836, + "step": 740 + }, + { + "epoch": 4.273504273504273, + "grad_norm": 0.7229928374290466, + "learning_rate": 0.0002, + "loss": 1.1685, + "step": 750 + }, + { + "epoch": 4.330484330484331, + "grad_norm": 0.7035910487174988, + "learning_rate": 0.0002, + "loss": 1.117, + "step": 760 + }, + { + "epoch": 4.387464387464387, + "grad_norm": 0.9075796008110046, + "learning_rate": 0.0002, + "loss": 1.189, + "step": 770 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 0.7957494854927063, + "learning_rate": 0.0002, + "loss": 1.1693, + "step": 780 + }, + { + "epoch": 4.501424501424501, + "grad_norm": 0.8733780384063721, + "learning_rate": 0.0002, + "loss": 1.1945, + "step": 790 + }, + { + "epoch": 4.5584045584045585, + "grad_norm": 0.8786619901657104, + "learning_rate": 0.0002, + "loss": 1.1867, + "step": 800 + }, + { + "epoch": 4.615384615384615, + "grad_norm": 0.7101715803146362, + "learning_rate": 0.0002, + "loss": 1.185, + "step": 810 + }, + { + "epoch": 4.672364672364672, + "grad_norm": 0.7451328039169312, + "learning_rate": 0.0002, + "loss": 1.2063, + "step": 820 + }, + { + "epoch": 4.72934472934473, + "grad_norm": 0.7830713987350464, + "learning_rate": 0.0002, + "loss": 1.1939, + "step": 830 + }, + { + "epoch": 4.786324786324786, + "grad_norm": 0.7804535031318665, + "learning_rate": 0.0002, + "loss": 1.1251, + "step": 840 + }, + { + "epoch": 4.843304843304844, + "grad_norm": 0.8121811747550964, + "learning_rate": 0.0002, + "loss": 1.2278, + "step": 850 + }, + { + "epoch": 4.9002849002849, + "grad_norm": 0.774864137172699, + "learning_rate": 0.0002, + "loss": 1.142, + "step": 860 + }, + { + "epoch": 4.957264957264957, + "grad_norm": 0.7517814040184021, + "learning_rate": 0.0002, + "loss": 1.1736, + "step": 870 + }, + { + "epoch": 4.997150997150997, + "eval_loss": 1.4074795246124268, + "eval_runtime": 2.8707, + "eval_samples_per_second": 34.138, + "eval_steps_per_second": 4.529, + "step": 877 + }, + { + "epoch": 5.014245014245014, + "grad_norm": 0.7974972128868103, + "learning_rate": 0.0002, + "loss": 1.151, + "step": 880 + }, + { + "epoch": 5.071225071225071, + "grad_norm": 1.1127357482910156, + "learning_rate": 0.0002, + "loss": 1.0637, + "step": 890 + }, + { + "epoch": 5.128205128205128, + "grad_norm": 0.8995195031166077, + "learning_rate": 0.0002, + "loss": 1.0497, + "step": 900 + }, + { + "epoch": 5.185185185185185, + "grad_norm": 0.8325890898704529, + "learning_rate": 0.0002, + "loss": 1.1101, + "step": 910 + }, + { + "epoch": 5.2421652421652425, + "grad_norm": 0.8830686807632446, + "learning_rate": 0.0002, + "loss": 1.0567, + "step": 920 + }, + { + "epoch": 5.299145299145299, + "grad_norm": 0.8856923580169678, + "learning_rate": 0.0002, + "loss": 1.1094, + "step": 930 + }, + { + "epoch": 5.356125356125356, + "grad_norm": 0.814587414264679, + "learning_rate": 0.0002, + "loss": 1.0328, + "step": 940 + }, + { + "epoch": 5.413105413105413, + "grad_norm": 0.9119254946708679, + "learning_rate": 0.0002, + "loss": 1.1379, + "step": 950 + }, + { + "epoch": 5.47008547008547, + "grad_norm": 0.8547661304473877, + "learning_rate": 0.0002, + "loss": 1.0993, + "step": 960 + }, + { + "epoch": 5.527065527065528, + "grad_norm": 0.943742036819458, + "learning_rate": 0.0002, + "loss": 1.1137, + "step": 970 + }, + { + "epoch": 5.584045584045584, + "grad_norm": 1.1333340406417847, + "learning_rate": 0.0002, + "loss": 1.0815, + "step": 980 + }, + { + "epoch": 5.641025641025641, + "grad_norm": 1.0290982723236084, + "learning_rate": 0.0002, + "loss": 1.1161, + "step": 990 + }, + { + "epoch": 5.698005698005698, + "grad_norm": 1.0613716840744019, + "learning_rate": 0.0002, + "loss": 1.1477, + "step": 1000 + }, + { + "epoch": 5.754985754985755, + "grad_norm": 0.925118088722229, + "learning_rate": 0.0002, + "loss": 1.0878, + "step": 1010 + }, + { + "epoch": 5.811965811965812, + "grad_norm": 0.828220546245575, + "learning_rate": 0.0002, + "loss": 1.0658, + "step": 1020 + }, + { + "epoch": 5.868945868945869, + "grad_norm": 0.7466493248939514, + "learning_rate": 0.0002, + "loss": 1.1179, + "step": 1030 + }, + { + "epoch": 5.925925925925926, + "grad_norm": 0.9189135432243347, + "learning_rate": 0.0002, + "loss": 1.1064, + "step": 1040 + }, + { + "epoch": 5.982905982905983, + "grad_norm": 0.9117513298988342, + "learning_rate": 0.0002, + "loss": 1.1114, + "step": 1050 + }, + { + "epoch": 6.0, + "eval_loss": 1.4333235025405884, + "eval_runtime": 2.9267, + "eval_samples_per_second": 33.485, + "eval_steps_per_second": 4.442, + "step": 1053 + }, + { + "epoch": 6.0398860398860394, + "grad_norm": 0.9506599307060242, + "learning_rate": 0.0002, + "loss": 1.0368, + "step": 1060 + }, + { + "epoch": 6.096866096866097, + "grad_norm": 0.9809837937355042, + "learning_rate": 0.0002, + "loss": 1.0376, + "step": 1070 + }, + { + "epoch": 6.153846153846154, + "grad_norm": 0.852557361125946, + "learning_rate": 0.0002, + "loss": 0.9849, + "step": 1080 + }, + { + "epoch": 6.210826210826211, + "grad_norm": 1.135279893875122, + "learning_rate": 0.0002, + "loss": 0.9782, + "step": 1090 + }, + { + "epoch": 6.267806267806268, + "grad_norm": 1.0243879556655884, + "learning_rate": 0.0002, + "loss": 1.0238, + "step": 1100 + }, + { + "epoch": 6.3247863247863245, + "grad_norm": 0.9213914275169373, + "learning_rate": 0.0002, + "loss": 0.9815, + "step": 1110 + }, + { + "epoch": 6.381766381766382, + "grad_norm": 1.0042028427124023, + "learning_rate": 0.0002, + "loss": 0.9899, + "step": 1120 + }, + { + "epoch": 6.438746438746438, + "grad_norm": 1.1024253368377686, + "learning_rate": 0.0002, + "loss": 1.0249, + "step": 1130 + }, + { + "epoch": 6.495726495726496, + "grad_norm": 0.9245727062225342, + "learning_rate": 0.0002, + "loss": 1.0305, + "step": 1140 + }, + { + "epoch": 6.552706552706553, + "grad_norm": 0.8309272527694702, + "learning_rate": 0.0002, + "loss": 0.9766, + "step": 1150 + }, + { + "epoch": 6.60968660968661, + "grad_norm": 1.029746651649475, + "learning_rate": 0.0002, + "loss": 0.9945, + "step": 1160 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 0.9932991862297058, + "learning_rate": 0.0002, + "loss": 0.9955, + "step": 1170 + }, + { + "epoch": 6.7236467236467234, + "grad_norm": 1.1597973108291626, + "learning_rate": 0.0002, + "loss": 1.0808, + "step": 1180 + }, + { + "epoch": 6.780626780626781, + "grad_norm": 1.1373951435089111, + "learning_rate": 0.0002, + "loss": 1.0407, + "step": 1190 + }, + { + "epoch": 6.837606837606837, + "grad_norm": 1.0308297872543335, + "learning_rate": 0.0002, + "loss": 0.9513, + "step": 1200 + }, + { + "epoch": 6.894586894586895, + "grad_norm": 1.1555122137069702, + "learning_rate": 0.0002, + "loss": 1.0437, + "step": 1210 + }, + { + "epoch": 6.951566951566951, + "grad_norm": 0.9829897284507751, + "learning_rate": 0.0002, + "loss": 1.0164, + "step": 1220 + }, + { + "epoch": 6.997150997150997, + "eval_loss": 1.4769021272659302, + "eval_runtime": 2.8681, + "eval_samples_per_second": 34.168, + "eval_steps_per_second": 4.533, + "step": 1228 + } + ], + "logging_steps": 10, + "max_steps": 1400, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.5014722497675264e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/training_args.bin b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1d5855b60b1cd520479d619dcdb51a31eb8d4e70 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1228/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2c8913d560553d015fae1642e2e67f5097556e650cbd6fe1331d7f7e47624aa +size 5560 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/README.md b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/adapter_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/adapter_model.safetensors b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d121c52ad94233611614d0e57c905e1dcbf94a8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:996ae93d058e24c4d884efc5bbe8a0ebf03d32679613e69751e8b35f5a890c82 +size 29500848 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/optimizer.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..427c8263cb86cb5b6ca641e1fbca7bf43c637f76 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31b76e00ddd6519cf27006007fae304b2bc714f8d0062b99791ac01edb5b5373 +size 15064314 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/rng_state.pth b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..df5d144b66ebf146c588b886982df2447a715376 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06598934139540514c1f935625f6d713bef5ba215b0be53186439cee2ecabd11 +size 14244 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/scheduler.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9fe6273f6b6b7c1a6b30e659cc87ff6db7446315 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2651dbbb234a1169de9db4c1691e20ebcc2a6f2cad7a0b6f3fb47aa10c248f +size 1064 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/special_tokens_map.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/tokenizer.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/tokenizer.model b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/tokenizer_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/trainer_state.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e1fdc734dc91d81029a310d2f14cdc232237e841 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/trainer_state.json @@ -0,0 +1,1077 @@ +{ + "best_metric": 1.3988444805145264, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702", + "epoch": 7.977207977207978, + "eval_steps": 10, + "global_step": 1400, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.05698005698005698, + "grad_norm": 1.0227872133255005, + "learning_rate": 0.0002, + "loss": 3.5896, + "step": 10 + }, + { + "epoch": 0.11396011396011396, + "grad_norm": 3.86788010597229, + "learning_rate": 0.0002, + "loss": 2.5124, + "step": 20 + }, + { + "epoch": 0.17094017094017094, + "grad_norm": 1.3474394083023071, + "learning_rate": 0.0002, + "loss": 2.1576, + "step": 30 + }, + { + "epoch": 0.22792022792022792, + "grad_norm": 1.1816296577453613, + "learning_rate": 0.0002, + "loss": 2.0115, + "step": 40 + }, + { + "epoch": 0.2849002849002849, + "grad_norm": 1.0907047986984253, + "learning_rate": 0.0002, + "loss": 1.875, + "step": 50 + }, + { + "epoch": 0.3418803418803419, + "grad_norm": 0.9163471460342407, + "learning_rate": 0.0002, + "loss": 1.8608, + "step": 60 + }, + { + "epoch": 0.39886039886039887, + "grad_norm": 1.0441275835037231, + "learning_rate": 0.0002, + "loss": 1.7334, + "step": 70 + }, + { + "epoch": 0.45584045584045585, + "grad_norm": 1.0836364030838013, + "learning_rate": 0.0002, + "loss": 1.6496, + "step": 80 + }, + { + "epoch": 0.5128205128205128, + "grad_norm": 0.5817112922668457, + "learning_rate": 0.0002, + "loss": 1.5814, + "step": 90 + }, + { + "epoch": 0.5698005698005698, + "grad_norm": 0.8991169929504395, + "learning_rate": 0.0002, + "loss": 1.6697, + "step": 100 + }, + { + "epoch": 0.6267806267806267, + "grad_norm": 1.1820793151855469, + "learning_rate": 0.0002, + "loss": 1.621, + "step": 110 + }, + { + "epoch": 0.6837606837606838, + "grad_norm": 0.8205533623695374, + "learning_rate": 0.0002, + "loss": 1.6376, + "step": 120 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.8154979348182678, + "learning_rate": 0.0002, + "loss": 1.5902, + "step": 130 + }, + { + "epoch": 0.7977207977207977, + "grad_norm": 0.7292681336402893, + "learning_rate": 0.0002, + "loss": 1.6139, + "step": 140 + }, + { + "epoch": 0.8547008547008547, + "grad_norm": 0.7737869024276733, + "learning_rate": 0.0002, + "loss": 1.6554, + "step": 150 + }, + { + "epoch": 0.9116809116809117, + "grad_norm": 0.7786843180656433, + "learning_rate": 0.0002, + "loss": 1.4696, + "step": 160 + }, + { + "epoch": 0.9686609686609686, + "grad_norm": 0.6918405294418335, + "learning_rate": 0.0002, + "loss": 1.5062, + "step": 170 + }, + { + "epoch": 0.9971509971509972, + "eval_loss": 1.4962449073791504, + "eval_runtime": 2.869, + "eval_samples_per_second": 34.158, + "eval_steps_per_second": 4.531, + "step": 175 + }, + { + "epoch": 1.0256410256410255, + "grad_norm": 0.6754891872406006, + "learning_rate": 0.0002, + "loss": 1.5305, + "step": 180 + }, + { + "epoch": 1.0826210826210827, + "grad_norm": 0.6875350475311279, + "learning_rate": 0.0002, + "loss": 1.4709, + "step": 190 + }, + { + "epoch": 1.1396011396011396, + "grad_norm": 0.7870411276817322, + "learning_rate": 0.0002, + "loss": 1.4744, + "step": 200 + }, + { + "epoch": 1.1965811965811965, + "grad_norm": 0.6934282779693604, + "learning_rate": 0.0002, + "loss": 1.5414, + "step": 210 + }, + { + "epoch": 1.2535612535612537, + "grad_norm": 0.6980162858963013, + "learning_rate": 0.0002, + "loss": 1.5129, + "step": 220 + }, + { + "epoch": 1.3105413105413106, + "grad_norm": 0.6163203120231628, + "learning_rate": 0.0002, + "loss": 1.385, + "step": 230 + }, + { + "epoch": 1.3675213675213675, + "grad_norm": 0.5967347025871277, + "learning_rate": 0.0002, + "loss": 1.4028, + "step": 240 + }, + { + "epoch": 1.4245014245014245, + "grad_norm": 0.7622564435005188, + "learning_rate": 0.0002, + "loss": 1.4945, + "step": 250 + }, + { + "epoch": 1.4814814814814814, + "grad_norm": 0.6667674779891968, + "learning_rate": 0.0002, + "loss": 1.4426, + "step": 260 + }, + { + "epoch": 1.5384615384615383, + "grad_norm": 0.9225417971611023, + "learning_rate": 0.0002, + "loss": 1.4227, + "step": 270 + }, + { + "epoch": 1.5954415954415955, + "grad_norm": 0.6473053097724915, + "learning_rate": 0.0002, + "loss": 1.3687, + "step": 280 + }, + { + "epoch": 1.6524216524216524, + "grad_norm": 0.8250042796134949, + "learning_rate": 0.0002, + "loss": 1.5086, + "step": 290 + }, + { + "epoch": 1.7094017094017095, + "grad_norm": 0.6660609841346741, + "learning_rate": 0.0002, + "loss": 1.4259, + "step": 300 + }, + { + "epoch": 1.7663817663817665, + "grad_norm": 0.7542873620986938, + "learning_rate": 0.0002, + "loss": 1.373, + "step": 310 + }, + { + "epoch": 1.8233618233618234, + "grad_norm": 0.5261648297309875, + "learning_rate": 0.0002, + "loss": 1.3823, + "step": 320 + }, + { + "epoch": 1.8803418803418803, + "grad_norm": 0.6519118547439575, + "learning_rate": 0.0002, + "loss": 1.4251, + "step": 330 + }, + { + "epoch": 1.9373219373219372, + "grad_norm": 0.7584664821624756, + "learning_rate": 0.0002, + "loss": 1.3613, + "step": 340 + }, + { + "epoch": 1.9943019943019942, + "grad_norm": 0.6466017365455627, + "learning_rate": 0.0002, + "loss": 1.4797, + "step": 350 + }, + { + "epoch": 2.0, + "eval_loss": 1.424173355102539, + "eval_runtime": 2.8659, + "eval_samples_per_second": 34.195, + "eval_steps_per_second": 4.536, + "step": 351 + }, + { + "epoch": 2.051282051282051, + "grad_norm": 0.7457601428031921, + "learning_rate": 0.0002, + "loss": 1.3555, + "step": 360 + }, + { + "epoch": 2.1082621082621085, + "grad_norm": 0.6645848751068115, + "learning_rate": 0.0002, + "loss": 1.387, + "step": 370 + }, + { + "epoch": 2.1652421652421654, + "grad_norm": 0.6545299887657166, + "learning_rate": 0.0002, + "loss": 1.3244, + "step": 380 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.7429937124252319, + "learning_rate": 0.0002, + "loss": 1.4025, + "step": 390 + }, + { + "epoch": 2.2792022792022792, + "grad_norm": 0.6929682493209839, + "learning_rate": 0.0002, + "loss": 1.3995, + "step": 400 + }, + { + "epoch": 2.336182336182336, + "grad_norm": 0.6999889016151428, + "learning_rate": 0.0002, + "loss": 1.3073, + "step": 410 + }, + { + "epoch": 2.393162393162393, + "grad_norm": 0.7174718379974365, + "learning_rate": 0.0002, + "loss": 1.3573, + "step": 420 + }, + { + "epoch": 2.45014245014245, + "grad_norm": 0.667317807674408, + "learning_rate": 0.0002, + "loss": 1.3169, + "step": 430 + }, + { + "epoch": 2.5071225071225074, + "grad_norm": 0.8981409072875977, + "learning_rate": 0.0002, + "loss": 1.3877, + "step": 440 + }, + { + "epoch": 2.564102564102564, + "grad_norm": 0.7560263872146606, + "learning_rate": 0.0002, + "loss": 1.3085, + "step": 450 + }, + { + "epoch": 2.6210826210826212, + "grad_norm": 0.699364185333252, + "learning_rate": 0.0002, + "loss": 1.278, + "step": 460 + }, + { + "epoch": 2.678062678062678, + "grad_norm": 0.666292667388916, + "learning_rate": 0.0002, + "loss": 1.2962, + "step": 470 + }, + { + "epoch": 2.735042735042735, + "grad_norm": 0.7564692497253418, + "learning_rate": 0.0002, + "loss": 1.3471, + "step": 480 + }, + { + "epoch": 2.792022792022792, + "grad_norm": 0.7561964392662048, + "learning_rate": 0.0002, + "loss": 1.3489, + "step": 490 + }, + { + "epoch": 2.849002849002849, + "grad_norm": 0.6506860852241516, + "learning_rate": 0.0002, + "loss": 1.3357, + "step": 500 + }, + { + "epoch": 2.905982905982906, + "grad_norm": 0.6425383687019348, + "learning_rate": 0.0002, + "loss": 1.311, + "step": 510 + }, + { + "epoch": 2.962962962962963, + "grad_norm": 0.7424822449684143, + "learning_rate": 0.0002, + "loss": 1.2879, + "step": 520 + }, + { + "epoch": 2.9971509971509973, + "eval_loss": 1.401209831237793, + "eval_runtime": 2.8721, + "eval_samples_per_second": 34.121, + "eval_steps_per_second": 4.526, + "step": 526 + }, + { + "epoch": 3.0199430199430197, + "grad_norm": 0.7109280228614807, + "learning_rate": 0.0002, + "loss": 1.3656, + "step": 530 + }, + { + "epoch": 3.076923076923077, + "grad_norm": 0.6746246814727783, + "learning_rate": 0.0002, + "loss": 1.2571, + "step": 540 + }, + { + "epoch": 3.133903133903134, + "grad_norm": 0.7202523350715637, + "learning_rate": 0.0002, + "loss": 1.2685, + "step": 550 + }, + { + "epoch": 3.190883190883191, + "grad_norm": 0.697090208530426, + "learning_rate": 0.0002, + "loss": 1.1808, + "step": 560 + }, + { + "epoch": 3.247863247863248, + "grad_norm": 0.7157464623451233, + "learning_rate": 0.0002, + "loss": 1.2479, + "step": 570 + }, + { + "epoch": 3.304843304843305, + "grad_norm": 0.8729232549667358, + "learning_rate": 0.0002, + "loss": 1.2426, + "step": 580 + }, + { + "epoch": 3.3618233618233617, + "grad_norm": 0.7119743227958679, + "learning_rate": 0.0002, + "loss": 1.2957, + "step": 590 + }, + { + "epoch": 3.4188034188034186, + "grad_norm": 0.7417448163032532, + "learning_rate": 0.0002, + "loss": 1.2787, + "step": 600 + }, + { + "epoch": 3.4757834757834756, + "grad_norm": 0.8174124956130981, + "learning_rate": 0.0002, + "loss": 1.2317, + "step": 610 + }, + { + "epoch": 3.532763532763533, + "grad_norm": 0.7199270129203796, + "learning_rate": 0.0002, + "loss": 1.2916, + "step": 620 + }, + { + "epoch": 3.58974358974359, + "grad_norm": 0.989138662815094, + "learning_rate": 0.0002, + "loss": 1.2074, + "step": 630 + }, + { + "epoch": 3.646723646723647, + "grad_norm": 0.75921630859375, + "learning_rate": 0.0002, + "loss": 1.2263, + "step": 640 + }, + { + "epoch": 3.7037037037037037, + "grad_norm": 0.7844401001930237, + "learning_rate": 0.0002, + "loss": 1.2319, + "step": 650 + }, + { + "epoch": 3.7606837606837606, + "grad_norm": 0.9127110242843628, + "learning_rate": 0.0002, + "loss": 1.2851, + "step": 660 + }, + { + "epoch": 3.8176638176638176, + "grad_norm": 0.7972270846366882, + "learning_rate": 0.0002, + "loss": 1.2835, + "step": 670 + }, + { + "epoch": 3.8746438746438745, + "grad_norm": 0.7458992004394531, + "learning_rate": 0.0002, + "loss": 1.3105, + "step": 680 + }, + { + "epoch": 3.931623931623932, + "grad_norm": 0.854924738407135, + "learning_rate": 0.0002, + "loss": 1.3017, + "step": 690 + }, + { + "epoch": 3.9886039886039883, + "grad_norm": 0.7763816118240356, + "learning_rate": 0.0002, + "loss": 1.2455, + "step": 700 + }, + { + "epoch": 4.0, + "eval_loss": 1.3988444805145264, + "eval_runtime": 2.8697, + "eval_samples_per_second": 34.15, + "eval_steps_per_second": 4.53, + "step": 702 + }, + { + "epoch": 4.045584045584046, + "grad_norm": 0.877430260181427, + "learning_rate": 0.0002, + "loss": 1.178, + "step": 710 + }, + { + "epoch": 4.102564102564102, + "grad_norm": 0.8365248441696167, + "learning_rate": 0.0002, + "loss": 1.1635, + "step": 720 + }, + { + "epoch": 4.15954415954416, + "grad_norm": 0.7748925089836121, + "learning_rate": 0.0002, + "loss": 1.2286, + "step": 730 + }, + { + "epoch": 4.216524216524217, + "grad_norm": 0.7695241570472717, + "learning_rate": 0.0002, + "loss": 1.1836, + "step": 740 + }, + { + "epoch": 4.273504273504273, + "grad_norm": 0.7229928374290466, + "learning_rate": 0.0002, + "loss": 1.1685, + "step": 750 + }, + { + "epoch": 4.330484330484331, + "grad_norm": 0.7035910487174988, + "learning_rate": 0.0002, + "loss": 1.117, + "step": 760 + }, + { + "epoch": 4.387464387464387, + "grad_norm": 0.9075796008110046, + "learning_rate": 0.0002, + "loss": 1.189, + "step": 770 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 0.7957494854927063, + "learning_rate": 0.0002, + "loss": 1.1693, + "step": 780 + }, + { + "epoch": 4.501424501424501, + "grad_norm": 0.8733780384063721, + "learning_rate": 0.0002, + "loss": 1.1945, + "step": 790 + }, + { + "epoch": 4.5584045584045585, + "grad_norm": 0.8786619901657104, + "learning_rate": 0.0002, + "loss": 1.1867, + "step": 800 + }, + { + "epoch": 4.615384615384615, + "grad_norm": 0.7101715803146362, + "learning_rate": 0.0002, + "loss": 1.185, + "step": 810 + }, + { + "epoch": 4.672364672364672, + "grad_norm": 0.7451328039169312, + "learning_rate": 0.0002, + "loss": 1.2063, + "step": 820 + }, + { + "epoch": 4.72934472934473, + "grad_norm": 0.7830713987350464, + "learning_rate": 0.0002, + "loss": 1.1939, + "step": 830 + }, + { + "epoch": 4.786324786324786, + "grad_norm": 0.7804535031318665, + "learning_rate": 0.0002, + "loss": 1.1251, + "step": 840 + }, + { + "epoch": 4.843304843304844, + "grad_norm": 0.8121811747550964, + "learning_rate": 0.0002, + "loss": 1.2278, + "step": 850 + }, + { + "epoch": 4.9002849002849, + "grad_norm": 0.774864137172699, + "learning_rate": 0.0002, + "loss": 1.142, + "step": 860 + }, + { + "epoch": 4.957264957264957, + "grad_norm": 0.7517814040184021, + "learning_rate": 0.0002, + "loss": 1.1736, + "step": 870 + }, + { + "epoch": 4.997150997150997, + "eval_loss": 1.4074795246124268, + "eval_runtime": 2.8707, + "eval_samples_per_second": 34.138, + "eval_steps_per_second": 4.529, + "step": 877 + }, + { + "epoch": 5.014245014245014, + "grad_norm": 0.7974972128868103, + "learning_rate": 0.0002, + "loss": 1.151, + "step": 880 + }, + { + "epoch": 5.071225071225071, + "grad_norm": 1.1127357482910156, + "learning_rate": 0.0002, + "loss": 1.0637, + "step": 890 + }, + { + "epoch": 5.128205128205128, + "grad_norm": 0.8995195031166077, + "learning_rate": 0.0002, + "loss": 1.0497, + "step": 900 + }, + { + "epoch": 5.185185185185185, + "grad_norm": 0.8325890898704529, + "learning_rate": 0.0002, + "loss": 1.1101, + "step": 910 + }, + { + "epoch": 5.2421652421652425, + "grad_norm": 0.8830686807632446, + "learning_rate": 0.0002, + "loss": 1.0567, + "step": 920 + }, + { + "epoch": 5.299145299145299, + "grad_norm": 0.8856923580169678, + "learning_rate": 0.0002, + "loss": 1.1094, + "step": 930 + }, + { + "epoch": 5.356125356125356, + "grad_norm": 0.814587414264679, + "learning_rate": 0.0002, + "loss": 1.0328, + "step": 940 + }, + { + "epoch": 5.413105413105413, + "grad_norm": 0.9119254946708679, + "learning_rate": 0.0002, + "loss": 1.1379, + "step": 950 + }, + { + "epoch": 5.47008547008547, + "grad_norm": 0.8547661304473877, + "learning_rate": 0.0002, + "loss": 1.0993, + "step": 960 + }, + { + "epoch": 5.527065527065528, + "grad_norm": 0.943742036819458, + "learning_rate": 0.0002, + "loss": 1.1137, + "step": 970 + }, + { + "epoch": 5.584045584045584, + "grad_norm": 1.1333340406417847, + "learning_rate": 0.0002, + "loss": 1.0815, + "step": 980 + }, + { + "epoch": 5.641025641025641, + "grad_norm": 1.0290982723236084, + "learning_rate": 0.0002, + "loss": 1.1161, + "step": 990 + }, + { + "epoch": 5.698005698005698, + "grad_norm": 1.0613716840744019, + "learning_rate": 0.0002, + "loss": 1.1477, + "step": 1000 + }, + { + "epoch": 5.754985754985755, + "grad_norm": 0.925118088722229, + "learning_rate": 0.0002, + "loss": 1.0878, + "step": 1010 + }, + { + "epoch": 5.811965811965812, + "grad_norm": 0.828220546245575, + "learning_rate": 0.0002, + "loss": 1.0658, + "step": 1020 + }, + { + "epoch": 5.868945868945869, + "grad_norm": 0.7466493248939514, + "learning_rate": 0.0002, + "loss": 1.1179, + "step": 1030 + }, + { + "epoch": 5.925925925925926, + "grad_norm": 0.9189135432243347, + "learning_rate": 0.0002, + "loss": 1.1064, + "step": 1040 + }, + { + "epoch": 5.982905982905983, + "grad_norm": 0.9117513298988342, + "learning_rate": 0.0002, + "loss": 1.1114, + "step": 1050 + }, + { + "epoch": 6.0, + "eval_loss": 1.4333235025405884, + "eval_runtime": 2.9267, + "eval_samples_per_second": 33.485, + "eval_steps_per_second": 4.442, + "step": 1053 + }, + { + "epoch": 6.0398860398860394, + "grad_norm": 0.9506599307060242, + "learning_rate": 0.0002, + "loss": 1.0368, + "step": 1060 + }, + { + "epoch": 6.096866096866097, + "grad_norm": 0.9809837937355042, + "learning_rate": 0.0002, + "loss": 1.0376, + "step": 1070 + }, + { + "epoch": 6.153846153846154, + "grad_norm": 0.852557361125946, + "learning_rate": 0.0002, + "loss": 0.9849, + "step": 1080 + }, + { + "epoch": 6.210826210826211, + "grad_norm": 1.135279893875122, + "learning_rate": 0.0002, + "loss": 0.9782, + "step": 1090 + }, + { + "epoch": 6.267806267806268, + "grad_norm": 1.0243879556655884, + "learning_rate": 0.0002, + "loss": 1.0238, + "step": 1100 + }, + { + "epoch": 6.3247863247863245, + "grad_norm": 0.9213914275169373, + "learning_rate": 0.0002, + "loss": 0.9815, + "step": 1110 + }, + { + "epoch": 6.381766381766382, + "grad_norm": 1.0042028427124023, + "learning_rate": 0.0002, + "loss": 0.9899, + "step": 1120 + }, + { + "epoch": 6.438746438746438, + "grad_norm": 1.1024253368377686, + "learning_rate": 0.0002, + "loss": 1.0249, + "step": 1130 + }, + { + "epoch": 6.495726495726496, + "grad_norm": 0.9245727062225342, + "learning_rate": 0.0002, + "loss": 1.0305, + "step": 1140 + }, + { + "epoch": 6.552706552706553, + "grad_norm": 0.8309272527694702, + "learning_rate": 0.0002, + "loss": 0.9766, + "step": 1150 + }, + { + "epoch": 6.60968660968661, + "grad_norm": 1.029746651649475, + "learning_rate": 0.0002, + "loss": 0.9945, + "step": 1160 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 0.9932991862297058, + "learning_rate": 0.0002, + "loss": 0.9955, + "step": 1170 + }, + { + "epoch": 6.7236467236467234, + "grad_norm": 1.1597973108291626, + "learning_rate": 0.0002, + "loss": 1.0808, + "step": 1180 + }, + { + "epoch": 6.780626780626781, + "grad_norm": 1.1373951435089111, + "learning_rate": 0.0002, + "loss": 1.0407, + "step": 1190 + }, + { + "epoch": 6.837606837606837, + "grad_norm": 1.0308297872543335, + "learning_rate": 0.0002, + "loss": 0.9513, + "step": 1200 + }, + { + "epoch": 6.894586894586895, + "grad_norm": 1.1555122137069702, + "learning_rate": 0.0002, + "loss": 1.0437, + "step": 1210 + }, + { + "epoch": 6.951566951566951, + "grad_norm": 0.9829897284507751, + "learning_rate": 0.0002, + "loss": 1.0164, + "step": 1220 + }, + { + "epoch": 6.997150997150997, + "eval_loss": 1.4769021272659302, + "eval_runtime": 2.8681, + "eval_samples_per_second": 34.168, + "eval_steps_per_second": 4.533, + "step": 1228 + }, + { + "epoch": 7.0085470085470085, + "grad_norm": 1.1083747148513794, + "learning_rate": 0.0002, + "loss": 1.0111, + "step": 1230 + }, + { + "epoch": 7.065527065527066, + "grad_norm": 1.1454236507415771, + "learning_rate": 0.0002, + "loss": 0.9179, + "step": 1240 + }, + { + "epoch": 7.122507122507122, + "grad_norm": 0.9501869082450867, + "learning_rate": 0.0002, + "loss": 0.9271, + "step": 1250 + }, + { + "epoch": 7.17948717948718, + "grad_norm": 1.2393906116485596, + "learning_rate": 0.0002, + "loss": 0.9121, + "step": 1260 + }, + { + "epoch": 7.236467236467236, + "grad_norm": 1.0671173334121704, + "learning_rate": 0.0002, + "loss": 0.9396, + "step": 1270 + }, + { + "epoch": 7.293447293447294, + "grad_norm": 1.1026686429977417, + "learning_rate": 0.0002, + "loss": 0.9023, + "step": 1280 + }, + { + "epoch": 7.35042735042735, + "grad_norm": 1.2422044277191162, + "learning_rate": 0.0002, + "loss": 0.9267, + "step": 1290 + }, + { + "epoch": 7.407407407407407, + "grad_norm": 1.0772203207015991, + "learning_rate": 0.0002, + "loss": 0.8394, + "step": 1300 + }, + { + "epoch": 7.464387464387464, + "grad_norm": 1.1025265455245972, + "learning_rate": 0.0002, + "loss": 0.9361, + "step": 1310 + }, + { + "epoch": 7.521367521367521, + "grad_norm": 1.0232294797897339, + "learning_rate": 0.0002, + "loss": 0.9124, + "step": 1320 + }, + { + "epoch": 7.578347578347579, + "grad_norm": 1.2126119136810303, + "learning_rate": 0.0002, + "loss": 0.9412, + "step": 1330 + }, + { + "epoch": 7.635327635327635, + "grad_norm": 1.2359609603881836, + "learning_rate": 0.0002, + "loss": 0.9405, + "step": 1340 + }, + { + "epoch": 7.6923076923076925, + "grad_norm": 0.9109336733818054, + "learning_rate": 0.0002, + "loss": 0.9096, + "step": 1350 + }, + { + "epoch": 7.749287749287749, + "grad_norm": 1.0734586715698242, + "learning_rate": 0.0002, + "loss": 0.9038, + "step": 1360 + }, + { + "epoch": 7.806267806267806, + "grad_norm": 1.1725562810897827, + "learning_rate": 0.0002, + "loss": 0.9674, + "step": 1370 + }, + { + "epoch": 7.863247863247864, + "grad_norm": 1.0173115730285645, + "learning_rate": 0.0002, + "loss": 0.9211, + "step": 1380 + }, + { + "epoch": 7.92022792022792, + "grad_norm": 1.0145931243896484, + "learning_rate": 0.0002, + "loss": 0.9764, + "step": 1390 + }, + { + "epoch": 7.977207977207978, + "grad_norm": 1.1547255516052246, + "learning_rate": 0.0002, + "loss": 0.9465, + "step": 1400 + }, + { + "epoch": 7.977207977207978, + "eval_loss": 1.5259032249450684, + "eval_runtime": 2.8665, + "eval_samples_per_second": 34.189, + "eval_steps_per_second": 4.535, + "step": 1400 + } + ], + "logging_steps": 10, + "max_steps": 1400, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.71107948691456e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/training_args.bin b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1d5855b60b1cd520479d619dcdb51a31eb8d4e70 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-1400/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2c8913d560553d015fae1642e2e67f5097556e650cbd6fe1331d7f7e47624aa +size 5560 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/README.md b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/adapter_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/adapter_model.safetensors b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc174897662e5a692779f0f708e0cda5d77e39e3 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75ec3539bb63b004a8819781d7ee42f59196664959c3f96f1ab42d86c713a717 +size 29500848 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/optimizer.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b4175c1b99e36b87d9e80060927404f66320bf19 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce19ebf7d1e6646f94c988c8db0232e74aaaea6babb0872e5423307daca571b8 +size 15064250 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/rng_state.pth b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1c0ed90489248e7ac384a04e150a4db06c9c8f7a --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f090d858199d2cc9c083d07c2e4899a2a9a85efb05bcd4868287516cefc9c8b +size 14244 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/scheduler.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3cb9ccfbedab0c2b791dd5b35ef0ae8496e5e6d --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c17c000dd3ad474c8365253b4464489310fed0d13c745556138280174b380deb +size 1064 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/special_tokens_map.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/tokenizer.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/tokenizer.model b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/tokenizer_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/trainer_state.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b6ab2774f5c2b910b64868dbdbdf808ad933e676 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/trainer_state.json @@ -0,0 +1,160 @@ +{ + "best_metric": 1.4962449073791504, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175", + "epoch": 0.9971509971509972, + "eval_steps": 10, + "global_step": 175, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.05698005698005698, + "grad_norm": 1.0227872133255005, + "learning_rate": 0.0002, + "loss": 3.5896, + "step": 10 + }, + { + "epoch": 0.11396011396011396, + "grad_norm": 3.86788010597229, + "learning_rate": 0.0002, + "loss": 2.5124, + "step": 20 + }, + { + "epoch": 0.17094017094017094, + "grad_norm": 1.3474394083023071, + "learning_rate": 0.0002, + "loss": 2.1576, + "step": 30 + }, + { + "epoch": 0.22792022792022792, + "grad_norm": 1.1816296577453613, + "learning_rate": 0.0002, + "loss": 2.0115, + "step": 40 + }, + { + "epoch": 0.2849002849002849, + "grad_norm": 1.0907047986984253, + "learning_rate": 0.0002, + "loss": 1.875, + "step": 50 + }, + { + "epoch": 0.3418803418803419, + "grad_norm": 0.9163471460342407, + "learning_rate": 0.0002, + "loss": 1.8608, + "step": 60 + }, + { + "epoch": 0.39886039886039887, + "grad_norm": 1.0441275835037231, + "learning_rate": 0.0002, + "loss": 1.7334, + "step": 70 + }, + { + "epoch": 0.45584045584045585, + "grad_norm": 1.0836364030838013, + "learning_rate": 0.0002, + "loss": 1.6496, + "step": 80 + }, + { + "epoch": 0.5128205128205128, + "grad_norm": 0.5817112922668457, + "learning_rate": 0.0002, + "loss": 1.5814, + "step": 90 + }, + { + "epoch": 0.5698005698005698, + "grad_norm": 0.8991169929504395, + "learning_rate": 0.0002, + "loss": 1.6697, + "step": 100 + }, + { + "epoch": 0.6267806267806267, + "grad_norm": 1.1820793151855469, + "learning_rate": 0.0002, + "loss": 1.621, + "step": 110 + }, + { + "epoch": 0.6837606837606838, + "grad_norm": 0.8205533623695374, + "learning_rate": 0.0002, + "loss": 1.6376, + "step": 120 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.8154979348182678, + "learning_rate": 0.0002, + "loss": 1.5902, + "step": 130 + }, + { + "epoch": 0.7977207977207977, + "grad_norm": 0.7292681336402893, + "learning_rate": 0.0002, + "loss": 1.6139, + "step": 140 + }, + { + "epoch": 0.8547008547008547, + "grad_norm": 0.7737869024276733, + "learning_rate": 0.0002, + "loss": 1.6554, + "step": 150 + }, + { + "epoch": 0.9116809116809117, + "grad_norm": 0.7786843180656433, + "learning_rate": 0.0002, + "loss": 1.4696, + "step": 160 + }, + { + "epoch": 0.9686609686609686, + "grad_norm": 0.6918405294418335, + "learning_rate": 0.0002, + "loss": 1.5062, + "step": 170 + }, + { + "epoch": 0.9971509971509972, + "eval_loss": 1.4962449073791504, + "eval_runtime": 2.869, + "eval_samples_per_second": 34.158, + "eval_steps_per_second": 4.531, + "step": 175 + } + ], + "logging_steps": 10, + "max_steps": 1400, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2144960356810752.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/training_args.bin b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1d5855b60b1cd520479d619dcdb51a31eb8d4e70 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2c8913d560553d015fae1642e2e67f5097556e650cbd6fe1331d7f7e47624aa +size 5560 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/README.md b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/adapter_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/adapter_model.safetensors b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d2b6bf68533babf434280fb052413bcdef9eb90e --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06d249baadd7651f7480915719016860aef236570aeedec487dc11b8d432d039 +size 29500848 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/optimizer.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e4237567d52573fb5f9d5e2764c7eb5b41f1ac80 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af5bd17bd9391440d9e8c57167a588644b70c0688e8ae291d9b27d230ebf3581 +size 15064314 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/rng_state.pth b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..63e8fc1ec9b451877e8036d4d0e6f04ed3819843 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e825d2d493326d56e7a45471324ee3b2edc3a2c29ab8a7f6c8152c7611cda758 +size 14244 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/scheduler.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..178095c7b1952d590ca5225131b0dae21e2643c8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b2b853507ab2cbae6ba6a48164491c149815ca5c0f4c12284692bd9b767467a +size 1064 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/special_tokens_map.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/tokenizer.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/tokenizer.model b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/tokenizer_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/trainer_state.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ccd5a13abbf1c9df28220a38d8953453cc5c38ed --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/trainer_state.json @@ -0,0 +1,294 @@ +{ + "best_metric": 1.424173355102539, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351", + "epoch": 2.0, + "eval_steps": 10, + "global_step": 351, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.05698005698005698, + "grad_norm": 1.0227872133255005, + "learning_rate": 0.0002, + "loss": 3.5896, + "step": 10 + }, + { + "epoch": 0.11396011396011396, + "grad_norm": 3.86788010597229, + "learning_rate": 0.0002, + "loss": 2.5124, + "step": 20 + }, + { + "epoch": 0.17094017094017094, + "grad_norm": 1.3474394083023071, + "learning_rate": 0.0002, + "loss": 2.1576, + "step": 30 + }, + { + "epoch": 0.22792022792022792, + "grad_norm": 1.1816296577453613, + "learning_rate": 0.0002, + "loss": 2.0115, + "step": 40 + }, + { + "epoch": 0.2849002849002849, + "grad_norm": 1.0907047986984253, + "learning_rate": 0.0002, + "loss": 1.875, + "step": 50 + }, + { + "epoch": 0.3418803418803419, + "grad_norm": 0.9163471460342407, + "learning_rate": 0.0002, + "loss": 1.8608, + "step": 60 + }, + { + "epoch": 0.39886039886039887, + "grad_norm": 1.0441275835037231, + "learning_rate": 0.0002, + "loss": 1.7334, + "step": 70 + }, + { + "epoch": 0.45584045584045585, + "grad_norm": 1.0836364030838013, + "learning_rate": 0.0002, + "loss": 1.6496, + "step": 80 + }, + { + "epoch": 0.5128205128205128, + "grad_norm": 0.5817112922668457, + "learning_rate": 0.0002, + "loss": 1.5814, + "step": 90 + }, + { + "epoch": 0.5698005698005698, + "grad_norm": 0.8991169929504395, + "learning_rate": 0.0002, + "loss": 1.6697, + "step": 100 + }, + { + "epoch": 0.6267806267806267, + "grad_norm": 1.1820793151855469, + "learning_rate": 0.0002, + "loss": 1.621, + "step": 110 + }, + { + "epoch": 0.6837606837606838, + "grad_norm": 0.8205533623695374, + "learning_rate": 0.0002, + "loss": 1.6376, + "step": 120 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.8154979348182678, + "learning_rate": 0.0002, + "loss": 1.5902, + "step": 130 + }, + { + "epoch": 0.7977207977207977, + "grad_norm": 0.7292681336402893, + "learning_rate": 0.0002, + "loss": 1.6139, + "step": 140 + }, + { + "epoch": 0.8547008547008547, + "grad_norm": 0.7737869024276733, + "learning_rate": 0.0002, + "loss": 1.6554, + "step": 150 + }, + { + "epoch": 0.9116809116809117, + "grad_norm": 0.7786843180656433, + "learning_rate": 0.0002, + "loss": 1.4696, + "step": 160 + }, + { + "epoch": 0.9686609686609686, + "grad_norm": 0.6918405294418335, + "learning_rate": 0.0002, + "loss": 1.5062, + "step": 170 + }, + { + "epoch": 0.9971509971509972, + "eval_loss": 1.4962449073791504, + "eval_runtime": 2.869, + "eval_samples_per_second": 34.158, + "eval_steps_per_second": 4.531, + "step": 175 + }, + { + "epoch": 1.0256410256410255, + "grad_norm": 0.6754891872406006, + "learning_rate": 0.0002, + "loss": 1.5305, + "step": 180 + }, + { + "epoch": 1.0826210826210827, + "grad_norm": 0.6875350475311279, + "learning_rate": 0.0002, + "loss": 1.4709, + "step": 190 + }, + { + "epoch": 1.1396011396011396, + "grad_norm": 0.7870411276817322, + "learning_rate": 0.0002, + "loss": 1.4744, + "step": 200 + }, + { + "epoch": 1.1965811965811965, + "grad_norm": 0.6934282779693604, + "learning_rate": 0.0002, + "loss": 1.5414, + "step": 210 + }, + { + "epoch": 1.2535612535612537, + "grad_norm": 0.6980162858963013, + "learning_rate": 0.0002, + "loss": 1.5129, + "step": 220 + }, + { + "epoch": 1.3105413105413106, + "grad_norm": 0.6163203120231628, + "learning_rate": 0.0002, + "loss": 1.385, + "step": 230 + }, + { + "epoch": 1.3675213675213675, + "grad_norm": 0.5967347025871277, + "learning_rate": 0.0002, + "loss": 1.4028, + "step": 240 + }, + { + "epoch": 1.4245014245014245, + "grad_norm": 0.7622564435005188, + "learning_rate": 0.0002, + "loss": 1.4945, + "step": 250 + }, + { + "epoch": 1.4814814814814814, + "grad_norm": 0.6667674779891968, + "learning_rate": 0.0002, + "loss": 1.4426, + "step": 260 + }, + { + "epoch": 1.5384615384615383, + "grad_norm": 0.9225417971611023, + "learning_rate": 0.0002, + "loss": 1.4227, + "step": 270 + }, + { + "epoch": 1.5954415954415955, + "grad_norm": 0.6473053097724915, + "learning_rate": 0.0002, + "loss": 1.3687, + "step": 280 + }, + { + "epoch": 1.6524216524216524, + "grad_norm": 0.8250042796134949, + "learning_rate": 0.0002, + "loss": 1.5086, + "step": 290 + }, + { + "epoch": 1.7094017094017095, + "grad_norm": 0.6660609841346741, + "learning_rate": 0.0002, + "loss": 1.4259, + "step": 300 + }, + { + "epoch": 1.7663817663817665, + "grad_norm": 0.7542873620986938, + "learning_rate": 0.0002, + "loss": 1.373, + "step": 310 + }, + { + "epoch": 1.8233618233618234, + "grad_norm": 0.5261648297309875, + "learning_rate": 0.0002, + "loss": 1.3823, + "step": 320 + }, + { + "epoch": 1.8803418803418803, + "grad_norm": 0.6519118547439575, + "learning_rate": 0.0002, + "loss": 1.4251, + "step": 330 + }, + { + "epoch": 1.9373219373219372, + "grad_norm": 0.7584664821624756, + "learning_rate": 0.0002, + "loss": 1.3613, + "step": 340 + }, + { + "epoch": 1.9943019943019942, + "grad_norm": 0.6466017365455627, + "learning_rate": 0.0002, + "loss": 1.4797, + "step": 350 + }, + { + "epoch": 2.0, + "eval_loss": 1.424173355102539, + "eval_runtime": 2.8659, + "eval_samples_per_second": 34.195, + "eval_steps_per_second": 4.536, + "step": 351 + } + ], + "logging_steps": 10, + "max_steps": 1400, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 4289920713621504.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/training_args.bin b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1d5855b60b1cd520479d619dcdb51a31eb8d4e70 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2c8913d560553d015fae1642e2e67f5097556e650cbd6fe1331d7f7e47624aa +size 5560 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/README.md b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/adapter_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/adapter_model.safetensors b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3917978d9c857cc851f6bc3ee07c32b090a8c17d --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce2c14d1164da88ac3058a635806ea1e8a6fd34d0a1740a1484b459000951abd +size 29500848 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/optimizer.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c452658d157bcb4cfe70f9a5c4e1b2b566e94303 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c078d52f77a571c3b9c0243421b8658607190f2293f80966b16c91d32b266b62 +size 15064314 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/rng_state.pth b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9b4223e7259dc680cefcf73fd7abff7e2a4aa560 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:307b37840034606c696e089402057d30a6c66307ce18f592ce232bf34835d6e5 +size 14244 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/scheduler.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..82dfb9562a61df80e93d3b55101d8d010c7c3121 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0990c2b37525fb7ec7cdb410f81507a201ecde29c679ebc72b310be6b98d228a +size 1064 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/special_tokens_map.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/tokenizer.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/tokenizer.model b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/tokenizer_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/trainer_state.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..fa66fe41a3f9b3220b922f2bccfebc4e9ccc38cf --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/trainer_state.json @@ -0,0 +1,421 @@ +{ + "best_metric": 1.401209831237793, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526", + "epoch": 2.9971509971509973, + "eval_steps": 10, + "global_step": 526, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.05698005698005698, + "grad_norm": 1.0227872133255005, + "learning_rate": 0.0002, + "loss": 3.5896, + "step": 10 + }, + { + "epoch": 0.11396011396011396, + "grad_norm": 3.86788010597229, + "learning_rate": 0.0002, + "loss": 2.5124, + "step": 20 + }, + { + "epoch": 0.17094017094017094, + "grad_norm": 1.3474394083023071, + "learning_rate": 0.0002, + "loss": 2.1576, + "step": 30 + }, + { + "epoch": 0.22792022792022792, + "grad_norm": 1.1816296577453613, + "learning_rate": 0.0002, + "loss": 2.0115, + "step": 40 + }, + { + "epoch": 0.2849002849002849, + "grad_norm": 1.0907047986984253, + "learning_rate": 0.0002, + "loss": 1.875, + "step": 50 + }, + { + "epoch": 0.3418803418803419, + "grad_norm": 0.9163471460342407, + "learning_rate": 0.0002, + "loss": 1.8608, + "step": 60 + }, + { + "epoch": 0.39886039886039887, + "grad_norm": 1.0441275835037231, + "learning_rate": 0.0002, + "loss": 1.7334, + "step": 70 + }, + { + "epoch": 0.45584045584045585, + "grad_norm": 1.0836364030838013, + "learning_rate": 0.0002, + "loss": 1.6496, + "step": 80 + }, + { + "epoch": 0.5128205128205128, + "grad_norm": 0.5817112922668457, + "learning_rate": 0.0002, + "loss": 1.5814, + "step": 90 + }, + { + "epoch": 0.5698005698005698, + "grad_norm": 0.8991169929504395, + "learning_rate": 0.0002, + "loss": 1.6697, + "step": 100 + }, + { + "epoch": 0.6267806267806267, + "grad_norm": 1.1820793151855469, + "learning_rate": 0.0002, + "loss": 1.621, + "step": 110 + }, + { + "epoch": 0.6837606837606838, + "grad_norm": 0.8205533623695374, + "learning_rate": 0.0002, + "loss": 1.6376, + "step": 120 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.8154979348182678, + "learning_rate": 0.0002, + "loss": 1.5902, + "step": 130 + }, + { + "epoch": 0.7977207977207977, + "grad_norm": 0.7292681336402893, + "learning_rate": 0.0002, + "loss": 1.6139, + "step": 140 + }, + { + "epoch": 0.8547008547008547, + "grad_norm": 0.7737869024276733, + "learning_rate": 0.0002, + "loss": 1.6554, + "step": 150 + }, + { + "epoch": 0.9116809116809117, + "grad_norm": 0.7786843180656433, + "learning_rate": 0.0002, + "loss": 1.4696, + "step": 160 + }, + { + "epoch": 0.9686609686609686, + "grad_norm": 0.6918405294418335, + "learning_rate": 0.0002, + "loss": 1.5062, + "step": 170 + }, + { + "epoch": 0.9971509971509972, + "eval_loss": 1.4962449073791504, + "eval_runtime": 2.869, + "eval_samples_per_second": 34.158, + "eval_steps_per_second": 4.531, + "step": 175 + }, + { + "epoch": 1.0256410256410255, + "grad_norm": 0.6754891872406006, + "learning_rate": 0.0002, + "loss": 1.5305, + "step": 180 + }, + { + "epoch": 1.0826210826210827, + "grad_norm": 0.6875350475311279, + "learning_rate": 0.0002, + "loss": 1.4709, + "step": 190 + }, + { + "epoch": 1.1396011396011396, + "grad_norm": 0.7870411276817322, + "learning_rate": 0.0002, + "loss": 1.4744, + "step": 200 + }, + { + "epoch": 1.1965811965811965, + "grad_norm": 0.6934282779693604, + "learning_rate": 0.0002, + "loss": 1.5414, + "step": 210 + }, + { + "epoch": 1.2535612535612537, + "grad_norm": 0.6980162858963013, + "learning_rate": 0.0002, + "loss": 1.5129, + "step": 220 + }, + { + "epoch": 1.3105413105413106, + "grad_norm": 0.6163203120231628, + "learning_rate": 0.0002, + "loss": 1.385, + "step": 230 + }, + { + "epoch": 1.3675213675213675, + "grad_norm": 0.5967347025871277, + "learning_rate": 0.0002, + "loss": 1.4028, + "step": 240 + }, + { + "epoch": 1.4245014245014245, + "grad_norm": 0.7622564435005188, + "learning_rate": 0.0002, + "loss": 1.4945, + "step": 250 + }, + { + "epoch": 1.4814814814814814, + "grad_norm": 0.6667674779891968, + "learning_rate": 0.0002, + "loss": 1.4426, + "step": 260 + }, + { + "epoch": 1.5384615384615383, + "grad_norm": 0.9225417971611023, + "learning_rate": 0.0002, + "loss": 1.4227, + "step": 270 + }, + { + "epoch": 1.5954415954415955, + "grad_norm": 0.6473053097724915, + "learning_rate": 0.0002, + "loss": 1.3687, + "step": 280 + }, + { + "epoch": 1.6524216524216524, + "grad_norm": 0.8250042796134949, + "learning_rate": 0.0002, + "loss": 1.5086, + "step": 290 + }, + { + "epoch": 1.7094017094017095, + "grad_norm": 0.6660609841346741, + "learning_rate": 0.0002, + "loss": 1.4259, + "step": 300 + }, + { + "epoch": 1.7663817663817665, + "grad_norm": 0.7542873620986938, + "learning_rate": 0.0002, + "loss": 1.373, + "step": 310 + }, + { + "epoch": 1.8233618233618234, + "grad_norm": 0.5261648297309875, + "learning_rate": 0.0002, + "loss": 1.3823, + "step": 320 + }, + { + "epoch": 1.8803418803418803, + "grad_norm": 0.6519118547439575, + "learning_rate": 0.0002, + "loss": 1.4251, + "step": 330 + }, + { + "epoch": 1.9373219373219372, + "grad_norm": 0.7584664821624756, + "learning_rate": 0.0002, + "loss": 1.3613, + "step": 340 + }, + { + "epoch": 1.9943019943019942, + "grad_norm": 0.6466017365455627, + "learning_rate": 0.0002, + "loss": 1.4797, + "step": 350 + }, + { + "epoch": 2.0, + "eval_loss": 1.424173355102539, + "eval_runtime": 2.8659, + "eval_samples_per_second": 34.195, + "eval_steps_per_second": 4.536, + "step": 351 + }, + { + "epoch": 2.051282051282051, + "grad_norm": 0.7457601428031921, + "learning_rate": 0.0002, + "loss": 1.3555, + "step": 360 + }, + { + "epoch": 2.1082621082621085, + "grad_norm": 0.6645848751068115, + "learning_rate": 0.0002, + "loss": 1.387, + "step": 370 + }, + { + "epoch": 2.1652421652421654, + "grad_norm": 0.6545299887657166, + "learning_rate": 0.0002, + "loss": 1.3244, + "step": 380 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.7429937124252319, + "learning_rate": 0.0002, + "loss": 1.4025, + "step": 390 + }, + { + "epoch": 2.2792022792022792, + "grad_norm": 0.6929682493209839, + "learning_rate": 0.0002, + "loss": 1.3995, + "step": 400 + }, + { + "epoch": 2.336182336182336, + "grad_norm": 0.6999889016151428, + "learning_rate": 0.0002, + "loss": 1.3073, + "step": 410 + }, + { + "epoch": 2.393162393162393, + "grad_norm": 0.7174718379974365, + "learning_rate": 0.0002, + "loss": 1.3573, + "step": 420 + }, + { + "epoch": 2.45014245014245, + "grad_norm": 0.667317807674408, + "learning_rate": 0.0002, + "loss": 1.3169, + "step": 430 + }, + { + "epoch": 2.5071225071225074, + "grad_norm": 0.8981409072875977, + "learning_rate": 0.0002, + "loss": 1.3877, + "step": 440 + }, + { + "epoch": 2.564102564102564, + "grad_norm": 0.7560263872146606, + "learning_rate": 0.0002, + "loss": 1.3085, + "step": 450 + }, + { + "epoch": 2.6210826210826212, + "grad_norm": 0.699364185333252, + "learning_rate": 0.0002, + "loss": 1.278, + "step": 460 + }, + { + "epoch": 2.678062678062678, + "grad_norm": 0.666292667388916, + "learning_rate": 0.0002, + "loss": 1.2962, + "step": 470 + }, + { + "epoch": 2.735042735042735, + "grad_norm": 0.7564692497253418, + "learning_rate": 0.0002, + "loss": 1.3471, + "step": 480 + }, + { + "epoch": 2.792022792022792, + "grad_norm": 0.7561964392662048, + "learning_rate": 0.0002, + "loss": 1.3489, + "step": 490 + }, + { + "epoch": 2.849002849002849, + "grad_norm": 0.6506860852241516, + "learning_rate": 0.0002, + "loss": 1.3357, + "step": 500 + }, + { + "epoch": 2.905982905982906, + "grad_norm": 0.6425383687019348, + "learning_rate": 0.0002, + "loss": 1.311, + "step": 510 + }, + { + "epoch": 2.962962962962963, + "grad_norm": 0.7424822449684143, + "learning_rate": 0.0002, + "loss": 1.2879, + "step": 520 + }, + { + "epoch": 2.9971509971509973, + "eval_loss": 1.401209831237793, + "eval_runtime": 2.8721, + "eval_samples_per_second": 34.121, + "eval_steps_per_second": 4.526, + "step": 526 + } + ], + "logging_steps": 10, + "max_steps": 1400, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 6434881070432256.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/training_args.bin b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1d5855b60b1cd520479d619dcdb51a31eb8d4e70 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2c8913d560553d015fae1642e2e67f5097556e650cbd6fe1331d7f7e47624aa +size 5560 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/README.md b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/adapter_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/adapter_model.safetensors b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80d9d474ec5f736ff6f891f0bc050cac2310e785 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:546d9a50ec7778987fc8866c16760b7c1a04c7a6905e3f70f3124f1290485262 +size 29500848 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/optimizer.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..54ac8e4bfa4a70523fc4e4f75bb07d902aa949ce --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:848e06c0d4fe2ed0b179b537b7e7766309fcd5fce160dfe7e269d7de3bcd0e4c +size 15064314 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/rng_state.pth b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..136189d5d7d1c22526ef4c427bde12e0b26030f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ca35e8cac13f2c7c0d021c4d01e2143654afb68b4c8ed46fe69f129cd919b1bb +size 14244 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/scheduler.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bd8417267b16219ee5a126567055b12f32886f6f --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2e995849b9c6ab36aa64065dd7fc5a77b3083bec77091ba5e6b0df9e813f35c +size 1064 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/special_tokens_map.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/tokenizer.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/tokenizer.model b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/tokenizer_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/trainer_state.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..43fed2a40667e3025d84e3201e3e33a097f907d8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/trainer_state.json @@ -0,0 +1,555 @@ +{ + "best_metric": 1.3988444805145264, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702", + "epoch": 4.0, + "eval_steps": 10, + "global_step": 702, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.05698005698005698, + "grad_norm": 1.0227872133255005, + "learning_rate": 0.0002, + "loss": 3.5896, + "step": 10 + }, + { + "epoch": 0.11396011396011396, + "grad_norm": 3.86788010597229, + "learning_rate": 0.0002, + "loss": 2.5124, + "step": 20 + }, + { + "epoch": 0.17094017094017094, + "grad_norm": 1.3474394083023071, + "learning_rate": 0.0002, + "loss": 2.1576, + "step": 30 + }, + { + "epoch": 0.22792022792022792, + "grad_norm": 1.1816296577453613, + "learning_rate": 0.0002, + "loss": 2.0115, + "step": 40 + }, + { + "epoch": 0.2849002849002849, + "grad_norm": 1.0907047986984253, + "learning_rate": 0.0002, + "loss": 1.875, + "step": 50 + }, + { + "epoch": 0.3418803418803419, + "grad_norm": 0.9163471460342407, + "learning_rate": 0.0002, + "loss": 1.8608, + "step": 60 + }, + { + "epoch": 0.39886039886039887, + "grad_norm": 1.0441275835037231, + "learning_rate": 0.0002, + "loss": 1.7334, + "step": 70 + }, + { + "epoch": 0.45584045584045585, + "grad_norm": 1.0836364030838013, + "learning_rate": 0.0002, + "loss": 1.6496, + "step": 80 + }, + { + "epoch": 0.5128205128205128, + "grad_norm": 0.5817112922668457, + "learning_rate": 0.0002, + "loss": 1.5814, + "step": 90 + }, + { + "epoch": 0.5698005698005698, + "grad_norm": 0.8991169929504395, + "learning_rate": 0.0002, + "loss": 1.6697, + "step": 100 + }, + { + "epoch": 0.6267806267806267, + "grad_norm": 1.1820793151855469, + "learning_rate": 0.0002, + "loss": 1.621, + "step": 110 + }, + { + "epoch": 0.6837606837606838, + "grad_norm": 0.8205533623695374, + "learning_rate": 0.0002, + "loss": 1.6376, + "step": 120 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.8154979348182678, + "learning_rate": 0.0002, + "loss": 1.5902, + "step": 130 + }, + { + "epoch": 0.7977207977207977, + "grad_norm": 0.7292681336402893, + "learning_rate": 0.0002, + "loss": 1.6139, + "step": 140 + }, + { + "epoch": 0.8547008547008547, + "grad_norm": 0.7737869024276733, + "learning_rate": 0.0002, + "loss": 1.6554, + "step": 150 + }, + { + "epoch": 0.9116809116809117, + "grad_norm": 0.7786843180656433, + "learning_rate": 0.0002, + "loss": 1.4696, + "step": 160 + }, + { + "epoch": 0.9686609686609686, + "grad_norm": 0.6918405294418335, + "learning_rate": 0.0002, + "loss": 1.5062, + "step": 170 + }, + { + "epoch": 0.9971509971509972, + "eval_loss": 1.4962449073791504, + "eval_runtime": 2.869, + "eval_samples_per_second": 34.158, + "eval_steps_per_second": 4.531, + "step": 175 + }, + { + "epoch": 1.0256410256410255, + "grad_norm": 0.6754891872406006, + "learning_rate": 0.0002, + "loss": 1.5305, + "step": 180 + }, + { + "epoch": 1.0826210826210827, + "grad_norm": 0.6875350475311279, + "learning_rate": 0.0002, + "loss": 1.4709, + "step": 190 + }, + { + "epoch": 1.1396011396011396, + "grad_norm": 0.7870411276817322, + "learning_rate": 0.0002, + "loss": 1.4744, + "step": 200 + }, + { + "epoch": 1.1965811965811965, + "grad_norm": 0.6934282779693604, + "learning_rate": 0.0002, + "loss": 1.5414, + "step": 210 + }, + { + "epoch": 1.2535612535612537, + "grad_norm": 0.6980162858963013, + "learning_rate": 0.0002, + "loss": 1.5129, + "step": 220 + }, + { + "epoch": 1.3105413105413106, + "grad_norm": 0.6163203120231628, + "learning_rate": 0.0002, + "loss": 1.385, + "step": 230 + }, + { + "epoch": 1.3675213675213675, + "grad_norm": 0.5967347025871277, + "learning_rate": 0.0002, + "loss": 1.4028, + "step": 240 + }, + { + "epoch": 1.4245014245014245, + "grad_norm": 0.7622564435005188, + "learning_rate": 0.0002, + "loss": 1.4945, + "step": 250 + }, + { + "epoch": 1.4814814814814814, + "grad_norm": 0.6667674779891968, + "learning_rate": 0.0002, + "loss": 1.4426, + "step": 260 + }, + { + "epoch": 1.5384615384615383, + "grad_norm": 0.9225417971611023, + "learning_rate": 0.0002, + "loss": 1.4227, + "step": 270 + }, + { + "epoch": 1.5954415954415955, + "grad_norm": 0.6473053097724915, + "learning_rate": 0.0002, + "loss": 1.3687, + "step": 280 + }, + { + "epoch": 1.6524216524216524, + "grad_norm": 0.8250042796134949, + "learning_rate": 0.0002, + "loss": 1.5086, + "step": 290 + }, + { + "epoch": 1.7094017094017095, + "grad_norm": 0.6660609841346741, + "learning_rate": 0.0002, + "loss": 1.4259, + "step": 300 + }, + { + "epoch": 1.7663817663817665, + "grad_norm": 0.7542873620986938, + "learning_rate": 0.0002, + "loss": 1.373, + "step": 310 + }, + { + "epoch": 1.8233618233618234, + "grad_norm": 0.5261648297309875, + "learning_rate": 0.0002, + "loss": 1.3823, + "step": 320 + }, + { + "epoch": 1.8803418803418803, + "grad_norm": 0.6519118547439575, + "learning_rate": 0.0002, + "loss": 1.4251, + "step": 330 + }, + { + "epoch": 1.9373219373219372, + "grad_norm": 0.7584664821624756, + "learning_rate": 0.0002, + "loss": 1.3613, + "step": 340 + }, + { + "epoch": 1.9943019943019942, + "grad_norm": 0.6466017365455627, + "learning_rate": 0.0002, + "loss": 1.4797, + "step": 350 + }, + { + "epoch": 2.0, + "eval_loss": 1.424173355102539, + "eval_runtime": 2.8659, + "eval_samples_per_second": 34.195, + "eval_steps_per_second": 4.536, + "step": 351 + }, + { + "epoch": 2.051282051282051, + "grad_norm": 0.7457601428031921, + "learning_rate": 0.0002, + "loss": 1.3555, + "step": 360 + }, + { + "epoch": 2.1082621082621085, + "grad_norm": 0.6645848751068115, + "learning_rate": 0.0002, + "loss": 1.387, + "step": 370 + }, + { + "epoch": 2.1652421652421654, + "grad_norm": 0.6545299887657166, + "learning_rate": 0.0002, + "loss": 1.3244, + "step": 380 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.7429937124252319, + "learning_rate": 0.0002, + "loss": 1.4025, + "step": 390 + }, + { + "epoch": 2.2792022792022792, + "grad_norm": 0.6929682493209839, + "learning_rate": 0.0002, + "loss": 1.3995, + "step": 400 + }, + { + "epoch": 2.336182336182336, + "grad_norm": 0.6999889016151428, + "learning_rate": 0.0002, + "loss": 1.3073, + "step": 410 + }, + { + "epoch": 2.393162393162393, + "grad_norm": 0.7174718379974365, + "learning_rate": 0.0002, + "loss": 1.3573, + "step": 420 + }, + { + "epoch": 2.45014245014245, + "grad_norm": 0.667317807674408, + "learning_rate": 0.0002, + "loss": 1.3169, + "step": 430 + }, + { + "epoch": 2.5071225071225074, + "grad_norm": 0.8981409072875977, + "learning_rate": 0.0002, + "loss": 1.3877, + "step": 440 + }, + { + "epoch": 2.564102564102564, + "grad_norm": 0.7560263872146606, + "learning_rate": 0.0002, + "loss": 1.3085, + "step": 450 + }, + { + "epoch": 2.6210826210826212, + "grad_norm": 0.699364185333252, + "learning_rate": 0.0002, + "loss": 1.278, + "step": 460 + }, + { + "epoch": 2.678062678062678, + "grad_norm": 0.666292667388916, + "learning_rate": 0.0002, + "loss": 1.2962, + "step": 470 + }, + { + "epoch": 2.735042735042735, + "grad_norm": 0.7564692497253418, + "learning_rate": 0.0002, + "loss": 1.3471, + "step": 480 + }, + { + "epoch": 2.792022792022792, + "grad_norm": 0.7561964392662048, + "learning_rate": 0.0002, + "loss": 1.3489, + "step": 490 + }, + { + "epoch": 2.849002849002849, + "grad_norm": 0.6506860852241516, + "learning_rate": 0.0002, + "loss": 1.3357, + "step": 500 + }, + { + "epoch": 2.905982905982906, + "grad_norm": 0.6425383687019348, + "learning_rate": 0.0002, + "loss": 1.311, + "step": 510 + }, + { + "epoch": 2.962962962962963, + "grad_norm": 0.7424822449684143, + "learning_rate": 0.0002, + "loss": 1.2879, + "step": 520 + }, + { + "epoch": 2.9971509971509973, + "eval_loss": 1.401209831237793, + "eval_runtime": 2.8721, + "eval_samples_per_second": 34.121, + "eval_steps_per_second": 4.526, + "step": 526 + }, + { + "epoch": 3.0199430199430197, + "grad_norm": 0.7109280228614807, + "learning_rate": 0.0002, + "loss": 1.3656, + "step": 530 + }, + { + "epoch": 3.076923076923077, + "grad_norm": 0.6746246814727783, + "learning_rate": 0.0002, + "loss": 1.2571, + "step": 540 + }, + { + "epoch": 3.133903133903134, + "grad_norm": 0.7202523350715637, + "learning_rate": 0.0002, + "loss": 1.2685, + "step": 550 + }, + { + "epoch": 3.190883190883191, + "grad_norm": 0.697090208530426, + "learning_rate": 0.0002, + "loss": 1.1808, + "step": 560 + }, + { + "epoch": 3.247863247863248, + "grad_norm": 0.7157464623451233, + "learning_rate": 0.0002, + "loss": 1.2479, + "step": 570 + }, + { + "epoch": 3.304843304843305, + "grad_norm": 0.8729232549667358, + "learning_rate": 0.0002, + "loss": 1.2426, + "step": 580 + }, + { + "epoch": 3.3618233618233617, + "grad_norm": 0.7119743227958679, + "learning_rate": 0.0002, + "loss": 1.2957, + "step": 590 + }, + { + "epoch": 3.4188034188034186, + "grad_norm": 0.7417448163032532, + "learning_rate": 0.0002, + "loss": 1.2787, + "step": 600 + }, + { + "epoch": 3.4757834757834756, + "grad_norm": 0.8174124956130981, + "learning_rate": 0.0002, + "loss": 1.2317, + "step": 610 + }, + { + "epoch": 3.532763532763533, + "grad_norm": 0.7199270129203796, + "learning_rate": 0.0002, + "loss": 1.2916, + "step": 620 + }, + { + "epoch": 3.58974358974359, + "grad_norm": 0.989138662815094, + "learning_rate": 0.0002, + "loss": 1.2074, + "step": 630 + }, + { + "epoch": 3.646723646723647, + "grad_norm": 0.75921630859375, + "learning_rate": 0.0002, + "loss": 1.2263, + "step": 640 + }, + { + "epoch": 3.7037037037037037, + "grad_norm": 0.7844401001930237, + "learning_rate": 0.0002, + "loss": 1.2319, + "step": 650 + }, + { + "epoch": 3.7606837606837606, + "grad_norm": 0.9127110242843628, + "learning_rate": 0.0002, + "loss": 1.2851, + "step": 660 + }, + { + "epoch": 3.8176638176638176, + "grad_norm": 0.7972270846366882, + "learning_rate": 0.0002, + "loss": 1.2835, + "step": 670 + }, + { + "epoch": 3.8746438746438745, + "grad_norm": 0.7458992004394531, + "learning_rate": 0.0002, + "loss": 1.3105, + "step": 680 + }, + { + "epoch": 3.931623931623932, + "grad_norm": 0.854924738407135, + "learning_rate": 0.0002, + "loss": 1.3017, + "step": 690 + }, + { + "epoch": 3.9886039886039883, + "grad_norm": 0.7763816118240356, + "learning_rate": 0.0002, + "loss": 1.2455, + "step": 700 + }, + { + "epoch": 4.0, + "eval_loss": 1.3988444805145264, + "eval_runtime": 2.8697, + "eval_samples_per_second": 34.15, + "eval_steps_per_second": 4.53, + "step": 702 + } + ], + "logging_steps": 10, + "max_steps": 1400, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 8579841427243008.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/training_args.bin b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1d5855b60b1cd520479d619dcdb51a31eb8d4e70 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2c8913d560553d015fae1642e2e67f5097556e650cbd6fe1331d7f7e47624aa +size 5560 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/README.md b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/adapter_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/adapter_model.safetensors b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b24065ea732fb5439cb5c8e8aae5b6bf9d48248 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac5b54df0064fd4f7487437063888bd353dca3025c8b5b6025f1fc6094b7e0fc +size 29500848 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/optimizer.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2256972aca723ac83bc09610aaa23a9b635d0e72 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8d934bfca6f8c7a03c6f843ec96dd8afa5c13d8385e15c15e8883e6bbe5f65a +size 15064314 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/rng_state.pth b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a5e21f47c1f351d24254ecb09fb4697626a31c79 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f47813bb1b8f9cea460a57067389c6476843c238ca8e482b1f48ee269de4cbc8 +size 14244 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/scheduler.pt b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..04bcb6bcf40f85183359e1b7fc68c5f525d8046b --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46bbe98cd388d0aef05bda7399971fbd5ee2093cb9bfaaef8acc8327f92a107a +size 1064 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/special_tokens_map.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/tokenizer.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/tokenizer.model b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/tokenizer_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/trainer_state.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8007591b8ad6ea51b5020b018351e2cbd3118045 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/trainer_state.json @@ -0,0 +1,682 @@ +{ + "best_metric": 1.3988444805145264, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702", + "epoch": 4.997150997150997, + "eval_steps": 10, + "global_step": 877, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.05698005698005698, + "grad_norm": 1.0227872133255005, + "learning_rate": 0.0002, + "loss": 3.5896, + "step": 10 + }, + { + "epoch": 0.11396011396011396, + "grad_norm": 3.86788010597229, + "learning_rate": 0.0002, + "loss": 2.5124, + "step": 20 + }, + { + "epoch": 0.17094017094017094, + "grad_norm": 1.3474394083023071, + "learning_rate": 0.0002, + "loss": 2.1576, + "step": 30 + }, + { + "epoch": 0.22792022792022792, + "grad_norm": 1.1816296577453613, + "learning_rate": 0.0002, + "loss": 2.0115, + "step": 40 + }, + { + "epoch": 0.2849002849002849, + "grad_norm": 1.0907047986984253, + "learning_rate": 0.0002, + "loss": 1.875, + "step": 50 + }, + { + "epoch": 0.3418803418803419, + "grad_norm": 0.9163471460342407, + "learning_rate": 0.0002, + "loss": 1.8608, + "step": 60 + }, + { + "epoch": 0.39886039886039887, + "grad_norm": 1.0441275835037231, + "learning_rate": 0.0002, + "loss": 1.7334, + "step": 70 + }, + { + "epoch": 0.45584045584045585, + "grad_norm": 1.0836364030838013, + "learning_rate": 0.0002, + "loss": 1.6496, + "step": 80 + }, + { + "epoch": 0.5128205128205128, + "grad_norm": 0.5817112922668457, + "learning_rate": 0.0002, + "loss": 1.5814, + "step": 90 + }, + { + "epoch": 0.5698005698005698, + "grad_norm": 0.8991169929504395, + "learning_rate": 0.0002, + "loss": 1.6697, + "step": 100 + }, + { + "epoch": 0.6267806267806267, + "grad_norm": 1.1820793151855469, + "learning_rate": 0.0002, + "loss": 1.621, + "step": 110 + }, + { + "epoch": 0.6837606837606838, + "grad_norm": 0.8205533623695374, + "learning_rate": 0.0002, + "loss": 1.6376, + "step": 120 + }, + { + "epoch": 0.7407407407407407, + "grad_norm": 0.8154979348182678, + "learning_rate": 0.0002, + "loss": 1.5902, + "step": 130 + }, + { + "epoch": 0.7977207977207977, + "grad_norm": 0.7292681336402893, + "learning_rate": 0.0002, + "loss": 1.6139, + "step": 140 + }, + { + "epoch": 0.8547008547008547, + "grad_norm": 0.7737869024276733, + "learning_rate": 0.0002, + "loss": 1.6554, + "step": 150 + }, + { + "epoch": 0.9116809116809117, + "grad_norm": 0.7786843180656433, + "learning_rate": 0.0002, + "loss": 1.4696, + "step": 160 + }, + { + "epoch": 0.9686609686609686, + "grad_norm": 0.6918405294418335, + "learning_rate": 0.0002, + "loss": 1.5062, + "step": 170 + }, + { + "epoch": 0.9971509971509972, + "eval_loss": 1.4962449073791504, + "eval_runtime": 2.869, + "eval_samples_per_second": 34.158, + "eval_steps_per_second": 4.531, + "step": 175 + }, + { + "epoch": 1.0256410256410255, + "grad_norm": 0.6754891872406006, + "learning_rate": 0.0002, + "loss": 1.5305, + "step": 180 + }, + { + "epoch": 1.0826210826210827, + "grad_norm": 0.6875350475311279, + "learning_rate": 0.0002, + "loss": 1.4709, + "step": 190 + }, + { + "epoch": 1.1396011396011396, + "grad_norm": 0.7870411276817322, + "learning_rate": 0.0002, + "loss": 1.4744, + "step": 200 + }, + { + "epoch": 1.1965811965811965, + "grad_norm": 0.6934282779693604, + "learning_rate": 0.0002, + "loss": 1.5414, + "step": 210 + }, + { + "epoch": 1.2535612535612537, + "grad_norm": 0.6980162858963013, + "learning_rate": 0.0002, + "loss": 1.5129, + "step": 220 + }, + { + "epoch": 1.3105413105413106, + "grad_norm": 0.6163203120231628, + "learning_rate": 0.0002, + "loss": 1.385, + "step": 230 + }, + { + "epoch": 1.3675213675213675, + "grad_norm": 0.5967347025871277, + "learning_rate": 0.0002, + "loss": 1.4028, + "step": 240 + }, + { + "epoch": 1.4245014245014245, + "grad_norm": 0.7622564435005188, + "learning_rate": 0.0002, + "loss": 1.4945, + "step": 250 + }, + { + "epoch": 1.4814814814814814, + "grad_norm": 0.6667674779891968, + "learning_rate": 0.0002, + "loss": 1.4426, + "step": 260 + }, + { + "epoch": 1.5384615384615383, + "grad_norm": 0.9225417971611023, + "learning_rate": 0.0002, + "loss": 1.4227, + "step": 270 + }, + { + "epoch": 1.5954415954415955, + "grad_norm": 0.6473053097724915, + "learning_rate": 0.0002, + "loss": 1.3687, + "step": 280 + }, + { + "epoch": 1.6524216524216524, + "grad_norm": 0.8250042796134949, + "learning_rate": 0.0002, + "loss": 1.5086, + "step": 290 + }, + { + "epoch": 1.7094017094017095, + "grad_norm": 0.6660609841346741, + "learning_rate": 0.0002, + "loss": 1.4259, + "step": 300 + }, + { + "epoch": 1.7663817663817665, + "grad_norm": 0.7542873620986938, + "learning_rate": 0.0002, + "loss": 1.373, + "step": 310 + }, + { + "epoch": 1.8233618233618234, + "grad_norm": 0.5261648297309875, + "learning_rate": 0.0002, + "loss": 1.3823, + "step": 320 + }, + { + "epoch": 1.8803418803418803, + "grad_norm": 0.6519118547439575, + "learning_rate": 0.0002, + "loss": 1.4251, + "step": 330 + }, + { + "epoch": 1.9373219373219372, + "grad_norm": 0.7584664821624756, + "learning_rate": 0.0002, + "loss": 1.3613, + "step": 340 + }, + { + "epoch": 1.9943019943019942, + "grad_norm": 0.6466017365455627, + "learning_rate": 0.0002, + "loss": 1.4797, + "step": 350 + }, + { + "epoch": 2.0, + "eval_loss": 1.424173355102539, + "eval_runtime": 2.8659, + "eval_samples_per_second": 34.195, + "eval_steps_per_second": 4.536, + "step": 351 + }, + { + "epoch": 2.051282051282051, + "grad_norm": 0.7457601428031921, + "learning_rate": 0.0002, + "loss": 1.3555, + "step": 360 + }, + { + "epoch": 2.1082621082621085, + "grad_norm": 0.6645848751068115, + "learning_rate": 0.0002, + "loss": 1.387, + "step": 370 + }, + { + "epoch": 2.1652421652421654, + "grad_norm": 0.6545299887657166, + "learning_rate": 0.0002, + "loss": 1.3244, + "step": 380 + }, + { + "epoch": 2.2222222222222223, + "grad_norm": 0.7429937124252319, + "learning_rate": 0.0002, + "loss": 1.4025, + "step": 390 + }, + { + "epoch": 2.2792022792022792, + "grad_norm": 0.6929682493209839, + "learning_rate": 0.0002, + "loss": 1.3995, + "step": 400 + }, + { + "epoch": 2.336182336182336, + "grad_norm": 0.6999889016151428, + "learning_rate": 0.0002, + "loss": 1.3073, + "step": 410 + }, + { + "epoch": 2.393162393162393, + "grad_norm": 0.7174718379974365, + "learning_rate": 0.0002, + "loss": 1.3573, + "step": 420 + }, + { + "epoch": 2.45014245014245, + "grad_norm": 0.667317807674408, + "learning_rate": 0.0002, + "loss": 1.3169, + "step": 430 + }, + { + "epoch": 2.5071225071225074, + "grad_norm": 0.8981409072875977, + "learning_rate": 0.0002, + "loss": 1.3877, + "step": 440 + }, + { + "epoch": 2.564102564102564, + "grad_norm": 0.7560263872146606, + "learning_rate": 0.0002, + "loss": 1.3085, + "step": 450 + }, + { + "epoch": 2.6210826210826212, + "grad_norm": 0.699364185333252, + "learning_rate": 0.0002, + "loss": 1.278, + "step": 460 + }, + { + "epoch": 2.678062678062678, + "grad_norm": 0.666292667388916, + "learning_rate": 0.0002, + "loss": 1.2962, + "step": 470 + }, + { + "epoch": 2.735042735042735, + "grad_norm": 0.7564692497253418, + "learning_rate": 0.0002, + "loss": 1.3471, + "step": 480 + }, + { + "epoch": 2.792022792022792, + "grad_norm": 0.7561964392662048, + "learning_rate": 0.0002, + "loss": 1.3489, + "step": 490 + }, + { + "epoch": 2.849002849002849, + "grad_norm": 0.6506860852241516, + "learning_rate": 0.0002, + "loss": 1.3357, + "step": 500 + }, + { + "epoch": 2.905982905982906, + "grad_norm": 0.6425383687019348, + "learning_rate": 0.0002, + "loss": 1.311, + "step": 510 + }, + { + "epoch": 2.962962962962963, + "grad_norm": 0.7424822449684143, + "learning_rate": 0.0002, + "loss": 1.2879, + "step": 520 + }, + { + "epoch": 2.9971509971509973, + "eval_loss": 1.401209831237793, + "eval_runtime": 2.8721, + "eval_samples_per_second": 34.121, + "eval_steps_per_second": 4.526, + "step": 526 + }, + { + "epoch": 3.0199430199430197, + "grad_norm": 0.7109280228614807, + "learning_rate": 0.0002, + "loss": 1.3656, + "step": 530 + }, + { + "epoch": 3.076923076923077, + "grad_norm": 0.6746246814727783, + "learning_rate": 0.0002, + "loss": 1.2571, + "step": 540 + }, + { + "epoch": 3.133903133903134, + "grad_norm": 0.7202523350715637, + "learning_rate": 0.0002, + "loss": 1.2685, + "step": 550 + }, + { + "epoch": 3.190883190883191, + "grad_norm": 0.697090208530426, + "learning_rate": 0.0002, + "loss": 1.1808, + "step": 560 + }, + { + "epoch": 3.247863247863248, + "grad_norm": 0.7157464623451233, + "learning_rate": 0.0002, + "loss": 1.2479, + "step": 570 + }, + { + "epoch": 3.304843304843305, + "grad_norm": 0.8729232549667358, + "learning_rate": 0.0002, + "loss": 1.2426, + "step": 580 + }, + { + "epoch": 3.3618233618233617, + "grad_norm": 0.7119743227958679, + "learning_rate": 0.0002, + "loss": 1.2957, + "step": 590 + }, + { + "epoch": 3.4188034188034186, + "grad_norm": 0.7417448163032532, + "learning_rate": 0.0002, + "loss": 1.2787, + "step": 600 + }, + { + "epoch": 3.4757834757834756, + "grad_norm": 0.8174124956130981, + "learning_rate": 0.0002, + "loss": 1.2317, + "step": 610 + }, + { + "epoch": 3.532763532763533, + "grad_norm": 0.7199270129203796, + "learning_rate": 0.0002, + "loss": 1.2916, + "step": 620 + }, + { + "epoch": 3.58974358974359, + "grad_norm": 0.989138662815094, + "learning_rate": 0.0002, + "loss": 1.2074, + "step": 630 + }, + { + "epoch": 3.646723646723647, + "grad_norm": 0.75921630859375, + "learning_rate": 0.0002, + "loss": 1.2263, + "step": 640 + }, + { + "epoch": 3.7037037037037037, + "grad_norm": 0.7844401001930237, + "learning_rate": 0.0002, + "loss": 1.2319, + "step": 650 + }, + { + "epoch": 3.7606837606837606, + "grad_norm": 0.9127110242843628, + "learning_rate": 0.0002, + "loss": 1.2851, + "step": 660 + }, + { + "epoch": 3.8176638176638176, + "grad_norm": 0.7972270846366882, + "learning_rate": 0.0002, + "loss": 1.2835, + "step": 670 + }, + { + "epoch": 3.8746438746438745, + "grad_norm": 0.7458992004394531, + "learning_rate": 0.0002, + "loss": 1.3105, + "step": 680 + }, + { + "epoch": 3.931623931623932, + "grad_norm": 0.854924738407135, + "learning_rate": 0.0002, + "loss": 1.3017, + "step": 690 + }, + { + "epoch": 3.9886039886039883, + "grad_norm": 0.7763816118240356, + "learning_rate": 0.0002, + "loss": 1.2455, + "step": 700 + }, + { + "epoch": 4.0, + "eval_loss": 1.3988444805145264, + "eval_runtime": 2.8697, + "eval_samples_per_second": 34.15, + "eval_steps_per_second": 4.53, + "step": 702 + }, + { + "epoch": 4.045584045584046, + "grad_norm": 0.877430260181427, + "learning_rate": 0.0002, + "loss": 1.178, + "step": 710 + }, + { + "epoch": 4.102564102564102, + "grad_norm": 0.8365248441696167, + "learning_rate": 0.0002, + "loss": 1.1635, + "step": 720 + }, + { + "epoch": 4.15954415954416, + "grad_norm": 0.7748925089836121, + "learning_rate": 0.0002, + "loss": 1.2286, + "step": 730 + }, + { + "epoch": 4.216524216524217, + "grad_norm": 0.7695241570472717, + "learning_rate": 0.0002, + "loss": 1.1836, + "step": 740 + }, + { + "epoch": 4.273504273504273, + "grad_norm": 0.7229928374290466, + "learning_rate": 0.0002, + "loss": 1.1685, + "step": 750 + }, + { + "epoch": 4.330484330484331, + "grad_norm": 0.7035910487174988, + "learning_rate": 0.0002, + "loss": 1.117, + "step": 760 + }, + { + "epoch": 4.387464387464387, + "grad_norm": 0.9075796008110046, + "learning_rate": 0.0002, + "loss": 1.189, + "step": 770 + }, + { + "epoch": 4.444444444444445, + "grad_norm": 0.7957494854927063, + "learning_rate": 0.0002, + "loss": 1.1693, + "step": 780 + }, + { + "epoch": 4.501424501424501, + "grad_norm": 0.8733780384063721, + "learning_rate": 0.0002, + "loss": 1.1945, + "step": 790 + }, + { + "epoch": 4.5584045584045585, + "grad_norm": 0.8786619901657104, + "learning_rate": 0.0002, + "loss": 1.1867, + "step": 800 + }, + { + "epoch": 4.615384615384615, + "grad_norm": 0.7101715803146362, + "learning_rate": 0.0002, + "loss": 1.185, + "step": 810 + }, + { + "epoch": 4.672364672364672, + "grad_norm": 0.7451328039169312, + "learning_rate": 0.0002, + "loss": 1.2063, + "step": 820 + }, + { + "epoch": 4.72934472934473, + "grad_norm": 0.7830713987350464, + "learning_rate": 0.0002, + "loss": 1.1939, + "step": 830 + }, + { + "epoch": 4.786324786324786, + "grad_norm": 0.7804535031318665, + "learning_rate": 0.0002, + "loss": 1.1251, + "step": 840 + }, + { + "epoch": 4.843304843304844, + "grad_norm": 0.8121811747550964, + "learning_rate": 0.0002, + "loss": 1.2278, + "step": 850 + }, + { + "epoch": 4.9002849002849, + "grad_norm": 0.774864137172699, + "learning_rate": 0.0002, + "loss": 1.142, + "step": 860 + }, + { + "epoch": 4.957264957264957, + "grad_norm": 0.7517814040184021, + "learning_rate": 0.0002, + "loss": 1.1736, + "step": 870 + }, + { + "epoch": 4.997150997150997, + "eval_loss": 1.4074795246124268, + "eval_runtime": 2.8707, + "eval_samples_per_second": 34.138, + "eval_steps_per_second": 4.529, + "step": 877 + } + ], + "logging_steps": 10, + "max_steps": 1400, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.072480178405376e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/training_args.bin b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1d5855b60b1cd520479d619dcdb51a31eb8d4e70 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-877/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2c8913d560553d015fae1642e2e67f5097556e650cbd6fe1331d7f7e47624aa +size 5560 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/special_tokens_map.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/tokenizer.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/tokenizer.model b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/tokenizer_config.json b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/training_args.bin b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..1d5855b60b1cd520479d619dcdb51a31eb8d4e70 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2c8913d560553d015fae1642e2e67f5097556e650cbd6fe1331d7f7e47624aa +size 5560 diff --git a/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/training_log.jsonl b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/training_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..a969a67d2351636ad02ffdd9a9c8b5b4efc07881 --- /dev/null +++ b/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/training_log.jsonl @@ -0,0 +1,23 @@ +{"epoch": 0.9971509971509972, "step": 175, "epoch_duration": 226.64873790740967, "total_accumulated_duration": 226.64873790740967, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3048.73388671875}, "peak_memory_usage": {"GPU_0": 5628.7490234375}, "avg_memory_reserved": {"GPU_0": 6182.0}, "peak_memory_reserved": {"GPU_0": 6182.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5884, "grad_norm": 1.0246449708938599, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5087, "grad_norm": 4.416985034942627, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1383, "grad_norm": 1.339896321296692, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0126, "grad_norm": 1.1955891847610474, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.8748, "grad_norm": 1.5392284393310547, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.864, "grad_norm": 2.2324352264404297, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7324, "grad_norm": 0.883982241153717, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.6497, "grad_norm": 0.8040387630462646, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.581, "grad_norm": 0.6630058288574219, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6714, "grad_norm": 0.8256624341011047, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.6244, "grad_norm": 1.7843570709228516, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6385, "grad_norm": 0.7624598741531372, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.5932, "grad_norm": 0.8000739216804504, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6153, "grad_norm": 0.746695876121521, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6538, "grad_norm": 0.7385406494140625, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4707, "grad_norm": 0.7716621160507202, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5073, "grad_norm": 0.6918502449989319, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}]} +{"epoch": 0.9971509971509972, "step": 175, "epoch_duration": 228.30274987220764, "total_accumulated_duration": 228.30274987220764, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3048.73388671875}, "peak_memory_usage": {"GPU_0": 5628.7490234375}, "avg_memory_reserved": {"GPU_0": 6182.0}, "peak_memory_reserved": {"GPU_0": 6182.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5887, "grad_norm": 1.0285680294036865, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5088, "grad_norm": 4.263883113861084, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1316, "grad_norm": 1.319510579109192, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0138, "grad_norm": 1.172463297843933, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.8757, "grad_norm": 1.2299168109893799, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8628, "grad_norm": 1.8571380376815796, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7331, "grad_norm": 0.8769466876983643, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.6501, "grad_norm": 0.9425599575042725, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5829, "grad_norm": 0.6939888000488281, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6745, "grad_norm": 0.80997234582901, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.6317, "grad_norm": 0.576340913772583, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6426, "grad_norm": 0.8277338743209839, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.5979, "grad_norm": 0.812319278717041, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6206, "grad_norm": 0.744087815284729, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.658, "grad_norm": 0.7455664277076721, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4727, "grad_norm": 0.7555345892906189, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.508, "grad_norm": 0.6886737942695618, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}]} +{"epoch": 0.9971509971509972, "step": 175, "epoch_duration": 196.87817358970642, "total_accumulated_duration": 196.87817358970642, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3048.73388671875}, "peak_memory_usage": {"GPU_0": 5628.7490234375}, "avg_memory_reserved": {"GPU_0": 6182.0}, "peak_memory_reserved": {"GPU_0": 6182.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5888, "grad_norm": 1.02470064163208, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5103, "grad_norm": 4.141382217407227, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1431, "grad_norm": 1.3427177667617798, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0115, "grad_norm": 1.2450870275497437, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.873, "grad_norm": 0.8380146026611328, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8602, "grad_norm": 1.6974546909332275, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7297, "grad_norm": 0.842096209526062, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.6483, "grad_norm": 0.8819207549095154, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5822, "grad_norm": 0.6970117688179016, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6723, "grad_norm": 0.732140839099884, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.6293, "grad_norm": 0.6692239046096802, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6398, "grad_norm": 0.8081008195877075, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.5955, "grad_norm": 0.8078020215034485, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6184, "grad_norm": 0.752601683139801, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.657, "grad_norm": 0.7533067464828491, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.472, "grad_norm": 0.7555228471755981, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5078, "grad_norm": 0.6797512769699097, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}]} +{"epoch": 0.9971509971509972, "step": 175, "epoch_duration": 196.1910490989685, "total_accumulated_duration": 196.1910490989685, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3048.73388671875}, "peak_memory_usage": {"GPU_0": 5628.7490234375}, "avg_memory_reserved": {"GPU_0": 6182.0}, "peak_memory_reserved": {"GPU_0": 6182.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5867, "grad_norm": 1.0275089740753174, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5071, "grad_norm": 5.149324893951416, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1403, "grad_norm": 1.302829384803772, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0171, "grad_norm": 1.2056041955947876, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.877, "grad_norm": 1.6039173603057861, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8654, "grad_norm": 2.2578282356262207, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7336, "grad_norm": 0.891614556312561, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.6502, "grad_norm": 0.7235773205757141, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5805, "grad_norm": 0.5928594470024109, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6703, "grad_norm": 0.9899774789810181, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.6212, "grad_norm": 0.8848198056221008, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6381, "grad_norm": 0.8585798144340515, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.5921, "grad_norm": 0.849170446395874, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6146, "grad_norm": 0.7267282605171204, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6543, "grad_norm": 0.7717547416687012, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4709, "grad_norm": 0.7685355544090271, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5076, "grad_norm": 0.6848856210708618, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}]} +{"epoch": 0.9971509971509972, "step": 175, "epoch_duration": 183.86725306510925, "total_accumulated_duration": 183.86725306510925, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3048.73388671875}, "peak_memory_usage": {"GPU_0": 5628.7490234375}, "avg_memory_reserved": {"GPU_0": 6182.0}, "peak_memory_reserved": {"GPU_0": 6182.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5898, "grad_norm": 1.0211373567581177, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5123, "grad_norm": 3.9195690155029297, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1553, "grad_norm": 1.3422091007232666, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0102, "grad_norm": 1.2220985889434814, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.8759, "grad_norm": 0.7786314487457275, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8629, "grad_norm": 1.8703639507293701, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.731, "grad_norm": 0.8601687550544739, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.6498, "grad_norm": 0.839336633682251, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5831, "grad_norm": 0.6838223338127136, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.671, "grad_norm": 0.736075222492218, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.626, "grad_norm": 1.5018651485443115, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6381, "grad_norm": 0.7694732546806335, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.5935, "grad_norm": 0.7978103756904602, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6163, "grad_norm": 0.7607526779174805, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6548, "grad_norm": 0.7390126585960388, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4711, "grad_norm": 0.7577585577964783, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5073, "grad_norm": 0.7028831839561462, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}]} +{"epoch": 0.9971509971509972, "step": 175, "epoch_duration": 196.93704557418823, "total_accumulated_duration": 196.93704557418823, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3048.73388671875}, "peak_memory_usage": {"GPU_0": 5628.7490234375}, "avg_memory_reserved": {"GPU_0": 6182.0}, "peak_memory_reserved": {"GPU_0": 6182.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5874, "grad_norm": 1.0230627059936523, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5078, "grad_norm": 4.167836666107178, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1285, "grad_norm": 1.3116984367370605, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0141, "grad_norm": 1.1505714654922485, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.8763, "grad_norm": 1.017291784286499, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8617, "grad_norm": 1.3579925298690796, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7338, "grad_norm": 0.9080680012702942, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.6496, "grad_norm": 1.0459669828414917, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5827, "grad_norm": 0.6726023554801941, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6734, "grad_norm": 0.7067064046859741, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.6281, "grad_norm": 1.1829911470413208, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6394, "grad_norm": 0.7684104442596436, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.5943, "grad_norm": 0.8014359474182129, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6178, "grad_norm": 0.7519007325172424, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6557, "grad_norm": 0.7420642375946045, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4719, "grad_norm": 0.7634224891662598, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5076, "grad_norm": 0.6824071407318115, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}]} +{"epoch": 0.9971509971509972, "step": 175, "epoch_duration": 197.5335237979889, "total_accumulated_duration": 197.5335237979889, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3048.73388671875}, "peak_memory_usage": {"GPU_0": 5628.7490234375}, "avg_memory_reserved": {"GPU_0": 6182.0}, "peak_memory_reserved": {"GPU_0": 6182.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5873, "grad_norm": 1.025920033454895, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5064, "grad_norm": 4.992947578430176, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1384, "grad_norm": 1.3070180416107178, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0151, "grad_norm": 1.200543999671936, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.8758, "grad_norm": 1.6544991731643677, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8644, "grad_norm": 2.260460138320923, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7332, "grad_norm": 0.8998034596443176, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.6499, "grad_norm": 0.7905030250549316, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5811, "grad_norm": 0.630607545375824, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6711, "grad_norm": 0.9616371393203735, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.6229, "grad_norm": 1.5203739404678345, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6378, "grad_norm": 0.8046877980232239, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.5926, "grad_norm": 0.8209801316261292, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6151, "grad_norm": 0.7339947819709778, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6543, "grad_norm": 0.7577587962150574, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4714, "grad_norm": 0.7635911703109741, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5068, "grad_norm": 0.6860561966896057, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}]} +{"epoch": 0.9971509971509972, "step": 175, "epoch_duration": 73.26114892959595, "total_accumulated_duration": 73.26114892959595, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3048.73388671875}, "peak_memory_usage": {"GPU_0": 5628.7490234375}, "avg_memory_reserved": {"GPU_0": 6182.0}, "peak_memory_reserved": {"GPU_0": 6182.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5893, "grad_norm": 1.0233017206192017, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5117, "grad_norm": 4.163138389587402, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1519, "grad_norm": 1.3474042415618896, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0115, "grad_norm": 1.2415039539337158, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.8789, "grad_norm": 1.0774320363998413, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8642, "grad_norm": 0.716522753238678, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7333, "grad_norm": 0.9734901189804077, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.651, "grad_norm": 1.7638314962387085, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5815, "grad_norm": 0.6061757206916809, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6714, "grad_norm": 0.7205694317817688, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.63, "grad_norm": 0.7752107381820679, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6405, "grad_norm": 0.8007442355155945, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.595, "grad_norm": 0.8049221038818359, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6182, "grad_norm": 0.7540143132209778, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6574, "grad_norm": 0.7468489408493042, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4714, "grad_norm": 0.7622578740119934, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5075, "grad_norm": 0.704738438129425, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}]} +{"epoch": 0.9971509971509972, "step": 175, "epoch_duration": 72.66727685928345, "total_accumulated_duration": 72.66727685928345, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3048.73388671875}, "peak_memory_usage": {"GPU_0": 5628.7490234375}, "avg_memory_reserved": {"GPU_0": 6182.0}, "peak_memory_reserved": {"GPU_0": 6182.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5896, "grad_norm": 1.0227872133255005, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5124, "grad_norm": 3.86788010597229, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1576, "grad_norm": 1.3474394083023071, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0115, "grad_norm": 1.1816296577453613, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.875, "grad_norm": 1.0907047986984253, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8608, "grad_norm": 0.9163471460342407, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7334, "grad_norm": 1.0441275835037231, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.6496, "grad_norm": 1.0836364030838013, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5814, "grad_norm": 0.5817112922668457, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6697, "grad_norm": 0.8991169929504395, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.621, "grad_norm": 1.1820793151855469, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6376, "grad_norm": 0.8205533623695374, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.5902, "grad_norm": 0.8154979348182678, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6139, "grad_norm": 0.7292681336402893, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6554, "grad_norm": 0.7737869024276733, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4696, "grad_norm": 0.7786843180656433, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5062, "grad_norm": 0.6918405294418335, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}]} +{"epoch": 2.0, "step": 351, "epoch_duration": 72.60998892784119, "total_accumulated_duration": 145.87113785743713, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15079.29833984375}, "avg_memory_reserved": {"GPU_0": 20172.0}, "peak_memory_reserved": {"GPU_0": 20172.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5893, "grad_norm": 1.0233017206192017, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5117, "grad_norm": 4.163138389587402, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1519, "grad_norm": 1.3474042415618896, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0115, "grad_norm": 1.2415039539337158, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.8789, "grad_norm": 1.0774320363998413, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8642, "grad_norm": 0.716522753238678, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7333, "grad_norm": 0.9734901189804077, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.651, "grad_norm": 1.7638314962387085, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5815, "grad_norm": 0.6061757206916809, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6714, "grad_norm": 0.7205694317817688, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.63, "grad_norm": 0.7752107381820679, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6405, "grad_norm": 0.8007442355155945, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.595, "grad_norm": 0.8049221038818359, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6182, "grad_norm": 0.7540143132209778, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6574, "grad_norm": 0.7468489408493042, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4714, "grad_norm": 0.7622578740119934, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5075, "grad_norm": 0.704738438129425, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}, {"eval_loss": 1.4978793859481812, "eval_runtime": 2.8823, "eval_samples_per_second": 34.001, "eval_steps_per_second": 4.51, "epoch": 0.9971509971509972, "step": 175}, {"loss": 1.5307, "grad_norm": 0.6786648035049438, "learning_rate": 0.0002, "epoch": 1.0256410256410255, "step": 180}, {"loss": 1.4725, "grad_norm": 0.6568158268928528, "learning_rate": 0.0002, "epoch": 1.0826210826210827, "step": 190}, {"loss": 1.4754, "grad_norm": 0.7925108671188354, "learning_rate": 0.0002, "epoch": 1.1396011396011396, "step": 200}, {"loss": 1.543, "grad_norm": 0.6983732581138611, "learning_rate": 0.0002, "epoch": 1.1965811965811965, "step": 210}, {"loss": 1.5132, "grad_norm": 0.6918368935585022, "learning_rate": 0.0002, "epoch": 1.2535612535612537, "step": 220}, {"loss": 1.3858, "grad_norm": 0.6140615940093994, "learning_rate": 0.0002, "epoch": 1.3105413105413106, "step": 230}, {"loss": 1.4046, "grad_norm": 0.5929235816001892, "learning_rate": 0.0002, "epoch": 1.3675213675213675, "step": 240}, {"loss": 1.4951, "grad_norm": 0.76374751329422, "learning_rate": 0.0002, "epoch": 1.4245014245014245, "step": 250}, {"loss": 1.4439, "grad_norm": 0.6553613543510437, "learning_rate": 0.0002, "epoch": 1.4814814814814814, "step": 260}, {"loss": 1.4252, "grad_norm": 0.9090031385421753, "learning_rate": 0.0002, "epoch": 1.5384615384615383, "step": 270}, {"loss": 1.3674, "grad_norm": 0.6486701369285583, "learning_rate": 0.0002, "epoch": 1.5954415954415955, "step": 280}, {"loss": 1.5102, "grad_norm": 0.8332676291465759, "learning_rate": 0.0002, "epoch": 1.6524216524216524, "step": 290}, {"loss": 1.4243, "grad_norm": 0.6700407266616821, "learning_rate": 0.0002, "epoch": 1.7094017094017095, "step": 300}, {"loss": 1.373, "grad_norm": 0.7524263262748718, "learning_rate": 0.0002, "epoch": 1.7663817663817665, "step": 310}, {"loss": 1.3821, "grad_norm": 0.5268421173095703, "learning_rate": 0.0002, "epoch": 1.8233618233618234, "step": 320}, {"loss": 1.4244, "grad_norm": 0.6470246315002441, "learning_rate": 0.0002, "epoch": 1.8803418803418803, "step": 330}, {"loss": 1.3603, "grad_norm": 0.7546916007995605, "learning_rate": 0.0002, "epoch": 1.9373219373219372, "step": 340}, {"loss": 1.4809, "grad_norm": 0.6527156233787537, "learning_rate": 0.0002, "epoch": 1.9943019943019942, "step": 350}]} +{"epoch": 2.0, "step": 351, "epoch_duration": 74.16440558433533, "total_accumulated_duration": 146.83168244361877, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15079.29833984375}, "avg_memory_reserved": {"GPU_0": 20172.0}, "peak_memory_reserved": {"GPU_0": 20172.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-175", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5896, "grad_norm": 1.0227872133255005, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5124, "grad_norm": 3.86788010597229, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1576, "grad_norm": 1.3474394083023071, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0115, "grad_norm": 1.1816296577453613, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.875, "grad_norm": 1.0907047986984253, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8608, "grad_norm": 0.9163471460342407, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7334, "grad_norm": 1.0441275835037231, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.6496, "grad_norm": 1.0836364030838013, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5814, "grad_norm": 0.5817112922668457, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6697, "grad_norm": 0.8991169929504395, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.621, "grad_norm": 1.1820793151855469, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6376, "grad_norm": 0.8205533623695374, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.5902, "grad_norm": 0.8154979348182678, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6139, "grad_norm": 0.7292681336402893, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6554, "grad_norm": 0.7737869024276733, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4696, "grad_norm": 0.7786843180656433, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5062, "grad_norm": 0.6918405294418335, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}, {"eval_loss": 1.4962449073791504, "eval_runtime": 2.869, "eval_samples_per_second": 34.158, "eval_steps_per_second": 4.531, "epoch": 0.9971509971509972, "step": 175}, {"loss": 1.5305, "grad_norm": 0.6754891872406006, "learning_rate": 0.0002, "epoch": 1.0256410256410255, "step": 180}, {"loss": 1.4709, "grad_norm": 0.6875350475311279, "learning_rate": 0.0002, "epoch": 1.0826210826210827, "step": 190}, {"loss": 1.4744, "grad_norm": 0.7870411276817322, "learning_rate": 0.0002, "epoch": 1.1396011396011396, "step": 200}, {"loss": 1.5414, "grad_norm": 0.6934282779693604, "learning_rate": 0.0002, "epoch": 1.1965811965811965, "step": 210}, {"loss": 1.5129, "grad_norm": 0.6980162858963013, "learning_rate": 0.0002, "epoch": 1.2535612535612537, "step": 220}, {"loss": 1.385, "grad_norm": 0.6163203120231628, "learning_rate": 0.0002, "epoch": 1.3105413105413106, "step": 230}, {"loss": 1.4028, "grad_norm": 0.5967347025871277, "learning_rate": 0.0002, "epoch": 1.3675213675213675, "step": 240}, {"loss": 1.4945, "grad_norm": 0.7622564435005188, "learning_rate": 0.0002, "epoch": 1.4245014245014245, "step": 250}, {"loss": 1.4426, "grad_norm": 0.6667674779891968, "learning_rate": 0.0002, "epoch": 1.4814814814814814, "step": 260}, {"loss": 1.4227, "grad_norm": 0.9225417971611023, "learning_rate": 0.0002, "epoch": 1.5384615384615383, "step": 270}, {"loss": 1.3687, "grad_norm": 0.6473053097724915, "learning_rate": 0.0002, "epoch": 1.5954415954415955, "step": 280}, {"loss": 1.5086, "grad_norm": 0.8250042796134949, "learning_rate": 0.0002, "epoch": 1.6524216524216524, "step": 290}, {"loss": 1.4259, "grad_norm": 0.6660609841346741, "learning_rate": 0.0002, "epoch": 1.7094017094017095, "step": 300}, {"loss": 1.373, "grad_norm": 0.7542873620986938, "learning_rate": 0.0002, "epoch": 1.7663817663817665, "step": 310}, {"loss": 1.3823, "grad_norm": 0.5261648297309875, "learning_rate": 0.0002, "epoch": 1.8233618233618234, "step": 320}, {"loss": 1.4251, "grad_norm": 0.6519118547439575, "learning_rate": 0.0002, "epoch": 1.8803418803418803, "step": 330}, {"loss": 1.3613, "grad_norm": 0.7584664821624756, "learning_rate": 0.0002, "epoch": 1.9373219373219372, "step": 340}, {"loss": 1.4797, "grad_norm": 0.6466017365455627, "learning_rate": 0.0002, "epoch": 1.9943019943019942, "step": 350}]} +{"epoch": 2.9971509971509973, "step": 526, "epoch_duration": 74.01644945144653, "total_accumulated_duration": 219.88758730888367, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3048.73388671875}, "peak_memory_usage": {"GPU_0": 15079.29833984375}, "avg_memory_reserved": {"GPU_0": 20172.0}, "peak_memory_reserved": {"GPU_0": 20172.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5893, "grad_norm": 1.0233017206192017, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5117, "grad_norm": 4.163138389587402, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1519, "grad_norm": 1.3474042415618896, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0115, "grad_norm": 1.2415039539337158, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.8789, "grad_norm": 1.0774320363998413, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8642, "grad_norm": 0.716522753238678, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7333, "grad_norm": 0.9734901189804077, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.651, "grad_norm": 1.7638314962387085, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5815, "grad_norm": 0.6061757206916809, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6714, "grad_norm": 0.7205694317817688, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.63, "grad_norm": 0.7752107381820679, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6405, "grad_norm": 0.8007442355155945, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.595, "grad_norm": 0.8049221038818359, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6182, "grad_norm": 0.7540143132209778, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6574, "grad_norm": 0.7468489408493042, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4714, "grad_norm": 0.7622578740119934, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5075, "grad_norm": 0.704738438129425, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}, {"eval_loss": 1.4978793859481812, "eval_runtime": 2.8823, "eval_samples_per_second": 34.001, "eval_steps_per_second": 4.51, "epoch": 0.9971509971509972, "step": 175}, {"loss": 1.5307, "grad_norm": 0.6786648035049438, "learning_rate": 0.0002, "epoch": 1.0256410256410255, "step": 180}, {"loss": 1.4725, "grad_norm": 0.6568158268928528, "learning_rate": 0.0002, "epoch": 1.0826210826210827, "step": 190}, {"loss": 1.4754, "grad_norm": 0.7925108671188354, "learning_rate": 0.0002, "epoch": 1.1396011396011396, "step": 200}, {"loss": 1.543, "grad_norm": 0.6983732581138611, "learning_rate": 0.0002, "epoch": 1.1965811965811965, "step": 210}, {"loss": 1.5132, "grad_norm": 0.6918368935585022, "learning_rate": 0.0002, "epoch": 1.2535612535612537, "step": 220}, {"loss": 1.3858, "grad_norm": 0.6140615940093994, "learning_rate": 0.0002, "epoch": 1.3105413105413106, "step": 230}, {"loss": 1.4046, "grad_norm": 0.5929235816001892, "learning_rate": 0.0002, "epoch": 1.3675213675213675, "step": 240}, {"loss": 1.4951, "grad_norm": 0.76374751329422, "learning_rate": 0.0002, "epoch": 1.4245014245014245, "step": 250}, {"loss": 1.4439, "grad_norm": 0.6553613543510437, "learning_rate": 0.0002, "epoch": 1.4814814814814814, "step": 260}, {"loss": 1.4252, "grad_norm": 0.9090031385421753, "learning_rate": 0.0002, "epoch": 1.5384615384615383, "step": 270}, {"loss": 1.3674, "grad_norm": 0.6486701369285583, "learning_rate": 0.0002, "epoch": 1.5954415954415955, "step": 280}, {"loss": 1.5102, "grad_norm": 0.8332676291465759, "learning_rate": 0.0002, "epoch": 1.6524216524216524, "step": 290}, {"loss": 1.4243, "grad_norm": 0.6700407266616821, "learning_rate": 0.0002, "epoch": 1.7094017094017095, "step": 300}, {"loss": 1.373, "grad_norm": 0.7524263262748718, "learning_rate": 0.0002, "epoch": 1.7663817663817665, "step": 310}, {"loss": 1.3821, "grad_norm": 0.5268421173095703, "learning_rate": 0.0002, "epoch": 1.8233618233618234, "step": 320}, {"loss": 1.4244, "grad_norm": 0.6470246315002441, "learning_rate": 0.0002, "epoch": 1.8803418803418803, "step": 330}, {"loss": 1.3603, "grad_norm": 0.7546916007995605, "learning_rate": 0.0002, "epoch": 1.9373219373219372, "step": 340}, {"loss": 1.4809, "grad_norm": 0.6527156233787537, "learning_rate": 0.0002, "epoch": 1.9943019943019942, "step": 350}, {"eval_loss": 1.4244847297668457, "eval_runtime": 2.8825, "eval_samples_per_second": 33.999, "eval_steps_per_second": 4.51, "epoch": 2.0, "step": 351}, {"loss": 1.3558, "grad_norm": 0.8553531765937805, "learning_rate": 0.0002, "epoch": 2.051282051282051, "step": 360}, {"loss": 1.3867, "grad_norm": 0.6769258975982666, "learning_rate": 0.0002, "epoch": 2.1082621082621085, "step": 370}, {"loss": 1.3242, "grad_norm": 0.6619579195976257, "learning_rate": 0.0002, "epoch": 2.1652421652421654, "step": 380}, {"loss": 1.3993, "grad_norm": 0.7349176406860352, "learning_rate": 0.0002, "epoch": 2.2222222222222223, "step": 390}, {"loss": 1.4022, "grad_norm": 0.6977018117904663, "learning_rate": 0.0002, "epoch": 2.2792022792022792, "step": 400}, {"loss": 1.3072, "grad_norm": 0.7045870423316956, "learning_rate": 0.0002, "epoch": 2.336182336182336, "step": 410}, {"loss": 1.3586, "grad_norm": 0.7158947587013245, "learning_rate": 0.0002, "epoch": 2.393162393162393, "step": 420}, {"loss": 1.3159, "grad_norm": 0.6706043481826782, "learning_rate": 0.0002, "epoch": 2.45014245014245, "step": 430}, {"loss": 1.3877, "grad_norm": 0.8902416229248047, "learning_rate": 0.0002, "epoch": 2.5071225071225074, "step": 440}, {"loss": 1.3071, "grad_norm": 0.736464262008667, "learning_rate": 0.0002, "epoch": 2.564102564102564, "step": 450}, {"loss": 1.2793, "grad_norm": 0.6882060766220093, "learning_rate": 0.0002, "epoch": 2.6210826210826212, "step": 460}, {"loss": 1.2949, "grad_norm": 0.6580819487571716, "learning_rate": 0.0002, "epoch": 2.678062678062678, "step": 470}, {"loss": 1.3477, "grad_norm": 0.7671576738357544, "learning_rate": 0.0002, "epoch": 2.735042735042735, "step": 480}, {"loss": 1.3488, "grad_norm": 0.791325569152832, "learning_rate": 0.0002, "epoch": 2.792022792022792, "step": 490}, {"loss": 1.3359, "grad_norm": 0.6403379440307617, "learning_rate": 0.0002, "epoch": 2.849002849002849, "step": 500}, {"loss": 1.3109, "grad_norm": 0.6506697535514832, "learning_rate": 0.0002, "epoch": 2.905982905982906, "step": 510}, {"loss": 1.2876, "grad_norm": 0.7321205139160156, "learning_rate": 0.0002, "epoch": 2.962962962962963, "step": 520}]} +{"epoch": 2.9971509971509973, "step": 526, "epoch_duration": 73.78220868110657, "total_accumulated_duration": 220.61389112472534, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3048.73388671875}, "peak_memory_usage": {"GPU_0": 15079.29833984375}, "avg_memory_reserved": {"GPU_0": 20172.0}, "peak_memory_reserved": {"GPU_0": 20172.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-351", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5896, "grad_norm": 1.0227872133255005, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5124, "grad_norm": 3.86788010597229, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1576, "grad_norm": 1.3474394083023071, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0115, "grad_norm": 1.1816296577453613, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.875, "grad_norm": 1.0907047986984253, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8608, "grad_norm": 0.9163471460342407, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7334, "grad_norm": 1.0441275835037231, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.6496, "grad_norm": 1.0836364030838013, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5814, "grad_norm": 0.5817112922668457, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6697, "grad_norm": 0.8991169929504395, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.621, "grad_norm": 1.1820793151855469, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6376, "grad_norm": 0.8205533623695374, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.5902, "grad_norm": 0.8154979348182678, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6139, "grad_norm": 0.7292681336402893, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6554, "grad_norm": 0.7737869024276733, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4696, "grad_norm": 0.7786843180656433, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5062, "grad_norm": 0.6918405294418335, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}, {"eval_loss": 1.4962449073791504, "eval_runtime": 2.869, "eval_samples_per_second": 34.158, "eval_steps_per_second": 4.531, "epoch": 0.9971509971509972, "step": 175}, {"loss": 1.5305, "grad_norm": 0.6754891872406006, "learning_rate": 0.0002, "epoch": 1.0256410256410255, "step": 180}, {"loss": 1.4709, "grad_norm": 0.6875350475311279, "learning_rate": 0.0002, "epoch": 1.0826210826210827, "step": 190}, {"loss": 1.4744, "grad_norm": 0.7870411276817322, "learning_rate": 0.0002, "epoch": 1.1396011396011396, "step": 200}, {"loss": 1.5414, "grad_norm": 0.6934282779693604, "learning_rate": 0.0002, "epoch": 1.1965811965811965, "step": 210}, {"loss": 1.5129, "grad_norm": 0.6980162858963013, "learning_rate": 0.0002, "epoch": 1.2535612535612537, "step": 220}, {"loss": 1.385, "grad_norm": 0.6163203120231628, "learning_rate": 0.0002, "epoch": 1.3105413105413106, "step": 230}, {"loss": 1.4028, "grad_norm": 0.5967347025871277, "learning_rate": 0.0002, "epoch": 1.3675213675213675, "step": 240}, {"loss": 1.4945, "grad_norm": 0.7622564435005188, "learning_rate": 0.0002, "epoch": 1.4245014245014245, "step": 250}, {"loss": 1.4426, "grad_norm": 0.6667674779891968, "learning_rate": 0.0002, "epoch": 1.4814814814814814, "step": 260}, {"loss": 1.4227, "grad_norm": 0.9225417971611023, "learning_rate": 0.0002, "epoch": 1.5384615384615383, "step": 270}, {"loss": 1.3687, "grad_norm": 0.6473053097724915, "learning_rate": 0.0002, "epoch": 1.5954415954415955, "step": 280}, {"loss": 1.5086, "grad_norm": 0.8250042796134949, "learning_rate": 0.0002, "epoch": 1.6524216524216524, "step": 290}, {"loss": 1.4259, "grad_norm": 0.6660609841346741, "learning_rate": 0.0002, "epoch": 1.7094017094017095, "step": 300}, {"loss": 1.373, "grad_norm": 0.7542873620986938, "learning_rate": 0.0002, "epoch": 1.7663817663817665, "step": 310}, {"loss": 1.3823, "grad_norm": 0.5261648297309875, "learning_rate": 0.0002, "epoch": 1.8233618233618234, "step": 320}, {"loss": 1.4251, "grad_norm": 0.6519118547439575, "learning_rate": 0.0002, "epoch": 1.8803418803418803, "step": 330}, {"loss": 1.3613, "grad_norm": 0.7584664821624756, "learning_rate": 0.0002, "epoch": 1.9373219373219372, "step": 340}, {"loss": 1.4797, "grad_norm": 0.6466017365455627, "learning_rate": 0.0002, "epoch": 1.9943019943019942, "step": 350}, {"eval_loss": 1.424173355102539, "eval_runtime": 2.8659, "eval_samples_per_second": 34.195, "eval_steps_per_second": 4.536, "epoch": 2.0, "step": 351}, {"loss": 1.3555, "grad_norm": 0.7457601428031921, "learning_rate": 0.0002, "epoch": 2.051282051282051, "step": 360}, {"loss": 1.387, "grad_norm": 0.6645848751068115, "learning_rate": 0.0002, "epoch": 2.1082621082621085, "step": 370}, {"loss": 1.3244, "grad_norm": 0.6545299887657166, "learning_rate": 0.0002, "epoch": 2.1652421652421654, "step": 380}, {"loss": 1.4025, "grad_norm": 0.7429937124252319, "learning_rate": 0.0002, "epoch": 2.2222222222222223, "step": 390}, {"loss": 1.3995, "grad_norm": 0.6929682493209839, "learning_rate": 0.0002, "epoch": 2.2792022792022792, "step": 400}, {"loss": 1.3073, "grad_norm": 0.6999889016151428, "learning_rate": 0.0002, "epoch": 2.336182336182336, "step": 410}, {"loss": 1.3573, "grad_norm": 0.7174718379974365, "learning_rate": 0.0002, "epoch": 2.393162393162393, "step": 420}, {"loss": 1.3169, "grad_norm": 0.667317807674408, "learning_rate": 0.0002, "epoch": 2.45014245014245, "step": 430}, {"loss": 1.3877, "grad_norm": 0.8981409072875977, "learning_rate": 0.0002, "epoch": 2.5071225071225074, "step": 440}, {"loss": 1.3085, "grad_norm": 0.7560263872146606, "learning_rate": 0.0002, "epoch": 2.564102564102564, "step": 450}, {"loss": 1.278, "grad_norm": 0.699364185333252, "learning_rate": 0.0002, "epoch": 2.6210826210826212, "step": 460}, {"loss": 1.2962, "grad_norm": 0.666292667388916, "learning_rate": 0.0002, "epoch": 2.678062678062678, "step": 470}, {"loss": 1.3471, "grad_norm": 0.7564692497253418, "learning_rate": 0.0002, "epoch": 2.735042735042735, "step": 480}, {"loss": 1.3489, "grad_norm": 0.7561964392662048, "learning_rate": 0.0002, "epoch": 2.792022792022792, "step": 490}, {"loss": 1.3357, "grad_norm": 0.6506860852241516, "learning_rate": 0.0002, "epoch": 2.849002849002849, "step": 500}, {"loss": 1.311, "grad_norm": 0.6425383687019348, "learning_rate": 0.0002, "epoch": 2.905982905982906, "step": 510}, {"loss": 1.2879, "grad_norm": 0.7424822449684143, "learning_rate": 0.0002, "epoch": 2.962962962962963, "step": 520}]} +{"epoch": 4.0, "step": 702, "epoch_duration": 71.99642825126648, "total_accumulated_duration": 291.88401556015015, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15079.29833984375}, "avg_memory_reserved": {"GPU_0": 20172.0}, "peak_memory_reserved": {"GPU_0": 20172.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5893, "grad_norm": 1.0233017206192017, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5117, "grad_norm": 4.163138389587402, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1519, "grad_norm": 1.3474042415618896, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0115, "grad_norm": 1.2415039539337158, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.8789, "grad_norm": 1.0774320363998413, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8642, "grad_norm": 0.716522753238678, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7333, "grad_norm": 0.9734901189804077, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.651, "grad_norm": 1.7638314962387085, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5815, "grad_norm": 0.6061757206916809, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6714, "grad_norm": 0.7205694317817688, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.63, "grad_norm": 0.7752107381820679, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6405, "grad_norm": 0.8007442355155945, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.595, "grad_norm": 0.8049221038818359, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6182, "grad_norm": 0.7540143132209778, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6574, "grad_norm": 0.7468489408493042, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4714, "grad_norm": 0.7622578740119934, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5075, "grad_norm": 0.704738438129425, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}, {"eval_loss": 1.4978793859481812, "eval_runtime": 2.8823, "eval_samples_per_second": 34.001, "eval_steps_per_second": 4.51, "epoch": 0.9971509971509972, "step": 175}, {"loss": 1.5307, "grad_norm": 0.6786648035049438, "learning_rate": 0.0002, "epoch": 1.0256410256410255, "step": 180}, {"loss": 1.4725, "grad_norm": 0.6568158268928528, "learning_rate": 0.0002, "epoch": 1.0826210826210827, "step": 190}, {"loss": 1.4754, "grad_norm": 0.7925108671188354, "learning_rate": 0.0002, "epoch": 1.1396011396011396, "step": 200}, {"loss": 1.543, "grad_norm": 0.6983732581138611, "learning_rate": 0.0002, "epoch": 1.1965811965811965, "step": 210}, {"loss": 1.5132, "grad_norm": 0.6918368935585022, "learning_rate": 0.0002, "epoch": 1.2535612535612537, "step": 220}, {"loss": 1.3858, "grad_norm": 0.6140615940093994, "learning_rate": 0.0002, "epoch": 1.3105413105413106, "step": 230}, {"loss": 1.4046, "grad_norm": 0.5929235816001892, "learning_rate": 0.0002, "epoch": 1.3675213675213675, "step": 240}, {"loss": 1.4951, "grad_norm": 0.76374751329422, "learning_rate": 0.0002, "epoch": 1.4245014245014245, "step": 250}, {"loss": 1.4439, "grad_norm": 0.6553613543510437, "learning_rate": 0.0002, "epoch": 1.4814814814814814, "step": 260}, {"loss": 1.4252, "grad_norm": 0.9090031385421753, "learning_rate": 0.0002, "epoch": 1.5384615384615383, "step": 270}, {"loss": 1.3674, "grad_norm": 0.6486701369285583, "learning_rate": 0.0002, "epoch": 1.5954415954415955, "step": 280}, {"loss": 1.5102, "grad_norm": 0.8332676291465759, "learning_rate": 0.0002, "epoch": 1.6524216524216524, "step": 290}, {"loss": 1.4243, "grad_norm": 0.6700407266616821, "learning_rate": 0.0002, "epoch": 1.7094017094017095, "step": 300}, {"loss": 1.373, "grad_norm": 0.7524263262748718, "learning_rate": 0.0002, "epoch": 1.7663817663817665, "step": 310}, {"loss": 1.3821, "grad_norm": 0.5268421173095703, "learning_rate": 0.0002, "epoch": 1.8233618233618234, "step": 320}, {"loss": 1.4244, "grad_norm": 0.6470246315002441, "learning_rate": 0.0002, "epoch": 1.8803418803418803, "step": 330}, {"loss": 1.3603, "grad_norm": 0.7546916007995605, "learning_rate": 0.0002, "epoch": 1.9373219373219372, "step": 340}, {"loss": 1.4809, "grad_norm": 0.6527156233787537, "learning_rate": 0.0002, "epoch": 1.9943019943019942, "step": 350}, {"eval_loss": 1.4244847297668457, "eval_runtime": 2.8825, "eval_samples_per_second": 33.999, "eval_steps_per_second": 4.51, "epoch": 2.0, "step": 351}, {"loss": 1.3558, "grad_norm": 0.8553531765937805, "learning_rate": 0.0002, "epoch": 2.051282051282051, "step": 360}, {"loss": 1.3867, "grad_norm": 0.6769258975982666, "learning_rate": 0.0002, "epoch": 2.1082621082621085, "step": 370}, {"loss": 1.3242, "grad_norm": 0.6619579195976257, "learning_rate": 0.0002, "epoch": 2.1652421652421654, "step": 380}, {"loss": 1.3993, "grad_norm": 0.7349176406860352, "learning_rate": 0.0002, "epoch": 2.2222222222222223, "step": 390}, {"loss": 1.4022, "grad_norm": 0.6977018117904663, "learning_rate": 0.0002, "epoch": 2.2792022792022792, "step": 400}, {"loss": 1.3072, "grad_norm": 0.7045870423316956, "learning_rate": 0.0002, "epoch": 2.336182336182336, "step": 410}, {"loss": 1.3586, "grad_norm": 0.7158947587013245, "learning_rate": 0.0002, "epoch": 2.393162393162393, "step": 420}, {"loss": 1.3159, "grad_norm": 0.6706043481826782, "learning_rate": 0.0002, "epoch": 2.45014245014245, "step": 430}, {"loss": 1.3877, "grad_norm": 0.8902416229248047, "learning_rate": 0.0002, "epoch": 2.5071225071225074, "step": 440}, {"loss": 1.3071, "grad_norm": 0.736464262008667, "learning_rate": 0.0002, "epoch": 2.564102564102564, "step": 450}, {"loss": 1.2793, "grad_norm": 0.6882060766220093, "learning_rate": 0.0002, "epoch": 2.6210826210826212, "step": 460}, {"loss": 1.2949, "grad_norm": 0.6580819487571716, "learning_rate": 0.0002, "epoch": 2.678062678062678, "step": 470}, {"loss": 1.3477, "grad_norm": 0.7671576738357544, "learning_rate": 0.0002, "epoch": 2.735042735042735, "step": 480}, {"loss": 1.3488, "grad_norm": 0.791325569152832, "learning_rate": 0.0002, "epoch": 2.792022792022792, "step": 490}, {"loss": 1.3359, "grad_norm": 0.6403379440307617, "learning_rate": 0.0002, "epoch": 2.849002849002849, "step": 500}, {"loss": 1.3109, "grad_norm": 0.6506697535514832, "learning_rate": 0.0002, "epoch": 2.905982905982906, "step": 510}, {"loss": 1.2876, "grad_norm": 0.7321205139160156, "learning_rate": 0.0002, "epoch": 2.962962962962963, "step": 520}, {"eval_loss": 1.4021576642990112, "eval_runtime": 2.8786, "eval_samples_per_second": 34.044, "eval_steps_per_second": 4.516, "epoch": 2.9971509971509973, "step": 526}, {"loss": 1.3666, "grad_norm": 0.7116469144821167, "learning_rate": 0.0002, "epoch": 3.0199430199430197, "step": 530}, {"loss": 1.2579, "grad_norm": 0.6635934114456177, "learning_rate": 0.0002, "epoch": 3.076923076923077, "step": 540}, {"loss": 1.2688, "grad_norm": 0.7347352504730225, "learning_rate": 0.0002, "epoch": 3.133903133903134, "step": 550}, {"loss": 1.1805, "grad_norm": 0.693499743938446, "learning_rate": 0.0002, "epoch": 3.190883190883191, "step": 560}, {"loss": 1.2463, "grad_norm": 0.7138662934303284, "learning_rate": 0.0002, "epoch": 3.247863247863248, "step": 570}, {"loss": 1.2412, "grad_norm": 0.8807587027549744, "learning_rate": 0.0002, "epoch": 3.304843304843305, "step": 580}, {"loss": 1.2961, "grad_norm": 0.7191319465637207, "learning_rate": 0.0002, "epoch": 3.3618233618233617, "step": 590}, {"loss": 1.2778, "grad_norm": 0.7447595596313477, "learning_rate": 0.0002, "epoch": 3.4188034188034186, "step": 600}, {"loss": 1.2302, "grad_norm": 0.7865943312644958, "learning_rate": 0.0002, "epoch": 3.4757834757834756, "step": 610}, {"loss": 1.2871, "grad_norm": 0.713374674320221, "learning_rate": 0.0002, "epoch": 3.532763532763533, "step": 620}, {"loss": 1.2054, "grad_norm": 0.9345150589942932, "learning_rate": 0.0002, "epoch": 3.58974358974359, "step": 630}, {"loss": 1.2263, "grad_norm": 0.758057713508606, "learning_rate": 0.0002, "epoch": 3.646723646723647, "step": 640}, {"loss": 1.2319, "grad_norm": 0.78746497631073, "learning_rate": 0.0002, "epoch": 3.7037037037037037, "step": 650}, {"loss": 1.2881, "grad_norm": 0.9097195267677307, "learning_rate": 0.0002, "epoch": 3.7606837606837606, "step": 660}, {"loss": 1.2845, "grad_norm": 0.7972307205200195, "learning_rate": 0.0002, "epoch": 3.8176638176638176, "step": 670}, {"loss": 1.3123, "grad_norm": 0.7442638278007507, "learning_rate": 0.0002, "epoch": 3.8746438746438745, "step": 680}, {"loss": 1.3008, "grad_norm": 0.836273193359375, "learning_rate": 0.0002, "epoch": 3.931623931623932, "step": 690}, {"loss": 1.2444, "grad_norm": 0.7712854146957397, "learning_rate": 0.0002, "epoch": 3.9886039886039883, "step": 700}]} +{"epoch": 4.0, "step": 702, "epoch_duration": 73.55654525756836, "total_accumulated_duration": 294.1704363822937, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15079.29833984375}, "avg_memory_reserved": {"GPU_0": 20172.0}, "peak_memory_reserved": {"GPU_0": 20172.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-526", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5896, "grad_norm": 1.0227872133255005, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5124, "grad_norm": 3.86788010597229, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1576, "grad_norm": 1.3474394083023071, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0115, "grad_norm": 1.1816296577453613, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.875, "grad_norm": 1.0907047986984253, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8608, "grad_norm": 0.9163471460342407, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7334, "grad_norm": 1.0441275835037231, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.6496, "grad_norm": 1.0836364030838013, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5814, "grad_norm": 0.5817112922668457, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6697, "grad_norm": 0.8991169929504395, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.621, "grad_norm": 1.1820793151855469, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6376, "grad_norm": 0.8205533623695374, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.5902, "grad_norm": 0.8154979348182678, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6139, "grad_norm": 0.7292681336402893, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6554, "grad_norm": 0.7737869024276733, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4696, "grad_norm": 0.7786843180656433, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5062, "grad_norm": 0.6918405294418335, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}, {"eval_loss": 1.4962449073791504, "eval_runtime": 2.869, "eval_samples_per_second": 34.158, "eval_steps_per_second": 4.531, "epoch": 0.9971509971509972, "step": 175}, {"loss": 1.5305, "grad_norm": 0.6754891872406006, "learning_rate": 0.0002, "epoch": 1.0256410256410255, "step": 180}, {"loss": 1.4709, "grad_norm": 0.6875350475311279, "learning_rate": 0.0002, "epoch": 1.0826210826210827, "step": 190}, {"loss": 1.4744, "grad_norm": 0.7870411276817322, "learning_rate": 0.0002, "epoch": 1.1396011396011396, "step": 200}, {"loss": 1.5414, "grad_norm": 0.6934282779693604, "learning_rate": 0.0002, "epoch": 1.1965811965811965, "step": 210}, {"loss": 1.5129, "grad_norm": 0.6980162858963013, "learning_rate": 0.0002, "epoch": 1.2535612535612537, "step": 220}, {"loss": 1.385, "grad_norm": 0.6163203120231628, "learning_rate": 0.0002, "epoch": 1.3105413105413106, "step": 230}, {"loss": 1.4028, "grad_norm": 0.5967347025871277, "learning_rate": 0.0002, "epoch": 1.3675213675213675, "step": 240}, {"loss": 1.4945, "grad_norm": 0.7622564435005188, "learning_rate": 0.0002, "epoch": 1.4245014245014245, "step": 250}, {"loss": 1.4426, "grad_norm": 0.6667674779891968, "learning_rate": 0.0002, "epoch": 1.4814814814814814, "step": 260}, {"loss": 1.4227, "grad_norm": 0.9225417971611023, "learning_rate": 0.0002, "epoch": 1.5384615384615383, "step": 270}, {"loss": 1.3687, "grad_norm": 0.6473053097724915, "learning_rate": 0.0002, "epoch": 1.5954415954415955, "step": 280}, {"loss": 1.5086, "grad_norm": 0.8250042796134949, "learning_rate": 0.0002, "epoch": 1.6524216524216524, "step": 290}, {"loss": 1.4259, "grad_norm": 0.6660609841346741, "learning_rate": 0.0002, "epoch": 1.7094017094017095, "step": 300}, {"loss": 1.373, "grad_norm": 0.7542873620986938, "learning_rate": 0.0002, "epoch": 1.7663817663817665, "step": 310}, {"loss": 1.3823, "grad_norm": 0.5261648297309875, "learning_rate": 0.0002, "epoch": 1.8233618233618234, "step": 320}, {"loss": 1.4251, "grad_norm": 0.6519118547439575, "learning_rate": 0.0002, "epoch": 1.8803418803418803, "step": 330}, {"loss": 1.3613, "grad_norm": 0.7584664821624756, "learning_rate": 0.0002, "epoch": 1.9373219373219372, "step": 340}, {"loss": 1.4797, "grad_norm": 0.6466017365455627, "learning_rate": 0.0002, "epoch": 1.9943019943019942, "step": 350}, {"eval_loss": 1.424173355102539, "eval_runtime": 2.8659, "eval_samples_per_second": 34.195, "eval_steps_per_second": 4.536, "epoch": 2.0, "step": 351}, {"loss": 1.3555, "grad_norm": 0.7457601428031921, "learning_rate": 0.0002, "epoch": 2.051282051282051, "step": 360}, {"loss": 1.387, "grad_norm": 0.6645848751068115, "learning_rate": 0.0002, "epoch": 2.1082621082621085, "step": 370}, {"loss": 1.3244, "grad_norm": 0.6545299887657166, "learning_rate": 0.0002, "epoch": 2.1652421652421654, "step": 380}, {"loss": 1.4025, "grad_norm": 0.7429937124252319, "learning_rate": 0.0002, "epoch": 2.2222222222222223, "step": 390}, {"loss": 1.3995, "grad_norm": 0.6929682493209839, "learning_rate": 0.0002, "epoch": 2.2792022792022792, "step": 400}, {"loss": 1.3073, "grad_norm": 0.6999889016151428, "learning_rate": 0.0002, "epoch": 2.336182336182336, "step": 410}, {"loss": 1.3573, "grad_norm": 0.7174718379974365, "learning_rate": 0.0002, "epoch": 2.393162393162393, "step": 420}, {"loss": 1.3169, "grad_norm": 0.667317807674408, "learning_rate": 0.0002, "epoch": 2.45014245014245, "step": 430}, {"loss": 1.3877, "grad_norm": 0.8981409072875977, "learning_rate": 0.0002, "epoch": 2.5071225071225074, "step": 440}, {"loss": 1.3085, "grad_norm": 0.7560263872146606, "learning_rate": 0.0002, "epoch": 2.564102564102564, "step": 450}, {"loss": 1.278, "grad_norm": 0.699364185333252, "learning_rate": 0.0002, "epoch": 2.6210826210826212, "step": 460}, {"loss": 1.2962, "grad_norm": 0.666292667388916, "learning_rate": 0.0002, "epoch": 2.678062678062678, "step": 470}, {"loss": 1.3471, "grad_norm": 0.7564692497253418, "learning_rate": 0.0002, "epoch": 2.735042735042735, "step": 480}, {"loss": 1.3489, "grad_norm": 0.7561964392662048, "learning_rate": 0.0002, "epoch": 2.792022792022792, "step": 490}, {"loss": 1.3357, "grad_norm": 0.6506860852241516, "learning_rate": 0.0002, "epoch": 2.849002849002849, "step": 500}, {"loss": 1.311, "grad_norm": 0.6425383687019348, "learning_rate": 0.0002, "epoch": 2.905982905982906, "step": 510}, {"loss": 1.2879, "grad_norm": 0.7424822449684143, "learning_rate": 0.0002, "epoch": 2.962962962962963, "step": 520}, {"eval_loss": 1.401209831237793, "eval_runtime": 2.8721, "eval_samples_per_second": 34.121, "eval_steps_per_second": 4.526, "epoch": 2.9971509971509973, "step": 526}, {"loss": 1.3656, "grad_norm": 0.7109280228614807, "learning_rate": 0.0002, "epoch": 3.0199430199430197, "step": 530}, {"loss": 1.2571, "grad_norm": 0.6746246814727783, "learning_rate": 0.0002, "epoch": 3.076923076923077, "step": 540}, {"loss": 1.2685, "grad_norm": 0.7202523350715637, "learning_rate": 0.0002, "epoch": 3.133903133903134, "step": 550}, {"loss": 1.1808, "grad_norm": 0.697090208530426, "learning_rate": 0.0002, "epoch": 3.190883190883191, "step": 560}, {"loss": 1.2479, "grad_norm": 0.7157464623451233, "learning_rate": 0.0002, "epoch": 3.247863247863248, "step": 570}, {"loss": 1.2426, "grad_norm": 0.8729232549667358, "learning_rate": 0.0002, "epoch": 3.304843304843305, "step": 580}, {"loss": 1.2957, "grad_norm": 0.7119743227958679, "learning_rate": 0.0002, "epoch": 3.3618233618233617, "step": 590}, {"loss": 1.2787, "grad_norm": 0.7417448163032532, "learning_rate": 0.0002, "epoch": 3.4188034188034186, "step": 600}, {"loss": 1.2317, "grad_norm": 0.8174124956130981, "learning_rate": 0.0002, "epoch": 3.4757834757834756, "step": 610}, {"loss": 1.2916, "grad_norm": 0.7199270129203796, "learning_rate": 0.0002, "epoch": 3.532763532763533, "step": 620}, {"loss": 1.2074, "grad_norm": 0.989138662815094, "learning_rate": 0.0002, "epoch": 3.58974358974359, "step": 630}, {"loss": 1.2263, "grad_norm": 0.75921630859375, "learning_rate": 0.0002, "epoch": 3.646723646723647, "step": 640}, {"loss": 1.2319, "grad_norm": 0.7844401001930237, "learning_rate": 0.0002, "epoch": 3.7037037037037037, "step": 650}, {"loss": 1.2851, "grad_norm": 0.9127110242843628, "learning_rate": 0.0002, "epoch": 3.7606837606837606, "step": 660}, {"loss": 1.2835, "grad_norm": 0.7972270846366882, "learning_rate": 0.0002, "epoch": 3.8176638176638176, "step": 670}, {"loss": 1.3105, "grad_norm": 0.7458992004394531, "learning_rate": 0.0002, "epoch": 3.8746438746438745, "step": 680}, {"loss": 1.3017, "grad_norm": 0.854924738407135, "learning_rate": 0.0002, "epoch": 3.931623931623932, "step": 690}, {"loss": 1.2455, "grad_norm": 0.7763816118240356, "learning_rate": 0.0002, "epoch": 3.9886039886039883, "step": 700}]} +{"epoch": 4.997150997150997, "step": 877, "epoch_duration": 72.5728759765625, "total_accumulated_duration": 364.45689153671265, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3048.73388671875}, "peak_memory_usage": {"GPU_0": 15079.29833984375}, "avg_memory_reserved": {"GPU_0": 20172.0}, "peak_memory_reserved": {"GPU_0": 20172.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5893, "grad_norm": 1.0233017206192017, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5117, "grad_norm": 4.163138389587402, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1519, "grad_norm": 1.3474042415618896, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0115, "grad_norm": 1.2415039539337158, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.8789, "grad_norm": 1.0774320363998413, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8642, "grad_norm": 0.716522753238678, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7333, "grad_norm": 0.9734901189804077, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.651, "grad_norm": 1.7638314962387085, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5815, "grad_norm": 0.6061757206916809, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6714, "grad_norm": 0.7205694317817688, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.63, "grad_norm": 0.7752107381820679, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6405, "grad_norm": 0.8007442355155945, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.595, "grad_norm": 0.8049221038818359, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6182, "grad_norm": 0.7540143132209778, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6574, "grad_norm": 0.7468489408493042, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4714, "grad_norm": 0.7622578740119934, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5075, "grad_norm": 0.704738438129425, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}, {"eval_loss": 1.4978793859481812, "eval_runtime": 2.8823, "eval_samples_per_second": 34.001, "eval_steps_per_second": 4.51, "epoch": 0.9971509971509972, "step": 175}, {"loss": 1.5307, "grad_norm": 0.6786648035049438, "learning_rate": 0.0002, "epoch": 1.0256410256410255, "step": 180}, {"loss": 1.4725, "grad_norm": 0.6568158268928528, "learning_rate": 0.0002, "epoch": 1.0826210826210827, "step": 190}, {"loss": 1.4754, "grad_norm": 0.7925108671188354, "learning_rate": 0.0002, "epoch": 1.1396011396011396, "step": 200}, {"loss": 1.543, "grad_norm": 0.6983732581138611, "learning_rate": 0.0002, "epoch": 1.1965811965811965, "step": 210}, {"loss": 1.5132, "grad_norm": 0.6918368935585022, "learning_rate": 0.0002, "epoch": 1.2535612535612537, "step": 220}, {"loss": 1.3858, "grad_norm": 0.6140615940093994, "learning_rate": 0.0002, "epoch": 1.3105413105413106, "step": 230}, {"loss": 1.4046, "grad_norm": 0.5929235816001892, "learning_rate": 0.0002, "epoch": 1.3675213675213675, "step": 240}, {"loss": 1.4951, "grad_norm": 0.76374751329422, "learning_rate": 0.0002, "epoch": 1.4245014245014245, "step": 250}, {"loss": 1.4439, "grad_norm": 0.6553613543510437, "learning_rate": 0.0002, "epoch": 1.4814814814814814, "step": 260}, {"loss": 1.4252, "grad_norm": 0.9090031385421753, "learning_rate": 0.0002, "epoch": 1.5384615384615383, "step": 270}, {"loss": 1.3674, "grad_norm": 0.6486701369285583, "learning_rate": 0.0002, "epoch": 1.5954415954415955, "step": 280}, {"loss": 1.5102, "grad_norm": 0.8332676291465759, "learning_rate": 0.0002, "epoch": 1.6524216524216524, "step": 290}, {"loss": 1.4243, "grad_norm": 0.6700407266616821, "learning_rate": 0.0002, "epoch": 1.7094017094017095, "step": 300}, {"loss": 1.373, "grad_norm": 0.7524263262748718, "learning_rate": 0.0002, "epoch": 1.7663817663817665, "step": 310}, {"loss": 1.3821, "grad_norm": 0.5268421173095703, "learning_rate": 0.0002, "epoch": 1.8233618233618234, "step": 320}, {"loss": 1.4244, "grad_norm": 0.6470246315002441, "learning_rate": 0.0002, "epoch": 1.8803418803418803, "step": 330}, {"loss": 1.3603, "grad_norm": 0.7546916007995605, "learning_rate": 0.0002, "epoch": 1.9373219373219372, "step": 340}, {"loss": 1.4809, "grad_norm": 0.6527156233787537, "learning_rate": 0.0002, "epoch": 1.9943019943019942, "step": 350}, {"eval_loss": 1.4244847297668457, "eval_runtime": 2.8825, "eval_samples_per_second": 33.999, "eval_steps_per_second": 4.51, "epoch": 2.0, "step": 351}, {"loss": 1.3558, "grad_norm": 0.8553531765937805, "learning_rate": 0.0002, "epoch": 2.051282051282051, "step": 360}, {"loss": 1.3867, "grad_norm": 0.6769258975982666, "learning_rate": 0.0002, "epoch": 2.1082621082621085, "step": 370}, {"loss": 1.3242, "grad_norm": 0.6619579195976257, "learning_rate": 0.0002, "epoch": 2.1652421652421654, "step": 380}, {"loss": 1.3993, "grad_norm": 0.7349176406860352, "learning_rate": 0.0002, "epoch": 2.2222222222222223, "step": 390}, {"loss": 1.4022, "grad_norm": 0.6977018117904663, "learning_rate": 0.0002, "epoch": 2.2792022792022792, "step": 400}, {"loss": 1.3072, "grad_norm": 0.7045870423316956, "learning_rate": 0.0002, "epoch": 2.336182336182336, "step": 410}, {"loss": 1.3586, "grad_norm": 0.7158947587013245, "learning_rate": 0.0002, "epoch": 2.393162393162393, "step": 420}, {"loss": 1.3159, "grad_norm": 0.6706043481826782, "learning_rate": 0.0002, "epoch": 2.45014245014245, "step": 430}, {"loss": 1.3877, "grad_norm": 0.8902416229248047, "learning_rate": 0.0002, "epoch": 2.5071225071225074, "step": 440}, {"loss": 1.3071, "grad_norm": 0.736464262008667, "learning_rate": 0.0002, "epoch": 2.564102564102564, "step": 450}, {"loss": 1.2793, "grad_norm": 0.6882060766220093, "learning_rate": 0.0002, "epoch": 2.6210826210826212, "step": 460}, {"loss": 1.2949, "grad_norm": 0.6580819487571716, "learning_rate": 0.0002, "epoch": 2.678062678062678, "step": 470}, {"loss": 1.3477, "grad_norm": 0.7671576738357544, "learning_rate": 0.0002, "epoch": 2.735042735042735, "step": 480}, {"loss": 1.3488, "grad_norm": 0.791325569152832, "learning_rate": 0.0002, "epoch": 2.792022792022792, "step": 490}, {"loss": 1.3359, "grad_norm": 0.6403379440307617, "learning_rate": 0.0002, "epoch": 2.849002849002849, "step": 500}, {"loss": 1.3109, "grad_norm": 0.6506697535514832, "learning_rate": 0.0002, "epoch": 2.905982905982906, "step": 510}, {"loss": 1.2876, "grad_norm": 0.7321205139160156, "learning_rate": 0.0002, "epoch": 2.962962962962963, "step": 520}, {"eval_loss": 1.4021576642990112, "eval_runtime": 2.8786, "eval_samples_per_second": 34.044, "eval_steps_per_second": 4.516, "epoch": 2.9971509971509973, "step": 526}, {"loss": 1.3666, "grad_norm": 0.7116469144821167, "learning_rate": 0.0002, "epoch": 3.0199430199430197, "step": 530}, {"loss": 1.2579, "grad_norm": 0.6635934114456177, "learning_rate": 0.0002, "epoch": 3.076923076923077, "step": 540}, {"loss": 1.2688, "grad_norm": 0.7347352504730225, "learning_rate": 0.0002, "epoch": 3.133903133903134, "step": 550}, {"loss": 1.1805, "grad_norm": 0.693499743938446, "learning_rate": 0.0002, "epoch": 3.190883190883191, "step": 560}, {"loss": 1.2463, "grad_norm": 0.7138662934303284, "learning_rate": 0.0002, "epoch": 3.247863247863248, "step": 570}, {"loss": 1.2412, "grad_norm": 0.8807587027549744, "learning_rate": 0.0002, "epoch": 3.304843304843305, "step": 580}, {"loss": 1.2961, "grad_norm": 0.7191319465637207, "learning_rate": 0.0002, "epoch": 3.3618233618233617, "step": 590}, {"loss": 1.2778, "grad_norm": 0.7447595596313477, "learning_rate": 0.0002, "epoch": 3.4188034188034186, "step": 600}, {"loss": 1.2302, "grad_norm": 0.7865943312644958, "learning_rate": 0.0002, "epoch": 3.4757834757834756, "step": 610}, {"loss": 1.2871, "grad_norm": 0.713374674320221, "learning_rate": 0.0002, "epoch": 3.532763532763533, "step": 620}, {"loss": 1.2054, "grad_norm": 0.9345150589942932, "learning_rate": 0.0002, "epoch": 3.58974358974359, "step": 630}, {"loss": 1.2263, "grad_norm": 0.758057713508606, "learning_rate": 0.0002, "epoch": 3.646723646723647, "step": 640}, {"loss": 1.2319, "grad_norm": 0.78746497631073, "learning_rate": 0.0002, "epoch": 3.7037037037037037, "step": 650}, {"loss": 1.2881, "grad_norm": 0.9097195267677307, "learning_rate": 0.0002, "epoch": 3.7606837606837606, "step": 660}, {"loss": 1.2845, "grad_norm": 0.7972307205200195, "learning_rate": 0.0002, "epoch": 3.8176638176638176, "step": 670}, {"loss": 1.3123, "grad_norm": 0.7442638278007507, "learning_rate": 0.0002, "epoch": 3.8746438746438745, "step": 680}, {"loss": 1.3008, "grad_norm": 0.836273193359375, "learning_rate": 0.0002, "epoch": 3.931623931623932, "step": 690}, {"loss": 1.2444, "grad_norm": 0.7712854146957397, "learning_rate": 0.0002, "epoch": 3.9886039886039883, "step": 700}, {"eval_loss": 1.3992847204208374, "eval_runtime": 2.8818, "eval_samples_per_second": 34.007, "eval_steps_per_second": 4.511, "epoch": 4.0, "step": 702}, {"loss": 1.1796, "grad_norm": 0.7713205218315125, "learning_rate": 0.0002, "epoch": 4.045584045584046, "step": 710}, {"loss": 1.1615, "grad_norm": 0.8424011468887329, "learning_rate": 0.0002, "epoch": 4.102564102564102, "step": 720}, {"loss": 1.2283, "grad_norm": 0.7956175804138184, "learning_rate": 0.0002, "epoch": 4.15954415954416, "step": 730}, {"loss": 1.1855, "grad_norm": 0.7468942403793335, "learning_rate": 0.0002, "epoch": 4.216524216524217, "step": 740}, {"loss": 1.1677, "grad_norm": 0.7284650206565857, "learning_rate": 0.0002, "epoch": 4.273504273504273, "step": 750}, {"loss": 1.1162, "grad_norm": 0.6964936256408691, "learning_rate": 0.0002, "epoch": 4.330484330484331, "step": 760}, {"loss": 1.1868, "grad_norm": 0.8488320708274841, "learning_rate": 0.0002, "epoch": 4.387464387464387, "step": 770}, {"loss": 1.1688, "grad_norm": 0.796196699142456, "learning_rate": 0.0002, "epoch": 4.444444444444445, "step": 780}, {"loss": 1.192, "grad_norm": 0.8746815919876099, "learning_rate": 0.0002, "epoch": 4.501424501424501, "step": 790}, {"loss": 1.1857, "grad_norm": 0.9144721627235413, "learning_rate": 0.0002, "epoch": 4.5584045584045585, "step": 800}, {"loss": 1.1836, "grad_norm": 0.7818491458892822, "learning_rate": 0.0002, "epoch": 4.615384615384615, "step": 810}, {"loss": 1.204, "grad_norm": 0.7483993768692017, "learning_rate": 0.0002, "epoch": 4.672364672364672, "step": 820}, {"loss": 1.191, "grad_norm": 0.7826094627380371, "learning_rate": 0.0002, "epoch": 4.72934472934473, "step": 830}, {"loss": 1.1254, "grad_norm": 0.7471262216567993, "learning_rate": 0.0002, "epoch": 4.786324786324786, "step": 840}, {"loss": 1.2276, "grad_norm": 0.8124629259109497, "learning_rate": 0.0002, "epoch": 4.843304843304844, "step": 850}, {"loss": 1.1405, "grad_norm": 0.7713154554367065, "learning_rate": 0.0002, "epoch": 4.9002849002849, "step": 860}, {"loss": 1.1768, "grad_norm": 0.7297055125236511, "learning_rate": 0.0002, "epoch": 4.957264957264957, "step": 870}]} +{"epoch": 6.0, "step": 1053, "epoch_duration": 72.29663968086243, "total_accumulated_duration": 436.7535312175751, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15079.29833984375}, "avg_memory_reserved": {"GPU_0": 20172.0}, "peak_memory_reserved": {"GPU_0": 20172.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5893, "grad_norm": 1.0233017206192017, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5117, "grad_norm": 4.163138389587402, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1519, "grad_norm": 1.3474042415618896, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0115, "grad_norm": 1.2415039539337158, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.8789, "grad_norm": 1.0774320363998413, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8642, "grad_norm": 0.716522753238678, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7333, "grad_norm": 0.9734901189804077, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.651, "grad_norm": 1.7638314962387085, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5815, "grad_norm": 0.6061757206916809, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6714, "grad_norm": 0.7205694317817688, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.63, "grad_norm": 0.7752107381820679, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6405, "grad_norm": 0.8007442355155945, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.595, "grad_norm": 0.8049221038818359, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6182, "grad_norm": 0.7540143132209778, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6574, "grad_norm": 0.7468489408493042, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4714, "grad_norm": 0.7622578740119934, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5075, "grad_norm": 0.704738438129425, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}, {"eval_loss": 1.4978793859481812, "eval_runtime": 2.8823, "eval_samples_per_second": 34.001, "eval_steps_per_second": 4.51, "epoch": 0.9971509971509972, "step": 175}, {"loss": 1.5307, "grad_norm": 0.6786648035049438, "learning_rate": 0.0002, "epoch": 1.0256410256410255, "step": 180}, {"loss": 1.4725, "grad_norm": 0.6568158268928528, "learning_rate": 0.0002, "epoch": 1.0826210826210827, "step": 190}, {"loss": 1.4754, "grad_norm": 0.7925108671188354, "learning_rate": 0.0002, "epoch": 1.1396011396011396, "step": 200}, {"loss": 1.543, "grad_norm": 0.6983732581138611, "learning_rate": 0.0002, "epoch": 1.1965811965811965, "step": 210}, {"loss": 1.5132, "grad_norm": 0.6918368935585022, "learning_rate": 0.0002, "epoch": 1.2535612535612537, "step": 220}, {"loss": 1.3858, "grad_norm": 0.6140615940093994, "learning_rate": 0.0002, "epoch": 1.3105413105413106, "step": 230}, {"loss": 1.4046, "grad_norm": 0.5929235816001892, "learning_rate": 0.0002, "epoch": 1.3675213675213675, "step": 240}, {"loss": 1.4951, "grad_norm": 0.76374751329422, "learning_rate": 0.0002, "epoch": 1.4245014245014245, "step": 250}, {"loss": 1.4439, "grad_norm": 0.6553613543510437, "learning_rate": 0.0002, "epoch": 1.4814814814814814, "step": 260}, {"loss": 1.4252, "grad_norm": 0.9090031385421753, "learning_rate": 0.0002, "epoch": 1.5384615384615383, "step": 270}, {"loss": 1.3674, "grad_norm": 0.6486701369285583, "learning_rate": 0.0002, "epoch": 1.5954415954415955, "step": 280}, {"loss": 1.5102, "grad_norm": 0.8332676291465759, "learning_rate": 0.0002, "epoch": 1.6524216524216524, "step": 290}, {"loss": 1.4243, "grad_norm": 0.6700407266616821, "learning_rate": 0.0002, "epoch": 1.7094017094017095, "step": 300}, {"loss": 1.373, "grad_norm": 0.7524263262748718, "learning_rate": 0.0002, "epoch": 1.7663817663817665, "step": 310}, {"loss": 1.3821, "grad_norm": 0.5268421173095703, "learning_rate": 0.0002, "epoch": 1.8233618233618234, "step": 320}, {"loss": 1.4244, "grad_norm": 0.6470246315002441, "learning_rate": 0.0002, "epoch": 1.8803418803418803, "step": 330}, {"loss": 1.3603, "grad_norm": 0.7546916007995605, "learning_rate": 0.0002, "epoch": 1.9373219373219372, "step": 340}, {"loss": 1.4809, "grad_norm": 0.6527156233787537, "learning_rate": 0.0002, "epoch": 1.9943019943019942, "step": 350}, {"eval_loss": 1.4244847297668457, "eval_runtime": 2.8825, "eval_samples_per_second": 33.999, "eval_steps_per_second": 4.51, "epoch": 2.0, "step": 351}, {"loss": 1.3558, "grad_norm": 0.8553531765937805, "learning_rate": 0.0002, "epoch": 2.051282051282051, "step": 360}, {"loss": 1.3867, "grad_norm": 0.6769258975982666, "learning_rate": 0.0002, "epoch": 2.1082621082621085, "step": 370}, {"loss": 1.3242, "grad_norm": 0.6619579195976257, "learning_rate": 0.0002, "epoch": 2.1652421652421654, "step": 380}, {"loss": 1.3993, "grad_norm": 0.7349176406860352, "learning_rate": 0.0002, "epoch": 2.2222222222222223, "step": 390}, {"loss": 1.4022, "grad_norm": 0.6977018117904663, "learning_rate": 0.0002, "epoch": 2.2792022792022792, "step": 400}, {"loss": 1.3072, "grad_norm": 0.7045870423316956, "learning_rate": 0.0002, "epoch": 2.336182336182336, "step": 410}, {"loss": 1.3586, "grad_norm": 0.7158947587013245, "learning_rate": 0.0002, "epoch": 2.393162393162393, "step": 420}, {"loss": 1.3159, "grad_norm": 0.6706043481826782, "learning_rate": 0.0002, "epoch": 2.45014245014245, "step": 430}, {"loss": 1.3877, "grad_norm": 0.8902416229248047, "learning_rate": 0.0002, "epoch": 2.5071225071225074, "step": 440}, {"loss": 1.3071, "grad_norm": 0.736464262008667, "learning_rate": 0.0002, "epoch": 2.564102564102564, "step": 450}, {"loss": 1.2793, "grad_norm": 0.6882060766220093, "learning_rate": 0.0002, "epoch": 2.6210826210826212, "step": 460}, {"loss": 1.2949, "grad_norm": 0.6580819487571716, "learning_rate": 0.0002, "epoch": 2.678062678062678, "step": 470}, {"loss": 1.3477, "grad_norm": 0.7671576738357544, "learning_rate": 0.0002, "epoch": 2.735042735042735, "step": 480}, {"loss": 1.3488, "grad_norm": 0.791325569152832, "learning_rate": 0.0002, "epoch": 2.792022792022792, "step": 490}, {"loss": 1.3359, "grad_norm": 0.6403379440307617, "learning_rate": 0.0002, "epoch": 2.849002849002849, "step": 500}, {"loss": 1.3109, "grad_norm": 0.6506697535514832, "learning_rate": 0.0002, "epoch": 2.905982905982906, "step": 510}, {"loss": 1.2876, "grad_norm": 0.7321205139160156, "learning_rate": 0.0002, "epoch": 2.962962962962963, "step": 520}, {"eval_loss": 1.4021576642990112, "eval_runtime": 2.8786, "eval_samples_per_second": 34.044, "eval_steps_per_second": 4.516, "epoch": 2.9971509971509973, "step": 526}, {"loss": 1.3666, "grad_norm": 0.7116469144821167, "learning_rate": 0.0002, "epoch": 3.0199430199430197, "step": 530}, {"loss": 1.2579, "grad_norm": 0.6635934114456177, "learning_rate": 0.0002, "epoch": 3.076923076923077, "step": 540}, {"loss": 1.2688, "grad_norm": 0.7347352504730225, "learning_rate": 0.0002, "epoch": 3.133903133903134, "step": 550}, {"loss": 1.1805, "grad_norm": 0.693499743938446, "learning_rate": 0.0002, "epoch": 3.190883190883191, "step": 560}, {"loss": 1.2463, "grad_norm": 0.7138662934303284, "learning_rate": 0.0002, "epoch": 3.247863247863248, "step": 570}, {"loss": 1.2412, "grad_norm": 0.8807587027549744, "learning_rate": 0.0002, "epoch": 3.304843304843305, "step": 580}, {"loss": 1.2961, "grad_norm": 0.7191319465637207, "learning_rate": 0.0002, "epoch": 3.3618233618233617, "step": 590}, {"loss": 1.2778, "grad_norm": 0.7447595596313477, "learning_rate": 0.0002, "epoch": 3.4188034188034186, "step": 600}, {"loss": 1.2302, "grad_norm": 0.7865943312644958, "learning_rate": 0.0002, "epoch": 3.4757834757834756, "step": 610}, {"loss": 1.2871, "grad_norm": 0.713374674320221, "learning_rate": 0.0002, "epoch": 3.532763532763533, "step": 620}, {"loss": 1.2054, "grad_norm": 0.9345150589942932, "learning_rate": 0.0002, "epoch": 3.58974358974359, "step": 630}, {"loss": 1.2263, "grad_norm": 0.758057713508606, "learning_rate": 0.0002, "epoch": 3.646723646723647, "step": 640}, {"loss": 1.2319, "grad_norm": 0.78746497631073, "learning_rate": 0.0002, "epoch": 3.7037037037037037, "step": 650}, {"loss": 1.2881, "grad_norm": 0.9097195267677307, "learning_rate": 0.0002, "epoch": 3.7606837606837606, "step": 660}, {"loss": 1.2845, "grad_norm": 0.7972307205200195, "learning_rate": 0.0002, "epoch": 3.8176638176638176, "step": 670}, {"loss": 1.3123, "grad_norm": 0.7442638278007507, "learning_rate": 0.0002, "epoch": 3.8746438746438745, "step": 680}, {"loss": 1.3008, "grad_norm": 0.836273193359375, "learning_rate": 0.0002, "epoch": 3.931623931623932, "step": 690}, {"loss": 1.2444, "grad_norm": 0.7712854146957397, "learning_rate": 0.0002, "epoch": 3.9886039886039883, "step": 700}, {"eval_loss": 1.3992847204208374, "eval_runtime": 2.8818, "eval_samples_per_second": 34.007, "eval_steps_per_second": 4.511, "epoch": 4.0, "step": 702}, {"loss": 1.1796, "grad_norm": 0.7713205218315125, "learning_rate": 0.0002, "epoch": 4.045584045584046, "step": 710}, {"loss": 1.1615, "grad_norm": 0.8424011468887329, "learning_rate": 0.0002, "epoch": 4.102564102564102, "step": 720}, {"loss": 1.2283, "grad_norm": 0.7956175804138184, "learning_rate": 0.0002, "epoch": 4.15954415954416, "step": 730}, {"loss": 1.1855, "grad_norm": 0.7468942403793335, "learning_rate": 0.0002, "epoch": 4.216524216524217, "step": 740}, {"loss": 1.1677, "grad_norm": 0.7284650206565857, "learning_rate": 0.0002, "epoch": 4.273504273504273, "step": 750}, {"loss": 1.1162, "grad_norm": 0.6964936256408691, "learning_rate": 0.0002, "epoch": 4.330484330484331, "step": 760}, {"loss": 1.1868, "grad_norm": 0.8488320708274841, "learning_rate": 0.0002, "epoch": 4.387464387464387, "step": 770}, {"loss": 1.1688, "grad_norm": 0.796196699142456, "learning_rate": 0.0002, "epoch": 4.444444444444445, "step": 780}, {"loss": 1.192, "grad_norm": 0.8746815919876099, "learning_rate": 0.0002, "epoch": 4.501424501424501, "step": 790}, {"loss": 1.1857, "grad_norm": 0.9144721627235413, "learning_rate": 0.0002, "epoch": 4.5584045584045585, "step": 800}, {"loss": 1.1836, "grad_norm": 0.7818491458892822, "learning_rate": 0.0002, "epoch": 4.615384615384615, "step": 810}, {"loss": 1.204, "grad_norm": 0.7483993768692017, "learning_rate": 0.0002, "epoch": 4.672364672364672, "step": 820}, {"loss": 1.191, "grad_norm": 0.7826094627380371, "learning_rate": 0.0002, "epoch": 4.72934472934473, "step": 830}, {"loss": 1.1254, "grad_norm": 0.7471262216567993, "learning_rate": 0.0002, "epoch": 4.786324786324786, "step": 840}, {"loss": 1.2276, "grad_norm": 0.8124629259109497, "learning_rate": 0.0002, "epoch": 4.843304843304844, "step": 850}, {"loss": 1.1405, "grad_norm": 0.7713154554367065, "learning_rate": 0.0002, "epoch": 4.9002849002849, "step": 860}, {"loss": 1.1768, "grad_norm": 0.7297055125236511, "learning_rate": 0.0002, "epoch": 4.957264957264957, "step": 870}, {"eval_loss": 1.4079580307006836, "eval_runtime": 2.8839, "eval_samples_per_second": 33.982, "eval_steps_per_second": 4.508, "epoch": 4.997150997150997, "step": 877}, {"loss": 1.1513, "grad_norm": 0.793989896774292, "learning_rate": 0.0002, "epoch": 5.014245014245014, "step": 880}, {"loss": 1.066, "grad_norm": 1.2083884477615356, "learning_rate": 0.0002, "epoch": 5.071225071225071, "step": 890}, {"loss": 1.049, "grad_norm": 0.9633278846740723, "learning_rate": 0.0002, "epoch": 5.128205128205128, "step": 900}, {"loss": 1.116, "grad_norm": 0.908246636390686, "learning_rate": 0.0002, "epoch": 5.185185185185185, "step": 910}, {"loss": 1.0497, "grad_norm": 0.9113070964813232, "learning_rate": 0.0002, "epoch": 5.2421652421652425, "step": 920}, {"loss": 1.1089, "grad_norm": 0.8860645294189453, "learning_rate": 0.0002, "epoch": 5.299145299145299, "step": 930}, {"loss": 1.0345, "grad_norm": 0.8054319024085999, "learning_rate": 0.0002, "epoch": 5.356125356125356, "step": 940}, {"loss": 1.1365, "grad_norm": 0.9051408171653748, "learning_rate": 0.0002, "epoch": 5.413105413105413, "step": 950}, {"loss": 1.1006, "grad_norm": 0.8493460416793823, "learning_rate": 0.0002, "epoch": 5.47008547008547, "step": 960}, {"loss": 1.1106, "grad_norm": 0.9579766988754272, "learning_rate": 0.0002, "epoch": 5.527065527065528, "step": 970}, {"loss": 1.0818, "grad_norm": 1.1115326881408691, "learning_rate": 0.0002, "epoch": 5.584045584045584, "step": 980}, {"loss": 1.1128, "grad_norm": 1.0047303438186646, "learning_rate": 0.0002, "epoch": 5.641025641025641, "step": 990}, {"loss": 1.1488, "grad_norm": 1.0419378280639648, "learning_rate": 0.0002, "epoch": 5.698005698005698, "step": 1000}, {"loss": 1.0848, "grad_norm": 0.9149153828620911, "learning_rate": 0.0002, "epoch": 5.754985754985755, "step": 1010}, {"loss": 1.0649, "grad_norm": 0.8375725746154785, "learning_rate": 0.0002, "epoch": 5.811965811965812, "step": 1020}, {"loss": 1.1194, "grad_norm": 0.7414003014564514, "learning_rate": 0.0002, "epoch": 5.868945868945869, "step": 1030}, {"loss": 1.1064, "grad_norm": 0.9575881958007812, "learning_rate": 0.0002, "epoch": 5.925925925925926, "step": 1040}, {"loss": 1.1097, "grad_norm": 0.889681875705719, "learning_rate": 0.0002, "epoch": 5.982905982905983, "step": 1050}]} +{"epoch": 4.997150997150997, "step": 877, "epoch_duration": 73.76588153839111, "total_accumulated_duration": 367.9363179206848, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3048.73388671875}, "peak_memory_usage": {"GPU_0": 15079.29833984375}, "avg_memory_reserved": {"GPU_0": 20172.0}, "peak_memory_reserved": {"GPU_0": 20172.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5896, "grad_norm": 1.0227872133255005, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5124, "grad_norm": 3.86788010597229, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1576, "grad_norm": 1.3474394083023071, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0115, "grad_norm": 1.1816296577453613, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.875, "grad_norm": 1.0907047986984253, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8608, "grad_norm": 0.9163471460342407, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7334, "grad_norm": 1.0441275835037231, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.6496, "grad_norm": 1.0836364030838013, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5814, "grad_norm": 0.5817112922668457, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6697, "grad_norm": 0.8991169929504395, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.621, "grad_norm": 1.1820793151855469, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6376, "grad_norm": 0.8205533623695374, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.5902, "grad_norm": 0.8154979348182678, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6139, "grad_norm": 0.7292681336402893, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6554, "grad_norm": 0.7737869024276733, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4696, "grad_norm": 0.7786843180656433, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5062, "grad_norm": 0.6918405294418335, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}, {"eval_loss": 1.4962449073791504, "eval_runtime": 2.869, "eval_samples_per_second": 34.158, "eval_steps_per_second": 4.531, "epoch": 0.9971509971509972, "step": 175}, {"loss": 1.5305, "grad_norm": 0.6754891872406006, "learning_rate": 0.0002, "epoch": 1.0256410256410255, "step": 180}, {"loss": 1.4709, "grad_norm": 0.6875350475311279, "learning_rate": 0.0002, "epoch": 1.0826210826210827, "step": 190}, {"loss": 1.4744, "grad_norm": 0.7870411276817322, "learning_rate": 0.0002, "epoch": 1.1396011396011396, "step": 200}, {"loss": 1.5414, "grad_norm": 0.6934282779693604, "learning_rate": 0.0002, "epoch": 1.1965811965811965, "step": 210}, {"loss": 1.5129, "grad_norm": 0.6980162858963013, "learning_rate": 0.0002, "epoch": 1.2535612535612537, "step": 220}, {"loss": 1.385, "grad_norm": 0.6163203120231628, "learning_rate": 0.0002, "epoch": 1.3105413105413106, "step": 230}, {"loss": 1.4028, "grad_norm": 0.5967347025871277, "learning_rate": 0.0002, "epoch": 1.3675213675213675, "step": 240}, {"loss": 1.4945, "grad_norm": 0.7622564435005188, "learning_rate": 0.0002, "epoch": 1.4245014245014245, "step": 250}, {"loss": 1.4426, "grad_norm": 0.6667674779891968, "learning_rate": 0.0002, "epoch": 1.4814814814814814, "step": 260}, {"loss": 1.4227, "grad_norm": 0.9225417971611023, "learning_rate": 0.0002, "epoch": 1.5384615384615383, "step": 270}, {"loss": 1.3687, "grad_norm": 0.6473053097724915, "learning_rate": 0.0002, "epoch": 1.5954415954415955, "step": 280}, {"loss": 1.5086, "grad_norm": 0.8250042796134949, "learning_rate": 0.0002, "epoch": 1.6524216524216524, "step": 290}, {"loss": 1.4259, "grad_norm": 0.6660609841346741, "learning_rate": 0.0002, "epoch": 1.7094017094017095, "step": 300}, {"loss": 1.373, "grad_norm": 0.7542873620986938, "learning_rate": 0.0002, "epoch": 1.7663817663817665, "step": 310}, {"loss": 1.3823, "grad_norm": 0.5261648297309875, "learning_rate": 0.0002, "epoch": 1.8233618233618234, "step": 320}, {"loss": 1.4251, "grad_norm": 0.6519118547439575, "learning_rate": 0.0002, "epoch": 1.8803418803418803, "step": 330}, {"loss": 1.3613, "grad_norm": 0.7584664821624756, "learning_rate": 0.0002, "epoch": 1.9373219373219372, "step": 340}, {"loss": 1.4797, "grad_norm": 0.6466017365455627, "learning_rate": 0.0002, "epoch": 1.9943019943019942, "step": 350}, {"eval_loss": 1.424173355102539, "eval_runtime": 2.8659, "eval_samples_per_second": 34.195, "eval_steps_per_second": 4.536, "epoch": 2.0, "step": 351}, {"loss": 1.3555, "grad_norm": 0.7457601428031921, "learning_rate": 0.0002, "epoch": 2.051282051282051, "step": 360}, {"loss": 1.387, "grad_norm": 0.6645848751068115, "learning_rate": 0.0002, "epoch": 2.1082621082621085, "step": 370}, {"loss": 1.3244, "grad_norm": 0.6545299887657166, "learning_rate": 0.0002, "epoch": 2.1652421652421654, "step": 380}, {"loss": 1.4025, "grad_norm": 0.7429937124252319, "learning_rate": 0.0002, "epoch": 2.2222222222222223, "step": 390}, {"loss": 1.3995, "grad_norm": 0.6929682493209839, "learning_rate": 0.0002, "epoch": 2.2792022792022792, "step": 400}, {"loss": 1.3073, "grad_norm": 0.6999889016151428, "learning_rate": 0.0002, "epoch": 2.336182336182336, "step": 410}, {"loss": 1.3573, "grad_norm": 0.7174718379974365, "learning_rate": 0.0002, "epoch": 2.393162393162393, "step": 420}, {"loss": 1.3169, "grad_norm": 0.667317807674408, "learning_rate": 0.0002, "epoch": 2.45014245014245, "step": 430}, {"loss": 1.3877, "grad_norm": 0.8981409072875977, "learning_rate": 0.0002, "epoch": 2.5071225071225074, "step": 440}, {"loss": 1.3085, "grad_norm": 0.7560263872146606, "learning_rate": 0.0002, "epoch": 2.564102564102564, "step": 450}, {"loss": 1.278, "grad_norm": 0.699364185333252, "learning_rate": 0.0002, "epoch": 2.6210826210826212, "step": 460}, {"loss": 1.2962, "grad_norm": 0.666292667388916, "learning_rate": 0.0002, "epoch": 2.678062678062678, "step": 470}, {"loss": 1.3471, "grad_norm": 0.7564692497253418, "learning_rate": 0.0002, "epoch": 2.735042735042735, "step": 480}, {"loss": 1.3489, "grad_norm": 0.7561964392662048, "learning_rate": 0.0002, "epoch": 2.792022792022792, "step": 490}, {"loss": 1.3357, "grad_norm": 0.6506860852241516, "learning_rate": 0.0002, "epoch": 2.849002849002849, "step": 500}, {"loss": 1.311, "grad_norm": 0.6425383687019348, "learning_rate": 0.0002, "epoch": 2.905982905982906, "step": 510}, {"loss": 1.2879, "grad_norm": 0.7424822449684143, "learning_rate": 0.0002, "epoch": 2.962962962962963, "step": 520}, {"eval_loss": 1.401209831237793, "eval_runtime": 2.8721, "eval_samples_per_second": 34.121, "eval_steps_per_second": 4.526, "epoch": 2.9971509971509973, "step": 526}, {"loss": 1.3656, "grad_norm": 0.7109280228614807, "learning_rate": 0.0002, "epoch": 3.0199430199430197, "step": 530}, {"loss": 1.2571, "grad_norm": 0.6746246814727783, "learning_rate": 0.0002, "epoch": 3.076923076923077, "step": 540}, {"loss": 1.2685, "grad_norm": 0.7202523350715637, "learning_rate": 0.0002, "epoch": 3.133903133903134, "step": 550}, {"loss": 1.1808, "grad_norm": 0.697090208530426, "learning_rate": 0.0002, "epoch": 3.190883190883191, "step": 560}, {"loss": 1.2479, "grad_norm": 0.7157464623451233, "learning_rate": 0.0002, "epoch": 3.247863247863248, "step": 570}, {"loss": 1.2426, "grad_norm": 0.8729232549667358, "learning_rate": 0.0002, "epoch": 3.304843304843305, "step": 580}, {"loss": 1.2957, "grad_norm": 0.7119743227958679, "learning_rate": 0.0002, "epoch": 3.3618233618233617, "step": 590}, {"loss": 1.2787, "grad_norm": 0.7417448163032532, "learning_rate": 0.0002, "epoch": 3.4188034188034186, "step": 600}, {"loss": 1.2317, "grad_norm": 0.8174124956130981, "learning_rate": 0.0002, "epoch": 3.4757834757834756, "step": 610}, {"loss": 1.2916, "grad_norm": 0.7199270129203796, "learning_rate": 0.0002, "epoch": 3.532763532763533, "step": 620}, {"loss": 1.2074, "grad_norm": 0.989138662815094, "learning_rate": 0.0002, "epoch": 3.58974358974359, "step": 630}, {"loss": 1.2263, "grad_norm": 0.75921630859375, "learning_rate": 0.0002, "epoch": 3.646723646723647, "step": 640}, {"loss": 1.2319, "grad_norm": 0.7844401001930237, "learning_rate": 0.0002, "epoch": 3.7037037037037037, "step": 650}, {"loss": 1.2851, "grad_norm": 0.9127110242843628, "learning_rate": 0.0002, "epoch": 3.7606837606837606, "step": 660}, {"loss": 1.2835, "grad_norm": 0.7972270846366882, "learning_rate": 0.0002, "epoch": 3.8176638176638176, "step": 670}, {"loss": 1.3105, "grad_norm": 0.7458992004394531, "learning_rate": 0.0002, "epoch": 3.8746438746438745, "step": 680}, {"loss": 1.3017, "grad_norm": 0.854924738407135, "learning_rate": 0.0002, "epoch": 3.931623931623932, "step": 690}, {"loss": 1.2455, "grad_norm": 0.7763816118240356, "learning_rate": 0.0002, "epoch": 3.9886039886039883, "step": 700}, {"eval_loss": 1.3988444805145264, "eval_runtime": 2.8697, "eval_samples_per_second": 34.15, "eval_steps_per_second": 4.53, "epoch": 4.0, "step": 702}, {"loss": 1.178, "grad_norm": 0.877430260181427, "learning_rate": 0.0002, "epoch": 4.045584045584046, "step": 710}, {"loss": 1.1635, "grad_norm": 0.8365248441696167, "learning_rate": 0.0002, "epoch": 4.102564102564102, "step": 720}, {"loss": 1.2286, "grad_norm": 0.7748925089836121, "learning_rate": 0.0002, "epoch": 4.15954415954416, "step": 730}, {"loss": 1.1836, "grad_norm": 0.7695241570472717, "learning_rate": 0.0002, "epoch": 4.216524216524217, "step": 740}, {"loss": 1.1685, "grad_norm": 0.7229928374290466, "learning_rate": 0.0002, "epoch": 4.273504273504273, "step": 750}, {"loss": 1.117, "grad_norm": 0.7035910487174988, "learning_rate": 0.0002, "epoch": 4.330484330484331, "step": 760}, {"loss": 1.189, "grad_norm": 0.9075796008110046, "learning_rate": 0.0002, "epoch": 4.387464387464387, "step": 770}, {"loss": 1.1693, "grad_norm": 0.7957494854927063, "learning_rate": 0.0002, "epoch": 4.444444444444445, "step": 780}, {"loss": 1.1945, "grad_norm": 0.8733780384063721, "learning_rate": 0.0002, "epoch": 4.501424501424501, "step": 790}, {"loss": 1.1867, "grad_norm": 0.8786619901657104, "learning_rate": 0.0002, "epoch": 4.5584045584045585, "step": 800}, {"loss": 1.185, "grad_norm": 0.7101715803146362, "learning_rate": 0.0002, "epoch": 4.615384615384615, "step": 810}, {"loss": 1.2063, "grad_norm": 0.7451328039169312, "learning_rate": 0.0002, "epoch": 4.672364672364672, "step": 820}, {"loss": 1.1939, "grad_norm": 0.7830713987350464, "learning_rate": 0.0002, "epoch": 4.72934472934473, "step": 830}, {"loss": 1.1251, "grad_norm": 0.7804535031318665, "learning_rate": 0.0002, "epoch": 4.786324786324786, "step": 840}, {"loss": 1.2278, "grad_norm": 0.8121811747550964, "learning_rate": 0.0002, "epoch": 4.843304843304844, "step": 850}, {"loss": 1.142, "grad_norm": 0.774864137172699, "learning_rate": 0.0002, "epoch": 4.9002849002849, "step": 860}, {"loss": 1.1736, "grad_norm": 0.7517814040184021, "learning_rate": 0.0002, "epoch": 4.957264957264957, "step": 870}]} +{"epoch": 6.997150997150997, "step": 1228, "epoch_duration": 72.54125547409058, "total_accumulated_duration": 509.29478669166565, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3048.73388671875}, "peak_memory_usage": {"GPU_0": 15079.29833984375}, "avg_memory_reserved": {"GPU_0": 20172.0}, "peak_memory_reserved": {"GPU_0": 20172.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5893, "grad_norm": 1.0233017206192017, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5117, "grad_norm": 4.163138389587402, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1519, "grad_norm": 1.3474042415618896, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0115, "grad_norm": 1.2415039539337158, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.8789, "grad_norm": 1.0774320363998413, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8642, "grad_norm": 0.716522753238678, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7333, "grad_norm": 0.9734901189804077, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.651, "grad_norm": 1.7638314962387085, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5815, "grad_norm": 0.6061757206916809, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6714, "grad_norm": 0.7205694317817688, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.63, "grad_norm": 0.7752107381820679, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6405, "grad_norm": 0.8007442355155945, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.595, "grad_norm": 0.8049221038818359, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6182, "grad_norm": 0.7540143132209778, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6574, "grad_norm": 0.7468489408493042, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4714, "grad_norm": 0.7622578740119934, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5075, "grad_norm": 0.704738438129425, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}, {"eval_loss": 1.4978793859481812, "eval_runtime": 2.8823, "eval_samples_per_second": 34.001, "eval_steps_per_second": 4.51, "epoch": 0.9971509971509972, "step": 175}, {"loss": 1.5307, "grad_norm": 0.6786648035049438, "learning_rate": 0.0002, "epoch": 1.0256410256410255, "step": 180}, {"loss": 1.4725, "grad_norm": 0.6568158268928528, "learning_rate": 0.0002, "epoch": 1.0826210826210827, "step": 190}, {"loss": 1.4754, "grad_norm": 0.7925108671188354, "learning_rate": 0.0002, "epoch": 1.1396011396011396, "step": 200}, {"loss": 1.543, "grad_norm": 0.6983732581138611, "learning_rate": 0.0002, "epoch": 1.1965811965811965, "step": 210}, {"loss": 1.5132, "grad_norm": 0.6918368935585022, "learning_rate": 0.0002, "epoch": 1.2535612535612537, "step": 220}, {"loss": 1.3858, "grad_norm": 0.6140615940093994, "learning_rate": 0.0002, "epoch": 1.3105413105413106, "step": 230}, {"loss": 1.4046, "grad_norm": 0.5929235816001892, "learning_rate": 0.0002, "epoch": 1.3675213675213675, "step": 240}, {"loss": 1.4951, "grad_norm": 0.76374751329422, "learning_rate": 0.0002, "epoch": 1.4245014245014245, "step": 250}, {"loss": 1.4439, "grad_norm": 0.6553613543510437, "learning_rate": 0.0002, "epoch": 1.4814814814814814, "step": 260}, {"loss": 1.4252, "grad_norm": 0.9090031385421753, "learning_rate": 0.0002, "epoch": 1.5384615384615383, "step": 270}, {"loss": 1.3674, "grad_norm": 0.6486701369285583, "learning_rate": 0.0002, "epoch": 1.5954415954415955, "step": 280}, {"loss": 1.5102, "grad_norm": 0.8332676291465759, "learning_rate": 0.0002, "epoch": 1.6524216524216524, "step": 290}, {"loss": 1.4243, "grad_norm": 0.6700407266616821, "learning_rate": 0.0002, "epoch": 1.7094017094017095, "step": 300}, {"loss": 1.373, "grad_norm": 0.7524263262748718, "learning_rate": 0.0002, "epoch": 1.7663817663817665, "step": 310}, {"loss": 1.3821, "grad_norm": 0.5268421173095703, "learning_rate": 0.0002, "epoch": 1.8233618233618234, "step": 320}, {"loss": 1.4244, "grad_norm": 0.6470246315002441, "learning_rate": 0.0002, "epoch": 1.8803418803418803, "step": 330}, {"loss": 1.3603, "grad_norm": 0.7546916007995605, "learning_rate": 0.0002, "epoch": 1.9373219373219372, "step": 340}, {"loss": 1.4809, "grad_norm": 0.6527156233787537, "learning_rate": 0.0002, "epoch": 1.9943019943019942, "step": 350}, {"eval_loss": 1.4244847297668457, "eval_runtime": 2.8825, "eval_samples_per_second": 33.999, "eval_steps_per_second": 4.51, "epoch": 2.0, "step": 351}, {"loss": 1.3558, "grad_norm": 0.8553531765937805, "learning_rate": 0.0002, "epoch": 2.051282051282051, "step": 360}, {"loss": 1.3867, "grad_norm": 0.6769258975982666, "learning_rate": 0.0002, "epoch": 2.1082621082621085, "step": 370}, {"loss": 1.3242, "grad_norm": 0.6619579195976257, "learning_rate": 0.0002, "epoch": 2.1652421652421654, "step": 380}, {"loss": 1.3993, "grad_norm": 0.7349176406860352, "learning_rate": 0.0002, "epoch": 2.2222222222222223, "step": 390}, {"loss": 1.4022, "grad_norm": 0.6977018117904663, "learning_rate": 0.0002, "epoch": 2.2792022792022792, "step": 400}, {"loss": 1.3072, "grad_norm": 0.7045870423316956, "learning_rate": 0.0002, "epoch": 2.336182336182336, "step": 410}, {"loss": 1.3586, "grad_norm": 0.7158947587013245, "learning_rate": 0.0002, "epoch": 2.393162393162393, "step": 420}, {"loss": 1.3159, "grad_norm": 0.6706043481826782, "learning_rate": 0.0002, "epoch": 2.45014245014245, "step": 430}, {"loss": 1.3877, "grad_norm": 0.8902416229248047, "learning_rate": 0.0002, "epoch": 2.5071225071225074, "step": 440}, {"loss": 1.3071, "grad_norm": 0.736464262008667, "learning_rate": 0.0002, "epoch": 2.564102564102564, "step": 450}, {"loss": 1.2793, "grad_norm": 0.6882060766220093, "learning_rate": 0.0002, "epoch": 2.6210826210826212, "step": 460}, {"loss": 1.2949, "grad_norm": 0.6580819487571716, "learning_rate": 0.0002, "epoch": 2.678062678062678, "step": 470}, {"loss": 1.3477, "grad_norm": 0.7671576738357544, "learning_rate": 0.0002, "epoch": 2.735042735042735, "step": 480}, {"loss": 1.3488, "grad_norm": 0.791325569152832, "learning_rate": 0.0002, "epoch": 2.792022792022792, "step": 490}, {"loss": 1.3359, "grad_norm": 0.6403379440307617, "learning_rate": 0.0002, "epoch": 2.849002849002849, "step": 500}, {"loss": 1.3109, "grad_norm": 0.6506697535514832, "learning_rate": 0.0002, "epoch": 2.905982905982906, "step": 510}, {"loss": 1.2876, "grad_norm": 0.7321205139160156, "learning_rate": 0.0002, "epoch": 2.962962962962963, "step": 520}, {"eval_loss": 1.4021576642990112, "eval_runtime": 2.8786, "eval_samples_per_second": 34.044, "eval_steps_per_second": 4.516, "epoch": 2.9971509971509973, "step": 526}, {"loss": 1.3666, "grad_norm": 0.7116469144821167, "learning_rate": 0.0002, "epoch": 3.0199430199430197, "step": 530}, {"loss": 1.2579, "grad_norm": 0.6635934114456177, "learning_rate": 0.0002, "epoch": 3.076923076923077, "step": 540}, {"loss": 1.2688, "grad_norm": 0.7347352504730225, "learning_rate": 0.0002, "epoch": 3.133903133903134, "step": 550}, {"loss": 1.1805, "grad_norm": 0.693499743938446, "learning_rate": 0.0002, "epoch": 3.190883190883191, "step": 560}, {"loss": 1.2463, "grad_norm": 0.7138662934303284, "learning_rate": 0.0002, "epoch": 3.247863247863248, "step": 570}, {"loss": 1.2412, "grad_norm": 0.8807587027549744, "learning_rate": 0.0002, "epoch": 3.304843304843305, "step": 580}, {"loss": 1.2961, "grad_norm": 0.7191319465637207, "learning_rate": 0.0002, "epoch": 3.3618233618233617, "step": 590}, {"loss": 1.2778, "grad_norm": 0.7447595596313477, "learning_rate": 0.0002, "epoch": 3.4188034188034186, "step": 600}, {"loss": 1.2302, "grad_norm": 0.7865943312644958, "learning_rate": 0.0002, "epoch": 3.4757834757834756, "step": 610}, {"loss": 1.2871, "grad_norm": 0.713374674320221, "learning_rate": 0.0002, "epoch": 3.532763532763533, "step": 620}, {"loss": 1.2054, "grad_norm": 0.9345150589942932, "learning_rate": 0.0002, "epoch": 3.58974358974359, "step": 630}, {"loss": 1.2263, "grad_norm": 0.758057713508606, "learning_rate": 0.0002, "epoch": 3.646723646723647, "step": 640}, {"loss": 1.2319, "grad_norm": 0.78746497631073, "learning_rate": 0.0002, "epoch": 3.7037037037037037, "step": 650}, {"loss": 1.2881, "grad_norm": 0.9097195267677307, "learning_rate": 0.0002, "epoch": 3.7606837606837606, "step": 660}, {"loss": 1.2845, "grad_norm": 0.7972307205200195, "learning_rate": 0.0002, "epoch": 3.8176638176638176, "step": 670}, {"loss": 1.3123, "grad_norm": 0.7442638278007507, "learning_rate": 0.0002, "epoch": 3.8746438746438745, "step": 680}, {"loss": 1.3008, "grad_norm": 0.836273193359375, "learning_rate": 0.0002, "epoch": 3.931623931623932, "step": 690}, {"loss": 1.2444, "grad_norm": 0.7712854146957397, "learning_rate": 0.0002, "epoch": 3.9886039886039883, "step": 700}, {"eval_loss": 1.3992847204208374, "eval_runtime": 2.8818, "eval_samples_per_second": 34.007, "eval_steps_per_second": 4.511, "epoch": 4.0, "step": 702}, {"loss": 1.1796, "grad_norm": 0.7713205218315125, "learning_rate": 0.0002, "epoch": 4.045584045584046, "step": 710}, {"loss": 1.1615, "grad_norm": 0.8424011468887329, "learning_rate": 0.0002, "epoch": 4.102564102564102, "step": 720}, {"loss": 1.2283, "grad_norm": 0.7956175804138184, "learning_rate": 0.0002, "epoch": 4.15954415954416, "step": 730}, {"loss": 1.1855, "grad_norm": 0.7468942403793335, "learning_rate": 0.0002, "epoch": 4.216524216524217, "step": 740}, {"loss": 1.1677, "grad_norm": 0.7284650206565857, "learning_rate": 0.0002, "epoch": 4.273504273504273, "step": 750}, {"loss": 1.1162, "grad_norm": 0.6964936256408691, "learning_rate": 0.0002, "epoch": 4.330484330484331, "step": 760}, {"loss": 1.1868, "grad_norm": 0.8488320708274841, "learning_rate": 0.0002, "epoch": 4.387464387464387, "step": 770}, {"loss": 1.1688, "grad_norm": 0.796196699142456, "learning_rate": 0.0002, "epoch": 4.444444444444445, "step": 780}, {"loss": 1.192, "grad_norm": 0.8746815919876099, "learning_rate": 0.0002, "epoch": 4.501424501424501, "step": 790}, {"loss": 1.1857, "grad_norm": 0.9144721627235413, "learning_rate": 0.0002, "epoch": 4.5584045584045585, "step": 800}, {"loss": 1.1836, "grad_norm": 0.7818491458892822, "learning_rate": 0.0002, "epoch": 4.615384615384615, "step": 810}, {"loss": 1.204, "grad_norm": 0.7483993768692017, "learning_rate": 0.0002, "epoch": 4.672364672364672, "step": 820}, {"loss": 1.191, "grad_norm": 0.7826094627380371, "learning_rate": 0.0002, "epoch": 4.72934472934473, "step": 830}, {"loss": 1.1254, "grad_norm": 0.7471262216567993, "learning_rate": 0.0002, "epoch": 4.786324786324786, "step": 840}, {"loss": 1.2276, "grad_norm": 0.8124629259109497, "learning_rate": 0.0002, "epoch": 4.843304843304844, "step": 850}, {"loss": 1.1405, "grad_norm": 0.7713154554367065, "learning_rate": 0.0002, "epoch": 4.9002849002849, "step": 860}, {"loss": 1.1768, "grad_norm": 0.7297055125236511, "learning_rate": 0.0002, "epoch": 4.957264957264957, "step": 870}, {"eval_loss": 1.4079580307006836, "eval_runtime": 2.8839, "eval_samples_per_second": 33.982, "eval_steps_per_second": 4.508, "epoch": 4.997150997150997, "step": 877}, {"loss": 1.1513, "grad_norm": 0.793989896774292, "learning_rate": 0.0002, "epoch": 5.014245014245014, "step": 880}, {"loss": 1.066, "grad_norm": 1.2083884477615356, "learning_rate": 0.0002, "epoch": 5.071225071225071, "step": 890}, {"loss": 1.049, "grad_norm": 0.9633278846740723, "learning_rate": 0.0002, "epoch": 5.128205128205128, "step": 900}, {"loss": 1.116, "grad_norm": 0.908246636390686, "learning_rate": 0.0002, "epoch": 5.185185185185185, "step": 910}, {"loss": 1.0497, "grad_norm": 0.9113070964813232, "learning_rate": 0.0002, "epoch": 5.2421652421652425, "step": 920}, {"loss": 1.1089, "grad_norm": 0.8860645294189453, "learning_rate": 0.0002, "epoch": 5.299145299145299, "step": 930}, {"loss": 1.0345, "grad_norm": 0.8054319024085999, "learning_rate": 0.0002, "epoch": 5.356125356125356, "step": 940}, {"loss": 1.1365, "grad_norm": 0.9051408171653748, "learning_rate": 0.0002, "epoch": 5.413105413105413, "step": 950}, {"loss": 1.1006, "grad_norm": 0.8493460416793823, "learning_rate": 0.0002, "epoch": 5.47008547008547, "step": 960}, {"loss": 1.1106, "grad_norm": 0.9579766988754272, "learning_rate": 0.0002, "epoch": 5.527065527065528, "step": 970}, {"loss": 1.0818, "grad_norm": 1.1115326881408691, "learning_rate": 0.0002, "epoch": 5.584045584045584, "step": 980}, {"loss": 1.1128, "grad_norm": 1.0047303438186646, "learning_rate": 0.0002, "epoch": 5.641025641025641, "step": 990}, {"loss": 1.1488, "grad_norm": 1.0419378280639648, "learning_rate": 0.0002, "epoch": 5.698005698005698, "step": 1000}, {"loss": 1.0848, "grad_norm": 0.9149153828620911, "learning_rate": 0.0002, "epoch": 5.754985754985755, "step": 1010}, {"loss": 1.0649, "grad_norm": 0.8375725746154785, "learning_rate": 0.0002, "epoch": 5.811965811965812, "step": 1020}, {"loss": 1.1194, "grad_norm": 0.7414003014564514, "learning_rate": 0.0002, "epoch": 5.868945868945869, "step": 1030}, {"loss": 1.1064, "grad_norm": 0.9575881958007812, "learning_rate": 0.0002, "epoch": 5.925925925925926, "step": 1040}, {"loss": 1.1097, "grad_norm": 0.889681875705719, "learning_rate": 0.0002, "epoch": 5.982905982905983, "step": 1050}, {"eval_loss": 1.4382578134536743, "eval_runtime": 2.8787, "eval_samples_per_second": 34.043, "eval_steps_per_second": 4.516, "epoch": 6.0, "step": 1053}, {"loss": 1.0383, "grad_norm": 0.93312007188797, "learning_rate": 0.0002, "epoch": 6.0398860398860394, "step": 1060}, {"loss": 1.038, "grad_norm": 0.9554668068885803, "learning_rate": 0.0002, "epoch": 6.096866096866097, "step": 1070}, {"loss": 0.9846, "grad_norm": 0.8017868995666504, "learning_rate": 0.0002, "epoch": 6.153846153846154, "step": 1080}, {"loss": 0.9767, "grad_norm": 1.1163588762283325, "learning_rate": 0.0002, "epoch": 6.210826210826211, "step": 1090}, {"loss": 1.0239, "grad_norm": 0.9846243262290955, "learning_rate": 0.0002, "epoch": 6.267806267806268, "step": 1100}, {"loss": 0.9773, "grad_norm": 0.930727481842041, "learning_rate": 0.0002, "epoch": 6.3247863247863245, "step": 1110}, {"loss": 0.9885, "grad_norm": 0.9940898418426514, "learning_rate": 0.0002, "epoch": 6.381766381766382, "step": 1120}, {"loss": 1.0289, "grad_norm": 1.0614467859268188, "learning_rate": 0.0002, "epoch": 6.438746438746438, "step": 1130}, {"loss": 1.0301, "grad_norm": 0.9676510095596313, "learning_rate": 0.0002, "epoch": 6.495726495726496, "step": 1140}, {"loss": 0.9773, "grad_norm": 0.8477176427841187, "learning_rate": 0.0002, "epoch": 6.552706552706553, "step": 1150}, {"loss": 0.9944, "grad_norm": 1.0543252229690552, "learning_rate": 0.0002, "epoch": 6.60968660968661, "step": 1160}, {"loss": 0.9939, "grad_norm": 0.9989932775497437, "learning_rate": 0.0002, "epoch": 6.666666666666667, "step": 1170}, {"loss": 1.0854, "grad_norm": 1.1233140230178833, "learning_rate": 0.0002, "epoch": 6.7236467236467234, "step": 1180}, {"loss": 1.0392, "grad_norm": 1.167738676071167, "learning_rate": 0.0002, "epoch": 6.780626780626781, "step": 1190}, {"loss": 0.9527, "grad_norm": 1.0173308849334717, "learning_rate": 0.0002, "epoch": 6.837606837606837, "step": 1200}, {"loss": 1.0461, "grad_norm": 1.2425651550292969, "learning_rate": 0.0002, "epoch": 6.894586894586895, "step": 1210}, {"loss": 1.0159, "grad_norm": 0.936252772808075, "learning_rate": 0.0002, "epoch": 6.951566951566951, "step": 1220}]} +{"epoch": 6.0, "step": 1053, "epoch_duration": 73.2647020816803, "total_accumulated_duration": 441.2010200023651, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15079.29833984375}, "avg_memory_reserved": {"GPU_0": 20172.0}, "peak_memory_reserved": {"GPU_0": 20172.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5896, "grad_norm": 1.0227872133255005, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5124, "grad_norm": 3.86788010597229, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1576, "grad_norm": 1.3474394083023071, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0115, "grad_norm": 1.1816296577453613, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.875, "grad_norm": 1.0907047986984253, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8608, "grad_norm": 0.9163471460342407, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7334, "grad_norm": 1.0441275835037231, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.6496, "grad_norm": 1.0836364030838013, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5814, "grad_norm": 0.5817112922668457, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6697, "grad_norm": 0.8991169929504395, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.621, "grad_norm": 1.1820793151855469, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6376, "grad_norm": 0.8205533623695374, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.5902, "grad_norm": 0.8154979348182678, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6139, "grad_norm": 0.7292681336402893, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6554, "grad_norm": 0.7737869024276733, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4696, "grad_norm": 0.7786843180656433, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5062, "grad_norm": 0.6918405294418335, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}, {"eval_loss": 1.4962449073791504, "eval_runtime": 2.869, "eval_samples_per_second": 34.158, "eval_steps_per_second": 4.531, "epoch": 0.9971509971509972, "step": 175}, {"loss": 1.5305, "grad_norm": 0.6754891872406006, "learning_rate": 0.0002, "epoch": 1.0256410256410255, "step": 180}, {"loss": 1.4709, "grad_norm": 0.6875350475311279, "learning_rate": 0.0002, "epoch": 1.0826210826210827, "step": 190}, {"loss": 1.4744, "grad_norm": 0.7870411276817322, "learning_rate": 0.0002, "epoch": 1.1396011396011396, "step": 200}, {"loss": 1.5414, "grad_norm": 0.6934282779693604, "learning_rate": 0.0002, "epoch": 1.1965811965811965, "step": 210}, {"loss": 1.5129, "grad_norm": 0.6980162858963013, "learning_rate": 0.0002, "epoch": 1.2535612535612537, "step": 220}, {"loss": 1.385, "grad_norm": 0.6163203120231628, "learning_rate": 0.0002, "epoch": 1.3105413105413106, "step": 230}, {"loss": 1.4028, "grad_norm": 0.5967347025871277, "learning_rate": 0.0002, "epoch": 1.3675213675213675, "step": 240}, {"loss": 1.4945, "grad_norm": 0.7622564435005188, "learning_rate": 0.0002, "epoch": 1.4245014245014245, "step": 250}, {"loss": 1.4426, "grad_norm": 0.6667674779891968, "learning_rate": 0.0002, "epoch": 1.4814814814814814, "step": 260}, {"loss": 1.4227, "grad_norm": 0.9225417971611023, "learning_rate": 0.0002, "epoch": 1.5384615384615383, "step": 270}, {"loss": 1.3687, "grad_norm": 0.6473053097724915, "learning_rate": 0.0002, "epoch": 1.5954415954415955, "step": 280}, {"loss": 1.5086, "grad_norm": 0.8250042796134949, "learning_rate": 0.0002, "epoch": 1.6524216524216524, "step": 290}, {"loss": 1.4259, "grad_norm": 0.6660609841346741, "learning_rate": 0.0002, "epoch": 1.7094017094017095, "step": 300}, {"loss": 1.373, "grad_norm": 0.7542873620986938, "learning_rate": 0.0002, "epoch": 1.7663817663817665, "step": 310}, {"loss": 1.3823, "grad_norm": 0.5261648297309875, "learning_rate": 0.0002, "epoch": 1.8233618233618234, "step": 320}, {"loss": 1.4251, "grad_norm": 0.6519118547439575, "learning_rate": 0.0002, "epoch": 1.8803418803418803, "step": 330}, {"loss": 1.3613, "grad_norm": 0.7584664821624756, "learning_rate": 0.0002, "epoch": 1.9373219373219372, "step": 340}, {"loss": 1.4797, "grad_norm": 0.6466017365455627, "learning_rate": 0.0002, "epoch": 1.9943019943019942, "step": 350}, {"eval_loss": 1.424173355102539, "eval_runtime": 2.8659, "eval_samples_per_second": 34.195, "eval_steps_per_second": 4.536, "epoch": 2.0, "step": 351}, {"loss": 1.3555, "grad_norm": 0.7457601428031921, "learning_rate": 0.0002, "epoch": 2.051282051282051, "step": 360}, {"loss": 1.387, "grad_norm": 0.6645848751068115, "learning_rate": 0.0002, "epoch": 2.1082621082621085, "step": 370}, {"loss": 1.3244, "grad_norm": 0.6545299887657166, "learning_rate": 0.0002, "epoch": 2.1652421652421654, "step": 380}, {"loss": 1.4025, "grad_norm": 0.7429937124252319, "learning_rate": 0.0002, "epoch": 2.2222222222222223, "step": 390}, {"loss": 1.3995, "grad_norm": 0.6929682493209839, "learning_rate": 0.0002, "epoch": 2.2792022792022792, "step": 400}, {"loss": 1.3073, "grad_norm": 0.6999889016151428, "learning_rate": 0.0002, "epoch": 2.336182336182336, "step": 410}, {"loss": 1.3573, "grad_norm": 0.7174718379974365, "learning_rate": 0.0002, "epoch": 2.393162393162393, "step": 420}, {"loss": 1.3169, "grad_norm": 0.667317807674408, "learning_rate": 0.0002, "epoch": 2.45014245014245, "step": 430}, {"loss": 1.3877, "grad_norm": 0.8981409072875977, "learning_rate": 0.0002, "epoch": 2.5071225071225074, "step": 440}, {"loss": 1.3085, "grad_norm": 0.7560263872146606, "learning_rate": 0.0002, "epoch": 2.564102564102564, "step": 450}, {"loss": 1.278, "grad_norm": 0.699364185333252, "learning_rate": 0.0002, "epoch": 2.6210826210826212, "step": 460}, {"loss": 1.2962, "grad_norm": 0.666292667388916, "learning_rate": 0.0002, "epoch": 2.678062678062678, "step": 470}, {"loss": 1.3471, "grad_norm": 0.7564692497253418, "learning_rate": 0.0002, "epoch": 2.735042735042735, "step": 480}, {"loss": 1.3489, "grad_norm": 0.7561964392662048, "learning_rate": 0.0002, "epoch": 2.792022792022792, "step": 490}, {"loss": 1.3357, "grad_norm": 0.6506860852241516, "learning_rate": 0.0002, "epoch": 2.849002849002849, "step": 500}, {"loss": 1.311, "grad_norm": 0.6425383687019348, "learning_rate": 0.0002, "epoch": 2.905982905982906, "step": 510}, {"loss": 1.2879, "grad_norm": 0.7424822449684143, "learning_rate": 0.0002, "epoch": 2.962962962962963, "step": 520}, {"eval_loss": 1.401209831237793, "eval_runtime": 2.8721, "eval_samples_per_second": 34.121, "eval_steps_per_second": 4.526, "epoch": 2.9971509971509973, "step": 526}, {"loss": 1.3656, "grad_norm": 0.7109280228614807, "learning_rate": 0.0002, "epoch": 3.0199430199430197, "step": 530}, {"loss": 1.2571, "grad_norm": 0.6746246814727783, "learning_rate": 0.0002, "epoch": 3.076923076923077, "step": 540}, {"loss": 1.2685, "grad_norm": 0.7202523350715637, "learning_rate": 0.0002, "epoch": 3.133903133903134, "step": 550}, {"loss": 1.1808, "grad_norm": 0.697090208530426, "learning_rate": 0.0002, "epoch": 3.190883190883191, "step": 560}, {"loss": 1.2479, "grad_norm": 0.7157464623451233, "learning_rate": 0.0002, "epoch": 3.247863247863248, "step": 570}, {"loss": 1.2426, "grad_norm": 0.8729232549667358, "learning_rate": 0.0002, "epoch": 3.304843304843305, "step": 580}, {"loss": 1.2957, "grad_norm": 0.7119743227958679, "learning_rate": 0.0002, "epoch": 3.3618233618233617, "step": 590}, {"loss": 1.2787, "grad_norm": 0.7417448163032532, "learning_rate": 0.0002, "epoch": 3.4188034188034186, "step": 600}, {"loss": 1.2317, "grad_norm": 0.8174124956130981, "learning_rate": 0.0002, "epoch": 3.4757834757834756, "step": 610}, {"loss": 1.2916, "grad_norm": 0.7199270129203796, "learning_rate": 0.0002, "epoch": 3.532763532763533, "step": 620}, {"loss": 1.2074, "grad_norm": 0.989138662815094, "learning_rate": 0.0002, "epoch": 3.58974358974359, "step": 630}, {"loss": 1.2263, "grad_norm": 0.75921630859375, "learning_rate": 0.0002, "epoch": 3.646723646723647, "step": 640}, {"loss": 1.2319, "grad_norm": 0.7844401001930237, "learning_rate": 0.0002, "epoch": 3.7037037037037037, "step": 650}, {"loss": 1.2851, "grad_norm": 0.9127110242843628, "learning_rate": 0.0002, "epoch": 3.7606837606837606, "step": 660}, {"loss": 1.2835, "grad_norm": 0.7972270846366882, "learning_rate": 0.0002, "epoch": 3.8176638176638176, "step": 670}, {"loss": 1.3105, "grad_norm": 0.7458992004394531, "learning_rate": 0.0002, "epoch": 3.8746438746438745, "step": 680}, {"loss": 1.3017, "grad_norm": 0.854924738407135, "learning_rate": 0.0002, "epoch": 3.931623931623932, "step": 690}, {"loss": 1.2455, "grad_norm": 0.7763816118240356, "learning_rate": 0.0002, "epoch": 3.9886039886039883, "step": 700}, {"eval_loss": 1.3988444805145264, "eval_runtime": 2.8697, "eval_samples_per_second": 34.15, "eval_steps_per_second": 4.53, "epoch": 4.0, "step": 702}, {"loss": 1.178, "grad_norm": 0.877430260181427, "learning_rate": 0.0002, "epoch": 4.045584045584046, "step": 710}, {"loss": 1.1635, "grad_norm": 0.8365248441696167, "learning_rate": 0.0002, "epoch": 4.102564102564102, "step": 720}, {"loss": 1.2286, "grad_norm": 0.7748925089836121, "learning_rate": 0.0002, "epoch": 4.15954415954416, "step": 730}, {"loss": 1.1836, "grad_norm": 0.7695241570472717, "learning_rate": 0.0002, "epoch": 4.216524216524217, "step": 740}, {"loss": 1.1685, "grad_norm": 0.7229928374290466, "learning_rate": 0.0002, "epoch": 4.273504273504273, "step": 750}, {"loss": 1.117, "grad_norm": 0.7035910487174988, "learning_rate": 0.0002, "epoch": 4.330484330484331, "step": 760}, {"loss": 1.189, "grad_norm": 0.9075796008110046, "learning_rate": 0.0002, "epoch": 4.387464387464387, "step": 770}, {"loss": 1.1693, "grad_norm": 0.7957494854927063, "learning_rate": 0.0002, "epoch": 4.444444444444445, "step": 780}, {"loss": 1.1945, "grad_norm": 0.8733780384063721, "learning_rate": 0.0002, "epoch": 4.501424501424501, "step": 790}, {"loss": 1.1867, "grad_norm": 0.8786619901657104, "learning_rate": 0.0002, "epoch": 4.5584045584045585, "step": 800}, {"loss": 1.185, "grad_norm": 0.7101715803146362, "learning_rate": 0.0002, "epoch": 4.615384615384615, "step": 810}, {"loss": 1.2063, "grad_norm": 0.7451328039169312, "learning_rate": 0.0002, "epoch": 4.672364672364672, "step": 820}, {"loss": 1.1939, "grad_norm": 0.7830713987350464, "learning_rate": 0.0002, "epoch": 4.72934472934473, "step": 830}, {"loss": 1.1251, "grad_norm": 0.7804535031318665, "learning_rate": 0.0002, "epoch": 4.786324786324786, "step": 840}, {"loss": 1.2278, "grad_norm": 0.8121811747550964, "learning_rate": 0.0002, "epoch": 4.843304843304844, "step": 850}, {"loss": 1.142, "grad_norm": 0.774864137172699, "learning_rate": 0.0002, "epoch": 4.9002849002849, "step": 860}, {"loss": 1.1736, "grad_norm": 0.7517814040184021, "learning_rate": 0.0002, "epoch": 4.957264957264957, "step": 870}, {"eval_loss": 1.4074795246124268, "eval_runtime": 2.8707, "eval_samples_per_second": 34.138, "eval_steps_per_second": 4.529, "epoch": 4.997150997150997, "step": 877}, {"loss": 1.151, "grad_norm": 0.7974972128868103, "learning_rate": 0.0002, "epoch": 5.014245014245014, "step": 880}, {"loss": 1.0637, "grad_norm": 1.1127357482910156, "learning_rate": 0.0002, "epoch": 5.071225071225071, "step": 890}, {"loss": 1.0497, "grad_norm": 0.8995195031166077, "learning_rate": 0.0002, "epoch": 5.128205128205128, "step": 900}, {"loss": 1.1101, "grad_norm": 0.8325890898704529, "learning_rate": 0.0002, "epoch": 5.185185185185185, "step": 910}, {"loss": 1.0567, "grad_norm": 0.8830686807632446, "learning_rate": 0.0002, "epoch": 5.2421652421652425, "step": 920}, {"loss": 1.1094, "grad_norm": 0.8856923580169678, "learning_rate": 0.0002, "epoch": 5.299145299145299, "step": 930}, {"loss": 1.0328, "grad_norm": 0.814587414264679, "learning_rate": 0.0002, "epoch": 5.356125356125356, "step": 940}, {"loss": 1.1379, "grad_norm": 0.9119254946708679, "learning_rate": 0.0002, "epoch": 5.413105413105413, "step": 950}, {"loss": 1.0993, "grad_norm": 0.8547661304473877, "learning_rate": 0.0002, "epoch": 5.47008547008547, "step": 960}, {"loss": 1.1137, "grad_norm": 0.943742036819458, "learning_rate": 0.0002, "epoch": 5.527065527065528, "step": 970}, {"loss": 1.0815, "grad_norm": 1.1333340406417847, "learning_rate": 0.0002, "epoch": 5.584045584045584, "step": 980}, {"loss": 1.1161, "grad_norm": 1.0290982723236084, "learning_rate": 0.0002, "epoch": 5.641025641025641, "step": 990}, {"loss": 1.1477, "grad_norm": 1.0613716840744019, "learning_rate": 0.0002, "epoch": 5.698005698005698, "step": 1000}, {"loss": 1.0878, "grad_norm": 0.925118088722229, "learning_rate": 0.0002, "epoch": 5.754985754985755, "step": 1010}, {"loss": 1.0658, "grad_norm": 0.828220546245575, "learning_rate": 0.0002, "epoch": 5.811965811965812, "step": 1020}, {"loss": 1.1179, "grad_norm": 0.7466493248939514, "learning_rate": 0.0002, "epoch": 5.868945868945869, "step": 1030}, {"loss": 1.1064, "grad_norm": 0.9189135432243347, "learning_rate": 0.0002, "epoch": 5.925925925925926, "step": 1040}, {"loss": 1.1114, "grad_norm": 0.9117513298988342, "learning_rate": 0.0002, "epoch": 5.982905982905983, "step": 1050}]} +{"epoch": 7.977207977207978, "step": 1400, "epoch_duration": 101.66690945625305, "total_accumulated_duration": 610.9616961479187, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15079.29833984375}, "avg_memory_reserved": {"GPU_0": 20172.0}, "peak_memory_reserved": {"GPU_0": 20172.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5893, "grad_norm": 1.0233017206192017, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5117, "grad_norm": 4.163138389587402, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1519, "grad_norm": 1.3474042415618896, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0115, "grad_norm": 1.2415039539337158, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.8789, "grad_norm": 1.0774320363998413, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8642, "grad_norm": 0.716522753238678, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7333, "grad_norm": 0.9734901189804077, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.651, "grad_norm": 1.7638314962387085, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5815, "grad_norm": 0.6061757206916809, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6714, "grad_norm": 0.7205694317817688, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.63, "grad_norm": 0.7752107381820679, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6405, "grad_norm": 0.8007442355155945, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.595, "grad_norm": 0.8049221038818359, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6182, "grad_norm": 0.7540143132209778, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6574, "grad_norm": 0.7468489408493042, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4714, "grad_norm": 0.7622578740119934, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5075, "grad_norm": 0.704738438129425, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}, {"eval_loss": 1.4978793859481812, "eval_runtime": 2.8823, "eval_samples_per_second": 34.001, "eval_steps_per_second": 4.51, "epoch": 0.9971509971509972, "step": 175}, {"loss": 1.5307, "grad_norm": 0.6786648035049438, "learning_rate": 0.0002, "epoch": 1.0256410256410255, "step": 180}, {"loss": 1.4725, "grad_norm": 0.6568158268928528, "learning_rate": 0.0002, "epoch": 1.0826210826210827, "step": 190}, {"loss": 1.4754, "grad_norm": 0.7925108671188354, "learning_rate": 0.0002, "epoch": 1.1396011396011396, "step": 200}, {"loss": 1.543, "grad_norm": 0.6983732581138611, "learning_rate": 0.0002, "epoch": 1.1965811965811965, "step": 210}, {"loss": 1.5132, "grad_norm": 0.6918368935585022, "learning_rate": 0.0002, "epoch": 1.2535612535612537, "step": 220}, {"loss": 1.3858, "grad_norm": 0.6140615940093994, "learning_rate": 0.0002, "epoch": 1.3105413105413106, "step": 230}, {"loss": 1.4046, "grad_norm": 0.5929235816001892, "learning_rate": 0.0002, "epoch": 1.3675213675213675, "step": 240}, {"loss": 1.4951, "grad_norm": 0.76374751329422, "learning_rate": 0.0002, "epoch": 1.4245014245014245, "step": 250}, {"loss": 1.4439, "grad_norm": 0.6553613543510437, "learning_rate": 0.0002, "epoch": 1.4814814814814814, "step": 260}, {"loss": 1.4252, "grad_norm": 0.9090031385421753, "learning_rate": 0.0002, "epoch": 1.5384615384615383, "step": 270}, {"loss": 1.3674, "grad_norm": 0.6486701369285583, "learning_rate": 0.0002, "epoch": 1.5954415954415955, "step": 280}, {"loss": 1.5102, "grad_norm": 0.8332676291465759, "learning_rate": 0.0002, "epoch": 1.6524216524216524, "step": 290}, {"loss": 1.4243, "grad_norm": 0.6700407266616821, "learning_rate": 0.0002, "epoch": 1.7094017094017095, "step": 300}, {"loss": 1.373, "grad_norm": 0.7524263262748718, "learning_rate": 0.0002, "epoch": 1.7663817663817665, "step": 310}, {"loss": 1.3821, "grad_norm": 0.5268421173095703, "learning_rate": 0.0002, "epoch": 1.8233618233618234, "step": 320}, {"loss": 1.4244, "grad_norm": 0.6470246315002441, "learning_rate": 0.0002, "epoch": 1.8803418803418803, "step": 330}, {"loss": 1.3603, "grad_norm": 0.7546916007995605, "learning_rate": 0.0002, "epoch": 1.9373219373219372, "step": 340}, {"loss": 1.4809, "grad_norm": 0.6527156233787537, "learning_rate": 0.0002, "epoch": 1.9943019943019942, "step": 350}, {"eval_loss": 1.4244847297668457, "eval_runtime": 2.8825, "eval_samples_per_second": 33.999, "eval_steps_per_second": 4.51, "epoch": 2.0, "step": 351}, {"loss": 1.3558, "grad_norm": 0.8553531765937805, "learning_rate": 0.0002, "epoch": 2.051282051282051, "step": 360}, {"loss": 1.3867, "grad_norm": 0.6769258975982666, "learning_rate": 0.0002, "epoch": 2.1082621082621085, "step": 370}, {"loss": 1.3242, "grad_norm": 0.6619579195976257, "learning_rate": 0.0002, "epoch": 2.1652421652421654, "step": 380}, {"loss": 1.3993, "grad_norm": 0.7349176406860352, "learning_rate": 0.0002, "epoch": 2.2222222222222223, "step": 390}, {"loss": 1.4022, "grad_norm": 0.6977018117904663, "learning_rate": 0.0002, "epoch": 2.2792022792022792, "step": 400}, {"loss": 1.3072, "grad_norm": 0.7045870423316956, "learning_rate": 0.0002, "epoch": 2.336182336182336, "step": 410}, {"loss": 1.3586, "grad_norm": 0.7158947587013245, "learning_rate": 0.0002, "epoch": 2.393162393162393, "step": 420}, {"loss": 1.3159, "grad_norm": 0.6706043481826782, "learning_rate": 0.0002, "epoch": 2.45014245014245, "step": 430}, {"loss": 1.3877, "grad_norm": 0.8902416229248047, "learning_rate": 0.0002, "epoch": 2.5071225071225074, "step": 440}, {"loss": 1.3071, "grad_norm": 0.736464262008667, "learning_rate": 0.0002, "epoch": 2.564102564102564, "step": 450}, {"loss": 1.2793, "grad_norm": 0.6882060766220093, "learning_rate": 0.0002, "epoch": 2.6210826210826212, "step": 460}, {"loss": 1.2949, "grad_norm": 0.6580819487571716, "learning_rate": 0.0002, "epoch": 2.678062678062678, "step": 470}, {"loss": 1.3477, "grad_norm": 0.7671576738357544, "learning_rate": 0.0002, "epoch": 2.735042735042735, "step": 480}, {"loss": 1.3488, "grad_norm": 0.791325569152832, "learning_rate": 0.0002, "epoch": 2.792022792022792, "step": 490}, {"loss": 1.3359, "grad_norm": 0.6403379440307617, "learning_rate": 0.0002, "epoch": 2.849002849002849, "step": 500}, {"loss": 1.3109, "grad_norm": 0.6506697535514832, "learning_rate": 0.0002, "epoch": 2.905982905982906, "step": 510}, {"loss": 1.2876, "grad_norm": 0.7321205139160156, "learning_rate": 0.0002, "epoch": 2.962962962962963, "step": 520}, {"eval_loss": 1.4021576642990112, "eval_runtime": 2.8786, "eval_samples_per_second": 34.044, "eval_steps_per_second": 4.516, "epoch": 2.9971509971509973, "step": 526}, {"loss": 1.3666, "grad_norm": 0.7116469144821167, "learning_rate": 0.0002, "epoch": 3.0199430199430197, "step": 530}, {"loss": 1.2579, "grad_norm": 0.6635934114456177, "learning_rate": 0.0002, "epoch": 3.076923076923077, "step": 540}, {"loss": 1.2688, "grad_norm": 0.7347352504730225, "learning_rate": 0.0002, "epoch": 3.133903133903134, "step": 550}, {"loss": 1.1805, "grad_norm": 0.693499743938446, "learning_rate": 0.0002, "epoch": 3.190883190883191, "step": 560}, {"loss": 1.2463, "grad_norm": 0.7138662934303284, "learning_rate": 0.0002, "epoch": 3.247863247863248, "step": 570}, {"loss": 1.2412, "grad_norm": 0.8807587027549744, "learning_rate": 0.0002, "epoch": 3.304843304843305, "step": 580}, {"loss": 1.2961, "grad_norm": 0.7191319465637207, "learning_rate": 0.0002, "epoch": 3.3618233618233617, "step": 590}, {"loss": 1.2778, "grad_norm": 0.7447595596313477, "learning_rate": 0.0002, "epoch": 3.4188034188034186, "step": 600}, {"loss": 1.2302, "grad_norm": 0.7865943312644958, "learning_rate": 0.0002, "epoch": 3.4757834757834756, "step": 610}, {"loss": 1.2871, "grad_norm": 0.713374674320221, "learning_rate": 0.0002, "epoch": 3.532763532763533, "step": 620}, {"loss": 1.2054, "grad_norm": 0.9345150589942932, "learning_rate": 0.0002, "epoch": 3.58974358974359, "step": 630}, {"loss": 1.2263, "grad_norm": 0.758057713508606, "learning_rate": 0.0002, "epoch": 3.646723646723647, "step": 640}, {"loss": 1.2319, "grad_norm": 0.78746497631073, "learning_rate": 0.0002, "epoch": 3.7037037037037037, "step": 650}, {"loss": 1.2881, "grad_norm": 0.9097195267677307, "learning_rate": 0.0002, "epoch": 3.7606837606837606, "step": 660}, {"loss": 1.2845, "grad_norm": 0.7972307205200195, "learning_rate": 0.0002, "epoch": 3.8176638176638176, "step": 670}, {"loss": 1.3123, "grad_norm": 0.7442638278007507, "learning_rate": 0.0002, "epoch": 3.8746438746438745, "step": 680}, {"loss": 1.3008, "grad_norm": 0.836273193359375, "learning_rate": 0.0002, "epoch": 3.931623931623932, "step": 690}, {"loss": 1.2444, "grad_norm": 0.7712854146957397, "learning_rate": 0.0002, "epoch": 3.9886039886039883, "step": 700}, {"eval_loss": 1.3992847204208374, "eval_runtime": 2.8818, "eval_samples_per_second": 34.007, "eval_steps_per_second": 4.511, "epoch": 4.0, "step": 702}, {"loss": 1.1796, "grad_norm": 0.7713205218315125, "learning_rate": 0.0002, "epoch": 4.045584045584046, "step": 710}, {"loss": 1.1615, "grad_norm": 0.8424011468887329, "learning_rate": 0.0002, "epoch": 4.102564102564102, "step": 720}, {"loss": 1.2283, "grad_norm": 0.7956175804138184, "learning_rate": 0.0002, "epoch": 4.15954415954416, "step": 730}, {"loss": 1.1855, "grad_norm": 0.7468942403793335, "learning_rate": 0.0002, "epoch": 4.216524216524217, "step": 740}, {"loss": 1.1677, "grad_norm": 0.7284650206565857, "learning_rate": 0.0002, "epoch": 4.273504273504273, "step": 750}, {"loss": 1.1162, "grad_norm": 0.6964936256408691, "learning_rate": 0.0002, "epoch": 4.330484330484331, "step": 760}, {"loss": 1.1868, "grad_norm": 0.8488320708274841, "learning_rate": 0.0002, "epoch": 4.387464387464387, "step": 770}, {"loss": 1.1688, "grad_norm": 0.796196699142456, "learning_rate": 0.0002, "epoch": 4.444444444444445, "step": 780}, {"loss": 1.192, "grad_norm": 0.8746815919876099, "learning_rate": 0.0002, "epoch": 4.501424501424501, "step": 790}, {"loss": 1.1857, "grad_norm": 0.9144721627235413, "learning_rate": 0.0002, "epoch": 4.5584045584045585, "step": 800}, {"loss": 1.1836, "grad_norm": 0.7818491458892822, "learning_rate": 0.0002, "epoch": 4.615384615384615, "step": 810}, {"loss": 1.204, "grad_norm": 0.7483993768692017, "learning_rate": 0.0002, "epoch": 4.672364672364672, "step": 820}, {"loss": 1.191, "grad_norm": 0.7826094627380371, "learning_rate": 0.0002, "epoch": 4.72934472934473, "step": 830}, {"loss": 1.1254, "grad_norm": 0.7471262216567993, "learning_rate": 0.0002, "epoch": 4.786324786324786, "step": 840}, {"loss": 1.2276, "grad_norm": 0.8124629259109497, "learning_rate": 0.0002, "epoch": 4.843304843304844, "step": 850}, {"loss": 1.1405, "grad_norm": 0.7713154554367065, "learning_rate": 0.0002, "epoch": 4.9002849002849, "step": 860}, {"loss": 1.1768, "grad_norm": 0.7297055125236511, "learning_rate": 0.0002, "epoch": 4.957264957264957, "step": 870}, {"eval_loss": 1.4079580307006836, "eval_runtime": 2.8839, "eval_samples_per_second": 33.982, "eval_steps_per_second": 4.508, "epoch": 4.997150997150997, "step": 877}, {"loss": 1.1513, "grad_norm": 0.793989896774292, "learning_rate": 0.0002, "epoch": 5.014245014245014, "step": 880}, {"loss": 1.066, "grad_norm": 1.2083884477615356, "learning_rate": 0.0002, "epoch": 5.071225071225071, "step": 890}, {"loss": 1.049, "grad_norm": 0.9633278846740723, "learning_rate": 0.0002, "epoch": 5.128205128205128, "step": 900}, {"loss": 1.116, "grad_norm": 0.908246636390686, "learning_rate": 0.0002, "epoch": 5.185185185185185, "step": 910}, {"loss": 1.0497, "grad_norm": 0.9113070964813232, "learning_rate": 0.0002, "epoch": 5.2421652421652425, "step": 920}, {"loss": 1.1089, "grad_norm": 0.8860645294189453, "learning_rate": 0.0002, "epoch": 5.299145299145299, "step": 930}, {"loss": 1.0345, "grad_norm": 0.8054319024085999, "learning_rate": 0.0002, "epoch": 5.356125356125356, "step": 940}, {"loss": 1.1365, "grad_norm": 0.9051408171653748, "learning_rate": 0.0002, "epoch": 5.413105413105413, "step": 950}, {"loss": 1.1006, "grad_norm": 0.8493460416793823, "learning_rate": 0.0002, "epoch": 5.47008547008547, "step": 960}, {"loss": 1.1106, "grad_norm": 0.9579766988754272, "learning_rate": 0.0002, "epoch": 5.527065527065528, "step": 970}, {"loss": 1.0818, "grad_norm": 1.1115326881408691, "learning_rate": 0.0002, "epoch": 5.584045584045584, "step": 980}, {"loss": 1.1128, "grad_norm": 1.0047303438186646, "learning_rate": 0.0002, "epoch": 5.641025641025641, "step": 990}, {"loss": 1.1488, "grad_norm": 1.0419378280639648, "learning_rate": 0.0002, "epoch": 5.698005698005698, "step": 1000}, {"loss": 1.0848, "grad_norm": 0.9149153828620911, "learning_rate": 0.0002, "epoch": 5.754985754985755, "step": 1010}, {"loss": 1.0649, "grad_norm": 0.8375725746154785, "learning_rate": 0.0002, "epoch": 5.811965811965812, "step": 1020}, {"loss": 1.1194, "grad_norm": 0.7414003014564514, "learning_rate": 0.0002, "epoch": 5.868945868945869, "step": 1030}, {"loss": 1.1064, "grad_norm": 0.9575881958007812, "learning_rate": 0.0002, "epoch": 5.925925925925926, "step": 1040}, {"loss": 1.1097, "grad_norm": 0.889681875705719, "learning_rate": 0.0002, "epoch": 5.982905982905983, "step": 1050}, {"eval_loss": 1.4382578134536743, "eval_runtime": 2.8787, "eval_samples_per_second": 34.043, "eval_steps_per_second": 4.516, "epoch": 6.0, "step": 1053}, {"loss": 1.0383, "grad_norm": 0.93312007188797, "learning_rate": 0.0002, "epoch": 6.0398860398860394, "step": 1060}, {"loss": 1.038, "grad_norm": 0.9554668068885803, "learning_rate": 0.0002, "epoch": 6.096866096866097, "step": 1070}, {"loss": 0.9846, "grad_norm": 0.8017868995666504, "learning_rate": 0.0002, "epoch": 6.153846153846154, "step": 1080}, {"loss": 0.9767, "grad_norm": 1.1163588762283325, "learning_rate": 0.0002, "epoch": 6.210826210826211, "step": 1090}, {"loss": 1.0239, "grad_norm": 0.9846243262290955, "learning_rate": 0.0002, "epoch": 6.267806267806268, "step": 1100}, {"loss": 0.9773, "grad_norm": 0.930727481842041, "learning_rate": 0.0002, "epoch": 6.3247863247863245, "step": 1110}, {"loss": 0.9885, "grad_norm": 0.9940898418426514, "learning_rate": 0.0002, "epoch": 6.381766381766382, "step": 1120}, {"loss": 1.0289, "grad_norm": 1.0614467859268188, "learning_rate": 0.0002, "epoch": 6.438746438746438, "step": 1130}, {"loss": 1.0301, "grad_norm": 0.9676510095596313, "learning_rate": 0.0002, "epoch": 6.495726495726496, "step": 1140}, {"loss": 0.9773, "grad_norm": 0.8477176427841187, "learning_rate": 0.0002, "epoch": 6.552706552706553, "step": 1150}, {"loss": 0.9944, "grad_norm": 1.0543252229690552, "learning_rate": 0.0002, "epoch": 6.60968660968661, "step": 1160}, {"loss": 0.9939, "grad_norm": 0.9989932775497437, "learning_rate": 0.0002, "epoch": 6.666666666666667, "step": 1170}, {"loss": 1.0854, "grad_norm": 1.1233140230178833, "learning_rate": 0.0002, "epoch": 6.7236467236467234, "step": 1180}, {"loss": 1.0392, "grad_norm": 1.167738676071167, "learning_rate": 0.0002, "epoch": 6.780626780626781, "step": 1190}, {"loss": 0.9527, "grad_norm": 1.0173308849334717, "learning_rate": 0.0002, "epoch": 6.837606837606837, "step": 1200}, {"loss": 1.0461, "grad_norm": 1.2425651550292969, "learning_rate": 0.0002, "epoch": 6.894586894586895, "step": 1210}, {"loss": 1.0159, "grad_norm": 0.936252772808075, "learning_rate": 0.0002, "epoch": 6.951566951566951, "step": 1220}, {"eval_loss": 1.4745711088180542, "eval_runtime": 2.8963, "eval_samples_per_second": 33.836, "eval_steps_per_second": 4.488, "epoch": 6.997150997150997, "step": 1228}, {"loss": 1.0114, "grad_norm": 0.964927613735199, "learning_rate": 0.0002, "epoch": 7.0085470085470085, "step": 1230}, {"loss": 0.9128, "grad_norm": 1.0938352346420288, "learning_rate": 0.0002, "epoch": 7.065527065527066, "step": 1240}, {"loss": 0.9237, "grad_norm": 0.9703038930892944, "learning_rate": 0.0002, "epoch": 7.122507122507122, "step": 1250}, {"loss": 0.9082, "grad_norm": 1.2634679079055786, "learning_rate": 0.0002, "epoch": 7.17948717948718, "step": 1260}, {"loss": 0.9337, "grad_norm": 1.0933488607406616, "learning_rate": 0.0002, "epoch": 7.236467236467236, "step": 1270}, {"loss": 0.896, "grad_norm": 1.1065915822982788, "learning_rate": 0.0002, "epoch": 7.293447293447294, "step": 1280}, {"loss": 0.9334, "grad_norm": 1.2566848993301392, "learning_rate": 0.0002, "epoch": 7.35042735042735, "step": 1290}, {"loss": 0.8398, "grad_norm": 1.0640543699264526, "learning_rate": 0.0002, "epoch": 7.407407407407407, "step": 1300}, {"loss": 0.9353, "grad_norm": 1.1277321577072144, "learning_rate": 0.0002, "epoch": 7.464387464387464, "step": 1310}, {"loss": 0.9089, "grad_norm": 1.023599624633789, "learning_rate": 0.0002, "epoch": 7.521367521367521, "step": 1320}, {"loss": 0.946, "grad_norm": 1.1464563608169556, "learning_rate": 0.0002, "epoch": 7.578347578347579, "step": 1330}, {"loss": 0.9418, "grad_norm": 1.1366990804672241, "learning_rate": 0.0002, "epoch": 7.635327635327635, "step": 1340}, {"loss": 0.9043, "grad_norm": 0.8951204419136047, "learning_rate": 0.0002, "epoch": 7.6923076923076925, "step": 1350}, {"loss": 0.9075, "grad_norm": 1.1149132251739502, "learning_rate": 0.0002, "epoch": 7.749287749287749, "step": 1360}, {"loss": 0.9693, "grad_norm": 1.2248684167861938, "learning_rate": 0.0002, "epoch": 7.806267806267806, "step": 1370}, {"loss": 0.9242, "grad_norm": 1.0078679323196411, "learning_rate": 0.0002, "epoch": 7.863247863247864, "step": 1380}, {"loss": 0.972, "grad_norm": 1.0135971307754517, "learning_rate": 0.0002, "epoch": 7.92022792022792, "step": 1390}, {"loss": 0.9393, "grad_norm": 1.2399200201034546, "learning_rate": 0.0002, "epoch": 7.977207977207978, "step": 1400}]} +{"epoch": 6.997150997150997, "step": 1228, "epoch_duration": 103.41745567321777, "total_accumulated_duration": 544.6184756755829, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3048.73388671875}, "peak_memory_usage": {"GPU_0": 15079.29833984375}, "avg_memory_reserved": {"GPU_0": 20172.0}, "peak_memory_reserved": {"GPU_0": 20172.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5896, "grad_norm": 1.0227872133255005, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5124, "grad_norm": 3.86788010597229, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1576, "grad_norm": 1.3474394083023071, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0115, "grad_norm": 1.1816296577453613, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.875, "grad_norm": 1.0907047986984253, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8608, "grad_norm": 0.9163471460342407, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7334, "grad_norm": 1.0441275835037231, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.6496, "grad_norm": 1.0836364030838013, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5814, "grad_norm": 0.5817112922668457, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6697, "grad_norm": 0.8991169929504395, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.621, "grad_norm": 1.1820793151855469, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6376, "grad_norm": 0.8205533623695374, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.5902, "grad_norm": 0.8154979348182678, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6139, "grad_norm": 0.7292681336402893, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6554, "grad_norm": 0.7737869024276733, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4696, "grad_norm": 0.7786843180656433, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5062, "grad_norm": 0.6918405294418335, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}, {"eval_loss": 1.4962449073791504, "eval_runtime": 2.869, "eval_samples_per_second": 34.158, "eval_steps_per_second": 4.531, "epoch": 0.9971509971509972, "step": 175}, {"loss": 1.5305, "grad_norm": 0.6754891872406006, "learning_rate": 0.0002, "epoch": 1.0256410256410255, "step": 180}, {"loss": 1.4709, "grad_norm": 0.6875350475311279, "learning_rate": 0.0002, "epoch": 1.0826210826210827, "step": 190}, {"loss": 1.4744, "grad_norm": 0.7870411276817322, "learning_rate": 0.0002, "epoch": 1.1396011396011396, "step": 200}, {"loss": 1.5414, "grad_norm": 0.6934282779693604, "learning_rate": 0.0002, "epoch": 1.1965811965811965, "step": 210}, {"loss": 1.5129, "grad_norm": 0.6980162858963013, "learning_rate": 0.0002, "epoch": 1.2535612535612537, "step": 220}, {"loss": 1.385, "grad_norm": 0.6163203120231628, "learning_rate": 0.0002, "epoch": 1.3105413105413106, "step": 230}, {"loss": 1.4028, "grad_norm": 0.5967347025871277, "learning_rate": 0.0002, "epoch": 1.3675213675213675, "step": 240}, {"loss": 1.4945, "grad_norm": 0.7622564435005188, "learning_rate": 0.0002, "epoch": 1.4245014245014245, "step": 250}, {"loss": 1.4426, "grad_norm": 0.6667674779891968, "learning_rate": 0.0002, "epoch": 1.4814814814814814, "step": 260}, {"loss": 1.4227, "grad_norm": 0.9225417971611023, "learning_rate": 0.0002, "epoch": 1.5384615384615383, "step": 270}, {"loss": 1.3687, "grad_norm": 0.6473053097724915, "learning_rate": 0.0002, "epoch": 1.5954415954415955, "step": 280}, {"loss": 1.5086, "grad_norm": 0.8250042796134949, "learning_rate": 0.0002, "epoch": 1.6524216524216524, "step": 290}, {"loss": 1.4259, "grad_norm": 0.6660609841346741, "learning_rate": 0.0002, "epoch": 1.7094017094017095, "step": 300}, {"loss": 1.373, "grad_norm": 0.7542873620986938, "learning_rate": 0.0002, "epoch": 1.7663817663817665, "step": 310}, {"loss": 1.3823, "grad_norm": 0.5261648297309875, "learning_rate": 0.0002, "epoch": 1.8233618233618234, "step": 320}, {"loss": 1.4251, "grad_norm": 0.6519118547439575, "learning_rate": 0.0002, "epoch": 1.8803418803418803, "step": 330}, {"loss": 1.3613, "grad_norm": 0.7584664821624756, "learning_rate": 0.0002, "epoch": 1.9373219373219372, "step": 340}, {"loss": 1.4797, "grad_norm": 0.6466017365455627, "learning_rate": 0.0002, "epoch": 1.9943019943019942, "step": 350}, {"eval_loss": 1.424173355102539, "eval_runtime": 2.8659, "eval_samples_per_second": 34.195, "eval_steps_per_second": 4.536, "epoch": 2.0, "step": 351}, {"loss": 1.3555, "grad_norm": 0.7457601428031921, "learning_rate": 0.0002, "epoch": 2.051282051282051, "step": 360}, {"loss": 1.387, "grad_norm": 0.6645848751068115, "learning_rate": 0.0002, "epoch": 2.1082621082621085, "step": 370}, {"loss": 1.3244, "grad_norm": 0.6545299887657166, "learning_rate": 0.0002, "epoch": 2.1652421652421654, "step": 380}, {"loss": 1.4025, "grad_norm": 0.7429937124252319, "learning_rate": 0.0002, "epoch": 2.2222222222222223, "step": 390}, {"loss": 1.3995, "grad_norm": 0.6929682493209839, "learning_rate": 0.0002, "epoch": 2.2792022792022792, "step": 400}, {"loss": 1.3073, "grad_norm": 0.6999889016151428, "learning_rate": 0.0002, "epoch": 2.336182336182336, "step": 410}, {"loss": 1.3573, "grad_norm": 0.7174718379974365, "learning_rate": 0.0002, "epoch": 2.393162393162393, "step": 420}, {"loss": 1.3169, "grad_norm": 0.667317807674408, "learning_rate": 0.0002, "epoch": 2.45014245014245, "step": 430}, {"loss": 1.3877, "grad_norm": 0.8981409072875977, "learning_rate": 0.0002, "epoch": 2.5071225071225074, "step": 440}, {"loss": 1.3085, "grad_norm": 0.7560263872146606, "learning_rate": 0.0002, "epoch": 2.564102564102564, "step": 450}, {"loss": 1.278, "grad_norm": 0.699364185333252, "learning_rate": 0.0002, "epoch": 2.6210826210826212, "step": 460}, {"loss": 1.2962, "grad_norm": 0.666292667388916, "learning_rate": 0.0002, "epoch": 2.678062678062678, "step": 470}, {"loss": 1.3471, "grad_norm": 0.7564692497253418, "learning_rate": 0.0002, "epoch": 2.735042735042735, "step": 480}, {"loss": 1.3489, "grad_norm": 0.7561964392662048, "learning_rate": 0.0002, "epoch": 2.792022792022792, "step": 490}, {"loss": 1.3357, "grad_norm": 0.6506860852241516, "learning_rate": 0.0002, "epoch": 2.849002849002849, "step": 500}, {"loss": 1.311, "grad_norm": 0.6425383687019348, "learning_rate": 0.0002, "epoch": 2.905982905982906, "step": 510}, {"loss": 1.2879, "grad_norm": 0.7424822449684143, "learning_rate": 0.0002, "epoch": 2.962962962962963, "step": 520}, {"eval_loss": 1.401209831237793, "eval_runtime": 2.8721, "eval_samples_per_second": 34.121, "eval_steps_per_second": 4.526, "epoch": 2.9971509971509973, "step": 526}, {"loss": 1.3656, "grad_norm": 0.7109280228614807, "learning_rate": 0.0002, "epoch": 3.0199430199430197, "step": 530}, {"loss": 1.2571, "grad_norm": 0.6746246814727783, "learning_rate": 0.0002, "epoch": 3.076923076923077, "step": 540}, {"loss": 1.2685, "grad_norm": 0.7202523350715637, "learning_rate": 0.0002, "epoch": 3.133903133903134, "step": 550}, {"loss": 1.1808, "grad_norm": 0.697090208530426, "learning_rate": 0.0002, "epoch": 3.190883190883191, "step": 560}, {"loss": 1.2479, "grad_norm": 0.7157464623451233, "learning_rate": 0.0002, "epoch": 3.247863247863248, "step": 570}, {"loss": 1.2426, "grad_norm": 0.8729232549667358, "learning_rate": 0.0002, "epoch": 3.304843304843305, "step": 580}, {"loss": 1.2957, "grad_norm": 0.7119743227958679, "learning_rate": 0.0002, "epoch": 3.3618233618233617, "step": 590}, {"loss": 1.2787, "grad_norm": 0.7417448163032532, "learning_rate": 0.0002, "epoch": 3.4188034188034186, "step": 600}, {"loss": 1.2317, "grad_norm": 0.8174124956130981, "learning_rate": 0.0002, "epoch": 3.4757834757834756, "step": 610}, {"loss": 1.2916, "grad_norm": 0.7199270129203796, "learning_rate": 0.0002, "epoch": 3.532763532763533, "step": 620}, {"loss": 1.2074, "grad_norm": 0.989138662815094, "learning_rate": 0.0002, "epoch": 3.58974358974359, "step": 630}, {"loss": 1.2263, "grad_norm": 0.75921630859375, "learning_rate": 0.0002, "epoch": 3.646723646723647, "step": 640}, {"loss": 1.2319, "grad_norm": 0.7844401001930237, "learning_rate": 0.0002, "epoch": 3.7037037037037037, "step": 650}, {"loss": 1.2851, "grad_norm": 0.9127110242843628, "learning_rate": 0.0002, "epoch": 3.7606837606837606, "step": 660}, {"loss": 1.2835, "grad_norm": 0.7972270846366882, "learning_rate": 0.0002, "epoch": 3.8176638176638176, "step": 670}, {"loss": 1.3105, "grad_norm": 0.7458992004394531, "learning_rate": 0.0002, "epoch": 3.8746438746438745, "step": 680}, {"loss": 1.3017, "grad_norm": 0.854924738407135, "learning_rate": 0.0002, "epoch": 3.931623931623932, "step": 690}, {"loss": 1.2455, "grad_norm": 0.7763816118240356, "learning_rate": 0.0002, "epoch": 3.9886039886039883, "step": 700}, {"eval_loss": 1.3988444805145264, "eval_runtime": 2.8697, "eval_samples_per_second": 34.15, "eval_steps_per_second": 4.53, "epoch": 4.0, "step": 702}, {"loss": 1.178, "grad_norm": 0.877430260181427, "learning_rate": 0.0002, "epoch": 4.045584045584046, "step": 710}, {"loss": 1.1635, "grad_norm": 0.8365248441696167, "learning_rate": 0.0002, "epoch": 4.102564102564102, "step": 720}, {"loss": 1.2286, "grad_norm": 0.7748925089836121, "learning_rate": 0.0002, "epoch": 4.15954415954416, "step": 730}, {"loss": 1.1836, "grad_norm": 0.7695241570472717, "learning_rate": 0.0002, "epoch": 4.216524216524217, "step": 740}, {"loss": 1.1685, "grad_norm": 0.7229928374290466, "learning_rate": 0.0002, "epoch": 4.273504273504273, "step": 750}, {"loss": 1.117, "grad_norm": 0.7035910487174988, "learning_rate": 0.0002, "epoch": 4.330484330484331, "step": 760}, {"loss": 1.189, "grad_norm": 0.9075796008110046, "learning_rate": 0.0002, "epoch": 4.387464387464387, "step": 770}, {"loss": 1.1693, "grad_norm": 0.7957494854927063, "learning_rate": 0.0002, "epoch": 4.444444444444445, "step": 780}, {"loss": 1.1945, "grad_norm": 0.8733780384063721, "learning_rate": 0.0002, "epoch": 4.501424501424501, "step": 790}, {"loss": 1.1867, "grad_norm": 0.8786619901657104, "learning_rate": 0.0002, "epoch": 4.5584045584045585, "step": 800}, {"loss": 1.185, "grad_norm": 0.7101715803146362, "learning_rate": 0.0002, "epoch": 4.615384615384615, "step": 810}, {"loss": 1.2063, "grad_norm": 0.7451328039169312, "learning_rate": 0.0002, "epoch": 4.672364672364672, "step": 820}, {"loss": 1.1939, "grad_norm": 0.7830713987350464, "learning_rate": 0.0002, "epoch": 4.72934472934473, "step": 830}, {"loss": 1.1251, "grad_norm": 0.7804535031318665, "learning_rate": 0.0002, "epoch": 4.786324786324786, "step": 840}, {"loss": 1.2278, "grad_norm": 0.8121811747550964, "learning_rate": 0.0002, "epoch": 4.843304843304844, "step": 850}, {"loss": 1.142, "grad_norm": 0.774864137172699, "learning_rate": 0.0002, "epoch": 4.9002849002849, "step": 860}, {"loss": 1.1736, "grad_norm": 0.7517814040184021, "learning_rate": 0.0002, "epoch": 4.957264957264957, "step": 870}, {"eval_loss": 1.4074795246124268, "eval_runtime": 2.8707, "eval_samples_per_second": 34.138, "eval_steps_per_second": 4.529, "epoch": 4.997150997150997, "step": 877}, {"loss": 1.151, "grad_norm": 0.7974972128868103, "learning_rate": 0.0002, "epoch": 5.014245014245014, "step": 880}, {"loss": 1.0637, "grad_norm": 1.1127357482910156, "learning_rate": 0.0002, "epoch": 5.071225071225071, "step": 890}, {"loss": 1.0497, "grad_norm": 0.8995195031166077, "learning_rate": 0.0002, "epoch": 5.128205128205128, "step": 900}, {"loss": 1.1101, "grad_norm": 0.8325890898704529, "learning_rate": 0.0002, "epoch": 5.185185185185185, "step": 910}, {"loss": 1.0567, "grad_norm": 0.8830686807632446, "learning_rate": 0.0002, "epoch": 5.2421652421652425, "step": 920}, {"loss": 1.1094, "grad_norm": 0.8856923580169678, "learning_rate": 0.0002, "epoch": 5.299145299145299, "step": 930}, {"loss": 1.0328, "grad_norm": 0.814587414264679, "learning_rate": 0.0002, "epoch": 5.356125356125356, "step": 940}, {"loss": 1.1379, "grad_norm": 0.9119254946708679, "learning_rate": 0.0002, "epoch": 5.413105413105413, "step": 950}, {"loss": 1.0993, "grad_norm": 0.8547661304473877, "learning_rate": 0.0002, "epoch": 5.47008547008547, "step": 960}, {"loss": 1.1137, "grad_norm": 0.943742036819458, "learning_rate": 0.0002, "epoch": 5.527065527065528, "step": 970}, {"loss": 1.0815, "grad_norm": 1.1333340406417847, "learning_rate": 0.0002, "epoch": 5.584045584045584, "step": 980}, {"loss": 1.1161, "grad_norm": 1.0290982723236084, "learning_rate": 0.0002, "epoch": 5.641025641025641, "step": 990}, {"loss": 1.1477, "grad_norm": 1.0613716840744019, "learning_rate": 0.0002, "epoch": 5.698005698005698, "step": 1000}, {"loss": 1.0878, "grad_norm": 0.925118088722229, "learning_rate": 0.0002, "epoch": 5.754985754985755, "step": 1010}, {"loss": 1.0658, "grad_norm": 0.828220546245575, "learning_rate": 0.0002, "epoch": 5.811965811965812, "step": 1020}, {"loss": 1.1179, "grad_norm": 0.7466493248939514, "learning_rate": 0.0002, "epoch": 5.868945868945869, "step": 1030}, {"loss": 1.1064, "grad_norm": 0.9189135432243347, "learning_rate": 0.0002, "epoch": 5.925925925925926, "step": 1040}, {"loss": 1.1114, "grad_norm": 0.9117513298988342, "learning_rate": 0.0002, "epoch": 5.982905982905983, "step": 1050}, {"eval_loss": 1.4333235025405884, "eval_runtime": 2.9267, "eval_samples_per_second": 33.485, "eval_steps_per_second": 4.442, "epoch": 6.0, "step": 1053}, {"loss": 1.0368, "grad_norm": 0.9506599307060242, "learning_rate": 0.0002, "epoch": 6.0398860398860394, "step": 1060}, {"loss": 1.0376, "grad_norm": 0.9809837937355042, "learning_rate": 0.0002, "epoch": 6.096866096866097, "step": 1070}, {"loss": 0.9849, "grad_norm": 0.852557361125946, "learning_rate": 0.0002, "epoch": 6.153846153846154, "step": 1080}, {"loss": 0.9782, "grad_norm": 1.135279893875122, "learning_rate": 0.0002, "epoch": 6.210826210826211, "step": 1090}, {"loss": 1.0238, "grad_norm": 1.0243879556655884, "learning_rate": 0.0002, "epoch": 6.267806267806268, "step": 1100}, {"loss": 0.9815, "grad_norm": 0.9213914275169373, "learning_rate": 0.0002, "epoch": 6.3247863247863245, "step": 1110}, {"loss": 0.9899, "grad_norm": 1.0042028427124023, "learning_rate": 0.0002, "epoch": 6.381766381766382, "step": 1120}, {"loss": 1.0249, "grad_norm": 1.1024253368377686, "learning_rate": 0.0002, "epoch": 6.438746438746438, "step": 1130}, {"loss": 1.0305, "grad_norm": 0.9245727062225342, "learning_rate": 0.0002, "epoch": 6.495726495726496, "step": 1140}, {"loss": 0.9766, "grad_norm": 0.8309272527694702, "learning_rate": 0.0002, "epoch": 6.552706552706553, "step": 1150}, {"loss": 0.9945, "grad_norm": 1.029746651649475, "learning_rate": 0.0002, "epoch": 6.60968660968661, "step": 1160}, {"loss": 0.9955, "grad_norm": 0.9932991862297058, "learning_rate": 0.0002, "epoch": 6.666666666666667, "step": 1170}, {"loss": 1.0808, "grad_norm": 1.1597973108291626, "learning_rate": 0.0002, "epoch": 6.7236467236467234, "step": 1180}, {"loss": 1.0407, "grad_norm": 1.1373951435089111, "learning_rate": 0.0002, "epoch": 6.780626780626781, "step": 1190}, {"loss": 0.9513, "grad_norm": 1.0308297872543335, "learning_rate": 0.0002, "epoch": 6.837606837606837, "step": 1200}, {"loss": 1.0437, "grad_norm": 1.1555122137069702, "learning_rate": 0.0002, "epoch": 6.894586894586895, "step": 1210}, {"loss": 1.0164, "grad_norm": 0.9829897284507751, "learning_rate": 0.0002, "epoch": 6.951566951566951, "step": 1220}]} +{"epoch": 7.977207977207978, "step": 1400, "epoch_duration": 74.71395587921143, "total_accumulated_duration": 619.3324315547943, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15079.29833984375}, "avg_memory_reserved": {"GPU_0": 20172.0}, "peak_memory_reserved": {"GPU_0": 20172.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-en-fpb_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.9-num-2790-sd-10000/checkpoint-702", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.5896, "grad_norm": 1.0227872133255005, "learning_rate": 0.0002, "epoch": 0.05698005698005698, "step": 10}, {"loss": 2.5124, "grad_norm": 3.86788010597229, "learning_rate": 0.0002, "epoch": 0.11396011396011396, "step": 20}, {"loss": 2.1576, "grad_norm": 1.3474394083023071, "learning_rate": 0.0002, "epoch": 0.17094017094017094, "step": 30}, {"loss": 2.0115, "grad_norm": 1.1816296577453613, "learning_rate": 0.0002, "epoch": 0.22792022792022792, "step": 40}, {"loss": 1.875, "grad_norm": 1.0907047986984253, "learning_rate": 0.0002, "epoch": 0.2849002849002849, "step": 50}, {"loss": 1.8608, "grad_norm": 0.9163471460342407, "learning_rate": 0.0002, "epoch": 0.3418803418803419, "step": 60}, {"loss": 1.7334, "grad_norm": 1.0441275835037231, "learning_rate": 0.0002, "epoch": 0.39886039886039887, "step": 70}, {"loss": 1.6496, "grad_norm": 1.0836364030838013, "learning_rate": 0.0002, "epoch": 0.45584045584045585, "step": 80}, {"loss": 1.5814, "grad_norm": 0.5817112922668457, "learning_rate": 0.0002, "epoch": 0.5128205128205128, "step": 90}, {"loss": 1.6697, "grad_norm": 0.8991169929504395, "learning_rate": 0.0002, "epoch": 0.5698005698005698, "step": 100}, {"loss": 1.621, "grad_norm": 1.1820793151855469, "learning_rate": 0.0002, "epoch": 0.6267806267806267, "step": 110}, {"loss": 1.6376, "grad_norm": 0.8205533623695374, "learning_rate": 0.0002, "epoch": 0.6837606837606838, "step": 120}, {"loss": 1.5902, "grad_norm": 0.8154979348182678, "learning_rate": 0.0002, "epoch": 0.7407407407407407, "step": 130}, {"loss": 1.6139, "grad_norm": 0.7292681336402893, "learning_rate": 0.0002, "epoch": 0.7977207977207977, "step": 140}, {"loss": 1.6554, "grad_norm": 0.7737869024276733, "learning_rate": 0.0002, "epoch": 0.8547008547008547, "step": 150}, {"loss": 1.4696, "grad_norm": 0.7786843180656433, "learning_rate": 0.0002, "epoch": 0.9116809116809117, "step": 160}, {"loss": 1.5062, "grad_norm": 0.6918405294418335, "learning_rate": 0.0002, "epoch": 0.9686609686609686, "step": 170}, {"eval_loss": 1.4962449073791504, "eval_runtime": 2.869, "eval_samples_per_second": 34.158, "eval_steps_per_second": 4.531, "epoch": 0.9971509971509972, "step": 175}, {"loss": 1.5305, "grad_norm": 0.6754891872406006, "learning_rate": 0.0002, "epoch": 1.0256410256410255, "step": 180}, {"loss": 1.4709, "grad_norm": 0.6875350475311279, "learning_rate": 0.0002, "epoch": 1.0826210826210827, "step": 190}, {"loss": 1.4744, "grad_norm": 0.7870411276817322, "learning_rate": 0.0002, "epoch": 1.1396011396011396, "step": 200}, {"loss": 1.5414, "grad_norm": 0.6934282779693604, "learning_rate": 0.0002, "epoch": 1.1965811965811965, "step": 210}, {"loss": 1.5129, "grad_norm": 0.6980162858963013, "learning_rate": 0.0002, "epoch": 1.2535612535612537, "step": 220}, {"loss": 1.385, "grad_norm": 0.6163203120231628, "learning_rate": 0.0002, "epoch": 1.3105413105413106, "step": 230}, {"loss": 1.4028, "grad_norm": 0.5967347025871277, "learning_rate": 0.0002, "epoch": 1.3675213675213675, "step": 240}, {"loss": 1.4945, "grad_norm": 0.7622564435005188, "learning_rate": 0.0002, "epoch": 1.4245014245014245, "step": 250}, {"loss": 1.4426, "grad_norm": 0.6667674779891968, "learning_rate": 0.0002, "epoch": 1.4814814814814814, "step": 260}, {"loss": 1.4227, "grad_norm": 0.9225417971611023, "learning_rate": 0.0002, "epoch": 1.5384615384615383, "step": 270}, {"loss": 1.3687, "grad_norm": 0.6473053097724915, "learning_rate": 0.0002, "epoch": 1.5954415954415955, "step": 280}, {"loss": 1.5086, "grad_norm": 0.8250042796134949, "learning_rate": 0.0002, "epoch": 1.6524216524216524, "step": 290}, {"loss": 1.4259, "grad_norm": 0.6660609841346741, "learning_rate": 0.0002, "epoch": 1.7094017094017095, "step": 300}, {"loss": 1.373, "grad_norm": 0.7542873620986938, "learning_rate": 0.0002, "epoch": 1.7663817663817665, "step": 310}, {"loss": 1.3823, "grad_norm": 0.5261648297309875, "learning_rate": 0.0002, "epoch": 1.8233618233618234, "step": 320}, {"loss": 1.4251, "grad_norm": 0.6519118547439575, "learning_rate": 0.0002, "epoch": 1.8803418803418803, "step": 330}, {"loss": 1.3613, "grad_norm": 0.7584664821624756, "learning_rate": 0.0002, "epoch": 1.9373219373219372, "step": 340}, {"loss": 1.4797, "grad_norm": 0.6466017365455627, "learning_rate": 0.0002, "epoch": 1.9943019943019942, "step": 350}, {"eval_loss": 1.424173355102539, "eval_runtime": 2.8659, "eval_samples_per_second": 34.195, "eval_steps_per_second": 4.536, "epoch": 2.0, "step": 351}, {"loss": 1.3555, "grad_norm": 0.7457601428031921, "learning_rate": 0.0002, "epoch": 2.051282051282051, "step": 360}, {"loss": 1.387, "grad_norm": 0.6645848751068115, "learning_rate": 0.0002, "epoch": 2.1082621082621085, "step": 370}, {"loss": 1.3244, "grad_norm": 0.6545299887657166, "learning_rate": 0.0002, "epoch": 2.1652421652421654, "step": 380}, {"loss": 1.4025, "grad_norm": 0.7429937124252319, "learning_rate": 0.0002, "epoch": 2.2222222222222223, "step": 390}, {"loss": 1.3995, "grad_norm": 0.6929682493209839, "learning_rate": 0.0002, "epoch": 2.2792022792022792, "step": 400}, {"loss": 1.3073, "grad_norm": 0.6999889016151428, "learning_rate": 0.0002, "epoch": 2.336182336182336, "step": 410}, {"loss": 1.3573, "grad_norm": 0.7174718379974365, "learning_rate": 0.0002, "epoch": 2.393162393162393, "step": 420}, {"loss": 1.3169, "grad_norm": 0.667317807674408, "learning_rate": 0.0002, "epoch": 2.45014245014245, "step": 430}, {"loss": 1.3877, "grad_norm": 0.8981409072875977, "learning_rate": 0.0002, "epoch": 2.5071225071225074, "step": 440}, {"loss": 1.3085, "grad_norm": 0.7560263872146606, "learning_rate": 0.0002, "epoch": 2.564102564102564, "step": 450}, {"loss": 1.278, "grad_norm": 0.699364185333252, "learning_rate": 0.0002, "epoch": 2.6210826210826212, "step": 460}, {"loss": 1.2962, "grad_norm": 0.666292667388916, "learning_rate": 0.0002, "epoch": 2.678062678062678, "step": 470}, {"loss": 1.3471, "grad_norm": 0.7564692497253418, "learning_rate": 0.0002, "epoch": 2.735042735042735, "step": 480}, {"loss": 1.3489, "grad_norm": 0.7561964392662048, "learning_rate": 0.0002, "epoch": 2.792022792022792, "step": 490}, {"loss": 1.3357, "grad_norm": 0.6506860852241516, "learning_rate": 0.0002, "epoch": 2.849002849002849, "step": 500}, {"loss": 1.311, "grad_norm": 0.6425383687019348, "learning_rate": 0.0002, "epoch": 2.905982905982906, "step": 510}, {"loss": 1.2879, "grad_norm": 0.7424822449684143, "learning_rate": 0.0002, "epoch": 2.962962962962963, "step": 520}, {"eval_loss": 1.401209831237793, "eval_runtime": 2.8721, "eval_samples_per_second": 34.121, "eval_steps_per_second": 4.526, "epoch": 2.9971509971509973, "step": 526}, {"loss": 1.3656, "grad_norm": 0.7109280228614807, "learning_rate": 0.0002, "epoch": 3.0199430199430197, "step": 530}, {"loss": 1.2571, "grad_norm": 0.6746246814727783, "learning_rate": 0.0002, "epoch": 3.076923076923077, "step": 540}, {"loss": 1.2685, "grad_norm": 0.7202523350715637, "learning_rate": 0.0002, "epoch": 3.133903133903134, "step": 550}, {"loss": 1.1808, "grad_norm": 0.697090208530426, "learning_rate": 0.0002, "epoch": 3.190883190883191, "step": 560}, {"loss": 1.2479, "grad_norm": 0.7157464623451233, "learning_rate": 0.0002, "epoch": 3.247863247863248, "step": 570}, {"loss": 1.2426, "grad_norm": 0.8729232549667358, "learning_rate": 0.0002, "epoch": 3.304843304843305, "step": 580}, {"loss": 1.2957, "grad_norm": 0.7119743227958679, "learning_rate": 0.0002, "epoch": 3.3618233618233617, "step": 590}, {"loss": 1.2787, "grad_norm": 0.7417448163032532, "learning_rate": 0.0002, "epoch": 3.4188034188034186, "step": 600}, {"loss": 1.2317, "grad_norm": 0.8174124956130981, "learning_rate": 0.0002, "epoch": 3.4757834757834756, "step": 610}, {"loss": 1.2916, "grad_norm": 0.7199270129203796, "learning_rate": 0.0002, "epoch": 3.532763532763533, "step": 620}, {"loss": 1.2074, "grad_norm": 0.989138662815094, "learning_rate": 0.0002, "epoch": 3.58974358974359, "step": 630}, {"loss": 1.2263, "grad_norm": 0.75921630859375, "learning_rate": 0.0002, "epoch": 3.646723646723647, "step": 640}, {"loss": 1.2319, "grad_norm": 0.7844401001930237, "learning_rate": 0.0002, "epoch": 3.7037037037037037, "step": 650}, {"loss": 1.2851, "grad_norm": 0.9127110242843628, "learning_rate": 0.0002, "epoch": 3.7606837606837606, "step": 660}, {"loss": 1.2835, "grad_norm": 0.7972270846366882, "learning_rate": 0.0002, "epoch": 3.8176638176638176, "step": 670}, {"loss": 1.3105, "grad_norm": 0.7458992004394531, "learning_rate": 0.0002, "epoch": 3.8746438746438745, "step": 680}, {"loss": 1.3017, "grad_norm": 0.854924738407135, "learning_rate": 0.0002, "epoch": 3.931623931623932, "step": 690}, {"loss": 1.2455, "grad_norm": 0.7763816118240356, "learning_rate": 0.0002, "epoch": 3.9886039886039883, "step": 700}, {"eval_loss": 1.3988444805145264, "eval_runtime": 2.8697, "eval_samples_per_second": 34.15, "eval_steps_per_second": 4.53, "epoch": 4.0, "step": 702}, {"loss": 1.178, "grad_norm": 0.877430260181427, "learning_rate": 0.0002, "epoch": 4.045584045584046, "step": 710}, {"loss": 1.1635, "grad_norm": 0.8365248441696167, "learning_rate": 0.0002, "epoch": 4.102564102564102, "step": 720}, {"loss": 1.2286, "grad_norm": 0.7748925089836121, "learning_rate": 0.0002, "epoch": 4.15954415954416, "step": 730}, {"loss": 1.1836, "grad_norm": 0.7695241570472717, "learning_rate": 0.0002, "epoch": 4.216524216524217, "step": 740}, {"loss": 1.1685, "grad_norm": 0.7229928374290466, "learning_rate": 0.0002, "epoch": 4.273504273504273, "step": 750}, {"loss": 1.117, "grad_norm": 0.7035910487174988, "learning_rate": 0.0002, "epoch": 4.330484330484331, "step": 760}, {"loss": 1.189, "grad_norm": 0.9075796008110046, "learning_rate": 0.0002, "epoch": 4.387464387464387, "step": 770}, {"loss": 1.1693, "grad_norm": 0.7957494854927063, "learning_rate": 0.0002, "epoch": 4.444444444444445, "step": 780}, {"loss": 1.1945, "grad_norm": 0.8733780384063721, "learning_rate": 0.0002, "epoch": 4.501424501424501, "step": 790}, {"loss": 1.1867, "grad_norm": 0.8786619901657104, "learning_rate": 0.0002, "epoch": 4.5584045584045585, "step": 800}, {"loss": 1.185, "grad_norm": 0.7101715803146362, "learning_rate": 0.0002, "epoch": 4.615384615384615, "step": 810}, {"loss": 1.2063, "grad_norm": 0.7451328039169312, "learning_rate": 0.0002, "epoch": 4.672364672364672, "step": 820}, {"loss": 1.1939, "grad_norm": 0.7830713987350464, "learning_rate": 0.0002, "epoch": 4.72934472934473, "step": 830}, {"loss": 1.1251, "grad_norm": 0.7804535031318665, "learning_rate": 0.0002, "epoch": 4.786324786324786, "step": 840}, {"loss": 1.2278, "grad_norm": 0.8121811747550964, "learning_rate": 0.0002, "epoch": 4.843304843304844, "step": 850}, {"loss": 1.142, "grad_norm": 0.774864137172699, "learning_rate": 0.0002, "epoch": 4.9002849002849, "step": 860}, {"loss": 1.1736, "grad_norm": 0.7517814040184021, "learning_rate": 0.0002, "epoch": 4.957264957264957, "step": 870}, {"eval_loss": 1.4074795246124268, "eval_runtime": 2.8707, "eval_samples_per_second": 34.138, "eval_steps_per_second": 4.529, "epoch": 4.997150997150997, "step": 877}, {"loss": 1.151, "grad_norm": 0.7974972128868103, "learning_rate": 0.0002, "epoch": 5.014245014245014, "step": 880}, {"loss": 1.0637, "grad_norm": 1.1127357482910156, "learning_rate": 0.0002, "epoch": 5.071225071225071, "step": 890}, {"loss": 1.0497, "grad_norm": 0.8995195031166077, "learning_rate": 0.0002, "epoch": 5.128205128205128, "step": 900}, {"loss": 1.1101, "grad_norm": 0.8325890898704529, "learning_rate": 0.0002, "epoch": 5.185185185185185, "step": 910}, {"loss": 1.0567, "grad_norm": 0.8830686807632446, "learning_rate": 0.0002, "epoch": 5.2421652421652425, "step": 920}, {"loss": 1.1094, "grad_norm": 0.8856923580169678, "learning_rate": 0.0002, "epoch": 5.299145299145299, "step": 930}, {"loss": 1.0328, "grad_norm": 0.814587414264679, "learning_rate": 0.0002, "epoch": 5.356125356125356, "step": 940}, {"loss": 1.1379, "grad_norm": 0.9119254946708679, "learning_rate": 0.0002, "epoch": 5.413105413105413, "step": 950}, {"loss": 1.0993, "grad_norm": 0.8547661304473877, "learning_rate": 0.0002, "epoch": 5.47008547008547, "step": 960}, {"loss": 1.1137, "grad_norm": 0.943742036819458, "learning_rate": 0.0002, "epoch": 5.527065527065528, "step": 970}, {"loss": 1.0815, "grad_norm": 1.1333340406417847, "learning_rate": 0.0002, "epoch": 5.584045584045584, "step": 980}, {"loss": 1.1161, "grad_norm": 1.0290982723236084, "learning_rate": 0.0002, "epoch": 5.641025641025641, "step": 990}, {"loss": 1.1477, "grad_norm": 1.0613716840744019, "learning_rate": 0.0002, "epoch": 5.698005698005698, "step": 1000}, {"loss": 1.0878, "grad_norm": 0.925118088722229, "learning_rate": 0.0002, "epoch": 5.754985754985755, "step": 1010}, {"loss": 1.0658, "grad_norm": 0.828220546245575, "learning_rate": 0.0002, "epoch": 5.811965811965812, "step": 1020}, {"loss": 1.1179, "grad_norm": 0.7466493248939514, "learning_rate": 0.0002, "epoch": 5.868945868945869, "step": 1030}, {"loss": 1.1064, "grad_norm": 0.9189135432243347, "learning_rate": 0.0002, "epoch": 5.925925925925926, "step": 1040}, {"loss": 1.1114, "grad_norm": 0.9117513298988342, "learning_rate": 0.0002, "epoch": 5.982905982905983, "step": 1050}, {"eval_loss": 1.4333235025405884, "eval_runtime": 2.9267, "eval_samples_per_second": 33.485, "eval_steps_per_second": 4.442, "epoch": 6.0, "step": 1053}, {"loss": 1.0368, "grad_norm": 0.9506599307060242, "learning_rate": 0.0002, "epoch": 6.0398860398860394, "step": 1060}, {"loss": 1.0376, "grad_norm": 0.9809837937355042, "learning_rate": 0.0002, "epoch": 6.096866096866097, "step": 1070}, {"loss": 0.9849, "grad_norm": 0.852557361125946, "learning_rate": 0.0002, "epoch": 6.153846153846154, "step": 1080}, {"loss": 0.9782, "grad_norm": 1.135279893875122, "learning_rate": 0.0002, "epoch": 6.210826210826211, "step": 1090}, {"loss": 1.0238, "grad_norm": 1.0243879556655884, "learning_rate": 0.0002, "epoch": 6.267806267806268, "step": 1100}, {"loss": 0.9815, "grad_norm": 0.9213914275169373, "learning_rate": 0.0002, "epoch": 6.3247863247863245, "step": 1110}, {"loss": 0.9899, "grad_norm": 1.0042028427124023, "learning_rate": 0.0002, "epoch": 6.381766381766382, "step": 1120}, {"loss": 1.0249, "grad_norm": 1.1024253368377686, "learning_rate": 0.0002, "epoch": 6.438746438746438, "step": 1130}, {"loss": 1.0305, "grad_norm": 0.9245727062225342, "learning_rate": 0.0002, "epoch": 6.495726495726496, "step": 1140}, {"loss": 0.9766, "grad_norm": 0.8309272527694702, "learning_rate": 0.0002, "epoch": 6.552706552706553, "step": 1150}, {"loss": 0.9945, "grad_norm": 1.029746651649475, "learning_rate": 0.0002, "epoch": 6.60968660968661, "step": 1160}, {"loss": 0.9955, "grad_norm": 0.9932991862297058, "learning_rate": 0.0002, "epoch": 6.666666666666667, "step": 1170}, {"loss": 1.0808, "grad_norm": 1.1597973108291626, "learning_rate": 0.0002, "epoch": 6.7236467236467234, "step": 1180}, {"loss": 1.0407, "grad_norm": 1.1373951435089111, "learning_rate": 0.0002, "epoch": 6.780626780626781, "step": 1190}, {"loss": 0.9513, "grad_norm": 1.0308297872543335, "learning_rate": 0.0002, "epoch": 6.837606837606837, "step": 1200}, {"loss": 1.0437, "grad_norm": 1.1555122137069702, "learning_rate": 0.0002, "epoch": 6.894586894586895, "step": 1210}, {"loss": 1.0164, "grad_norm": 0.9829897284507751, "learning_rate": 0.0002, "epoch": 6.951566951566951, "step": 1220}, {"eval_loss": 1.4769021272659302, "eval_runtime": 2.8681, "eval_samples_per_second": 34.168, "eval_steps_per_second": 4.533, "epoch": 6.997150997150997, "step": 1228}, {"loss": 1.0111, "grad_norm": 1.1083747148513794, "learning_rate": 0.0002, "epoch": 7.0085470085470085, "step": 1230}, {"loss": 0.9179, "grad_norm": 1.1454236507415771, "learning_rate": 0.0002, "epoch": 7.065527065527066, "step": 1240}, {"loss": 0.9271, "grad_norm": 0.9501869082450867, "learning_rate": 0.0002, "epoch": 7.122507122507122, "step": 1250}, {"loss": 0.9121, "grad_norm": 1.2393906116485596, "learning_rate": 0.0002, "epoch": 7.17948717948718, "step": 1260}, {"loss": 0.9396, "grad_norm": 1.0671173334121704, "learning_rate": 0.0002, "epoch": 7.236467236467236, "step": 1270}, {"loss": 0.9023, "grad_norm": 1.1026686429977417, "learning_rate": 0.0002, "epoch": 7.293447293447294, "step": 1280}, {"loss": 0.9267, "grad_norm": 1.2422044277191162, "learning_rate": 0.0002, "epoch": 7.35042735042735, "step": 1290}, {"loss": 0.8394, "grad_norm": 1.0772203207015991, "learning_rate": 0.0002, "epoch": 7.407407407407407, "step": 1300}, {"loss": 0.9361, "grad_norm": 1.1025265455245972, "learning_rate": 0.0002, "epoch": 7.464387464387464, "step": 1310}, {"loss": 0.9124, "grad_norm": 1.0232294797897339, "learning_rate": 0.0002, "epoch": 7.521367521367521, "step": 1320}, {"loss": 0.9412, "grad_norm": 1.2126119136810303, "learning_rate": 0.0002, "epoch": 7.578347578347579, "step": 1330}, {"loss": 0.9405, "grad_norm": 1.2359609603881836, "learning_rate": 0.0002, "epoch": 7.635327635327635, "step": 1340}, {"loss": 0.9096, "grad_norm": 0.9109336733818054, "learning_rate": 0.0002, "epoch": 7.6923076923076925, "step": 1350}, {"loss": 0.9038, "grad_norm": 1.0734586715698242, "learning_rate": 0.0002, "epoch": 7.749287749287749, "step": 1360}, {"loss": 0.9674, "grad_norm": 1.1725562810897827, "learning_rate": 0.0002, "epoch": 7.806267806267806, "step": 1370}, {"loss": 0.9211, "grad_norm": 1.0173115730285645, "learning_rate": 0.0002, "epoch": 7.863247863247864, "step": 1380}, {"loss": 0.9764, "grad_norm": 1.0145931243896484, "learning_rate": 0.0002, "epoch": 7.92022792022792, "step": 1390}, {"loss": 0.9465, "grad_norm": 1.1547255516052246, "learning_rate": 0.0002, "epoch": 7.977207977207978, "step": 1400}]}