diff --git a/.gitattributes b/.gitattributes index 18e44aa3f083b13e87a5a819318d3a5b89be97bc..099707ddd79adc0f11b4b66b516215ead5800837 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3422,3 +3422,12 @@ gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0 gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-5623-sd-1/checkpoint-8218/tokenizer.json filter=lfs diff=lfs merge=lfs -text gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-5623-sd-1/checkpoint-9392/tokenizer.json filter=lfs diff=lfs merge=lfs -text gemma-2b-it_int4_hellaswag-routerbench-0shot_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.8-num-5623-sd-1/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/README.md b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/adapter_config.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/adapter_model.safetensors b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..109596fbe768580a1c4f560efa8a5a99f822a8aa --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6e7766c9aae4d2fd34c0286b639660fa9abfa548c97dd1e795db9b07731b767 +size 29500848 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/README.md b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/adapter_config.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/adapter_model.safetensors b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..297e33bb55e834deb7246e7a32009e5318049302 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60cf6b1e50e1c52de15774e4d67a5155c40c04f7dbae8f5de9f3f70f1d4ef2b6 +size 29500848 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/optimizer.pt b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..607badf957d10930e5a9cc329b763d93dac11798 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b8888f98cbb338ac4a4d0abdcc1d2d1620eab5df580a348deafe766fc06acb72 +size 15064314 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/rng_state.pth b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3f075498ff27fd5d1f0963673cf28eb97b2dc686 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:478113972e174c338abecc051a8c41b59427eb769e48c53edb8caee8216f7141 +size 14244 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/scheduler.pt b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..bfb6114c3284b46ade676f401deff9e55e7dc0f2 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97aad0a099d88064f47969b65325e965443831cb49c6f63faa162e9b6f09345c +size 1064 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/special_tokens_map.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/tokenizer.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/tokenizer.model b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/tokenizer_config.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/trainer_state.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a7c14cf10afef52d1b5681477e45093e275e4a7c --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/trainer_state.json @@ -0,0 +1,1030 @@ +{ + "best_metric": 0.6257933378219604, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395", + "epoch": 3.0, + "eval_steps": 10, + "global_step": 1395, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.021505376344086023, + "grad_norm": 0.9075053930282593, + "learning_rate": 0.0002, + "loss": 3.4172, + "step": 10 + }, + { + "epoch": 0.043010752688172046, + "grad_norm": 1.4321208000183105, + "learning_rate": 0.0002, + "loss": 2.5888, + "step": 20 + }, + { + "epoch": 0.06451612903225806, + "grad_norm": 1.7500602006912231, + "learning_rate": 0.0002, + "loss": 2.1195, + "step": 30 + }, + { + "epoch": 0.08602150537634409, + "grad_norm": 0.7606641054153442, + "learning_rate": 0.0002, + "loss": 1.9303, + "step": 40 + }, + { + "epoch": 0.10752688172043011, + "grad_norm": 1.2754929065704346, + "learning_rate": 0.0002, + "loss": 1.6112, + "step": 50 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 1.0936230421066284, + "learning_rate": 0.0002, + "loss": 1.4319, + "step": 60 + }, + { + "epoch": 0.15053763440860216, + "grad_norm": 1.144593596458435, + "learning_rate": 0.0002, + "loss": 1.3568, + "step": 70 + }, + { + "epoch": 0.17204301075268819, + "grad_norm": 1.2181956768035889, + "learning_rate": 0.0002, + "loss": 1.2028, + "step": 80 + }, + { + "epoch": 0.1935483870967742, + "grad_norm": 1.1260095834732056, + "learning_rate": 0.0002, + "loss": 1.1534, + "step": 90 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 1.1155284643173218, + "learning_rate": 0.0002, + "loss": 1.1089, + "step": 100 + }, + { + "epoch": 0.23655913978494625, + "grad_norm": 1.089565396308899, + "learning_rate": 0.0002, + "loss": 1.0883, + "step": 110 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 0.9833471775054932, + "learning_rate": 0.0002, + "loss": 1.0814, + "step": 120 + }, + { + "epoch": 0.27956989247311825, + "grad_norm": 1.0265629291534424, + "learning_rate": 0.0002, + "loss": 1.0239, + "step": 130 + }, + { + "epoch": 0.3010752688172043, + "grad_norm": 0.9344286322593689, + "learning_rate": 0.0002, + "loss": 0.9888, + "step": 140 + }, + { + "epoch": 0.3225806451612903, + "grad_norm": 0.9883386492729187, + "learning_rate": 0.0002, + "loss": 1.0043, + "step": 150 + }, + { + "epoch": 0.34408602150537637, + "grad_norm": 0.9299277067184448, + "learning_rate": 0.0002, + "loss": 0.9338, + "step": 160 + }, + { + "epoch": 0.3655913978494624, + "grad_norm": 1.390045404434204, + "learning_rate": 0.0002, + "loss": 0.9432, + "step": 170 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 1.0313078165054321, + "learning_rate": 0.0002, + "loss": 0.9008, + "step": 180 + }, + { + "epoch": 0.40860215053763443, + "grad_norm": 1.1792205572128296, + "learning_rate": 0.0002, + "loss": 0.9434, + "step": 190 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 1.049809217453003, + "learning_rate": 0.0002, + "loss": 0.8761, + "step": 200 + }, + { + "epoch": 0.45161290322580644, + "grad_norm": 0.990111768245697, + "learning_rate": 0.0002, + "loss": 0.8709, + "step": 210 + }, + { + "epoch": 0.4731182795698925, + "grad_norm": 0.9870412349700928, + "learning_rate": 0.0002, + "loss": 0.905, + "step": 220 + }, + { + "epoch": 0.4946236559139785, + "grad_norm": 0.8557345867156982, + "learning_rate": 0.0002, + "loss": 0.9129, + "step": 230 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 0.9746861457824707, + "learning_rate": 0.0002, + "loss": 0.8836, + "step": 240 + }, + { + "epoch": 0.5376344086021505, + "grad_norm": 0.9010438323020935, + "learning_rate": 0.0002, + "loss": 0.873, + "step": 250 + }, + { + "epoch": 0.5591397849462365, + "grad_norm": 0.9061082005500793, + "learning_rate": 0.0002, + "loss": 0.8241, + "step": 260 + }, + { + "epoch": 0.5806451612903226, + "grad_norm": 0.9311846494674683, + "learning_rate": 0.0002, + "loss": 0.8652, + "step": 270 + }, + { + "epoch": 0.6021505376344086, + "grad_norm": 0.9140254855155945, + "learning_rate": 0.0002, + "loss": 0.8256, + "step": 280 + }, + { + "epoch": 0.6236559139784946, + "grad_norm": 0.9722253084182739, + "learning_rate": 0.0002, + "loss": 0.8441, + "step": 290 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 0.8539168238639832, + "learning_rate": 0.0002, + "loss": 0.8314, + "step": 300 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.9053162932395935, + "learning_rate": 0.0002, + "loss": 0.8528, + "step": 310 + }, + { + "epoch": 0.6881720430107527, + "grad_norm": 0.8444252610206604, + "learning_rate": 0.0002, + "loss": 0.8209, + "step": 320 + }, + { + "epoch": 0.7096774193548387, + "grad_norm": 0.8127437829971313, + "learning_rate": 0.0002, + "loss": 0.8101, + "step": 330 + }, + { + "epoch": 0.7311827956989247, + "grad_norm": 0.886555016040802, + "learning_rate": 0.0002, + "loss": 0.8223, + "step": 340 + }, + { + "epoch": 0.7526881720430108, + "grad_norm": 0.8458548784255981, + "learning_rate": 0.0002, + "loss": 0.8368, + "step": 350 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 0.8683297634124756, + "learning_rate": 0.0002, + "loss": 0.8295, + "step": 360 + }, + { + "epoch": 0.7956989247311828, + "grad_norm": 0.8308405876159668, + "learning_rate": 0.0002, + "loss": 0.8232, + "step": 370 + }, + { + "epoch": 0.8172043010752689, + "grad_norm": 0.8305579423904419, + "learning_rate": 0.0002, + "loss": 0.7752, + "step": 380 + }, + { + "epoch": 0.8387096774193549, + "grad_norm": 0.8545567393302917, + "learning_rate": 0.0002, + "loss": 0.8267, + "step": 390 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 0.8486055731773376, + "learning_rate": 0.0002, + "loss": 0.8212, + "step": 400 + }, + { + "epoch": 0.8817204301075269, + "grad_norm": 0.8126763105392456, + "learning_rate": 0.0002, + "loss": 0.743, + "step": 410 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 0.8494045734405518, + "learning_rate": 0.0002, + "loss": 0.7993, + "step": 420 + }, + { + "epoch": 0.9247311827956989, + "grad_norm": 0.7639183402061462, + "learning_rate": 0.0002, + "loss": 0.8213, + "step": 430 + }, + { + "epoch": 0.946236559139785, + "grad_norm": 0.858101487159729, + "learning_rate": 0.0002, + "loss": 0.8015, + "step": 440 + }, + { + "epoch": 0.967741935483871, + "grad_norm": 0.8141381740570068, + "learning_rate": 0.0002, + "loss": 0.7629, + "step": 450 + }, + { + "epoch": 0.989247311827957, + "grad_norm": 0.8072513937950134, + "learning_rate": 0.0002, + "loss": 0.7357, + "step": 460 + }, + { + "epoch": 1.0, + "eval_loss": 0.7740864157676697, + "eval_runtime": 21.383, + "eval_samples_per_second": 15.48, + "eval_steps_per_second": 1.964, + "step": 465 + }, + { + "epoch": 1.010752688172043, + "grad_norm": 0.8269494771957397, + "learning_rate": 0.0002, + "loss": 0.7701, + "step": 470 + }, + { + "epoch": 1.032258064516129, + "grad_norm": 0.7814009189605713, + "learning_rate": 0.0002, + "loss": 0.7532, + "step": 480 + }, + { + "epoch": 1.053763440860215, + "grad_norm": 0.8183923363685608, + "learning_rate": 0.0002, + "loss": 0.7689, + "step": 490 + }, + { + "epoch": 1.075268817204301, + "grad_norm": 0.8146600723266602, + "learning_rate": 0.0002, + "loss": 0.765, + "step": 500 + }, + { + "epoch": 1.096774193548387, + "grad_norm": 0.8635126352310181, + "learning_rate": 0.0002, + "loss": 0.7358, + "step": 510 + }, + { + "epoch": 1.118279569892473, + "grad_norm": 0.8520359396934509, + "learning_rate": 0.0002, + "loss": 0.7302, + "step": 520 + }, + { + "epoch": 1.139784946236559, + "grad_norm": 0.8026443123817444, + "learning_rate": 0.0002, + "loss": 0.7492, + "step": 530 + }, + { + "epoch": 1.1612903225806452, + "grad_norm": 0.8157258629798889, + "learning_rate": 0.0002, + "loss": 0.7518, + "step": 540 + }, + { + "epoch": 1.1827956989247312, + "grad_norm": 0.9450796246528625, + "learning_rate": 0.0002, + "loss": 0.7461, + "step": 550 + }, + { + "epoch": 1.2043010752688172, + "grad_norm": 0.8859835863113403, + "learning_rate": 0.0002, + "loss": 0.7128, + "step": 560 + }, + { + "epoch": 1.2258064516129032, + "grad_norm": 0.7819921970367432, + "learning_rate": 0.0002, + "loss": 0.7067, + "step": 570 + }, + { + "epoch": 1.2473118279569892, + "grad_norm": 0.7823445796966553, + "learning_rate": 0.0002, + "loss": 0.7577, + "step": 580 + }, + { + "epoch": 1.2688172043010753, + "grad_norm": 0.7931883931159973, + "learning_rate": 0.0002, + "loss": 0.7358, + "step": 590 + }, + { + "epoch": 1.2903225806451613, + "grad_norm": 0.7495734095573425, + "learning_rate": 0.0002, + "loss": 0.723, + "step": 600 + }, + { + "epoch": 1.3118279569892473, + "grad_norm": 0.9272717237472534, + "learning_rate": 0.0002, + "loss": 0.7386, + "step": 610 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.7968398332595825, + "learning_rate": 0.0002, + "loss": 0.7498, + "step": 620 + }, + { + "epoch": 1.3548387096774195, + "grad_norm": 0.7813659310340881, + "learning_rate": 0.0002, + "loss": 0.7635, + "step": 630 + }, + { + "epoch": 1.3763440860215055, + "grad_norm": 0.730925977230072, + "learning_rate": 0.0002, + "loss": 0.6665, + "step": 640 + }, + { + "epoch": 1.3978494623655915, + "grad_norm": 0.8011482954025269, + "learning_rate": 0.0002, + "loss": 0.7037, + "step": 650 + }, + { + "epoch": 1.4193548387096775, + "grad_norm": 0.7770085334777832, + "learning_rate": 0.0002, + "loss": 0.6931, + "step": 660 + }, + { + "epoch": 1.4408602150537635, + "grad_norm": 0.7432682514190674, + "learning_rate": 0.0002, + "loss": 0.6949, + "step": 670 + }, + { + "epoch": 1.4623655913978495, + "grad_norm": 0.8820092678070068, + "learning_rate": 0.0002, + "loss": 0.7444, + "step": 680 + }, + { + "epoch": 1.4838709677419355, + "grad_norm": 0.7786208987236023, + "learning_rate": 0.0002, + "loss": 0.6758, + "step": 690 + }, + { + "epoch": 1.5053763440860215, + "grad_norm": 0.7467480301856995, + "learning_rate": 0.0002, + "loss": 0.6702, + "step": 700 + }, + { + "epoch": 1.5268817204301075, + "grad_norm": 0.8147122263908386, + "learning_rate": 0.0002, + "loss": 0.7107, + "step": 710 + }, + { + "epoch": 1.5483870967741935, + "grad_norm": 0.796030580997467, + "learning_rate": 0.0002, + "loss": 0.7144, + "step": 720 + }, + { + "epoch": 1.5698924731182795, + "grad_norm": 0.8776171207427979, + "learning_rate": 0.0002, + "loss": 0.6936, + "step": 730 + }, + { + "epoch": 1.5913978494623655, + "grad_norm": 0.8056126236915588, + "learning_rate": 0.0002, + "loss": 0.7101, + "step": 740 + }, + { + "epoch": 1.6129032258064515, + "grad_norm": 0.8141863346099854, + "learning_rate": 0.0002, + "loss": 0.7162, + "step": 750 + }, + { + "epoch": 1.6344086021505375, + "grad_norm": 0.8100557327270508, + "learning_rate": 0.0002, + "loss": 0.7088, + "step": 760 + }, + { + "epoch": 1.6559139784946235, + "grad_norm": 0.8283200860023499, + "learning_rate": 0.0002, + "loss": 0.7212, + "step": 770 + }, + { + "epoch": 1.6774193548387095, + "grad_norm": 0.800865113735199, + "learning_rate": 0.0002, + "loss": 0.694, + "step": 780 + }, + { + "epoch": 1.6989247311827957, + "grad_norm": 0.8052287697792053, + "learning_rate": 0.0002, + "loss": 0.7076, + "step": 790 + }, + { + "epoch": 1.7204301075268817, + "grad_norm": 0.8619674444198608, + "learning_rate": 0.0002, + "loss": 0.7257, + "step": 800 + }, + { + "epoch": 1.7419354838709677, + "grad_norm": 0.8907215595245361, + "learning_rate": 0.0002, + "loss": 0.7141, + "step": 810 + }, + { + "epoch": 1.7634408602150538, + "grad_norm": 0.6976316571235657, + "learning_rate": 0.0002, + "loss": 0.7035, + "step": 820 + }, + { + "epoch": 1.7849462365591398, + "grad_norm": 0.7533746957778931, + "learning_rate": 0.0002, + "loss": 0.6916, + "step": 830 + }, + { + "epoch": 1.8064516129032258, + "grad_norm": 0.7326804399490356, + "learning_rate": 0.0002, + "loss": 0.7094, + "step": 840 + }, + { + "epoch": 1.827956989247312, + "grad_norm": 0.7782683372497559, + "learning_rate": 0.0002, + "loss": 0.6891, + "step": 850 + }, + { + "epoch": 1.849462365591398, + "grad_norm": 0.7424806356430054, + "learning_rate": 0.0002, + "loss": 0.6931, + "step": 860 + }, + { + "epoch": 1.870967741935484, + "grad_norm": 1.172325611114502, + "learning_rate": 0.0002, + "loss": 0.7354, + "step": 870 + }, + { + "epoch": 1.89247311827957, + "grad_norm": 0.771058201789856, + "learning_rate": 0.0002, + "loss": 0.6866, + "step": 880 + }, + { + "epoch": 1.913978494623656, + "grad_norm": 0.8624904155731201, + "learning_rate": 0.0002, + "loss": 0.7296, + "step": 890 + }, + { + "epoch": 1.935483870967742, + "grad_norm": 0.7062820792198181, + "learning_rate": 0.0002, + "loss": 0.7233, + "step": 900 + }, + { + "epoch": 1.956989247311828, + "grad_norm": 0.7560103535652161, + "learning_rate": 0.0002, + "loss": 0.6966, + "step": 910 + }, + { + "epoch": 1.978494623655914, + "grad_norm": 0.788899838924408, + "learning_rate": 0.0002, + "loss": 0.69, + "step": 920 + }, + { + "epoch": 2.0, + "grad_norm": 0.6562113761901855, + "learning_rate": 0.0002, + "loss": 0.6505, + "step": 930 + }, + { + "epoch": 2.0, + "eval_loss": 0.6885261535644531, + "eval_runtime": 21.4291, + "eval_samples_per_second": 15.446, + "eval_steps_per_second": 1.96, + "step": 930 + }, + { + "epoch": 2.021505376344086, + "grad_norm": 0.8216531872749329, + "learning_rate": 0.0002, + "loss": 0.6625, + "step": 940 + }, + { + "epoch": 2.043010752688172, + "grad_norm": 0.8317142724990845, + "learning_rate": 0.0002, + "loss": 0.6398, + "step": 950 + }, + { + "epoch": 2.064516129032258, + "grad_norm": 0.8446708917617798, + "learning_rate": 0.0002, + "loss": 0.649, + "step": 960 + }, + { + "epoch": 2.086021505376344, + "grad_norm": 0.735055148601532, + "learning_rate": 0.0002, + "loss": 0.657, + "step": 970 + }, + { + "epoch": 2.10752688172043, + "grad_norm": 0.7487243413925171, + "learning_rate": 0.0002, + "loss": 0.649, + "step": 980 + }, + { + "epoch": 2.129032258064516, + "grad_norm": 0.8573887944221497, + "learning_rate": 0.0002, + "loss": 0.6419, + "step": 990 + }, + { + "epoch": 2.150537634408602, + "grad_norm": 0.6284521818161011, + "learning_rate": 0.0002, + "loss": 0.6431, + "step": 1000 + }, + { + "epoch": 2.172043010752688, + "grad_norm": 0.754183292388916, + "learning_rate": 0.0002, + "loss": 0.6128, + "step": 1010 + }, + { + "epoch": 2.193548387096774, + "grad_norm": 0.9445359110832214, + "learning_rate": 0.0002, + "loss": 0.6253, + "step": 1020 + }, + { + "epoch": 2.21505376344086, + "grad_norm": 0.808508038520813, + "learning_rate": 0.0002, + "loss": 0.605, + "step": 1030 + }, + { + "epoch": 2.236559139784946, + "grad_norm": 0.9394679665565491, + "learning_rate": 0.0002, + "loss": 0.6786, + "step": 1040 + }, + { + "epoch": 2.258064516129032, + "grad_norm": 0.8151357769966125, + "learning_rate": 0.0002, + "loss": 0.6176, + "step": 1050 + }, + { + "epoch": 2.279569892473118, + "grad_norm": 0.7909848093986511, + "learning_rate": 0.0002, + "loss": 0.66, + "step": 1060 + }, + { + "epoch": 2.3010752688172045, + "grad_norm": 0.7506507039070129, + "learning_rate": 0.0002, + "loss": 0.6254, + "step": 1070 + }, + { + "epoch": 2.3225806451612905, + "grad_norm": 0.8240520358085632, + "learning_rate": 0.0002, + "loss": 0.6608, + "step": 1080 + }, + { + "epoch": 2.3440860215053765, + "grad_norm": 0.9342400431632996, + "learning_rate": 0.0002, + "loss": 0.6207, + "step": 1090 + }, + { + "epoch": 2.3655913978494625, + "grad_norm": 1.0598735809326172, + "learning_rate": 0.0002, + "loss": 0.6029, + "step": 1100 + }, + { + "epoch": 2.3870967741935485, + "grad_norm": 0.7907650470733643, + "learning_rate": 0.0002, + "loss": 0.6035, + "step": 1110 + }, + { + "epoch": 2.4086021505376345, + "grad_norm": 0.9388798475265503, + "learning_rate": 0.0002, + "loss": 0.6237, + "step": 1120 + }, + { + "epoch": 2.4301075268817205, + "grad_norm": 0.8985419869422913, + "learning_rate": 0.0002, + "loss": 0.6207, + "step": 1130 + }, + { + "epoch": 2.4516129032258065, + "grad_norm": 0.7471932768821716, + "learning_rate": 0.0002, + "loss": 0.5902, + "step": 1140 + }, + { + "epoch": 2.4731182795698925, + "grad_norm": 0.761131763458252, + "learning_rate": 0.0002, + "loss": 0.6446, + "step": 1150 + }, + { + "epoch": 2.4946236559139785, + "grad_norm": 0.7901819348335266, + "learning_rate": 0.0002, + "loss": 0.6088, + "step": 1160 + }, + { + "epoch": 2.5161290322580645, + "grad_norm": 0.9932922720909119, + "learning_rate": 0.0002, + "loss": 0.6142, + "step": 1170 + }, + { + "epoch": 2.5376344086021505, + "grad_norm": 0.7414287328720093, + "learning_rate": 0.0002, + "loss": 0.6407, + "step": 1180 + }, + { + "epoch": 2.5591397849462365, + "grad_norm": 0.8111771941184998, + "learning_rate": 0.0002, + "loss": 0.6161, + "step": 1190 + }, + { + "epoch": 2.5806451612903225, + "grad_norm": 0.7520156502723694, + "learning_rate": 0.0002, + "loss": 0.6006, + "step": 1200 + }, + { + "epoch": 2.6021505376344085, + "grad_norm": 0.9022907018661499, + "learning_rate": 0.0002, + "loss": 0.615, + "step": 1210 + }, + { + "epoch": 2.6236559139784945, + "grad_norm": 0.7746260166168213, + "learning_rate": 0.0002, + "loss": 0.6211, + "step": 1220 + }, + { + "epoch": 2.6451612903225805, + "grad_norm": 0.8482862114906311, + "learning_rate": 0.0002, + "loss": 0.616, + "step": 1230 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 0.7925458550453186, + "learning_rate": 0.0002, + "loss": 0.6417, + "step": 1240 + }, + { + "epoch": 2.688172043010753, + "grad_norm": 0.8369929194450378, + "learning_rate": 0.0002, + "loss": 0.6187, + "step": 1250 + }, + { + "epoch": 2.709677419354839, + "grad_norm": 0.8311542868614197, + "learning_rate": 0.0002, + "loss": 0.6138, + "step": 1260 + }, + { + "epoch": 2.731182795698925, + "grad_norm": 0.7204853296279907, + "learning_rate": 0.0002, + "loss": 0.5894, + "step": 1270 + }, + { + "epoch": 2.752688172043011, + "grad_norm": 0.8447284698486328, + "learning_rate": 0.0002, + "loss": 0.6325, + "step": 1280 + }, + { + "epoch": 2.774193548387097, + "grad_norm": 0.7738404273986816, + "learning_rate": 0.0002, + "loss": 0.5946, + "step": 1290 + }, + { + "epoch": 2.795698924731183, + "grad_norm": 0.8393287062644958, + "learning_rate": 0.0002, + "loss": 0.5678, + "step": 1300 + }, + { + "epoch": 2.817204301075269, + "grad_norm": 0.79121994972229, + "learning_rate": 0.0002, + "loss": 0.6092, + "step": 1310 + }, + { + "epoch": 2.838709677419355, + "grad_norm": 0.7331557869911194, + "learning_rate": 0.0002, + "loss": 0.5889, + "step": 1320 + }, + { + "epoch": 2.860215053763441, + "grad_norm": 0.9593998193740845, + "learning_rate": 0.0002, + "loss": 0.6048, + "step": 1330 + }, + { + "epoch": 2.881720430107527, + "grad_norm": 0.7215158343315125, + "learning_rate": 0.0002, + "loss": 0.6108, + "step": 1340 + }, + { + "epoch": 2.903225806451613, + "grad_norm": 0.840404212474823, + "learning_rate": 0.0002, + "loss": 0.5897, + "step": 1350 + }, + { + "epoch": 2.924731182795699, + "grad_norm": 0.870659351348877, + "learning_rate": 0.0002, + "loss": 0.6056, + "step": 1360 + }, + { + "epoch": 2.946236559139785, + "grad_norm": 0.8744975328445435, + "learning_rate": 0.0002, + "loss": 0.6205, + "step": 1370 + }, + { + "epoch": 2.967741935483871, + "grad_norm": 0.8030612468719482, + "learning_rate": 0.0002, + "loss": 0.5966, + "step": 1380 + }, + { + "epoch": 2.989247311827957, + "grad_norm": 0.825814962387085, + "learning_rate": 0.0002, + "loss": 0.6004, + "step": 1390 + }, + { + "epoch": 3.0, + "eval_loss": 0.6257933378219604, + "eval_runtime": 21.3692, + "eval_samples_per_second": 15.49, + "eval_steps_per_second": 1.965, + "step": 1395 + } + ], + "logging_steps": 10, + "max_steps": 3720, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.704968488747008e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/training_args.bin b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..483b53d4fc1c568a5fc890fa850e3450f390b208 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e4c4fe2be590ed03492316230adb3a1edca3e4066c55f3716c0352d7134c564 +size 5560 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/README.md b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/adapter_config.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/adapter_model.safetensors b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dfd77beeaba9bb06ee526b8e540726c081b0c6ea --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e541ab555a1d347ba51089b50ef294e08671de6d00f792adea734418e435599d +size 29500848 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/optimizer.pt b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..794a25cf13ccb30dc82f107feaf3dddb9fbdb8d4 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09e1da60b1d2d76754de5deaf49254cf1376561692f13d690f4adeb1394cec4f +size 15064314 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/rng_state.pth b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f7dd62630aaeb0615031e83bb54590332decdae8 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9acae9a74e6c3f8a91c318f8eb916624e759416f1baedf1f367b3c835fabf93c +size 14244 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/scheduler.pt b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cfa91775459dca484c64b3ca54e66a486cab7330 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab700ed56a8ffed38e7a4929a8f2f7616aeebff9b9062279a518b87700a13d34 +size 1064 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/special_tokens_map.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/tokenizer.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/tokenizer.model b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/tokenizer_config.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/trainer_state.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1bfc74243b202020a163a9bb218285f4f984a6bf --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/trainer_state.json @@ -0,0 +1,1367 @@ +{ + "best_metric": 0.5737715363502502, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860", + "epoch": 4.0, + "eval_steps": 10, + "global_step": 1860, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.021505376344086023, + "grad_norm": 0.9075053930282593, + "learning_rate": 0.0002, + "loss": 3.4172, + "step": 10 + }, + { + "epoch": 0.043010752688172046, + "grad_norm": 1.4321208000183105, + "learning_rate": 0.0002, + "loss": 2.5888, + "step": 20 + }, + { + "epoch": 0.06451612903225806, + "grad_norm": 1.7500602006912231, + "learning_rate": 0.0002, + "loss": 2.1195, + "step": 30 + }, + { + "epoch": 0.08602150537634409, + "grad_norm": 0.7606641054153442, + "learning_rate": 0.0002, + "loss": 1.9303, + "step": 40 + }, + { + "epoch": 0.10752688172043011, + "grad_norm": 1.2754929065704346, + "learning_rate": 0.0002, + "loss": 1.6112, + "step": 50 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 1.0936230421066284, + "learning_rate": 0.0002, + "loss": 1.4319, + "step": 60 + }, + { + "epoch": 0.15053763440860216, + "grad_norm": 1.144593596458435, + "learning_rate": 0.0002, + "loss": 1.3568, + "step": 70 + }, + { + "epoch": 0.17204301075268819, + "grad_norm": 1.2181956768035889, + "learning_rate": 0.0002, + "loss": 1.2028, + "step": 80 + }, + { + "epoch": 0.1935483870967742, + "grad_norm": 1.1260095834732056, + "learning_rate": 0.0002, + "loss": 1.1534, + "step": 90 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 1.1155284643173218, + "learning_rate": 0.0002, + "loss": 1.1089, + "step": 100 + }, + { + "epoch": 0.23655913978494625, + "grad_norm": 1.089565396308899, + "learning_rate": 0.0002, + "loss": 1.0883, + "step": 110 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 0.9833471775054932, + "learning_rate": 0.0002, + "loss": 1.0814, + "step": 120 + }, + { + "epoch": 0.27956989247311825, + "grad_norm": 1.0265629291534424, + "learning_rate": 0.0002, + "loss": 1.0239, + "step": 130 + }, + { + "epoch": 0.3010752688172043, + "grad_norm": 0.9344286322593689, + "learning_rate": 0.0002, + "loss": 0.9888, + "step": 140 + }, + { + "epoch": 0.3225806451612903, + "grad_norm": 0.9883386492729187, + "learning_rate": 0.0002, + "loss": 1.0043, + "step": 150 + }, + { + "epoch": 0.34408602150537637, + "grad_norm": 0.9299277067184448, + "learning_rate": 0.0002, + "loss": 0.9338, + "step": 160 + }, + { + "epoch": 0.3655913978494624, + "grad_norm": 1.390045404434204, + "learning_rate": 0.0002, + "loss": 0.9432, + "step": 170 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 1.0313078165054321, + "learning_rate": 0.0002, + "loss": 0.9008, + "step": 180 + }, + { + "epoch": 0.40860215053763443, + "grad_norm": 1.1792205572128296, + "learning_rate": 0.0002, + "loss": 0.9434, + "step": 190 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 1.049809217453003, + "learning_rate": 0.0002, + "loss": 0.8761, + "step": 200 + }, + { + "epoch": 0.45161290322580644, + "grad_norm": 0.990111768245697, + "learning_rate": 0.0002, + "loss": 0.8709, + "step": 210 + }, + { + "epoch": 0.4731182795698925, + "grad_norm": 0.9870412349700928, + "learning_rate": 0.0002, + "loss": 0.905, + "step": 220 + }, + { + "epoch": 0.4946236559139785, + "grad_norm": 0.8557345867156982, + "learning_rate": 0.0002, + "loss": 0.9129, + "step": 230 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 0.9746861457824707, + "learning_rate": 0.0002, + "loss": 0.8836, + "step": 240 + }, + { + "epoch": 0.5376344086021505, + "grad_norm": 0.9010438323020935, + "learning_rate": 0.0002, + "loss": 0.873, + "step": 250 + }, + { + "epoch": 0.5591397849462365, + "grad_norm": 0.9061082005500793, + "learning_rate": 0.0002, + "loss": 0.8241, + "step": 260 + }, + { + "epoch": 0.5806451612903226, + "grad_norm": 0.9311846494674683, + "learning_rate": 0.0002, + "loss": 0.8652, + "step": 270 + }, + { + "epoch": 0.6021505376344086, + "grad_norm": 0.9140254855155945, + "learning_rate": 0.0002, + "loss": 0.8256, + "step": 280 + }, + { + "epoch": 0.6236559139784946, + "grad_norm": 0.9722253084182739, + "learning_rate": 0.0002, + "loss": 0.8441, + "step": 290 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 0.8539168238639832, + "learning_rate": 0.0002, + "loss": 0.8314, + "step": 300 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.9053162932395935, + "learning_rate": 0.0002, + "loss": 0.8528, + "step": 310 + }, + { + "epoch": 0.6881720430107527, + "grad_norm": 0.8444252610206604, + "learning_rate": 0.0002, + "loss": 0.8209, + "step": 320 + }, + { + "epoch": 0.7096774193548387, + "grad_norm": 0.8127437829971313, + "learning_rate": 0.0002, + "loss": 0.8101, + "step": 330 + }, + { + "epoch": 0.7311827956989247, + "grad_norm": 0.886555016040802, + "learning_rate": 0.0002, + "loss": 0.8223, + "step": 340 + }, + { + "epoch": 0.7526881720430108, + "grad_norm": 0.8458548784255981, + "learning_rate": 0.0002, + "loss": 0.8368, + "step": 350 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 0.8683297634124756, + "learning_rate": 0.0002, + "loss": 0.8295, + "step": 360 + }, + { + "epoch": 0.7956989247311828, + "grad_norm": 0.8308405876159668, + "learning_rate": 0.0002, + "loss": 0.8232, + "step": 370 + }, + { + "epoch": 0.8172043010752689, + "grad_norm": 0.8305579423904419, + "learning_rate": 0.0002, + "loss": 0.7752, + "step": 380 + }, + { + "epoch": 0.8387096774193549, + "grad_norm": 0.8545567393302917, + "learning_rate": 0.0002, + "loss": 0.8267, + "step": 390 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 0.8486055731773376, + "learning_rate": 0.0002, + "loss": 0.8212, + "step": 400 + }, + { + "epoch": 0.8817204301075269, + "grad_norm": 0.8126763105392456, + "learning_rate": 0.0002, + "loss": 0.743, + "step": 410 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 0.8494045734405518, + "learning_rate": 0.0002, + "loss": 0.7993, + "step": 420 + }, + { + "epoch": 0.9247311827956989, + "grad_norm": 0.7639183402061462, + "learning_rate": 0.0002, + "loss": 0.8213, + "step": 430 + }, + { + "epoch": 0.946236559139785, + "grad_norm": 0.858101487159729, + "learning_rate": 0.0002, + "loss": 0.8015, + "step": 440 + }, + { + "epoch": 0.967741935483871, + "grad_norm": 0.8141381740570068, + "learning_rate": 0.0002, + "loss": 0.7629, + "step": 450 + }, + { + "epoch": 0.989247311827957, + "grad_norm": 0.8072513937950134, + "learning_rate": 0.0002, + "loss": 0.7357, + "step": 460 + }, + { + "epoch": 1.0, + "eval_loss": 0.7740864157676697, + "eval_runtime": 21.383, + "eval_samples_per_second": 15.48, + "eval_steps_per_second": 1.964, + "step": 465 + }, + { + "epoch": 1.010752688172043, + "grad_norm": 0.8269494771957397, + "learning_rate": 0.0002, + "loss": 0.7701, + "step": 470 + }, + { + "epoch": 1.032258064516129, + "grad_norm": 0.7814009189605713, + "learning_rate": 0.0002, + "loss": 0.7532, + "step": 480 + }, + { + "epoch": 1.053763440860215, + "grad_norm": 0.8183923363685608, + "learning_rate": 0.0002, + "loss": 0.7689, + "step": 490 + }, + { + "epoch": 1.075268817204301, + "grad_norm": 0.8146600723266602, + "learning_rate": 0.0002, + "loss": 0.765, + "step": 500 + }, + { + "epoch": 1.096774193548387, + "grad_norm": 0.8635126352310181, + "learning_rate": 0.0002, + "loss": 0.7358, + "step": 510 + }, + { + "epoch": 1.118279569892473, + "grad_norm": 0.8520359396934509, + "learning_rate": 0.0002, + "loss": 0.7302, + "step": 520 + }, + { + "epoch": 1.139784946236559, + "grad_norm": 0.8026443123817444, + "learning_rate": 0.0002, + "loss": 0.7492, + "step": 530 + }, + { + "epoch": 1.1612903225806452, + "grad_norm": 0.8157258629798889, + "learning_rate": 0.0002, + "loss": 0.7518, + "step": 540 + }, + { + "epoch": 1.1827956989247312, + "grad_norm": 0.9450796246528625, + "learning_rate": 0.0002, + "loss": 0.7461, + "step": 550 + }, + { + "epoch": 1.2043010752688172, + "grad_norm": 0.8859835863113403, + "learning_rate": 0.0002, + "loss": 0.7128, + "step": 560 + }, + { + "epoch": 1.2258064516129032, + "grad_norm": 0.7819921970367432, + "learning_rate": 0.0002, + "loss": 0.7067, + "step": 570 + }, + { + "epoch": 1.2473118279569892, + "grad_norm": 0.7823445796966553, + "learning_rate": 0.0002, + "loss": 0.7577, + "step": 580 + }, + { + "epoch": 1.2688172043010753, + "grad_norm": 0.7931883931159973, + "learning_rate": 0.0002, + "loss": 0.7358, + "step": 590 + }, + { + "epoch": 1.2903225806451613, + "grad_norm": 0.7495734095573425, + "learning_rate": 0.0002, + "loss": 0.723, + "step": 600 + }, + { + "epoch": 1.3118279569892473, + "grad_norm": 0.9272717237472534, + "learning_rate": 0.0002, + "loss": 0.7386, + "step": 610 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.7968398332595825, + "learning_rate": 0.0002, + "loss": 0.7498, + "step": 620 + }, + { + "epoch": 1.3548387096774195, + "grad_norm": 0.7813659310340881, + "learning_rate": 0.0002, + "loss": 0.7635, + "step": 630 + }, + { + "epoch": 1.3763440860215055, + "grad_norm": 0.730925977230072, + "learning_rate": 0.0002, + "loss": 0.6665, + "step": 640 + }, + { + "epoch": 1.3978494623655915, + "grad_norm": 0.8011482954025269, + "learning_rate": 0.0002, + "loss": 0.7037, + "step": 650 + }, + { + "epoch": 1.4193548387096775, + "grad_norm": 0.7770085334777832, + "learning_rate": 0.0002, + "loss": 0.6931, + "step": 660 + }, + { + "epoch": 1.4408602150537635, + "grad_norm": 0.7432682514190674, + "learning_rate": 0.0002, + "loss": 0.6949, + "step": 670 + }, + { + "epoch": 1.4623655913978495, + "grad_norm": 0.8820092678070068, + "learning_rate": 0.0002, + "loss": 0.7444, + "step": 680 + }, + { + "epoch": 1.4838709677419355, + "grad_norm": 0.7786208987236023, + "learning_rate": 0.0002, + "loss": 0.6758, + "step": 690 + }, + { + "epoch": 1.5053763440860215, + "grad_norm": 0.7467480301856995, + "learning_rate": 0.0002, + "loss": 0.6702, + "step": 700 + }, + { + "epoch": 1.5268817204301075, + "grad_norm": 0.8147122263908386, + "learning_rate": 0.0002, + "loss": 0.7107, + "step": 710 + }, + { + "epoch": 1.5483870967741935, + "grad_norm": 0.796030580997467, + "learning_rate": 0.0002, + "loss": 0.7144, + "step": 720 + }, + { + "epoch": 1.5698924731182795, + "grad_norm": 0.8776171207427979, + "learning_rate": 0.0002, + "loss": 0.6936, + "step": 730 + }, + { + "epoch": 1.5913978494623655, + "grad_norm": 0.8056126236915588, + "learning_rate": 0.0002, + "loss": 0.7101, + "step": 740 + }, + { + "epoch": 1.6129032258064515, + "grad_norm": 0.8141863346099854, + "learning_rate": 0.0002, + "loss": 0.7162, + "step": 750 + }, + { + "epoch": 1.6344086021505375, + "grad_norm": 0.8100557327270508, + "learning_rate": 0.0002, + "loss": 0.7088, + "step": 760 + }, + { + "epoch": 1.6559139784946235, + "grad_norm": 0.8283200860023499, + "learning_rate": 0.0002, + "loss": 0.7212, + "step": 770 + }, + { + "epoch": 1.6774193548387095, + "grad_norm": 0.800865113735199, + "learning_rate": 0.0002, + "loss": 0.694, + "step": 780 + }, + { + "epoch": 1.6989247311827957, + "grad_norm": 0.8052287697792053, + "learning_rate": 0.0002, + "loss": 0.7076, + "step": 790 + }, + { + "epoch": 1.7204301075268817, + "grad_norm": 0.8619674444198608, + "learning_rate": 0.0002, + "loss": 0.7257, + "step": 800 + }, + { + "epoch": 1.7419354838709677, + "grad_norm": 0.8907215595245361, + "learning_rate": 0.0002, + "loss": 0.7141, + "step": 810 + }, + { + "epoch": 1.7634408602150538, + "grad_norm": 0.6976316571235657, + "learning_rate": 0.0002, + "loss": 0.7035, + "step": 820 + }, + { + "epoch": 1.7849462365591398, + "grad_norm": 0.7533746957778931, + "learning_rate": 0.0002, + "loss": 0.6916, + "step": 830 + }, + { + "epoch": 1.8064516129032258, + "grad_norm": 0.7326804399490356, + "learning_rate": 0.0002, + "loss": 0.7094, + "step": 840 + }, + { + "epoch": 1.827956989247312, + "grad_norm": 0.7782683372497559, + "learning_rate": 0.0002, + "loss": 0.6891, + "step": 850 + }, + { + "epoch": 1.849462365591398, + "grad_norm": 0.7424806356430054, + "learning_rate": 0.0002, + "loss": 0.6931, + "step": 860 + }, + { + "epoch": 1.870967741935484, + "grad_norm": 1.172325611114502, + "learning_rate": 0.0002, + "loss": 0.7354, + "step": 870 + }, + { + "epoch": 1.89247311827957, + "grad_norm": 0.771058201789856, + "learning_rate": 0.0002, + "loss": 0.6866, + "step": 880 + }, + { + "epoch": 1.913978494623656, + "grad_norm": 0.8624904155731201, + "learning_rate": 0.0002, + "loss": 0.7296, + "step": 890 + }, + { + "epoch": 1.935483870967742, + "grad_norm": 0.7062820792198181, + "learning_rate": 0.0002, + "loss": 0.7233, + "step": 900 + }, + { + "epoch": 1.956989247311828, + "grad_norm": 0.7560103535652161, + "learning_rate": 0.0002, + "loss": 0.6966, + "step": 910 + }, + { + "epoch": 1.978494623655914, + "grad_norm": 0.788899838924408, + "learning_rate": 0.0002, + "loss": 0.69, + "step": 920 + }, + { + "epoch": 2.0, + "grad_norm": 0.6562113761901855, + "learning_rate": 0.0002, + "loss": 0.6505, + "step": 930 + }, + { + "epoch": 2.0, + "eval_loss": 0.6885261535644531, + "eval_runtime": 21.4291, + "eval_samples_per_second": 15.446, + "eval_steps_per_second": 1.96, + "step": 930 + }, + { + "epoch": 2.021505376344086, + "grad_norm": 0.8216531872749329, + "learning_rate": 0.0002, + "loss": 0.6625, + "step": 940 + }, + { + "epoch": 2.043010752688172, + "grad_norm": 0.8317142724990845, + "learning_rate": 0.0002, + "loss": 0.6398, + "step": 950 + }, + { + "epoch": 2.064516129032258, + "grad_norm": 0.8446708917617798, + "learning_rate": 0.0002, + "loss": 0.649, + "step": 960 + }, + { + "epoch": 2.086021505376344, + "grad_norm": 0.735055148601532, + "learning_rate": 0.0002, + "loss": 0.657, + "step": 970 + }, + { + "epoch": 2.10752688172043, + "grad_norm": 0.7487243413925171, + "learning_rate": 0.0002, + "loss": 0.649, + "step": 980 + }, + { + "epoch": 2.129032258064516, + "grad_norm": 0.8573887944221497, + "learning_rate": 0.0002, + "loss": 0.6419, + "step": 990 + }, + { + "epoch": 2.150537634408602, + "grad_norm": 0.6284521818161011, + "learning_rate": 0.0002, + "loss": 0.6431, + "step": 1000 + }, + { + "epoch": 2.172043010752688, + "grad_norm": 0.754183292388916, + "learning_rate": 0.0002, + "loss": 0.6128, + "step": 1010 + }, + { + "epoch": 2.193548387096774, + "grad_norm": 0.9445359110832214, + "learning_rate": 0.0002, + "loss": 0.6253, + "step": 1020 + }, + { + "epoch": 2.21505376344086, + "grad_norm": 0.808508038520813, + "learning_rate": 0.0002, + "loss": 0.605, + "step": 1030 + }, + { + "epoch": 2.236559139784946, + "grad_norm": 0.9394679665565491, + "learning_rate": 0.0002, + "loss": 0.6786, + "step": 1040 + }, + { + "epoch": 2.258064516129032, + "grad_norm": 0.8151357769966125, + "learning_rate": 0.0002, + "loss": 0.6176, + "step": 1050 + }, + { + "epoch": 2.279569892473118, + "grad_norm": 0.7909848093986511, + "learning_rate": 0.0002, + "loss": 0.66, + "step": 1060 + }, + { + "epoch": 2.3010752688172045, + "grad_norm": 0.7506507039070129, + "learning_rate": 0.0002, + "loss": 0.6254, + "step": 1070 + }, + { + "epoch": 2.3225806451612905, + "grad_norm": 0.8240520358085632, + "learning_rate": 0.0002, + "loss": 0.6608, + "step": 1080 + }, + { + "epoch": 2.3440860215053765, + "grad_norm": 0.9342400431632996, + "learning_rate": 0.0002, + "loss": 0.6207, + "step": 1090 + }, + { + "epoch": 2.3655913978494625, + "grad_norm": 1.0598735809326172, + "learning_rate": 0.0002, + "loss": 0.6029, + "step": 1100 + }, + { + "epoch": 2.3870967741935485, + "grad_norm": 0.7907650470733643, + "learning_rate": 0.0002, + "loss": 0.6035, + "step": 1110 + }, + { + "epoch": 2.4086021505376345, + "grad_norm": 0.9388798475265503, + "learning_rate": 0.0002, + "loss": 0.6237, + "step": 1120 + }, + { + "epoch": 2.4301075268817205, + "grad_norm": 0.8985419869422913, + "learning_rate": 0.0002, + "loss": 0.6207, + "step": 1130 + }, + { + "epoch": 2.4516129032258065, + "grad_norm": 0.7471932768821716, + "learning_rate": 0.0002, + "loss": 0.5902, + "step": 1140 + }, + { + "epoch": 2.4731182795698925, + "grad_norm": 0.761131763458252, + "learning_rate": 0.0002, + "loss": 0.6446, + "step": 1150 + }, + { + "epoch": 2.4946236559139785, + "grad_norm": 0.7901819348335266, + "learning_rate": 0.0002, + "loss": 0.6088, + "step": 1160 + }, + { + "epoch": 2.5161290322580645, + "grad_norm": 0.9932922720909119, + "learning_rate": 0.0002, + "loss": 0.6142, + "step": 1170 + }, + { + "epoch": 2.5376344086021505, + "grad_norm": 0.7414287328720093, + "learning_rate": 0.0002, + "loss": 0.6407, + "step": 1180 + }, + { + "epoch": 2.5591397849462365, + "grad_norm": 0.8111771941184998, + "learning_rate": 0.0002, + "loss": 0.6161, + "step": 1190 + }, + { + "epoch": 2.5806451612903225, + "grad_norm": 0.7520156502723694, + "learning_rate": 0.0002, + "loss": 0.6006, + "step": 1200 + }, + { + "epoch": 2.6021505376344085, + "grad_norm": 0.9022907018661499, + "learning_rate": 0.0002, + "loss": 0.615, + "step": 1210 + }, + { + "epoch": 2.6236559139784945, + "grad_norm": 0.7746260166168213, + "learning_rate": 0.0002, + "loss": 0.6211, + "step": 1220 + }, + { + "epoch": 2.6451612903225805, + "grad_norm": 0.8482862114906311, + "learning_rate": 0.0002, + "loss": 0.616, + "step": 1230 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 0.7925458550453186, + "learning_rate": 0.0002, + "loss": 0.6417, + "step": 1240 + }, + { + "epoch": 2.688172043010753, + "grad_norm": 0.8369929194450378, + "learning_rate": 0.0002, + "loss": 0.6187, + "step": 1250 + }, + { + "epoch": 2.709677419354839, + "grad_norm": 0.8311542868614197, + "learning_rate": 0.0002, + "loss": 0.6138, + "step": 1260 + }, + { + "epoch": 2.731182795698925, + "grad_norm": 0.7204853296279907, + "learning_rate": 0.0002, + "loss": 0.5894, + "step": 1270 + }, + { + "epoch": 2.752688172043011, + "grad_norm": 0.8447284698486328, + "learning_rate": 0.0002, + "loss": 0.6325, + "step": 1280 + }, + { + "epoch": 2.774193548387097, + "grad_norm": 0.7738404273986816, + "learning_rate": 0.0002, + "loss": 0.5946, + "step": 1290 + }, + { + "epoch": 2.795698924731183, + "grad_norm": 0.8393287062644958, + "learning_rate": 0.0002, + "loss": 0.5678, + "step": 1300 + }, + { + "epoch": 2.817204301075269, + "grad_norm": 0.79121994972229, + "learning_rate": 0.0002, + "loss": 0.6092, + "step": 1310 + }, + { + "epoch": 2.838709677419355, + "grad_norm": 0.7331557869911194, + "learning_rate": 0.0002, + "loss": 0.5889, + "step": 1320 + }, + { + "epoch": 2.860215053763441, + "grad_norm": 0.9593998193740845, + "learning_rate": 0.0002, + "loss": 0.6048, + "step": 1330 + }, + { + "epoch": 2.881720430107527, + "grad_norm": 0.7215158343315125, + "learning_rate": 0.0002, + "loss": 0.6108, + "step": 1340 + }, + { + "epoch": 2.903225806451613, + "grad_norm": 0.840404212474823, + "learning_rate": 0.0002, + "loss": 0.5897, + "step": 1350 + }, + { + "epoch": 2.924731182795699, + "grad_norm": 0.870659351348877, + "learning_rate": 0.0002, + "loss": 0.6056, + "step": 1360 + }, + { + "epoch": 2.946236559139785, + "grad_norm": 0.8744975328445435, + "learning_rate": 0.0002, + "loss": 0.6205, + "step": 1370 + }, + { + "epoch": 2.967741935483871, + "grad_norm": 0.8030612468719482, + "learning_rate": 0.0002, + "loss": 0.5966, + "step": 1380 + }, + { + "epoch": 2.989247311827957, + "grad_norm": 0.825814962387085, + "learning_rate": 0.0002, + "loss": 0.6004, + "step": 1390 + }, + { + "epoch": 3.0, + "eval_loss": 0.6257933378219604, + "eval_runtime": 21.3692, + "eval_samples_per_second": 15.49, + "eval_steps_per_second": 1.965, + "step": 1395 + }, + { + "epoch": 3.010752688172043, + "grad_norm": 0.8650677800178528, + "learning_rate": 0.0002, + "loss": 0.5696, + "step": 1400 + }, + { + "epoch": 3.032258064516129, + "grad_norm": 0.8364197015762329, + "learning_rate": 0.0002, + "loss": 0.5483, + "step": 1410 + }, + { + "epoch": 3.053763440860215, + "grad_norm": 0.8278448581695557, + "learning_rate": 0.0002, + "loss": 0.5606, + "step": 1420 + }, + { + "epoch": 3.075268817204301, + "grad_norm": 0.8806642889976501, + "learning_rate": 0.0002, + "loss": 0.5572, + "step": 1430 + }, + { + "epoch": 3.096774193548387, + "grad_norm": 0.8180029988288879, + "learning_rate": 0.0002, + "loss": 0.585, + "step": 1440 + }, + { + "epoch": 3.118279569892473, + "grad_norm": 0.8561782836914062, + "learning_rate": 0.0002, + "loss": 0.5667, + "step": 1450 + }, + { + "epoch": 3.139784946236559, + "grad_norm": 0.8377029299736023, + "learning_rate": 0.0002, + "loss": 0.5246, + "step": 1460 + }, + { + "epoch": 3.161290322580645, + "grad_norm": 0.885779082775116, + "learning_rate": 0.0002, + "loss": 0.5464, + "step": 1470 + }, + { + "epoch": 3.182795698924731, + "grad_norm": 0.9388518333435059, + "learning_rate": 0.0002, + "loss": 0.541, + "step": 1480 + }, + { + "epoch": 3.204301075268817, + "grad_norm": 0.8816235661506653, + "learning_rate": 0.0002, + "loss": 0.5447, + "step": 1490 + }, + { + "epoch": 3.225806451612903, + "grad_norm": 0.9885783791542053, + "learning_rate": 0.0002, + "loss": 0.5466, + "step": 1500 + }, + { + "epoch": 3.247311827956989, + "grad_norm": 0.8635850548744202, + "learning_rate": 0.0002, + "loss": 0.5455, + "step": 1510 + }, + { + "epoch": 3.268817204301075, + "grad_norm": 0.829853355884552, + "learning_rate": 0.0002, + "loss": 0.5419, + "step": 1520 + }, + { + "epoch": 3.2903225806451615, + "grad_norm": 0.9037486910820007, + "learning_rate": 0.0002, + "loss": 0.54, + "step": 1530 + }, + { + "epoch": 3.3118279569892475, + "grad_norm": 0.8173713684082031, + "learning_rate": 0.0002, + "loss": 0.5375, + "step": 1540 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.796953022480011, + "learning_rate": 0.0002, + "loss": 0.5405, + "step": 1550 + }, + { + "epoch": 3.3548387096774195, + "grad_norm": 0.7894400358200073, + "learning_rate": 0.0002, + "loss": 0.5505, + "step": 1560 + }, + { + "epoch": 3.3763440860215055, + "grad_norm": 0.9434949159622192, + "learning_rate": 0.0002, + "loss": 0.5395, + "step": 1570 + }, + { + "epoch": 3.3978494623655915, + "grad_norm": 0.8666760325431824, + "learning_rate": 0.0002, + "loss": 0.5271, + "step": 1580 + }, + { + "epoch": 3.4193548387096775, + "grad_norm": 0.7782467007637024, + "learning_rate": 0.0002, + "loss": 0.5439, + "step": 1590 + }, + { + "epoch": 3.4408602150537635, + "grad_norm": 0.8849126696586609, + "learning_rate": 0.0002, + "loss": 0.5161, + "step": 1600 + }, + { + "epoch": 3.4623655913978495, + "grad_norm": 0.7863831520080566, + "learning_rate": 0.0002, + "loss": 0.5353, + "step": 1610 + }, + { + "epoch": 3.4838709677419355, + "grad_norm": 1.0403116941452026, + "learning_rate": 0.0002, + "loss": 0.5308, + "step": 1620 + }, + { + "epoch": 3.5053763440860215, + "grad_norm": 0.8307499289512634, + "learning_rate": 0.0002, + "loss": 0.5339, + "step": 1630 + }, + { + "epoch": 3.5268817204301075, + "grad_norm": 0.9132118821144104, + "learning_rate": 0.0002, + "loss": 0.5361, + "step": 1640 + }, + { + "epoch": 3.5483870967741935, + "grad_norm": 0.9322578310966492, + "learning_rate": 0.0002, + "loss": 0.5828, + "step": 1650 + }, + { + "epoch": 3.5698924731182795, + "grad_norm": 0.9782460331916809, + "learning_rate": 0.0002, + "loss": 0.546, + "step": 1660 + }, + { + "epoch": 3.5913978494623655, + "grad_norm": 0.7189919352531433, + "learning_rate": 0.0002, + "loss": 0.5424, + "step": 1670 + }, + { + "epoch": 3.6129032258064515, + "grad_norm": 0.9689221382141113, + "learning_rate": 0.0002, + "loss": 0.5514, + "step": 1680 + }, + { + "epoch": 3.6344086021505375, + "grad_norm": 0.9684675335884094, + "learning_rate": 0.0002, + "loss": 0.5379, + "step": 1690 + }, + { + "epoch": 3.6559139784946235, + "grad_norm": 0.8851472735404968, + "learning_rate": 0.0002, + "loss": 0.5748, + "step": 1700 + }, + { + "epoch": 3.6774193548387095, + "grad_norm": 0.7709833383560181, + "learning_rate": 0.0002, + "loss": 0.5412, + "step": 1710 + }, + { + "epoch": 3.698924731182796, + "grad_norm": 0.818236231803894, + "learning_rate": 0.0002, + "loss": 0.521, + "step": 1720 + }, + { + "epoch": 3.720430107526882, + "grad_norm": 0.870642364025116, + "learning_rate": 0.0002, + "loss": 0.5445, + "step": 1730 + }, + { + "epoch": 3.741935483870968, + "grad_norm": 1.0245511531829834, + "learning_rate": 0.0002, + "loss": 0.5307, + "step": 1740 + }, + { + "epoch": 3.763440860215054, + "grad_norm": 0.8607558608055115, + "learning_rate": 0.0002, + "loss": 0.5593, + "step": 1750 + }, + { + "epoch": 3.78494623655914, + "grad_norm": 0.8511829972267151, + "learning_rate": 0.0002, + "loss": 0.536, + "step": 1760 + }, + { + "epoch": 3.806451612903226, + "grad_norm": 0.7969087362289429, + "learning_rate": 0.0002, + "loss": 0.5193, + "step": 1770 + }, + { + "epoch": 3.827956989247312, + "grad_norm": 0.8457245826721191, + "learning_rate": 0.0002, + "loss": 0.5578, + "step": 1780 + }, + { + "epoch": 3.849462365591398, + "grad_norm": 0.8893467783927917, + "learning_rate": 0.0002, + "loss": 0.5337, + "step": 1790 + }, + { + "epoch": 3.870967741935484, + "grad_norm": 0.8593819737434387, + "learning_rate": 0.0002, + "loss": 0.5024, + "step": 1800 + }, + { + "epoch": 3.89247311827957, + "grad_norm": 0.7574560642242432, + "learning_rate": 0.0002, + "loss": 0.5134, + "step": 1810 + }, + { + "epoch": 3.913978494623656, + "grad_norm": 0.8681567311286926, + "learning_rate": 0.0002, + "loss": 0.5263, + "step": 1820 + }, + { + "epoch": 3.935483870967742, + "grad_norm": 0.9068132042884827, + "learning_rate": 0.0002, + "loss": 0.532, + "step": 1830 + }, + { + "epoch": 3.956989247311828, + "grad_norm": 0.8668948411941528, + "learning_rate": 0.0002, + "loss": 0.5427, + "step": 1840 + }, + { + "epoch": 3.978494623655914, + "grad_norm": 1.046032428741455, + "learning_rate": 0.0002, + "loss": 0.5349, + "step": 1850 + }, + { + "epoch": 4.0, + "grad_norm": 0.904780387878418, + "learning_rate": 0.0002, + "loss": 0.5087, + "step": 1860 + }, + { + "epoch": 4.0, + "eval_loss": 0.5737715363502502, + "eval_runtime": 21.4915, + "eval_samples_per_second": 15.401, + "eval_steps_per_second": 1.954, + "step": 1860 + } + ], + "logging_steps": 10, + "max_steps": 3720, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.273291318329344e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/training_args.bin b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..483b53d4fc1c568a5fc890fa850e3450f390b208 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e4c4fe2be590ed03492316230adb3a1edca3e4066c55f3716c0352d7134c564 +size 5560 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/README.md b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/adapter_config.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/adapter_model.safetensors b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aa056367b45557b778367b6675f412237c70cea3 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a3a8816014f6b56e239eee03e09b0769511aa83826ed046f5efd719d015e965 +size 29500848 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/optimizer.pt b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6abe8edcb5aee11be12a9f19776830275ab66a8c --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a65edbec23b2e333180b69103be1e3e81617ef94611db51c279fd4ecd891f153 +size 15064314 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/rng_state.pth b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3edef16d50aeba6872f99d589bb61a56ffa1b359 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76d94a0b8ba8ae587f26bf24336817481dcc65d73576ae598d1102cde342b949 +size 14244 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/scheduler.pt b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..9ce9ee917955ebc932f8a9f74e3434c7f701155b --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2afcae7bbfb81927fb154285fa5a98cc3acb8e8fa78aa6b1ed36a42f6b92d2ba +size 1064 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/special_tokens_map.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/tokenizer.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/tokenizer.model b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/tokenizer_config.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/trainer_state.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d920ecc7e29eba2359fd6943f13e5ace96af55a0 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/trainer_state.json @@ -0,0 +1,1697 @@ +{ + "best_metric": 0.5363914370536804, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325", + "epoch": 5.0, + "eval_steps": 10, + "global_step": 2325, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.021505376344086023, + "grad_norm": 0.9075053930282593, + "learning_rate": 0.0002, + "loss": 3.4172, + "step": 10 + }, + { + "epoch": 0.043010752688172046, + "grad_norm": 1.4321208000183105, + "learning_rate": 0.0002, + "loss": 2.5888, + "step": 20 + }, + { + "epoch": 0.06451612903225806, + "grad_norm": 1.7500602006912231, + "learning_rate": 0.0002, + "loss": 2.1195, + "step": 30 + }, + { + "epoch": 0.08602150537634409, + "grad_norm": 0.7606641054153442, + "learning_rate": 0.0002, + "loss": 1.9303, + "step": 40 + }, + { + "epoch": 0.10752688172043011, + "grad_norm": 1.2754929065704346, + "learning_rate": 0.0002, + "loss": 1.6112, + "step": 50 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 1.0936230421066284, + "learning_rate": 0.0002, + "loss": 1.4319, + "step": 60 + }, + { + "epoch": 0.15053763440860216, + "grad_norm": 1.144593596458435, + "learning_rate": 0.0002, + "loss": 1.3568, + "step": 70 + }, + { + "epoch": 0.17204301075268819, + "grad_norm": 1.2181956768035889, + "learning_rate": 0.0002, + "loss": 1.2028, + "step": 80 + }, + { + "epoch": 0.1935483870967742, + "grad_norm": 1.1260095834732056, + "learning_rate": 0.0002, + "loss": 1.1534, + "step": 90 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 1.1155284643173218, + "learning_rate": 0.0002, + "loss": 1.1089, + "step": 100 + }, + { + "epoch": 0.23655913978494625, + "grad_norm": 1.089565396308899, + "learning_rate": 0.0002, + "loss": 1.0883, + "step": 110 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 0.9833471775054932, + "learning_rate": 0.0002, + "loss": 1.0814, + "step": 120 + }, + { + "epoch": 0.27956989247311825, + "grad_norm": 1.0265629291534424, + "learning_rate": 0.0002, + "loss": 1.0239, + "step": 130 + }, + { + "epoch": 0.3010752688172043, + "grad_norm": 0.9344286322593689, + "learning_rate": 0.0002, + "loss": 0.9888, + "step": 140 + }, + { + "epoch": 0.3225806451612903, + "grad_norm": 0.9883386492729187, + "learning_rate": 0.0002, + "loss": 1.0043, + "step": 150 + }, + { + "epoch": 0.34408602150537637, + "grad_norm": 0.9299277067184448, + "learning_rate": 0.0002, + "loss": 0.9338, + "step": 160 + }, + { + "epoch": 0.3655913978494624, + "grad_norm": 1.390045404434204, + "learning_rate": 0.0002, + "loss": 0.9432, + "step": 170 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 1.0313078165054321, + "learning_rate": 0.0002, + "loss": 0.9008, + "step": 180 + }, + { + "epoch": 0.40860215053763443, + "grad_norm": 1.1792205572128296, + "learning_rate": 0.0002, + "loss": 0.9434, + "step": 190 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 1.049809217453003, + "learning_rate": 0.0002, + "loss": 0.8761, + "step": 200 + }, + { + "epoch": 0.45161290322580644, + "grad_norm": 0.990111768245697, + "learning_rate": 0.0002, + "loss": 0.8709, + "step": 210 + }, + { + "epoch": 0.4731182795698925, + "grad_norm": 0.9870412349700928, + "learning_rate": 0.0002, + "loss": 0.905, + "step": 220 + }, + { + "epoch": 0.4946236559139785, + "grad_norm": 0.8557345867156982, + "learning_rate": 0.0002, + "loss": 0.9129, + "step": 230 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 0.9746861457824707, + "learning_rate": 0.0002, + "loss": 0.8836, + "step": 240 + }, + { + "epoch": 0.5376344086021505, + "grad_norm": 0.9010438323020935, + "learning_rate": 0.0002, + "loss": 0.873, + "step": 250 + }, + { + "epoch": 0.5591397849462365, + "grad_norm": 0.9061082005500793, + "learning_rate": 0.0002, + "loss": 0.8241, + "step": 260 + }, + { + "epoch": 0.5806451612903226, + "grad_norm": 0.9311846494674683, + "learning_rate": 0.0002, + "loss": 0.8652, + "step": 270 + }, + { + "epoch": 0.6021505376344086, + "grad_norm": 0.9140254855155945, + "learning_rate": 0.0002, + "loss": 0.8256, + "step": 280 + }, + { + "epoch": 0.6236559139784946, + "grad_norm": 0.9722253084182739, + "learning_rate": 0.0002, + "loss": 0.8441, + "step": 290 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 0.8539168238639832, + "learning_rate": 0.0002, + "loss": 0.8314, + "step": 300 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.9053162932395935, + "learning_rate": 0.0002, + "loss": 0.8528, + "step": 310 + }, + { + "epoch": 0.6881720430107527, + "grad_norm": 0.8444252610206604, + "learning_rate": 0.0002, + "loss": 0.8209, + "step": 320 + }, + { + "epoch": 0.7096774193548387, + "grad_norm": 0.8127437829971313, + "learning_rate": 0.0002, + "loss": 0.8101, + "step": 330 + }, + { + "epoch": 0.7311827956989247, + "grad_norm": 0.886555016040802, + "learning_rate": 0.0002, + "loss": 0.8223, + "step": 340 + }, + { + "epoch": 0.7526881720430108, + "grad_norm": 0.8458548784255981, + "learning_rate": 0.0002, + "loss": 0.8368, + "step": 350 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 0.8683297634124756, + "learning_rate": 0.0002, + "loss": 0.8295, + "step": 360 + }, + { + "epoch": 0.7956989247311828, + "grad_norm": 0.8308405876159668, + "learning_rate": 0.0002, + "loss": 0.8232, + "step": 370 + }, + { + "epoch": 0.8172043010752689, + "grad_norm": 0.8305579423904419, + "learning_rate": 0.0002, + "loss": 0.7752, + "step": 380 + }, + { + "epoch": 0.8387096774193549, + "grad_norm": 0.8545567393302917, + "learning_rate": 0.0002, + "loss": 0.8267, + "step": 390 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 0.8486055731773376, + "learning_rate": 0.0002, + "loss": 0.8212, + "step": 400 + }, + { + "epoch": 0.8817204301075269, + "grad_norm": 0.8126763105392456, + "learning_rate": 0.0002, + "loss": 0.743, + "step": 410 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 0.8494045734405518, + "learning_rate": 0.0002, + "loss": 0.7993, + "step": 420 + }, + { + "epoch": 0.9247311827956989, + "grad_norm": 0.7639183402061462, + "learning_rate": 0.0002, + "loss": 0.8213, + "step": 430 + }, + { + "epoch": 0.946236559139785, + "grad_norm": 0.858101487159729, + "learning_rate": 0.0002, + "loss": 0.8015, + "step": 440 + }, + { + "epoch": 0.967741935483871, + "grad_norm": 0.8141381740570068, + "learning_rate": 0.0002, + "loss": 0.7629, + "step": 450 + }, + { + "epoch": 0.989247311827957, + "grad_norm": 0.8072513937950134, + "learning_rate": 0.0002, + "loss": 0.7357, + "step": 460 + }, + { + "epoch": 1.0, + "eval_loss": 0.7740864157676697, + "eval_runtime": 21.383, + "eval_samples_per_second": 15.48, + "eval_steps_per_second": 1.964, + "step": 465 + }, + { + "epoch": 1.010752688172043, + "grad_norm": 0.8269494771957397, + "learning_rate": 0.0002, + "loss": 0.7701, + "step": 470 + }, + { + "epoch": 1.032258064516129, + "grad_norm": 0.7814009189605713, + "learning_rate": 0.0002, + "loss": 0.7532, + "step": 480 + }, + { + "epoch": 1.053763440860215, + "grad_norm": 0.8183923363685608, + "learning_rate": 0.0002, + "loss": 0.7689, + "step": 490 + }, + { + "epoch": 1.075268817204301, + "grad_norm": 0.8146600723266602, + "learning_rate": 0.0002, + "loss": 0.765, + "step": 500 + }, + { + "epoch": 1.096774193548387, + "grad_norm": 0.8635126352310181, + "learning_rate": 0.0002, + "loss": 0.7358, + "step": 510 + }, + { + "epoch": 1.118279569892473, + "grad_norm": 0.8520359396934509, + "learning_rate": 0.0002, + "loss": 0.7302, + "step": 520 + }, + { + "epoch": 1.139784946236559, + "grad_norm": 0.8026443123817444, + "learning_rate": 0.0002, + "loss": 0.7492, + "step": 530 + }, + { + "epoch": 1.1612903225806452, + "grad_norm": 0.8157258629798889, + "learning_rate": 0.0002, + "loss": 0.7518, + "step": 540 + }, + { + "epoch": 1.1827956989247312, + "grad_norm": 0.9450796246528625, + "learning_rate": 0.0002, + "loss": 0.7461, + "step": 550 + }, + { + "epoch": 1.2043010752688172, + "grad_norm": 0.8859835863113403, + "learning_rate": 0.0002, + "loss": 0.7128, + "step": 560 + }, + { + "epoch": 1.2258064516129032, + "grad_norm": 0.7819921970367432, + "learning_rate": 0.0002, + "loss": 0.7067, + "step": 570 + }, + { + "epoch": 1.2473118279569892, + "grad_norm": 0.7823445796966553, + "learning_rate": 0.0002, + "loss": 0.7577, + "step": 580 + }, + { + "epoch": 1.2688172043010753, + "grad_norm": 0.7931883931159973, + "learning_rate": 0.0002, + "loss": 0.7358, + "step": 590 + }, + { + "epoch": 1.2903225806451613, + "grad_norm": 0.7495734095573425, + "learning_rate": 0.0002, + "loss": 0.723, + "step": 600 + }, + { + "epoch": 1.3118279569892473, + "grad_norm": 0.9272717237472534, + "learning_rate": 0.0002, + "loss": 0.7386, + "step": 610 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.7968398332595825, + "learning_rate": 0.0002, + "loss": 0.7498, + "step": 620 + }, + { + "epoch": 1.3548387096774195, + "grad_norm": 0.7813659310340881, + "learning_rate": 0.0002, + "loss": 0.7635, + "step": 630 + }, + { + "epoch": 1.3763440860215055, + "grad_norm": 0.730925977230072, + "learning_rate": 0.0002, + "loss": 0.6665, + "step": 640 + }, + { + "epoch": 1.3978494623655915, + "grad_norm": 0.8011482954025269, + "learning_rate": 0.0002, + "loss": 0.7037, + "step": 650 + }, + { + "epoch": 1.4193548387096775, + "grad_norm": 0.7770085334777832, + "learning_rate": 0.0002, + "loss": 0.6931, + "step": 660 + }, + { + "epoch": 1.4408602150537635, + "grad_norm": 0.7432682514190674, + "learning_rate": 0.0002, + "loss": 0.6949, + "step": 670 + }, + { + "epoch": 1.4623655913978495, + "grad_norm": 0.8820092678070068, + "learning_rate": 0.0002, + "loss": 0.7444, + "step": 680 + }, + { + "epoch": 1.4838709677419355, + "grad_norm": 0.7786208987236023, + "learning_rate": 0.0002, + "loss": 0.6758, + "step": 690 + }, + { + "epoch": 1.5053763440860215, + "grad_norm": 0.7467480301856995, + "learning_rate": 0.0002, + "loss": 0.6702, + "step": 700 + }, + { + "epoch": 1.5268817204301075, + "grad_norm": 0.8147122263908386, + "learning_rate": 0.0002, + "loss": 0.7107, + "step": 710 + }, + { + "epoch": 1.5483870967741935, + "grad_norm": 0.796030580997467, + "learning_rate": 0.0002, + "loss": 0.7144, + "step": 720 + }, + { + "epoch": 1.5698924731182795, + "grad_norm": 0.8776171207427979, + "learning_rate": 0.0002, + "loss": 0.6936, + "step": 730 + }, + { + "epoch": 1.5913978494623655, + "grad_norm": 0.8056126236915588, + "learning_rate": 0.0002, + "loss": 0.7101, + "step": 740 + }, + { + "epoch": 1.6129032258064515, + "grad_norm": 0.8141863346099854, + "learning_rate": 0.0002, + "loss": 0.7162, + "step": 750 + }, + { + "epoch": 1.6344086021505375, + "grad_norm": 0.8100557327270508, + "learning_rate": 0.0002, + "loss": 0.7088, + "step": 760 + }, + { + "epoch": 1.6559139784946235, + "grad_norm": 0.8283200860023499, + "learning_rate": 0.0002, + "loss": 0.7212, + "step": 770 + }, + { + "epoch": 1.6774193548387095, + "grad_norm": 0.800865113735199, + "learning_rate": 0.0002, + "loss": 0.694, + "step": 780 + }, + { + "epoch": 1.6989247311827957, + "grad_norm": 0.8052287697792053, + "learning_rate": 0.0002, + "loss": 0.7076, + "step": 790 + }, + { + "epoch": 1.7204301075268817, + "grad_norm": 0.8619674444198608, + "learning_rate": 0.0002, + "loss": 0.7257, + "step": 800 + }, + { + "epoch": 1.7419354838709677, + "grad_norm": 0.8907215595245361, + "learning_rate": 0.0002, + "loss": 0.7141, + "step": 810 + }, + { + "epoch": 1.7634408602150538, + "grad_norm": 0.6976316571235657, + "learning_rate": 0.0002, + "loss": 0.7035, + "step": 820 + }, + { + "epoch": 1.7849462365591398, + "grad_norm": 0.7533746957778931, + "learning_rate": 0.0002, + "loss": 0.6916, + "step": 830 + }, + { + "epoch": 1.8064516129032258, + "grad_norm": 0.7326804399490356, + "learning_rate": 0.0002, + "loss": 0.7094, + "step": 840 + }, + { + "epoch": 1.827956989247312, + "grad_norm": 0.7782683372497559, + "learning_rate": 0.0002, + "loss": 0.6891, + "step": 850 + }, + { + "epoch": 1.849462365591398, + "grad_norm": 0.7424806356430054, + "learning_rate": 0.0002, + "loss": 0.6931, + "step": 860 + }, + { + "epoch": 1.870967741935484, + "grad_norm": 1.172325611114502, + "learning_rate": 0.0002, + "loss": 0.7354, + "step": 870 + }, + { + "epoch": 1.89247311827957, + "grad_norm": 0.771058201789856, + "learning_rate": 0.0002, + "loss": 0.6866, + "step": 880 + }, + { + "epoch": 1.913978494623656, + "grad_norm": 0.8624904155731201, + "learning_rate": 0.0002, + "loss": 0.7296, + "step": 890 + }, + { + "epoch": 1.935483870967742, + "grad_norm": 0.7062820792198181, + "learning_rate": 0.0002, + "loss": 0.7233, + "step": 900 + }, + { + "epoch": 1.956989247311828, + "grad_norm": 0.7560103535652161, + "learning_rate": 0.0002, + "loss": 0.6966, + "step": 910 + }, + { + "epoch": 1.978494623655914, + "grad_norm": 0.788899838924408, + "learning_rate": 0.0002, + "loss": 0.69, + "step": 920 + }, + { + "epoch": 2.0, + "grad_norm": 0.6562113761901855, + "learning_rate": 0.0002, + "loss": 0.6505, + "step": 930 + }, + { + "epoch": 2.0, + "eval_loss": 0.6885261535644531, + "eval_runtime": 21.4291, + "eval_samples_per_second": 15.446, + "eval_steps_per_second": 1.96, + "step": 930 + }, + { + "epoch": 2.021505376344086, + "grad_norm": 0.8216531872749329, + "learning_rate": 0.0002, + "loss": 0.6625, + "step": 940 + }, + { + "epoch": 2.043010752688172, + "grad_norm": 0.8317142724990845, + "learning_rate": 0.0002, + "loss": 0.6398, + "step": 950 + }, + { + "epoch": 2.064516129032258, + "grad_norm": 0.8446708917617798, + "learning_rate": 0.0002, + "loss": 0.649, + "step": 960 + }, + { + "epoch": 2.086021505376344, + "grad_norm": 0.735055148601532, + "learning_rate": 0.0002, + "loss": 0.657, + "step": 970 + }, + { + "epoch": 2.10752688172043, + "grad_norm": 0.7487243413925171, + "learning_rate": 0.0002, + "loss": 0.649, + "step": 980 + }, + { + "epoch": 2.129032258064516, + "grad_norm": 0.8573887944221497, + "learning_rate": 0.0002, + "loss": 0.6419, + "step": 990 + }, + { + "epoch": 2.150537634408602, + "grad_norm": 0.6284521818161011, + "learning_rate": 0.0002, + "loss": 0.6431, + "step": 1000 + }, + { + "epoch": 2.172043010752688, + "grad_norm": 0.754183292388916, + "learning_rate": 0.0002, + "loss": 0.6128, + "step": 1010 + }, + { + "epoch": 2.193548387096774, + "grad_norm": 0.9445359110832214, + "learning_rate": 0.0002, + "loss": 0.6253, + "step": 1020 + }, + { + "epoch": 2.21505376344086, + "grad_norm": 0.808508038520813, + "learning_rate": 0.0002, + "loss": 0.605, + "step": 1030 + }, + { + "epoch": 2.236559139784946, + "grad_norm": 0.9394679665565491, + "learning_rate": 0.0002, + "loss": 0.6786, + "step": 1040 + }, + { + "epoch": 2.258064516129032, + "grad_norm": 0.8151357769966125, + "learning_rate": 0.0002, + "loss": 0.6176, + "step": 1050 + }, + { + "epoch": 2.279569892473118, + "grad_norm": 0.7909848093986511, + "learning_rate": 0.0002, + "loss": 0.66, + "step": 1060 + }, + { + "epoch": 2.3010752688172045, + "grad_norm": 0.7506507039070129, + "learning_rate": 0.0002, + "loss": 0.6254, + "step": 1070 + }, + { + "epoch": 2.3225806451612905, + "grad_norm": 0.8240520358085632, + "learning_rate": 0.0002, + "loss": 0.6608, + "step": 1080 + }, + { + "epoch": 2.3440860215053765, + "grad_norm": 0.9342400431632996, + "learning_rate": 0.0002, + "loss": 0.6207, + "step": 1090 + }, + { + "epoch": 2.3655913978494625, + "grad_norm": 1.0598735809326172, + "learning_rate": 0.0002, + "loss": 0.6029, + "step": 1100 + }, + { + "epoch": 2.3870967741935485, + "grad_norm": 0.7907650470733643, + "learning_rate": 0.0002, + "loss": 0.6035, + "step": 1110 + }, + { + "epoch": 2.4086021505376345, + "grad_norm": 0.9388798475265503, + "learning_rate": 0.0002, + "loss": 0.6237, + "step": 1120 + }, + { + "epoch": 2.4301075268817205, + "grad_norm": 0.8985419869422913, + "learning_rate": 0.0002, + "loss": 0.6207, + "step": 1130 + }, + { + "epoch": 2.4516129032258065, + "grad_norm": 0.7471932768821716, + "learning_rate": 0.0002, + "loss": 0.5902, + "step": 1140 + }, + { + "epoch": 2.4731182795698925, + "grad_norm": 0.761131763458252, + "learning_rate": 0.0002, + "loss": 0.6446, + "step": 1150 + }, + { + "epoch": 2.4946236559139785, + "grad_norm": 0.7901819348335266, + "learning_rate": 0.0002, + "loss": 0.6088, + "step": 1160 + }, + { + "epoch": 2.5161290322580645, + "grad_norm": 0.9932922720909119, + "learning_rate": 0.0002, + "loss": 0.6142, + "step": 1170 + }, + { + "epoch": 2.5376344086021505, + "grad_norm": 0.7414287328720093, + "learning_rate": 0.0002, + "loss": 0.6407, + "step": 1180 + }, + { + "epoch": 2.5591397849462365, + "grad_norm": 0.8111771941184998, + "learning_rate": 0.0002, + "loss": 0.6161, + "step": 1190 + }, + { + "epoch": 2.5806451612903225, + "grad_norm": 0.7520156502723694, + "learning_rate": 0.0002, + "loss": 0.6006, + "step": 1200 + }, + { + "epoch": 2.6021505376344085, + "grad_norm": 0.9022907018661499, + "learning_rate": 0.0002, + "loss": 0.615, + "step": 1210 + }, + { + "epoch": 2.6236559139784945, + "grad_norm": 0.7746260166168213, + "learning_rate": 0.0002, + "loss": 0.6211, + "step": 1220 + }, + { + "epoch": 2.6451612903225805, + "grad_norm": 0.8482862114906311, + "learning_rate": 0.0002, + "loss": 0.616, + "step": 1230 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 0.7925458550453186, + "learning_rate": 0.0002, + "loss": 0.6417, + "step": 1240 + }, + { + "epoch": 2.688172043010753, + "grad_norm": 0.8369929194450378, + "learning_rate": 0.0002, + "loss": 0.6187, + "step": 1250 + }, + { + "epoch": 2.709677419354839, + "grad_norm": 0.8311542868614197, + "learning_rate": 0.0002, + "loss": 0.6138, + "step": 1260 + }, + { + "epoch": 2.731182795698925, + "grad_norm": 0.7204853296279907, + "learning_rate": 0.0002, + "loss": 0.5894, + "step": 1270 + }, + { + "epoch": 2.752688172043011, + "grad_norm": 0.8447284698486328, + "learning_rate": 0.0002, + "loss": 0.6325, + "step": 1280 + }, + { + "epoch": 2.774193548387097, + "grad_norm": 0.7738404273986816, + "learning_rate": 0.0002, + "loss": 0.5946, + "step": 1290 + }, + { + "epoch": 2.795698924731183, + "grad_norm": 0.8393287062644958, + "learning_rate": 0.0002, + "loss": 0.5678, + "step": 1300 + }, + { + "epoch": 2.817204301075269, + "grad_norm": 0.79121994972229, + "learning_rate": 0.0002, + "loss": 0.6092, + "step": 1310 + }, + { + "epoch": 2.838709677419355, + "grad_norm": 0.7331557869911194, + "learning_rate": 0.0002, + "loss": 0.5889, + "step": 1320 + }, + { + "epoch": 2.860215053763441, + "grad_norm": 0.9593998193740845, + "learning_rate": 0.0002, + "loss": 0.6048, + "step": 1330 + }, + { + "epoch": 2.881720430107527, + "grad_norm": 0.7215158343315125, + "learning_rate": 0.0002, + "loss": 0.6108, + "step": 1340 + }, + { + "epoch": 2.903225806451613, + "grad_norm": 0.840404212474823, + "learning_rate": 0.0002, + "loss": 0.5897, + "step": 1350 + }, + { + "epoch": 2.924731182795699, + "grad_norm": 0.870659351348877, + "learning_rate": 0.0002, + "loss": 0.6056, + "step": 1360 + }, + { + "epoch": 2.946236559139785, + "grad_norm": 0.8744975328445435, + "learning_rate": 0.0002, + "loss": 0.6205, + "step": 1370 + }, + { + "epoch": 2.967741935483871, + "grad_norm": 0.8030612468719482, + "learning_rate": 0.0002, + "loss": 0.5966, + "step": 1380 + }, + { + "epoch": 2.989247311827957, + "grad_norm": 0.825814962387085, + "learning_rate": 0.0002, + "loss": 0.6004, + "step": 1390 + }, + { + "epoch": 3.0, + "eval_loss": 0.6257933378219604, + "eval_runtime": 21.3692, + "eval_samples_per_second": 15.49, + "eval_steps_per_second": 1.965, + "step": 1395 + }, + { + "epoch": 3.010752688172043, + "grad_norm": 0.8650677800178528, + "learning_rate": 0.0002, + "loss": 0.5696, + "step": 1400 + }, + { + "epoch": 3.032258064516129, + "grad_norm": 0.8364197015762329, + "learning_rate": 0.0002, + "loss": 0.5483, + "step": 1410 + }, + { + "epoch": 3.053763440860215, + "grad_norm": 0.8278448581695557, + "learning_rate": 0.0002, + "loss": 0.5606, + "step": 1420 + }, + { + "epoch": 3.075268817204301, + "grad_norm": 0.8806642889976501, + "learning_rate": 0.0002, + "loss": 0.5572, + "step": 1430 + }, + { + "epoch": 3.096774193548387, + "grad_norm": 0.8180029988288879, + "learning_rate": 0.0002, + "loss": 0.585, + "step": 1440 + }, + { + "epoch": 3.118279569892473, + "grad_norm": 0.8561782836914062, + "learning_rate": 0.0002, + "loss": 0.5667, + "step": 1450 + }, + { + "epoch": 3.139784946236559, + "grad_norm": 0.8377029299736023, + "learning_rate": 0.0002, + "loss": 0.5246, + "step": 1460 + }, + { + "epoch": 3.161290322580645, + "grad_norm": 0.885779082775116, + "learning_rate": 0.0002, + "loss": 0.5464, + "step": 1470 + }, + { + "epoch": 3.182795698924731, + "grad_norm": 0.9388518333435059, + "learning_rate": 0.0002, + "loss": 0.541, + "step": 1480 + }, + { + "epoch": 3.204301075268817, + "grad_norm": 0.8816235661506653, + "learning_rate": 0.0002, + "loss": 0.5447, + "step": 1490 + }, + { + "epoch": 3.225806451612903, + "grad_norm": 0.9885783791542053, + "learning_rate": 0.0002, + "loss": 0.5466, + "step": 1500 + }, + { + "epoch": 3.247311827956989, + "grad_norm": 0.8635850548744202, + "learning_rate": 0.0002, + "loss": 0.5455, + "step": 1510 + }, + { + "epoch": 3.268817204301075, + "grad_norm": 0.829853355884552, + "learning_rate": 0.0002, + "loss": 0.5419, + "step": 1520 + }, + { + "epoch": 3.2903225806451615, + "grad_norm": 0.9037486910820007, + "learning_rate": 0.0002, + "loss": 0.54, + "step": 1530 + }, + { + "epoch": 3.3118279569892475, + "grad_norm": 0.8173713684082031, + "learning_rate": 0.0002, + "loss": 0.5375, + "step": 1540 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.796953022480011, + "learning_rate": 0.0002, + "loss": 0.5405, + "step": 1550 + }, + { + "epoch": 3.3548387096774195, + "grad_norm": 0.7894400358200073, + "learning_rate": 0.0002, + "loss": 0.5505, + "step": 1560 + }, + { + "epoch": 3.3763440860215055, + "grad_norm": 0.9434949159622192, + "learning_rate": 0.0002, + "loss": 0.5395, + "step": 1570 + }, + { + "epoch": 3.3978494623655915, + "grad_norm": 0.8666760325431824, + "learning_rate": 0.0002, + "loss": 0.5271, + "step": 1580 + }, + { + "epoch": 3.4193548387096775, + "grad_norm": 0.7782467007637024, + "learning_rate": 0.0002, + "loss": 0.5439, + "step": 1590 + }, + { + "epoch": 3.4408602150537635, + "grad_norm": 0.8849126696586609, + "learning_rate": 0.0002, + "loss": 0.5161, + "step": 1600 + }, + { + "epoch": 3.4623655913978495, + "grad_norm": 0.7863831520080566, + "learning_rate": 0.0002, + "loss": 0.5353, + "step": 1610 + }, + { + "epoch": 3.4838709677419355, + "grad_norm": 1.0403116941452026, + "learning_rate": 0.0002, + "loss": 0.5308, + "step": 1620 + }, + { + "epoch": 3.5053763440860215, + "grad_norm": 0.8307499289512634, + "learning_rate": 0.0002, + "loss": 0.5339, + "step": 1630 + }, + { + "epoch": 3.5268817204301075, + "grad_norm": 0.9132118821144104, + "learning_rate": 0.0002, + "loss": 0.5361, + "step": 1640 + }, + { + "epoch": 3.5483870967741935, + "grad_norm": 0.9322578310966492, + "learning_rate": 0.0002, + "loss": 0.5828, + "step": 1650 + }, + { + "epoch": 3.5698924731182795, + "grad_norm": 0.9782460331916809, + "learning_rate": 0.0002, + "loss": 0.546, + "step": 1660 + }, + { + "epoch": 3.5913978494623655, + "grad_norm": 0.7189919352531433, + "learning_rate": 0.0002, + "loss": 0.5424, + "step": 1670 + }, + { + "epoch": 3.6129032258064515, + "grad_norm": 0.9689221382141113, + "learning_rate": 0.0002, + "loss": 0.5514, + "step": 1680 + }, + { + "epoch": 3.6344086021505375, + "grad_norm": 0.9684675335884094, + "learning_rate": 0.0002, + "loss": 0.5379, + "step": 1690 + }, + { + "epoch": 3.6559139784946235, + "grad_norm": 0.8851472735404968, + "learning_rate": 0.0002, + "loss": 0.5748, + "step": 1700 + }, + { + "epoch": 3.6774193548387095, + "grad_norm": 0.7709833383560181, + "learning_rate": 0.0002, + "loss": 0.5412, + "step": 1710 + }, + { + "epoch": 3.698924731182796, + "grad_norm": 0.818236231803894, + "learning_rate": 0.0002, + "loss": 0.521, + "step": 1720 + }, + { + "epoch": 3.720430107526882, + "grad_norm": 0.870642364025116, + "learning_rate": 0.0002, + "loss": 0.5445, + "step": 1730 + }, + { + "epoch": 3.741935483870968, + "grad_norm": 1.0245511531829834, + "learning_rate": 0.0002, + "loss": 0.5307, + "step": 1740 + }, + { + "epoch": 3.763440860215054, + "grad_norm": 0.8607558608055115, + "learning_rate": 0.0002, + "loss": 0.5593, + "step": 1750 + }, + { + "epoch": 3.78494623655914, + "grad_norm": 0.8511829972267151, + "learning_rate": 0.0002, + "loss": 0.536, + "step": 1760 + }, + { + "epoch": 3.806451612903226, + "grad_norm": 0.7969087362289429, + "learning_rate": 0.0002, + "loss": 0.5193, + "step": 1770 + }, + { + "epoch": 3.827956989247312, + "grad_norm": 0.8457245826721191, + "learning_rate": 0.0002, + "loss": 0.5578, + "step": 1780 + }, + { + "epoch": 3.849462365591398, + "grad_norm": 0.8893467783927917, + "learning_rate": 0.0002, + "loss": 0.5337, + "step": 1790 + }, + { + "epoch": 3.870967741935484, + "grad_norm": 0.8593819737434387, + "learning_rate": 0.0002, + "loss": 0.5024, + "step": 1800 + }, + { + "epoch": 3.89247311827957, + "grad_norm": 0.7574560642242432, + "learning_rate": 0.0002, + "loss": 0.5134, + "step": 1810 + }, + { + "epoch": 3.913978494623656, + "grad_norm": 0.8681567311286926, + "learning_rate": 0.0002, + "loss": 0.5263, + "step": 1820 + }, + { + "epoch": 3.935483870967742, + "grad_norm": 0.9068132042884827, + "learning_rate": 0.0002, + "loss": 0.532, + "step": 1830 + }, + { + "epoch": 3.956989247311828, + "grad_norm": 0.8668948411941528, + "learning_rate": 0.0002, + "loss": 0.5427, + "step": 1840 + }, + { + "epoch": 3.978494623655914, + "grad_norm": 1.046032428741455, + "learning_rate": 0.0002, + "loss": 0.5349, + "step": 1850 + }, + { + "epoch": 4.0, + "grad_norm": 0.904780387878418, + "learning_rate": 0.0002, + "loss": 0.5087, + "step": 1860 + }, + { + "epoch": 4.0, + "eval_loss": 0.5737715363502502, + "eval_runtime": 21.4915, + "eval_samples_per_second": 15.401, + "eval_steps_per_second": 1.954, + "step": 1860 + }, + { + "epoch": 4.021505376344086, + "grad_norm": 0.8611752986907959, + "learning_rate": 0.0002, + "loss": 0.4843, + "step": 1870 + }, + { + "epoch": 4.043010752688172, + "grad_norm": 0.838782548904419, + "learning_rate": 0.0002, + "loss": 0.4814, + "step": 1880 + }, + { + "epoch": 4.064516129032258, + "grad_norm": 0.9119709134101868, + "learning_rate": 0.0002, + "loss": 0.474, + "step": 1890 + }, + { + "epoch": 4.086021505376344, + "grad_norm": 0.8026251196861267, + "learning_rate": 0.0002, + "loss": 0.4951, + "step": 1900 + }, + { + "epoch": 4.10752688172043, + "grad_norm": 0.8773705363273621, + "learning_rate": 0.0002, + "loss": 0.491, + "step": 1910 + }, + { + "epoch": 4.129032258064516, + "grad_norm": 0.8762255907058716, + "learning_rate": 0.0002, + "loss": 0.474, + "step": 1920 + }, + { + "epoch": 4.150537634408602, + "grad_norm": 0.8371861577033997, + "learning_rate": 0.0002, + "loss": 0.4816, + "step": 1930 + }, + { + "epoch": 4.172043010752688, + "grad_norm": 0.9703728556632996, + "learning_rate": 0.0002, + "loss": 0.472, + "step": 1940 + }, + { + "epoch": 4.193548387096774, + "grad_norm": 0.8802874684333801, + "learning_rate": 0.0002, + "loss": 0.4772, + "step": 1950 + }, + { + "epoch": 4.21505376344086, + "grad_norm": 1.0103057622909546, + "learning_rate": 0.0002, + "loss": 0.5032, + "step": 1960 + }, + { + "epoch": 4.236559139784946, + "grad_norm": 0.9212995171546936, + "learning_rate": 0.0002, + "loss": 0.4945, + "step": 1970 + }, + { + "epoch": 4.258064516129032, + "grad_norm": 1.009544849395752, + "learning_rate": 0.0002, + "loss": 0.4753, + "step": 1980 + }, + { + "epoch": 4.279569892473118, + "grad_norm": 0.8535077571868896, + "learning_rate": 0.0002, + "loss": 0.4789, + "step": 1990 + }, + { + "epoch": 4.301075268817204, + "grad_norm": 0.8363022804260254, + "learning_rate": 0.0002, + "loss": 0.4782, + "step": 2000 + }, + { + "epoch": 4.32258064516129, + "grad_norm": 0.9041762948036194, + "learning_rate": 0.0002, + "loss": 0.4875, + "step": 2010 + }, + { + "epoch": 4.344086021505376, + "grad_norm": 0.960790753364563, + "learning_rate": 0.0002, + "loss": 0.4779, + "step": 2020 + }, + { + "epoch": 4.365591397849462, + "grad_norm": 0.8823095560073853, + "learning_rate": 0.0002, + "loss": 0.4626, + "step": 2030 + }, + { + "epoch": 4.387096774193548, + "grad_norm": 0.952100396156311, + "learning_rate": 0.0002, + "loss": 0.4883, + "step": 2040 + }, + { + "epoch": 4.408602150537634, + "grad_norm": 1.0793498754501343, + "learning_rate": 0.0002, + "loss": 0.4789, + "step": 2050 + }, + { + "epoch": 4.43010752688172, + "grad_norm": 0.8987208008766174, + "learning_rate": 0.0002, + "loss": 0.4827, + "step": 2060 + }, + { + "epoch": 4.451612903225806, + "grad_norm": 0.8539772033691406, + "learning_rate": 0.0002, + "loss": 0.4594, + "step": 2070 + }, + { + "epoch": 4.473118279569892, + "grad_norm": 0.9160863757133484, + "learning_rate": 0.0002, + "loss": 0.4752, + "step": 2080 + }, + { + "epoch": 4.494623655913978, + "grad_norm": 0.9946850538253784, + "learning_rate": 0.0002, + "loss": 0.5033, + "step": 2090 + }, + { + "epoch": 4.516129032258064, + "grad_norm": 0.908039391040802, + "learning_rate": 0.0002, + "loss": 0.4842, + "step": 2100 + }, + { + "epoch": 4.53763440860215, + "grad_norm": 1.1462254524230957, + "learning_rate": 0.0002, + "loss": 0.4861, + "step": 2110 + }, + { + "epoch": 4.559139784946236, + "grad_norm": 0.8392056226730347, + "learning_rate": 0.0002, + "loss": 0.4892, + "step": 2120 + }, + { + "epoch": 4.580645161290323, + "grad_norm": 0.9673896431922913, + "learning_rate": 0.0002, + "loss": 0.4824, + "step": 2130 + }, + { + "epoch": 4.602150537634409, + "grad_norm": 0.9047091603279114, + "learning_rate": 0.0002, + "loss": 0.4665, + "step": 2140 + }, + { + "epoch": 4.623655913978495, + "grad_norm": 0.9013425707817078, + "learning_rate": 0.0002, + "loss": 0.4714, + "step": 2150 + }, + { + "epoch": 4.645161290322581, + "grad_norm": 0.8899165391921997, + "learning_rate": 0.0002, + "loss": 0.472, + "step": 2160 + }, + { + "epoch": 4.666666666666667, + "grad_norm": 0.748602569103241, + "learning_rate": 0.0002, + "loss": 0.4635, + "step": 2170 + }, + { + "epoch": 4.688172043010753, + "grad_norm": 0.8694155216217041, + "learning_rate": 0.0002, + "loss": 0.4695, + "step": 2180 + }, + { + "epoch": 4.709677419354839, + "grad_norm": 0.9134316444396973, + "learning_rate": 0.0002, + "loss": 0.4929, + "step": 2190 + }, + { + "epoch": 4.731182795698925, + "grad_norm": 0.8504763245582581, + "learning_rate": 0.0002, + "loss": 0.4855, + "step": 2200 + }, + { + "epoch": 4.752688172043011, + "grad_norm": 1.0321544408798218, + "learning_rate": 0.0002, + "loss": 0.4517, + "step": 2210 + }, + { + "epoch": 4.774193548387097, + "grad_norm": 0.9368237257003784, + "learning_rate": 0.0002, + "loss": 0.4796, + "step": 2220 + }, + { + "epoch": 4.795698924731183, + "grad_norm": 0.9319947361946106, + "learning_rate": 0.0002, + "loss": 0.4837, + "step": 2230 + }, + { + "epoch": 4.817204301075269, + "grad_norm": 0.904333770275116, + "learning_rate": 0.0002, + "loss": 0.4696, + "step": 2240 + }, + { + "epoch": 4.838709677419355, + "grad_norm": 0.8097078204154968, + "learning_rate": 0.0002, + "loss": 0.4746, + "step": 2250 + }, + { + "epoch": 4.860215053763441, + "grad_norm": 0.9128859043121338, + "learning_rate": 0.0002, + "loss": 0.4438, + "step": 2260 + }, + { + "epoch": 4.881720430107527, + "grad_norm": 0.883129894733429, + "learning_rate": 0.0002, + "loss": 0.4693, + "step": 2270 + }, + { + "epoch": 4.903225806451613, + "grad_norm": 0.85712730884552, + "learning_rate": 0.0002, + "loss": 0.4494, + "step": 2280 + }, + { + "epoch": 4.924731182795699, + "grad_norm": 1.2101863622665405, + "learning_rate": 0.0002, + "loss": 0.4593, + "step": 2290 + }, + { + "epoch": 4.946236559139785, + "grad_norm": 0.917966902256012, + "learning_rate": 0.0002, + "loss": 0.4779, + "step": 2300 + }, + { + "epoch": 4.967741935483871, + "grad_norm": 0.7740724086761475, + "learning_rate": 0.0002, + "loss": 0.4666, + "step": 2310 + }, + { + "epoch": 4.989247311827957, + "grad_norm": 1.0199906826019287, + "learning_rate": 0.0002, + "loss": 0.4629, + "step": 2320 + }, + { + "epoch": 5.0, + "eval_loss": 0.5363914370536804, + "eval_runtime": 21.3941, + "eval_samples_per_second": 15.472, + "eval_steps_per_second": 1.963, + "step": 2325 + } + ], + "logging_steps": 10, + "max_steps": 3720, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.84161414791168e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/training_args.bin b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..483b53d4fc1c568a5fc890fa850e3450f390b208 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e4c4fe2be590ed03492316230adb3a1edca3e4066c55f3716c0352d7134c564 +size 5560 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/README.md b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/adapter_config.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/adapter_model.safetensors b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3470024eb7a2c3549695e46e24bf3e15184388a --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:000663c19f6be785e72fa959fb587169066699d674d8bfee57cbf973cb87e984 +size 29500848 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/optimizer.pt b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..28d6e106f46ddda3b3229e66e2d0a1f4bace4e38 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a3c8a8ef88f82223655c75a2ed9e3c85789689fd8bed182ccbb066f87bfcc34 +size 15064314 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/rng_state.pth b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b067621f71756ec961f604616d7abedc3836d6a5 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8237fb590cc4741b8cdb25f103d4ff4351eda1d6bd6372159499b51d44b3fbfb +size 14244 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/scheduler.pt b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4ac258a9c1113698d9863b2652a5ee759ceaa463 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3b25b7e8485ad3adf27ba5c1b150b8d23655686491e68d0ee87d9c1bea784d9 +size 1064 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/special_tokens_map.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/tokenizer.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/tokenizer.model b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/tokenizer_config.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/trainer_state.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..620fc3de22756106f08cb63afdc47ec190d59ccb --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/trainer_state.json @@ -0,0 +1,2034 @@ +{ + "best_metric": 0.5057176947593689, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790", + "epoch": 6.0, + "eval_steps": 10, + "global_step": 2790, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.021505376344086023, + "grad_norm": 0.9075053930282593, + "learning_rate": 0.0002, + "loss": 3.4172, + "step": 10 + }, + { + "epoch": 0.043010752688172046, + "grad_norm": 1.4321208000183105, + "learning_rate": 0.0002, + "loss": 2.5888, + "step": 20 + }, + { + "epoch": 0.06451612903225806, + "grad_norm": 1.7500602006912231, + "learning_rate": 0.0002, + "loss": 2.1195, + "step": 30 + }, + { + "epoch": 0.08602150537634409, + "grad_norm": 0.7606641054153442, + "learning_rate": 0.0002, + "loss": 1.9303, + "step": 40 + }, + { + "epoch": 0.10752688172043011, + "grad_norm": 1.2754929065704346, + "learning_rate": 0.0002, + "loss": 1.6112, + "step": 50 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 1.0936230421066284, + "learning_rate": 0.0002, + "loss": 1.4319, + "step": 60 + }, + { + "epoch": 0.15053763440860216, + "grad_norm": 1.144593596458435, + "learning_rate": 0.0002, + "loss": 1.3568, + "step": 70 + }, + { + "epoch": 0.17204301075268819, + "grad_norm": 1.2181956768035889, + "learning_rate": 0.0002, + "loss": 1.2028, + "step": 80 + }, + { + "epoch": 0.1935483870967742, + "grad_norm": 1.1260095834732056, + "learning_rate": 0.0002, + "loss": 1.1534, + "step": 90 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 1.1155284643173218, + "learning_rate": 0.0002, + "loss": 1.1089, + "step": 100 + }, + { + "epoch": 0.23655913978494625, + "grad_norm": 1.089565396308899, + "learning_rate": 0.0002, + "loss": 1.0883, + "step": 110 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 0.9833471775054932, + "learning_rate": 0.0002, + "loss": 1.0814, + "step": 120 + }, + { + "epoch": 0.27956989247311825, + "grad_norm": 1.0265629291534424, + "learning_rate": 0.0002, + "loss": 1.0239, + "step": 130 + }, + { + "epoch": 0.3010752688172043, + "grad_norm": 0.9344286322593689, + "learning_rate": 0.0002, + "loss": 0.9888, + "step": 140 + }, + { + "epoch": 0.3225806451612903, + "grad_norm": 0.9883386492729187, + "learning_rate": 0.0002, + "loss": 1.0043, + "step": 150 + }, + { + "epoch": 0.34408602150537637, + "grad_norm": 0.9299277067184448, + "learning_rate": 0.0002, + "loss": 0.9338, + "step": 160 + }, + { + "epoch": 0.3655913978494624, + "grad_norm": 1.390045404434204, + "learning_rate": 0.0002, + "loss": 0.9432, + "step": 170 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 1.0313078165054321, + "learning_rate": 0.0002, + "loss": 0.9008, + "step": 180 + }, + { + "epoch": 0.40860215053763443, + "grad_norm": 1.1792205572128296, + "learning_rate": 0.0002, + "loss": 0.9434, + "step": 190 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 1.049809217453003, + "learning_rate": 0.0002, + "loss": 0.8761, + "step": 200 + }, + { + "epoch": 0.45161290322580644, + "grad_norm": 0.990111768245697, + "learning_rate": 0.0002, + "loss": 0.8709, + "step": 210 + }, + { + "epoch": 0.4731182795698925, + "grad_norm": 0.9870412349700928, + "learning_rate": 0.0002, + "loss": 0.905, + "step": 220 + }, + { + "epoch": 0.4946236559139785, + "grad_norm": 0.8557345867156982, + "learning_rate": 0.0002, + "loss": 0.9129, + "step": 230 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 0.9746861457824707, + "learning_rate": 0.0002, + "loss": 0.8836, + "step": 240 + }, + { + "epoch": 0.5376344086021505, + "grad_norm": 0.9010438323020935, + "learning_rate": 0.0002, + "loss": 0.873, + "step": 250 + }, + { + "epoch": 0.5591397849462365, + "grad_norm": 0.9061082005500793, + "learning_rate": 0.0002, + "loss": 0.8241, + "step": 260 + }, + { + "epoch": 0.5806451612903226, + "grad_norm": 0.9311846494674683, + "learning_rate": 0.0002, + "loss": 0.8652, + "step": 270 + }, + { + "epoch": 0.6021505376344086, + "grad_norm": 0.9140254855155945, + "learning_rate": 0.0002, + "loss": 0.8256, + "step": 280 + }, + { + "epoch": 0.6236559139784946, + "grad_norm": 0.9722253084182739, + "learning_rate": 0.0002, + "loss": 0.8441, + "step": 290 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 0.8539168238639832, + "learning_rate": 0.0002, + "loss": 0.8314, + "step": 300 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.9053162932395935, + "learning_rate": 0.0002, + "loss": 0.8528, + "step": 310 + }, + { + "epoch": 0.6881720430107527, + "grad_norm": 0.8444252610206604, + "learning_rate": 0.0002, + "loss": 0.8209, + "step": 320 + }, + { + "epoch": 0.7096774193548387, + "grad_norm": 0.8127437829971313, + "learning_rate": 0.0002, + "loss": 0.8101, + "step": 330 + }, + { + "epoch": 0.7311827956989247, + "grad_norm": 0.886555016040802, + "learning_rate": 0.0002, + "loss": 0.8223, + "step": 340 + }, + { + "epoch": 0.7526881720430108, + "grad_norm": 0.8458548784255981, + "learning_rate": 0.0002, + "loss": 0.8368, + "step": 350 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 0.8683297634124756, + "learning_rate": 0.0002, + "loss": 0.8295, + "step": 360 + }, + { + "epoch": 0.7956989247311828, + "grad_norm": 0.8308405876159668, + "learning_rate": 0.0002, + "loss": 0.8232, + "step": 370 + }, + { + "epoch": 0.8172043010752689, + "grad_norm": 0.8305579423904419, + "learning_rate": 0.0002, + "loss": 0.7752, + "step": 380 + }, + { + "epoch": 0.8387096774193549, + "grad_norm": 0.8545567393302917, + "learning_rate": 0.0002, + "loss": 0.8267, + "step": 390 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 0.8486055731773376, + "learning_rate": 0.0002, + "loss": 0.8212, + "step": 400 + }, + { + "epoch": 0.8817204301075269, + "grad_norm": 0.8126763105392456, + "learning_rate": 0.0002, + "loss": 0.743, + "step": 410 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 0.8494045734405518, + "learning_rate": 0.0002, + "loss": 0.7993, + "step": 420 + }, + { + "epoch": 0.9247311827956989, + "grad_norm": 0.7639183402061462, + "learning_rate": 0.0002, + "loss": 0.8213, + "step": 430 + }, + { + "epoch": 0.946236559139785, + "grad_norm": 0.858101487159729, + "learning_rate": 0.0002, + "loss": 0.8015, + "step": 440 + }, + { + "epoch": 0.967741935483871, + "grad_norm": 0.8141381740570068, + "learning_rate": 0.0002, + "loss": 0.7629, + "step": 450 + }, + { + "epoch": 0.989247311827957, + "grad_norm": 0.8072513937950134, + "learning_rate": 0.0002, + "loss": 0.7357, + "step": 460 + }, + { + "epoch": 1.0, + "eval_loss": 0.7740864157676697, + "eval_runtime": 21.383, + "eval_samples_per_second": 15.48, + "eval_steps_per_second": 1.964, + "step": 465 + }, + { + "epoch": 1.010752688172043, + "grad_norm": 0.8269494771957397, + "learning_rate": 0.0002, + "loss": 0.7701, + "step": 470 + }, + { + "epoch": 1.032258064516129, + "grad_norm": 0.7814009189605713, + "learning_rate": 0.0002, + "loss": 0.7532, + "step": 480 + }, + { + "epoch": 1.053763440860215, + "grad_norm": 0.8183923363685608, + "learning_rate": 0.0002, + "loss": 0.7689, + "step": 490 + }, + { + "epoch": 1.075268817204301, + "grad_norm": 0.8146600723266602, + "learning_rate": 0.0002, + "loss": 0.765, + "step": 500 + }, + { + "epoch": 1.096774193548387, + "grad_norm": 0.8635126352310181, + "learning_rate": 0.0002, + "loss": 0.7358, + "step": 510 + }, + { + "epoch": 1.118279569892473, + "grad_norm": 0.8520359396934509, + "learning_rate": 0.0002, + "loss": 0.7302, + "step": 520 + }, + { + "epoch": 1.139784946236559, + "grad_norm": 0.8026443123817444, + "learning_rate": 0.0002, + "loss": 0.7492, + "step": 530 + }, + { + "epoch": 1.1612903225806452, + "grad_norm": 0.8157258629798889, + "learning_rate": 0.0002, + "loss": 0.7518, + "step": 540 + }, + { + "epoch": 1.1827956989247312, + "grad_norm": 0.9450796246528625, + "learning_rate": 0.0002, + "loss": 0.7461, + "step": 550 + }, + { + "epoch": 1.2043010752688172, + "grad_norm": 0.8859835863113403, + "learning_rate": 0.0002, + "loss": 0.7128, + "step": 560 + }, + { + "epoch": 1.2258064516129032, + "grad_norm": 0.7819921970367432, + "learning_rate": 0.0002, + "loss": 0.7067, + "step": 570 + }, + { + "epoch": 1.2473118279569892, + "grad_norm": 0.7823445796966553, + "learning_rate": 0.0002, + "loss": 0.7577, + "step": 580 + }, + { + "epoch": 1.2688172043010753, + "grad_norm": 0.7931883931159973, + "learning_rate": 0.0002, + "loss": 0.7358, + "step": 590 + }, + { + "epoch": 1.2903225806451613, + "grad_norm": 0.7495734095573425, + "learning_rate": 0.0002, + "loss": 0.723, + "step": 600 + }, + { + "epoch": 1.3118279569892473, + "grad_norm": 0.9272717237472534, + "learning_rate": 0.0002, + "loss": 0.7386, + "step": 610 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.7968398332595825, + "learning_rate": 0.0002, + "loss": 0.7498, + "step": 620 + }, + { + "epoch": 1.3548387096774195, + "grad_norm": 0.7813659310340881, + "learning_rate": 0.0002, + "loss": 0.7635, + "step": 630 + }, + { + "epoch": 1.3763440860215055, + "grad_norm": 0.730925977230072, + "learning_rate": 0.0002, + "loss": 0.6665, + "step": 640 + }, + { + "epoch": 1.3978494623655915, + "grad_norm": 0.8011482954025269, + "learning_rate": 0.0002, + "loss": 0.7037, + "step": 650 + }, + { + "epoch": 1.4193548387096775, + "grad_norm": 0.7770085334777832, + "learning_rate": 0.0002, + "loss": 0.6931, + "step": 660 + }, + { + "epoch": 1.4408602150537635, + "grad_norm": 0.7432682514190674, + "learning_rate": 0.0002, + "loss": 0.6949, + "step": 670 + }, + { + "epoch": 1.4623655913978495, + "grad_norm": 0.8820092678070068, + "learning_rate": 0.0002, + "loss": 0.7444, + "step": 680 + }, + { + "epoch": 1.4838709677419355, + "grad_norm": 0.7786208987236023, + "learning_rate": 0.0002, + "loss": 0.6758, + "step": 690 + }, + { + "epoch": 1.5053763440860215, + "grad_norm": 0.7467480301856995, + "learning_rate": 0.0002, + "loss": 0.6702, + "step": 700 + }, + { + "epoch": 1.5268817204301075, + "grad_norm": 0.8147122263908386, + "learning_rate": 0.0002, + "loss": 0.7107, + "step": 710 + }, + { + "epoch": 1.5483870967741935, + "grad_norm": 0.796030580997467, + "learning_rate": 0.0002, + "loss": 0.7144, + "step": 720 + }, + { + "epoch": 1.5698924731182795, + "grad_norm": 0.8776171207427979, + "learning_rate": 0.0002, + "loss": 0.6936, + "step": 730 + }, + { + "epoch": 1.5913978494623655, + "grad_norm": 0.8056126236915588, + "learning_rate": 0.0002, + "loss": 0.7101, + "step": 740 + }, + { + "epoch": 1.6129032258064515, + "grad_norm": 0.8141863346099854, + "learning_rate": 0.0002, + "loss": 0.7162, + "step": 750 + }, + { + "epoch": 1.6344086021505375, + "grad_norm": 0.8100557327270508, + "learning_rate": 0.0002, + "loss": 0.7088, + "step": 760 + }, + { + "epoch": 1.6559139784946235, + "grad_norm": 0.8283200860023499, + "learning_rate": 0.0002, + "loss": 0.7212, + "step": 770 + }, + { + "epoch": 1.6774193548387095, + "grad_norm": 0.800865113735199, + "learning_rate": 0.0002, + "loss": 0.694, + "step": 780 + }, + { + "epoch": 1.6989247311827957, + "grad_norm": 0.8052287697792053, + "learning_rate": 0.0002, + "loss": 0.7076, + "step": 790 + }, + { + "epoch": 1.7204301075268817, + "grad_norm": 0.8619674444198608, + "learning_rate": 0.0002, + "loss": 0.7257, + "step": 800 + }, + { + "epoch": 1.7419354838709677, + "grad_norm": 0.8907215595245361, + "learning_rate": 0.0002, + "loss": 0.7141, + "step": 810 + }, + { + "epoch": 1.7634408602150538, + "grad_norm": 0.6976316571235657, + "learning_rate": 0.0002, + "loss": 0.7035, + "step": 820 + }, + { + "epoch": 1.7849462365591398, + "grad_norm": 0.7533746957778931, + "learning_rate": 0.0002, + "loss": 0.6916, + "step": 830 + }, + { + "epoch": 1.8064516129032258, + "grad_norm": 0.7326804399490356, + "learning_rate": 0.0002, + "loss": 0.7094, + "step": 840 + }, + { + "epoch": 1.827956989247312, + "grad_norm": 0.7782683372497559, + "learning_rate": 0.0002, + "loss": 0.6891, + "step": 850 + }, + { + "epoch": 1.849462365591398, + "grad_norm": 0.7424806356430054, + "learning_rate": 0.0002, + "loss": 0.6931, + "step": 860 + }, + { + "epoch": 1.870967741935484, + "grad_norm": 1.172325611114502, + "learning_rate": 0.0002, + "loss": 0.7354, + "step": 870 + }, + { + "epoch": 1.89247311827957, + "grad_norm": 0.771058201789856, + "learning_rate": 0.0002, + "loss": 0.6866, + "step": 880 + }, + { + "epoch": 1.913978494623656, + "grad_norm": 0.8624904155731201, + "learning_rate": 0.0002, + "loss": 0.7296, + "step": 890 + }, + { + "epoch": 1.935483870967742, + "grad_norm": 0.7062820792198181, + "learning_rate": 0.0002, + "loss": 0.7233, + "step": 900 + }, + { + "epoch": 1.956989247311828, + "grad_norm": 0.7560103535652161, + "learning_rate": 0.0002, + "loss": 0.6966, + "step": 910 + }, + { + "epoch": 1.978494623655914, + "grad_norm": 0.788899838924408, + "learning_rate": 0.0002, + "loss": 0.69, + "step": 920 + }, + { + "epoch": 2.0, + "grad_norm": 0.6562113761901855, + "learning_rate": 0.0002, + "loss": 0.6505, + "step": 930 + }, + { + "epoch": 2.0, + "eval_loss": 0.6885261535644531, + "eval_runtime": 21.4291, + "eval_samples_per_second": 15.446, + "eval_steps_per_second": 1.96, + "step": 930 + }, + { + "epoch": 2.021505376344086, + "grad_norm": 0.8216531872749329, + "learning_rate": 0.0002, + "loss": 0.6625, + "step": 940 + }, + { + "epoch": 2.043010752688172, + "grad_norm": 0.8317142724990845, + "learning_rate": 0.0002, + "loss": 0.6398, + "step": 950 + }, + { + "epoch": 2.064516129032258, + "grad_norm": 0.8446708917617798, + "learning_rate": 0.0002, + "loss": 0.649, + "step": 960 + }, + { + "epoch": 2.086021505376344, + "grad_norm": 0.735055148601532, + "learning_rate": 0.0002, + "loss": 0.657, + "step": 970 + }, + { + "epoch": 2.10752688172043, + "grad_norm": 0.7487243413925171, + "learning_rate": 0.0002, + "loss": 0.649, + "step": 980 + }, + { + "epoch": 2.129032258064516, + "grad_norm": 0.8573887944221497, + "learning_rate": 0.0002, + "loss": 0.6419, + "step": 990 + }, + { + "epoch": 2.150537634408602, + "grad_norm": 0.6284521818161011, + "learning_rate": 0.0002, + "loss": 0.6431, + "step": 1000 + }, + { + "epoch": 2.172043010752688, + "grad_norm": 0.754183292388916, + "learning_rate": 0.0002, + "loss": 0.6128, + "step": 1010 + }, + { + "epoch": 2.193548387096774, + "grad_norm": 0.9445359110832214, + "learning_rate": 0.0002, + "loss": 0.6253, + "step": 1020 + }, + { + "epoch": 2.21505376344086, + "grad_norm": 0.808508038520813, + "learning_rate": 0.0002, + "loss": 0.605, + "step": 1030 + }, + { + "epoch": 2.236559139784946, + "grad_norm": 0.9394679665565491, + "learning_rate": 0.0002, + "loss": 0.6786, + "step": 1040 + }, + { + "epoch": 2.258064516129032, + "grad_norm": 0.8151357769966125, + "learning_rate": 0.0002, + "loss": 0.6176, + "step": 1050 + }, + { + "epoch": 2.279569892473118, + "grad_norm": 0.7909848093986511, + "learning_rate": 0.0002, + "loss": 0.66, + "step": 1060 + }, + { + "epoch": 2.3010752688172045, + "grad_norm": 0.7506507039070129, + "learning_rate": 0.0002, + "loss": 0.6254, + "step": 1070 + }, + { + "epoch": 2.3225806451612905, + "grad_norm": 0.8240520358085632, + "learning_rate": 0.0002, + "loss": 0.6608, + "step": 1080 + }, + { + "epoch": 2.3440860215053765, + "grad_norm": 0.9342400431632996, + "learning_rate": 0.0002, + "loss": 0.6207, + "step": 1090 + }, + { + "epoch": 2.3655913978494625, + "grad_norm": 1.0598735809326172, + "learning_rate": 0.0002, + "loss": 0.6029, + "step": 1100 + }, + { + "epoch": 2.3870967741935485, + "grad_norm": 0.7907650470733643, + "learning_rate": 0.0002, + "loss": 0.6035, + "step": 1110 + }, + { + "epoch": 2.4086021505376345, + "grad_norm": 0.9388798475265503, + "learning_rate": 0.0002, + "loss": 0.6237, + "step": 1120 + }, + { + "epoch": 2.4301075268817205, + "grad_norm": 0.8985419869422913, + "learning_rate": 0.0002, + "loss": 0.6207, + "step": 1130 + }, + { + "epoch": 2.4516129032258065, + "grad_norm": 0.7471932768821716, + "learning_rate": 0.0002, + "loss": 0.5902, + "step": 1140 + }, + { + "epoch": 2.4731182795698925, + "grad_norm": 0.761131763458252, + "learning_rate": 0.0002, + "loss": 0.6446, + "step": 1150 + }, + { + "epoch": 2.4946236559139785, + "grad_norm": 0.7901819348335266, + "learning_rate": 0.0002, + "loss": 0.6088, + "step": 1160 + }, + { + "epoch": 2.5161290322580645, + "grad_norm": 0.9932922720909119, + "learning_rate": 0.0002, + "loss": 0.6142, + "step": 1170 + }, + { + "epoch": 2.5376344086021505, + "grad_norm": 0.7414287328720093, + "learning_rate": 0.0002, + "loss": 0.6407, + "step": 1180 + }, + { + "epoch": 2.5591397849462365, + "grad_norm": 0.8111771941184998, + "learning_rate": 0.0002, + "loss": 0.6161, + "step": 1190 + }, + { + "epoch": 2.5806451612903225, + "grad_norm": 0.7520156502723694, + "learning_rate": 0.0002, + "loss": 0.6006, + "step": 1200 + }, + { + "epoch": 2.6021505376344085, + "grad_norm": 0.9022907018661499, + "learning_rate": 0.0002, + "loss": 0.615, + "step": 1210 + }, + { + "epoch": 2.6236559139784945, + "grad_norm": 0.7746260166168213, + "learning_rate": 0.0002, + "loss": 0.6211, + "step": 1220 + }, + { + "epoch": 2.6451612903225805, + "grad_norm": 0.8482862114906311, + "learning_rate": 0.0002, + "loss": 0.616, + "step": 1230 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 0.7925458550453186, + "learning_rate": 0.0002, + "loss": 0.6417, + "step": 1240 + }, + { + "epoch": 2.688172043010753, + "grad_norm": 0.8369929194450378, + "learning_rate": 0.0002, + "loss": 0.6187, + "step": 1250 + }, + { + "epoch": 2.709677419354839, + "grad_norm": 0.8311542868614197, + "learning_rate": 0.0002, + "loss": 0.6138, + "step": 1260 + }, + { + "epoch": 2.731182795698925, + "grad_norm": 0.7204853296279907, + "learning_rate": 0.0002, + "loss": 0.5894, + "step": 1270 + }, + { + "epoch": 2.752688172043011, + "grad_norm": 0.8447284698486328, + "learning_rate": 0.0002, + "loss": 0.6325, + "step": 1280 + }, + { + "epoch": 2.774193548387097, + "grad_norm": 0.7738404273986816, + "learning_rate": 0.0002, + "loss": 0.5946, + "step": 1290 + }, + { + "epoch": 2.795698924731183, + "grad_norm": 0.8393287062644958, + "learning_rate": 0.0002, + "loss": 0.5678, + "step": 1300 + }, + { + "epoch": 2.817204301075269, + "grad_norm": 0.79121994972229, + "learning_rate": 0.0002, + "loss": 0.6092, + "step": 1310 + }, + { + "epoch": 2.838709677419355, + "grad_norm": 0.7331557869911194, + "learning_rate": 0.0002, + "loss": 0.5889, + "step": 1320 + }, + { + "epoch": 2.860215053763441, + "grad_norm": 0.9593998193740845, + "learning_rate": 0.0002, + "loss": 0.6048, + "step": 1330 + }, + { + "epoch": 2.881720430107527, + "grad_norm": 0.7215158343315125, + "learning_rate": 0.0002, + "loss": 0.6108, + "step": 1340 + }, + { + "epoch": 2.903225806451613, + "grad_norm": 0.840404212474823, + "learning_rate": 0.0002, + "loss": 0.5897, + "step": 1350 + }, + { + "epoch": 2.924731182795699, + "grad_norm": 0.870659351348877, + "learning_rate": 0.0002, + "loss": 0.6056, + "step": 1360 + }, + { + "epoch": 2.946236559139785, + "grad_norm": 0.8744975328445435, + "learning_rate": 0.0002, + "loss": 0.6205, + "step": 1370 + }, + { + "epoch": 2.967741935483871, + "grad_norm": 0.8030612468719482, + "learning_rate": 0.0002, + "loss": 0.5966, + "step": 1380 + }, + { + "epoch": 2.989247311827957, + "grad_norm": 0.825814962387085, + "learning_rate": 0.0002, + "loss": 0.6004, + "step": 1390 + }, + { + "epoch": 3.0, + "eval_loss": 0.6257933378219604, + "eval_runtime": 21.3692, + "eval_samples_per_second": 15.49, + "eval_steps_per_second": 1.965, + "step": 1395 + }, + { + "epoch": 3.010752688172043, + "grad_norm": 0.8650677800178528, + "learning_rate": 0.0002, + "loss": 0.5696, + "step": 1400 + }, + { + "epoch": 3.032258064516129, + "grad_norm": 0.8364197015762329, + "learning_rate": 0.0002, + "loss": 0.5483, + "step": 1410 + }, + { + "epoch": 3.053763440860215, + "grad_norm": 0.8278448581695557, + "learning_rate": 0.0002, + "loss": 0.5606, + "step": 1420 + }, + { + "epoch": 3.075268817204301, + "grad_norm": 0.8806642889976501, + "learning_rate": 0.0002, + "loss": 0.5572, + "step": 1430 + }, + { + "epoch": 3.096774193548387, + "grad_norm": 0.8180029988288879, + "learning_rate": 0.0002, + "loss": 0.585, + "step": 1440 + }, + { + "epoch": 3.118279569892473, + "grad_norm": 0.8561782836914062, + "learning_rate": 0.0002, + "loss": 0.5667, + "step": 1450 + }, + { + "epoch": 3.139784946236559, + "grad_norm": 0.8377029299736023, + "learning_rate": 0.0002, + "loss": 0.5246, + "step": 1460 + }, + { + "epoch": 3.161290322580645, + "grad_norm": 0.885779082775116, + "learning_rate": 0.0002, + "loss": 0.5464, + "step": 1470 + }, + { + "epoch": 3.182795698924731, + "grad_norm": 0.9388518333435059, + "learning_rate": 0.0002, + "loss": 0.541, + "step": 1480 + }, + { + "epoch": 3.204301075268817, + "grad_norm": 0.8816235661506653, + "learning_rate": 0.0002, + "loss": 0.5447, + "step": 1490 + }, + { + "epoch": 3.225806451612903, + "grad_norm": 0.9885783791542053, + "learning_rate": 0.0002, + "loss": 0.5466, + "step": 1500 + }, + { + "epoch": 3.247311827956989, + "grad_norm": 0.8635850548744202, + "learning_rate": 0.0002, + "loss": 0.5455, + "step": 1510 + }, + { + "epoch": 3.268817204301075, + "grad_norm": 0.829853355884552, + "learning_rate": 0.0002, + "loss": 0.5419, + "step": 1520 + }, + { + "epoch": 3.2903225806451615, + "grad_norm": 0.9037486910820007, + "learning_rate": 0.0002, + "loss": 0.54, + "step": 1530 + }, + { + "epoch": 3.3118279569892475, + "grad_norm": 0.8173713684082031, + "learning_rate": 0.0002, + "loss": 0.5375, + "step": 1540 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.796953022480011, + "learning_rate": 0.0002, + "loss": 0.5405, + "step": 1550 + }, + { + "epoch": 3.3548387096774195, + "grad_norm": 0.7894400358200073, + "learning_rate": 0.0002, + "loss": 0.5505, + "step": 1560 + }, + { + "epoch": 3.3763440860215055, + "grad_norm": 0.9434949159622192, + "learning_rate": 0.0002, + "loss": 0.5395, + "step": 1570 + }, + { + "epoch": 3.3978494623655915, + "grad_norm": 0.8666760325431824, + "learning_rate": 0.0002, + "loss": 0.5271, + "step": 1580 + }, + { + "epoch": 3.4193548387096775, + "grad_norm": 0.7782467007637024, + "learning_rate": 0.0002, + "loss": 0.5439, + "step": 1590 + }, + { + "epoch": 3.4408602150537635, + "grad_norm": 0.8849126696586609, + "learning_rate": 0.0002, + "loss": 0.5161, + "step": 1600 + }, + { + "epoch": 3.4623655913978495, + "grad_norm": 0.7863831520080566, + "learning_rate": 0.0002, + "loss": 0.5353, + "step": 1610 + }, + { + "epoch": 3.4838709677419355, + "grad_norm": 1.0403116941452026, + "learning_rate": 0.0002, + "loss": 0.5308, + "step": 1620 + }, + { + "epoch": 3.5053763440860215, + "grad_norm": 0.8307499289512634, + "learning_rate": 0.0002, + "loss": 0.5339, + "step": 1630 + }, + { + "epoch": 3.5268817204301075, + "grad_norm": 0.9132118821144104, + "learning_rate": 0.0002, + "loss": 0.5361, + "step": 1640 + }, + { + "epoch": 3.5483870967741935, + "grad_norm": 0.9322578310966492, + "learning_rate": 0.0002, + "loss": 0.5828, + "step": 1650 + }, + { + "epoch": 3.5698924731182795, + "grad_norm": 0.9782460331916809, + "learning_rate": 0.0002, + "loss": 0.546, + "step": 1660 + }, + { + "epoch": 3.5913978494623655, + "grad_norm": 0.7189919352531433, + "learning_rate": 0.0002, + "loss": 0.5424, + "step": 1670 + }, + { + "epoch": 3.6129032258064515, + "grad_norm": 0.9689221382141113, + "learning_rate": 0.0002, + "loss": 0.5514, + "step": 1680 + }, + { + "epoch": 3.6344086021505375, + "grad_norm": 0.9684675335884094, + "learning_rate": 0.0002, + "loss": 0.5379, + "step": 1690 + }, + { + "epoch": 3.6559139784946235, + "grad_norm": 0.8851472735404968, + "learning_rate": 0.0002, + "loss": 0.5748, + "step": 1700 + }, + { + "epoch": 3.6774193548387095, + "grad_norm": 0.7709833383560181, + "learning_rate": 0.0002, + "loss": 0.5412, + "step": 1710 + }, + { + "epoch": 3.698924731182796, + "grad_norm": 0.818236231803894, + "learning_rate": 0.0002, + "loss": 0.521, + "step": 1720 + }, + { + "epoch": 3.720430107526882, + "grad_norm": 0.870642364025116, + "learning_rate": 0.0002, + "loss": 0.5445, + "step": 1730 + }, + { + "epoch": 3.741935483870968, + "grad_norm": 1.0245511531829834, + "learning_rate": 0.0002, + "loss": 0.5307, + "step": 1740 + }, + { + "epoch": 3.763440860215054, + "grad_norm": 0.8607558608055115, + "learning_rate": 0.0002, + "loss": 0.5593, + "step": 1750 + }, + { + "epoch": 3.78494623655914, + "grad_norm": 0.8511829972267151, + "learning_rate": 0.0002, + "loss": 0.536, + "step": 1760 + }, + { + "epoch": 3.806451612903226, + "grad_norm": 0.7969087362289429, + "learning_rate": 0.0002, + "loss": 0.5193, + "step": 1770 + }, + { + "epoch": 3.827956989247312, + "grad_norm": 0.8457245826721191, + "learning_rate": 0.0002, + "loss": 0.5578, + "step": 1780 + }, + { + "epoch": 3.849462365591398, + "grad_norm": 0.8893467783927917, + "learning_rate": 0.0002, + "loss": 0.5337, + "step": 1790 + }, + { + "epoch": 3.870967741935484, + "grad_norm": 0.8593819737434387, + "learning_rate": 0.0002, + "loss": 0.5024, + "step": 1800 + }, + { + "epoch": 3.89247311827957, + "grad_norm": 0.7574560642242432, + "learning_rate": 0.0002, + "loss": 0.5134, + "step": 1810 + }, + { + "epoch": 3.913978494623656, + "grad_norm": 0.8681567311286926, + "learning_rate": 0.0002, + "loss": 0.5263, + "step": 1820 + }, + { + "epoch": 3.935483870967742, + "grad_norm": 0.9068132042884827, + "learning_rate": 0.0002, + "loss": 0.532, + "step": 1830 + }, + { + "epoch": 3.956989247311828, + "grad_norm": 0.8668948411941528, + "learning_rate": 0.0002, + "loss": 0.5427, + "step": 1840 + }, + { + "epoch": 3.978494623655914, + "grad_norm": 1.046032428741455, + "learning_rate": 0.0002, + "loss": 0.5349, + "step": 1850 + }, + { + "epoch": 4.0, + "grad_norm": 0.904780387878418, + "learning_rate": 0.0002, + "loss": 0.5087, + "step": 1860 + }, + { + "epoch": 4.0, + "eval_loss": 0.5737715363502502, + "eval_runtime": 21.4915, + "eval_samples_per_second": 15.401, + "eval_steps_per_second": 1.954, + "step": 1860 + }, + { + "epoch": 4.021505376344086, + "grad_norm": 0.8611752986907959, + "learning_rate": 0.0002, + "loss": 0.4843, + "step": 1870 + }, + { + "epoch": 4.043010752688172, + "grad_norm": 0.838782548904419, + "learning_rate": 0.0002, + "loss": 0.4814, + "step": 1880 + }, + { + "epoch": 4.064516129032258, + "grad_norm": 0.9119709134101868, + "learning_rate": 0.0002, + "loss": 0.474, + "step": 1890 + }, + { + "epoch": 4.086021505376344, + "grad_norm": 0.8026251196861267, + "learning_rate": 0.0002, + "loss": 0.4951, + "step": 1900 + }, + { + "epoch": 4.10752688172043, + "grad_norm": 0.8773705363273621, + "learning_rate": 0.0002, + "loss": 0.491, + "step": 1910 + }, + { + "epoch": 4.129032258064516, + "grad_norm": 0.8762255907058716, + "learning_rate": 0.0002, + "loss": 0.474, + "step": 1920 + }, + { + "epoch": 4.150537634408602, + "grad_norm": 0.8371861577033997, + "learning_rate": 0.0002, + "loss": 0.4816, + "step": 1930 + }, + { + "epoch": 4.172043010752688, + "grad_norm": 0.9703728556632996, + "learning_rate": 0.0002, + "loss": 0.472, + "step": 1940 + }, + { + "epoch": 4.193548387096774, + "grad_norm": 0.8802874684333801, + "learning_rate": 0.0002, + "loss": 0.4772, + "step": 1950 + }, + { + "epoch": 4.21505376344086, + "grad_norm": 1.0103057622909546, + "learning_rate": 0.0002, + "loss": 0.5032, + "step": 1960 + }, + { + "epoch": 4.236559139784946, + "grad_norm": 0.9212995171546936, + "learning_rate": 0.0002, + "loss": 0.4945, + "step": 1970 + }, + { + "epoch": 4.258064516129032, + "grad_norm": 1.009544849395752, + "learning_rate": 0.0002, + "loss": 0.4753, + "step": 1980 + }, + { + "epoch": 4.279569892473118, + "grad_norm": 0.8535077571868896, + "learning_rate": 0.0002, + "loss": 0.4789, + "step": 1990 + }, + { + "epoch": 4.301075268817204, + "grad_norm": 0.8363022804260254, + "learning_rate": 0.0002, + "loss": 0.4782, + "step": 2000 + }, + { + "epoch": 4.32258064516129, + "grad_norm": 0.9041762948036194, + "learning_rate": 0.0002, + "loss": 0.4875, + "step": 2010 + }, + { + "epoch": 4.344086021505376, + "grad_norm": 0.960790753364563, + "learning_rate": 0.0002, + "loss": 0.4779, + "step": 2020 + }, + { + "epoch": 4.365591397849462, + "grad_norm": 0.8823095560073853, + "learning_rate": 0.0002, + "loss": 0.4626, + "step": 2030 + }, + { + "epoch": 4.387096774193548, + "grad_norm": 0.952100396156311, + "learning_rate": 0.0002, + "loss": 0.4883, + "step": 2040 + }, + { + "epoch": 4.408602150537634, + "grad_norm": 1.0793498754501343, + "learning_rate": 0.0002, + "loss": 0.4789, + "step": 2050 + }, + { + "epoch": 4.43010752688172, + "grad_norm": 0.8987208008766174, + "learning_rate": 0.0002, + "loss": 0.4827, + "step": 2060 + }, + { + "epoch": 4.451612903225806, + "grad_norm": 0.8539772033691406, + "learning_rate": 0.0002, + "loss": 0.4594, + "step": 2070 + }, + { + "epoch": 4.473118279569892, + "grad_norm": 0.9160863757133484, + "learning_rate": 0.0002, + "loss": 0.4752, + "step": 2080 + }, + { + "epoch": 4.494623655913978, + "grad_norm": 0.9946850538253784, + "learning_rate": 0.0002, + "loss": 0.5033, + "step": 2090 + }, + { + "epoch": 4.516129032258064, + "grad_norm": 0.908039391040802, + "learning_rate": 0.0002, + "loss": 0.4842, + "step": 2100 + }, + { + "epoch": 4.53763440860215, + "grad_norm": 1.1462254524230957, + "learning_rate": 0.0002, + "loss": 0.4861, + "step": 2110 + }, + { + "epoch": 4.559139784946236, + "grad_norm": 0.8392056226730347, + "learning_rate": 0.0002, + "loss": 0.4892, + "step": 2120 + }, + { + "epoch": 4.580645161290323, + "grad_norm": 0.9673896431922913, + "learning_rate": 0.0002, + "loss": 0.4824, + "step": 2130 + }, + { + "epoch": 4.602150537634409, + "grad_norm": 0.9047091603279114, + "learning_rate": 0.0002, + "loss": 0.4665, + "step": 2140 + }, + { + "epoch": 4.623655913978495, + "grad_norm": 0.9013425707817078, + "learning_rate": 0.0002, + "loss": 0.4714, + "step": 2150 + }, + { + "epoch": 4.645161290322581, + "grad_norm": 0.8899165391921997, + "learning_rate": 0.0002, + "loss": 0.472, + "step": 2160 + }, + { + "epoch": 4.666666666666667, + "grad_norm": 0.748602569103241, + "learning_rate": 0.0002, + "loss": 0.4635, + "step": 2170 + }, + { + "epoch": 4.688172043010753, + "grad_norm": 0.8694155216217041, + "learning_rate": 0.0002, + "loss": 0.4695, + "step": 2180 + }, + { + "epoch": 4.709677419354839, + "grad_norm": 0.9134316444396973, + "learning_rate": 0.0002, + "loss": 0.4929, + "step": 2190 + }, + { + "epoch": 4.731182795698925, + "grad_norm": 0.8504763245582581, + "learning_rate": 0.0002, + "loss": 0.4855, + "step": 2200 + }, + { + "epoch": 4.752688172043011, + "grad_norm": 1.0321544408798218, + "learning_rate": 0.0002, + "loss": 0.4517, + "step": 2210 + }, + { + "epoch": 4.774193548387097, + "grad_norm": 0.9368237257003784, + "learning_rate": 0.0002, + "loss": 0.4796, + "step": 2220 + }, + { + "epoch": 4.795698924731183, + "grad_norm": 0.9319947361946106, + "learning_rate": 0.0002, + "loss": 0.4837, + "step": 2230 + }, + { + "epoch": 4.817204301075269, + "grad_norm": 0.904333770275116, + "learning_rate": 0.0002, + "loss": 0.4696, + "step": 2240 + }, + { + "epoch": 4.838709677419355, + "grad_norm": 0.8097078204154968, + "learning_rate": 0.0002, + "loss": 0.4746, + "step": 2250 + }, + { + "epoch": 4.860215053763441, + "grad_norm": 0.9128859043121338, + "learning_rate": 0.0002, + "loss": 0.4438, + "step": 2260 + }, + { + "epoch": 4.881720430107527, + "grad_norm": 0.883129894733429, + "learning_rate": 0.0002, + "loss": 0.4693, + "step": 2270 + }, + { + "epoch": 4.903225806451613, + "grad_norm": 0.85712730884552, + "learning_rate": 0.0002, + "loss": 0.4494, + "step": 2280 + }, + { + "epoch": 4.924731182795699, + "grad_norm": 1.2101863622665405, + "learning_rate": 0.0002, + "loss": 0.4593, + "step": 2290 + }, + { + "epoch": 4.946236559139785, + "grad_norm": 0.917966902256012, + "learning_rate": 0.0002, + "loss": 0.4779, + "step": 2300 + }, + { + "epoch": 4.967741935483871, + "grad_norm": 0.7740724086761475, + "learning_rate": 0.0002, + "loss": 0.4666, + "step": 2310 + }, + { + "epoch": 4.989247311827957, + "grad_norm": 1.0199906826019287, + "learning_rate": 0.0002, + "loss": 0.4629, + "step": 2320 + }, + { + "epoch": 5.0, + "eval_loss": 0.5363914370536804, + "eval_runtime": 21.3941, + "eval_samples_per_second": 15.472, + "eval_steps_per_second": 1.963, + "step": 2325 + }, + { + "epoch": 5.010752688172043, + "grad_norm": 0.8580502271652222, + "learning_rate": 0.0002, + "loss": 0.4543, + "step": 2330 + }, + { + "epoch": 5.032258064516129, + "grad_norm": 0.7702704668045044, + "learning_rate": 0.0002, + "loss": 0.404, + "step": 2340 + }, + { + "epoch": 5.053763440860215, + "grad_norm": 0.9417401552200317, + "learning_rate": 0.0002, + "loss": 0.4408, + "step": 2350 + }, + { + "epoch": 5.075268817204301, + "grad_norm": 0.9461463689804077, + "learning_rate": 0.0002, + "loss": 0.4306, + "step": 2360 + }, + { + "epoch": 5.096774193548387, + "grad_norm": 0.8931282162666321, + "learning_rate": 0.0002, + "loss": 0.4251, + "step": 2370 + }, + { + "epoch": 5.118279569892473, + "grad_norm": 1.000909447669983, + "learning_rate": 0.0002, + "loss": 0.4249, + "step": 2380 + }, + { + "epoch": 5.139784946236559, + "grad_norm": 0.8640249967575073, + "learning_rate": 0.0002, + "loss": 0.4231, + "step": 2390 + }, + { + "epoch": 5.161290322580645, + "grad_norm": 1.0451020002365112, + "learning_rate": 0.0002, + "loss": 0.4272, + "step": 2400 + }, + { + "epoch": 5.182795698924731, + "grad_norm": 0.7896912097930908, + "learning_rate": 0.0002, + "loss": 0.4177, + "step": 2410 + }, + { + "epoch": 5.204301075268817, + "grad_norm": 0.8424463272094727, + "learning_rate": 0.0002, + "loss": 0.4116, + "step": 2420 + }, + { + "epoch": 5.225806451612903, + "grad_norm": 1.0852105617523193, + "learning_rate": 0.0002, + "loss": 0.4225, + "step": 2430 + }, + { + "epoch": 5.247311827956989, + "grad_norm": 0.9285983443260193, + "learning_rate": 0.0002, + "loss": 0.4352, + "step": 2440 + }, + { + "epoch": 5.268817204301075, + "grad_norm": 0.9119299054145813, + "learning_rate": 0.0002, + "loss": 0.4262, + "step": 2450 + }, + { + "epoch": 5.290322580645161, + "grad_norm": 0.8790456056594849, + "learning_rate": 0.0002, + "loss": 0.4494, + "step": 2460 + }, + { + "epoch": 5.311827956989247, + "grad_norm": 0.8726504445075989, + "learning_rate": 0.0002, + "loss": 0.4421, + "step": 2470 + }, + { + "epoch": 5.333333333333333, + "grad_norm": 0.9415227770805359, + "learning_rate": 0.0002, + "loss": 0.4372, + "step": 2480 + }, + { + "epoch": 5.354838709677419, + "grad_norm": 0.9133324027061462, + "learning_rate": 0.0002, + "loss": 0.4223, + "step": 2490 + }, + { + "epoch": 5.376344086021505, + "grad_norm": 0.9567879438400269, + "learning_rate": 0.0002, + "loss": 0.4401, + "step": 2500 + }, + { + "epoch": 5.397849462365591, + "grad_norm": 0.9239469766616821, + "learning_rate": 0.0002, + "loss": 0.4094, + "step": 2510 + }, + { + "epoch": 5.419354838709677, + "grad_norm": 1.0293527841567993, + "learning_rate": 0.0002, + "loss": 0.4416, + "step": 2520 + }, + { + "epoch": 5.440860215053763, + "grad_norm": 0.8618718981742859, + "learning_rate": 0.0002, + "loss": 0.4311, + "step": 2530 + }, + { + "epoch": 5.462365591397849, + "grad_norm": 0.740166187286377, + "learning_rate": 0.0002, + "loss": 0.462, + "step": 2540 + }, + { + "epoch": 5.483870967741936, + "grad_norm": 0.901566743850708, + "learning_rate": 0.0002, + "loss": 0.4172, + "step": 2550 + }, + { + "epoch": 5.505376344086022, + "grad_norm": 0.7957597970962524, + "learning_rate": 0.0002, + "loss": 0.4315, + "step": 2560 + }, + { + "epoch": 5.526881720430108, + "grad_norm": 1.1139343976974487, + "learning_rate": 0.0002, + "loss": 0.4263, + "step": 2570 + }, + { + "epoch": 5.548387096774194, + "grad_norm": 0.989765465259552, + "learning_rate": 0.0002, + "loss": 0.4056, + "step": 2580 + }, + { + "epoch": 5.56989247311828, + "grad_norm": 0.9416969418525696, + "learning_rate": 0.0002, + "loss": 0.4311, + "step": 2590 + }, + { + "epoch": 5.591397849462366, + "grad_norm": 0.9184830784797668, + "learning_rate": 0.0002, + "loss": 0.4363, + "step": 2600 + }, + { + "epoch": 5.612903225806452, + "grad_norm": 1.0512700080871582, + "learning_rate": 0.0002, + "loss": 0.432, + "step": 2610 + }, + { + "epoch": 5.634408602150538, + "grad_norm": 0.901462197303772, + "learning_rate": 0.0002, + "loss": 0.4227, + "step": 2620 + }, + { + "epoch": 5.655913978494624, + "grad_norm": 0.9732566475868225, + "learning_rate": 0.0002, + "loss": 0.4332, + "step": 2630 + }, + { + "epoch": 5.67741935483871, + "grad_norm": 0.8180275559425354, + "learning_rate": 0.0002, + "loss": 0.4223, + "step": 2640 + }, + { + "epoch": 5.698924731182796, + "grad_norm": 1.1354765892028809, + "learning_rate": 0.0002, + "loss": 0.4311, + "step": 2650 + }, + { + "epoch": 5.720430107526882, + "grad_norm": 0.9161503314971924, + "learning_rate": 0.0002, + "loss": 0.4409, + "step": 2660 + }, + { + "epoch": 5.741935483870968, + "grad_norm": 1.0561772584915161, + "learning_rate": 0.0002, + "loss": 0.4394, + "step": 2670 + }, + { + "epoch": 5.763440860215054, + "grad_norm": 0.7712787389755249, + "learning_rate": 0.0002, + "loss": 0.424, + "step": 2680 + }, + { + "epoch": 5.78494623655914, + "grad_norm": 0.9674550294876099, + "learning_rate": 0.0002, + "loss": 0.4326, + "step": 2690 + }, + { + "epoch": 5.806451612903226, + "grad_norm": 0.7531843781471252, + "learning_rate": 0.0002, + "loss": 0.4459, + "step": 2700 + }, + { + "epoch": 5.827956989247312, + "grad_norm": 1.1332131624221802, + "learning_rate": 0.0002, + "loss": 0.4276, + "step": 2710 + }, + { + "epoch": 5.849462365591398, + "grad_norm": 0.9367414116859436, + "learning_rate": 0.0002, + "loss": 0.4113, + "step": 2720 + }, + { + "epoch": 5.870967741935484, + "grad_norm": 0.8267706632614136, + "learning_rate": 0.0002, + "loss": 0.4227, + "step": 2730 + }, + { + "epoch": 5.89247311827957, + "grad_norm": 1.1040657758712769, + "learning_rate": 0.0002, + "loss": 0.4218, + "step": 2740 + }, + { + "epoch": 5.913978494623656, + "grad_norm": 0.8879582285881042, + "learning_rate": 0.0002, + "loss": 0.4129, + "step": 2750 + }, + { + "epoch": 5.935483870967742, + "grad_norm": 0.9264667630195618, + "learning_rate": 0.0002, + "loss": 0.4241, + "step": 2760 + }, + { + "epoch": 5.956989247311828, + "grad_norm": 0.9373905658721924, + "learning_rate": 0.0002, + "loss": 0.4318, + "step": 2770 + }, + { + "epoch": 5.978494623655914, + "grad_norm": 1.0063740015029907, + "learning_rate": 0.0002, + "loss": 0.423, + "step": 2780 + }, + { + "epoch": 6.0, + "grad_norm": 0.8291367292404175, + "learning_rate": 0.0002, + "loss": 0.4382, + "step": 2790 + }, + { + "epoch": 6.0, + "eval_loss": 0.5057176947593689, + "eval_runtime": 21.3206, + "eval_samples_per_second": 15.525, + "eval_steps_per_second": 1.97, + "step": 2790 + } + ], + "logging_steps": 10, + "max_steps": 3720, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.409936977494016e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/training_args.bin b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..483b53d4fc1c568a5fc890fa850e3450f390b208 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e4c4fe2be590ed03492316230adb3a1edca3e4066c55f3716c0352d7134c564 +size 5560 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/README.md b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/adapter_config.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/adapter_model.safetensors b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b8e87ae7c16b8643438e3c9cbd31e06aa096121 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54f4a168fbd2c583f9daa69a9737f492d382725904351c5a22fd1b960c408e37 +size 29500848 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/optimizer.pt b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bf0e0220021fce5faa860af5f9bdfae428031807 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bbbfcdecc3011c7fc856dbded7d96c1b54e87d6f765b458fa6f3b8e08fc6a1e +size 15064314 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/rng_state.pth b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..25ffe2b662b93a483d3ef1bf10d6b4d9ce9f8786 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f73bb6248668e8e530e914affcab3aeae1a987ed024a2c75b1b13c7d20e1a3ca +size 14244 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/scheduler.pt b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..637f69c1cfc0c0dafdad68cbe5ec085272752500 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dec82ffffb79214bda8d32893fbea6069d6fb1f5366d6289391a63b2e486940 +size 1064 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/special_tokens_map.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/tokenizer.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/tokenizer.model b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/tokenizer_config.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/trainer_state.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5a0027a9dea35dcdba6086c15e3c2a407e286c27 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/trainer_state.json @@ -0,0 +1,2364 @@ +{ + "best_metric": 0.48264747858047485, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255", + "epoch": 7.0, + "eval_steps": 10, + "global_step": 3255, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.021505376344086023, + "grad_norm": 0.9075053930282593, + "learning_rate": 0.0002, + "loss": 3.4172, + "step": 10 + }, + { + "epoch": 0.043010752688172046, + "grad_norm": 1.4321208000183105, + "learning_rate": 0.0002, + "loss": 2.5888, + "step": 20 + }, + { + "epoch": 0.06451612903225806, + "grad_norm": 1.7500602006912231, + "learning_rate": 0.0002, + "loss": 2.1195, + "step": 30 + }, + { + "epoch": 0.08602150537634409, + "grad_norm": 0.7606641054153442, + "learning_rate": 0.0002, + "loss": 1.9303, + "step": 40 + }, + { + "epoch": 0.10752688172043011, + "grad_norm": 1.2754929065704346, + "learning_rate": 0.0002, + "loss": 1.6112, + "step": 50 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 1.0936230421066284, + "learning_rate": 0.0002, + "loss": 1.4319, + "step": 60 + }, + { + "epoch": 0.15053763440860216, + "grad_norm": 1.144593596458435, + "learning_rate": 0.0002, + "loss": 1.3568, + "step": 70 + }, + { + "epoch": 0.17204301075268819, + "grad_norm": 1.2181956768035889, + "learning_rate": 0.0002, + "loss": 1.2028, + "step": 80 + }, + { + "epoch": 0.1935483870967742, + "grad_norm": 1.1260095834732056, + "learning_rate": 0.0002, + "loss": 1.1534, + "step": 90 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 1.1155284643173218, + "learning_rate": 0.0002, + "loss": 1.1089, + "step": 100 + }, + { + "epoch": 0.23655913978494625, + "grad_norm": 1.089565396308899, + "learning_rate": 0.0002, + "loss": 1.0883, + "step": 110 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 0.9833471775054932, + "learning_rate": 0.0002, + "loss": 1.0814, + "step": 120 + }, + { + "epoch": 0.27956989247311825, + "grad_norm": 1.0265629291534424, + "learning_rate": 0.0002, + "loss": 1.0239, + "step": 130 + }, + { + "epoch": 0.3010752688172043, + "grad_norm": 0.9344286322593689, + "learning_rate": 0.0002, + "loss": 0.9888, + "step": 140 + }, + { + "epoch": 0.3225806451612903, + "grad_norm": 0.9883386492729187, + "learning_rate": 0.0002, + "loss": 1.0043, + "step": 150 + }, + { + "epoch": 0.34408602150537637, + "grad_norm": 0.9299277067184448, + "learning_rate": 0.0002, + "loss": 0.9338, + "step": 160 + }, + { + "epoch": 0.3655913978494624, + "grad_norm": 1.390045404434204, + "learning_rate": 0.0002, + "loss": 0.9432, + "step": 170 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 1.0313078165054321, + "learning_rate": 0.0002, + "loss": 0.9008, + "step": 180 + }, + { + "epoch": 0.40860215053763443, + "grad_norm": 1.1792205572128296, + "learning_rate": 0.0002, + "loss": 0.9434, + "step": 190 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 1.049809217453003, + "learning_rate": 0.0002, + "loss": 0.8761, + "step": 200 + }, + { + "epoch": 0.45161290322580644, + "grad_norm": 0.990111768245697, + "learning_rate": 0.0002, + "loss": 0.8709, + "step": 210 + }, + { + "epoch": 0.4731182795698925, + "grad_norm": 0.9870412349700928, + "learning_rate": 0.0002, + "loss": 0.905, + "step": 220 + }, + { + "epoch": 0.4946236559139785, + "grad_norm": 0.8557345867156982, + "learning_rate": 0.0002, + "loss": 0.9129, + "step": 230 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 0.9746861457824707, + "learning_rate": 0.0002, + "loss": 0.8836, + "step": 240 + }, + { + "epoch": 0.5376344086021505, + "grad_norm": 0.9010438323020935, + "learning_rate": 0.0002, + "loss": 0.873, + "step": 250 + }, + { + "epoch": 0.5591397849462365, + "grad_norm": 0.9061082005500793, + "learning_rate": 0.0002, + "loss": 0.8241, + "step": 260 + }, + { + "epoch": 0.5806451612903226, + "grad_norm": 0.9311846494674683, + "learning_rate": 0.0002, + "loss": 0.8652, + "step": 270 + }, + { + "epoch": 0.6021505376344086, + "grad_norm": 0.9140254855155945, + "learning_rate": 0.0002, + "loss": 0.8256, + "step": 280 + }, + { + "epoch": 0.6236559139784946, + "grad_norm": 0.9722253084182739, + "learning_rate": 0.0002, + "loss": 0.8441, + "step": 290 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 0.8539168238639832, + "learning_rate": 0.0002, + "loss": 0.8314, + "step": 300 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.9053162932395935, + "learning_rate": 0.0002, + "loss": 0.8528, + "step": 310 + }, + { + "epoch": 0.6881720430107527, + "grad_norm": 0.8444252610206604, + "learning_rate": 0.0002, + "loss": 0.8209, + "step": 320 + }, + { + "epoch": 0.7096774193548387, + "grad_norm": 0.8127437829971313, + "learning_rate": 0.0002, + "loss": 0.8101, + "step": 330 + }, + { + "epoch": 0.7311827956989247, + "grad_norm": 0.886555016040802, + "learning_rate": 0.0002, + "loss": 0.8223, + "step": 340 + }, + { + "epoch": 0.7526881720430108, + "grad_norm": 0.8458548784255981, + "learning_rate": 0.0002, + "loss": 0.8368, + "step": 350 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 0.8683297634124756, + "learning_rate": 0.0002, + "loss": 0.8295, + "step": 360 + }, + { + "epoch": 0.7956989247311828, + "grad_norm": 0.8308405876159668, + "learning_rate": 0.0002, + "loss": 0.8232, + "step": 370 + }, + { + "epoch": 0.8172043010752689, + "grad_norm": 0.8305579423904419, + "learning_rate": 0.0002, + "loss": 0.7752, + "step": 380 + }, + { + "epoch": 0.8387096774193549, + "grad_norm": 0.8545567393302917, + "learning_rate": 0.0002, + "loss": 0.8267, + "step": 390 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 0.8486055731773376, + "learning_rate": 0.0002, + "loss": 0.8212, + "step": 400 + }, + { + "epoch": 0.8817204301075269, + "grad_norm": 0.8126763105392456, + "learning_rate": 0.0002, + "loss": 0.743, + "step": 410 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 0.8494045734405518, + "learning_rate": 0.0002, + "loss": 0.7993, + "step": 420 + }, + { + "epoch": 0.9247311827956989, + "grad_norm": 0.7639183402061462, + "learning_rate": 0.0002, + "loss": 0.8213, + "step": 430 + }, + { + "epoch": 0.946236559139785, + "grad_norm": 0.858101487159729, + "learning_rate": 0.0002, + "loss": 0.8015, + "step": 440 + }, + { + "epoch": 0.967741935483871, + "grad_norm": 0.8141381740570068, + "learning_rate": 0.0002, + "loss": 0.7629, + "step": 450 + }, + { + "epoch": 0.989247311827957, + "grad_norm": 0.8072513937950134, + "learning_rate": 0.0002, + "loss": 0.7357, + "step": 460 + }, + { + "epoch": 1.0, + "eval_loss": 0.7740864157676697, + "eval_runtime": 21.383, + "eval_samples_per_second": 15.48, + "eval_steps_per_second": 1.964, + "step": 465 + }, + { + "epoch": 1.010752688172043, + "grad_norm": 0.8269494771957397, + "learning_rate": 0.0002, + "loss": 0.7701, + "step": 470 + }, + { + "epoch": 1.032258064516129, + "grad_norm": 0.7814009189605713, + "learning_rate": 0.0002, + "loss": 0.7532, + "step": 480 + }, + { + "epoch": 1.053763440860215, + "grad_norm": 0.8183923363685608, + "learning_rate": 0.0002, + "loss": 0.7689, + "step": 490 + }, + { + "epoch": 1.075268817204301, + "grad_norm": 0.8146600723266602, + "learning_rate": 0.0002, + "loss": 0.765, + "step": 500 + }, + { + "epoch": 1.096774193548387, + "grad_norm": 0.8635126352310181, + "learning_rate": 0.0002, + "loss": 0.7358, + "step": 510 + }, + { + "epoch": 1.118279569892473, + "grad_norm": 0.8520359396934509, + "learning_rate": 0.0002, + "loss": 0.7302, + "step": 520 + }, + { + "epoch": 1.139784946236559, + "grad_norm": 0.8026443123817444, + "learning_rate": 0.0002, + "loss": 0.7492, + "step": 530 + }, + { + "epoch": 1.1612903225806452, + "grad_norm": 0.8157258629798889, + "learning_rate": 0.0002, + "loss": 0.7518, + "step": 540 + }, + { + "epoch": 1.1827956989247312, + "grad_norm": 0.9450796246528625, + "learning_rate": 0.0002, + "loss": 0.7461, + "step": 550 + }, + { + "epoch": 1.2043010752688172, + "grad_norm": 0.8859835863113403, + "learning_rate": 0.0002, + "loss": 0.7128, + "step": 560 + }, + { + "epoch": 1.2258064516129032, + "grad_norm": 0.7819921970367432, + "learning_rate": 0.0002, + "loss": 0.7067, + "step": 570 + }, + { + "epoch": 1.2473118279569892, + "grad_norm": 0.7823445796966553, + "learning_rate": 0.0002, + "loss": 0.7577, + "step": 580 + }, + { + "epoch": 1.2688172043010753, + "grad_norm": 0.7931883931159973, + "learning_rate": 0.0002, + "loss": 0.7358, + "step": 590 + }, + { + "epoch": 1.2903225806451613, + "grad_norm": 0.7495734095573425, + "learning_rate": 0.0002, + "loss": 0.723, + "step": 600 + }, + { + "epoch": 1.3118279569892473, + "grad_norm": 0.9272717237472534, + "learning_rate": 0.0002, + "loss": 0.7386, + "step": 610 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.7968398332595825, + "learning_rate": 0.0002, + "loss": 0.7498, + "step": 620 + }, + { + "epoch": 1.3548387096774195, + "grad_norm": 0.7813659310340881, + "learning_rate": 0.0002, + "loss": 0.7635, + "step": 630 + }, + { + "epoch": 1.3763440860215055, + "grad_norm": 0.730925977230072, + "learning_rate": 0.0002, + "loss": 0.6665, + "step": 640 + }, + { + "epoch": 1.3978494623655915, + "grad_norm": 0.8011482954025269, + "learning_rate": 0.0002, + "loss": 0.7037, + "step": 650 + }, + { + "epoch": 1.4193548387096775, + "grad_norm": 0.7770085334777832, + "learning_rate": 0.0002, + "loss": 0.6931, + "step": 660 + }, + { + "epoch": 1.4408602150537635, + "grad_norm": 0.7432682514190674, + "learning_rate": 0.0002, + "loss": 0.6949, + "step": 670 + }, + { + "epoch": 1.4623655913978495, + "grad_norm": 0.8820092678070068, + "learning_rate": 0.0002, + "loss": 0.7444, + "step": 680 + }, + { + "epoch": 1.4838709677419355, + "grad_norm": 0.7786208987236023, + "learning_rate": 0.0002, + "loss": 0.6758, + "step": 690 + }, + { + "epoch": 1.5053763440860215, + "grad_norm": 0.7467480301856995, + "learning_rate": 0.0002, + "loss": 0.6702, + "step": 700 + }, + { + "epoch": 1.5268817204301075, + "grad_norm": 0.8147122263908386, + "learning_rate": 0.0002, + "loss": 0.7107, + "step": 710 + }, + { + "epoch": 1.5483870967741935, + "grad_norm": 0.796030580997467, + "learning_rate": 0.0002, + "loss": 0.7144, + "step": 720 + }, + { + "epoch": 1.5698924731182795, + "grad_norm": 0.8776171207427979, + "learning_rate": 0.0002, + "loss": 0.6936, + "step": 730 + }, + { + "epoch": 1.5913978494623655, + "grad_norm": 0.8056126236915588, + "learning_rate": 0.0002, + "loss": 0.7101, + "step": 740 + }, + { + "epoch": 1.6129032258064515, + "grad_norm": 0.8141863346099854, + "learning_rate": 0.0002, + "loss": 0.7162, + "step": 750 + }, + { + "epoch": 1.6344086021505375, + "grad_norm": 0.8100557327270508, + "learning_rate": 0.0002, + "loss": 0.7088, + "step": 760 + }, + { + "epoch": 1.6559139784946235, + "grad_norm": 0.8283200860023499, + "learning_rate": 0.0002, + "loss": 0.7212, + "step": 770 + }, + { + "epoch": 1.6774193548387095, + "grad_norm": 0.800865113735199, + "learning_rate": 0.0002, + "loss": 0.694, + "step": 780 + }, + { + "epoch": 1.6989247311827957, + "grad_norm": 0.8052287697792053, + "learning_rate": 0.0002, + "loss": 0.7076, + "step": 790 + }, + { + "epoch": 1.7204301075268817, + "grad_norm": 0.8619674444198608, + "learning_rate": 0.0002, + "loss": 0.7257, + "step": 800 + }, + { + "epoch": 1.7419354838709677, + "grad_norm": 0.8907215595245361, + "learning_rate": 0.0002, + "loss": 0.7141, + "step": 810 + }, + { + "epoch": 1.7634408602150538, + "grad_norm": 0.6976316571235657, + "learning_rate": 0.0002, + "loss": 0.7035, + "step": 820 + }, + { + "epoch": 1.7849462365591398, + "grad_norm": 0.7533746957778931, + "learning_rate": 0.0002, + "loss": 0.6916, + "step": 830 + }, + { + "epoch": 1.8064516129032258, + "grad_norm": 0.7326804399490356, + "learning_rate": 0.0002, + "loss": 0.7094, + "step": 840 + }, + { + "epoch": 1.827956989247312, + "grad_norm": 0.7782683372497559, + "learning_rate": 0.0002, + "loss": 0.6891, + "step": 850 + }, + { + "epoch": 1.849462365591398, + "grad_norm": 0.7424806356430054, + "learning_rate": 0.0002, + "loss": 0.6931, + "step": 860 + }, + { + "epoch": 1.870967741935484, + "grad_norm": 1.172325611114502, + "learning_rate": 0.0002, + "loss": 0.7354, + "step": 870 + }, + { + "epoch": 1.89247311827957, + "grad_norm": 0.771058201789856, + "learning_rate": 0.0002, + "loss": 0.6866, + "step": 880 + }, + { + "epoch": 1.913978494623656, + "grad_norm": 0.8624904155731201, + "learning_rate": 0.0002, + "loss": 0.7296, + "step": 890 + }, + { + "epoch": 1.935483870967742, + "grad_norm": 0.7062820792198181, + "learning_rate": 0.0002, + "loss": 0.7233, + "step": 900 + }, + { + "epoch": 1.956989247311828, + "grad_norm": 0.7560103535652161, + "learning_rate": 0.0002, + "loss": 0.6966, + "step": 910 + }, + { + "epoch": 1.978494623655914, + "grad_norm": 0.788899838924408, + "learning_rate": 0.0002, + "loss": 0.69, + "step": 920 + }, + { + "epoch": 2.0, + "grad_norm": 0.6562113761901855, + "learning_rate": 0.0002, + "loss": 0.6505, + "step": 930 + }, + { + "epoch": 2.0, + "eval_loss": 0.6885261535644531, + "eval_runtime": 21.4291, + "eval_samples_per_second": 15.446, + "eval_steps_per_second": 1.96, + "step": 930 + }, + { + "epoch": 2.021505376344086, + "grad_norm": 0.8216531872749329, + "learning_rate": 0.0002, + "loss": 0.6625, + "step": 940 + }, + { + "epoch": 2.043010752688172, + "grad_norm": 0.8317142724990845, + "learning_rate": 0.0002, + "loss": 0.6398, + "step": 950 + }, + { + "epoch": 2.064516129032258, + "grad_norm": 0.8446708917617798, + "learning_rate": 0.0002, + "loss": 0.649, + "step": 960 + }, + { + "epoch": 2.086021505376344, + "grad_norm": 0.735055148601532, + "learning_rate": 0.0002, + "loss": 0.657, + "step": 970 + }, + { + "epoch": 2.10752688172043, + "grad_norm": 0.7487243413925171, + "learning_rate": 0.0002, + "loss": 0.649, + "step": 980 + }, + { + "epoch": 2.129032258064516, + "grad_norm": 0.8573887944221497, + "learning_rate": 0.0002, + "loss": 0.6419, + "step": 990 + }, + { + "epoch": 2.150537634408602, + "grad_norm": 0.6284521818161011, + "learning_rate": 0.0002, + "loss": 0.6431, + "step": 1000 + }, + { + "epoch": 2.172043010752688, + "grad_norm": 0.754183292388916, + "learning_rate": 0.0002, + "loss": 0.6128, + "step": 1010 + }, + { + "epoch": 2.193548387096774, + "grad_norm": 0.9445359110832214, + "learning_rate": 0.0002, + "loss": 0.6253, + "step": 1020 + }, + { + "epoch": 2.21505376344086, + "grad_norm": 0.808508038520813, + "learning_rate": 0.0002, + "loss": 0.605, + "step": 1030 + }, + { + "epoch": 2.236559139784946, + "grad_norm": 0.9394679665565491, + "learning_rate": 0.0002, + "loss": 0.6786, + "step": 1040 + }, + { + "epoch": 2.258064516129032, + "grad_norm": 0.8151357769966125, + "learning_rate": 0.0002, + "loss": 0.6176, + "step": 1050 + }, + { + "epoch": 2.279569892473118, + "grad_norm": 0.7909848093986511, + "learning_rate": 0.0002, + "loss": 0.66, + "step": 1060 + }, + { + "epoch": 2.3010752688172045, + "grad_norm": 0.7506507039070129, + "learning_rate": 0.0002, + "loss": 0.6254, + "step": 1070 + }, + { + "epoch": 2.3225806451612905, + "grad_norm": 0.8240520358085632, + "learning_rate": 0.0002, + "loss": 0.6608, + "step": 1080 + }, + { + "epoch": 2.3440860215053765, + "grad_norm": 0.9342400431632996, + "learning_rate": 0.0002, + "loss": 0.6207, + "step": 1090 + }, + { + "epoch": 2.3655913978494625, + "grad_norm": 1.0598735809326172, + "learning_rate": 0.0002, + "loss": 0.6029, + "step": 1100 + }, + { + "epoch": 2.3870967741935485, + "grad_norm": 0.7907650470733643, + "learning_rate": 0.0002, + "loss": 0.6035, + "step": 1110 + }, + { + "epoch": 2.4086021505376345, + "grad_norm": 0.9388798475265503, + "learning_rate": 0.0002, + "loss": 0.6237, + "step": 1120 + }, + { + "epoch": 2.4301075268817205, + "grad_norm": 0.8985419869422913, + "learning_rate": 0.0002, + "loss": 0.6207, + "step": 1130 + }, + { + "epoch": 2.4516129032258065, + "grad_norm": 0.7471932768821716, + "learning_rate": 0.0002, + "loss": 0.5902, + "step": 1140 + }, + { + "epoch": 2.4731182795698925, + "grad_norm": 0.761131763458252, + "learning_rate": 0.0002, + "loss": 0.6446, + "step": 1150 + }, + { + "epoch": 2.4946236559139785, + "grad_norm": 0.7901819348335266, + "learning_rate": 0.0002, + "loss": 0.6088, + "step": 1160 + }, + { + "epoch": 2.5161290322580645, + "grad_norm": 0.9932922720909119, + "learning_rate": 0.0002, + "loss": 0.6142, + "step": 1170 + }, + { + "epoch": 2.5376344086021505, + "grad_norm": 0.7414287328720093, + "learning_rate": 0.0002, + "loss": 0.6407, + "step": 1180 + }, + { + "epoch": 2.5591397849462365, + "grad_norm": 0.8111771941184998, + "learning_rate": 0.0002, + "loss": 0.6161, + "step": 1190 + }, + { + "epoch": 2.5806451612903225, + "grad_norm": 0.7520156502723694, + "learning_rate": 0.0002, + "loss": 0.6006, + "step": 1200 + }, + { + "epoch": 2.6021505376344085, + "grad_norm": 0.9022907018661499, + "learning_rate": 0.0002, + "loss": 0.615, + "step": 1210 + }, + { + "epoch": 2.6236559139784945, + "grad_norm": 0.7746260166168213, + "learning_rate": 0.0002, + "loss": 0.6211, + "step": 1220 + }, + { + "epoch": 2.6451612903225805, + "grad_norm": 0.8482862114906311, + "learning_rate": 0.0002, + "loss": 0.616, + "step": 1230 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 0.7925458550453186, + "learning_rate": 0.0002, + "loss": 0.6417, + "step": 1240 + }, + { + "epoch": 2.688172043010753, + "grad_norm": 0.8369929194450378, + "learning_rate": 0.0002, + "loss": 0.6187, + "step": 1250 + }, + { + "epoch": 2.709677419354839, + "grad_norm": 0.8311542868614197, + "learning_rate": 0.0002, + "loss": 0.6138, + "step": 1260 + }, + { + "epoch": 2.731182795698925, + "grad_norm": 0.7204853296279907, + "learning_rate": 0.0002, + "loss": 0.5894, + "step": 1270 + }, + { + "epoch": 2.752688172043011, + "grad_norm": 0.8447284698486328, + "learning_rate": 0.0002, + "loss": 0.6325, + "step": 1280 + }, + { + "epoch": 2.774193548387097, + "grad_norm": 0.7738404273986816, + "learning_rate": 0.0002, + "loss": 0.5946, + "step": 1290 + }, + { + "epoch": 2.795698924731183, + "grad_norm": 0.8393287062644958, + "learning_rate": 0.0002, + "loss": 0.5678, + "step": 1300 + }, + { + "epoch": 2.817204301075269, + "grad_norm": 0.79121994972229, + "learning_rate": 0.0002, + "loss": 0.6092, + "step": 1310 + }, + { + "epoch": 2.838709677419355, + "grad_norm": 0.7331557869911194, + "learning_rate": 0.0002, + "loss": 0.5889, + "step": 1320 + }, + { + "epoch": 2.860215053763441, + "grad_norm": 0.9593998193740845, + "learning_rate": 0.0002, + "loss": 0.6048, + "step": 1330 + }, + { + "epoch": 2.881720430107527, + "grad_norm": 0.7215158343315125, + "learning_rate": 0.0002, + "loss": 0.6108, + "step": 1340 + }, + { + "epoch": 2.903225806451613, + "grad_norm": 0.840404212474823, + "learning_rate": 0.0002, + "loss": 0.5897, + "step": 1350 + }, + { + "epoch": 2.924731182795699, + "grad_norm": 0.870659351348877, + "learning_rate": 0.0002, + "loss": 0.6056, + "step": 1360 + }, + { + "epoch": 2.946236559139785, + "grad_norm": 0.8744975328445435, + "learning_rate": 0.0002, + "loss": 0.6205, + "step": 1370 + }, + { + "epoch": 2.967741935483871, + "grad_norm": 0.8030612468719482, + "learning_rate": 0.0002, + "loss": 0.5966, + "step": 1380 + }, + { + "epoch": 2.989247311827957, + "grad_norm": 0.825814962387085, + "learning_rate": 0.0002, + "loss": 0.6004, + "step": 1390 + }, + { + "epoch": 3.0, + "eval_loss": 0.6257933378219604, + "eval_runtime": 21.3692, + "eval_samples_per_second": 15.49, + "eval_steps_per_second": 1.965, + "step": 1395 + }, + { + "epoch": 3.010752688172043, + "grad_norm": 0.8650677800178528, + "learning_rate": 0.0002, + "loss": 0.5696, + "step": 1400 + }, + { + "epoch": 3.032258064516129, + "grad_norm": 0.8364197015762329, + "learning_rate": 0.0002, + "loss": 0.5483, + "step": 1410 + }, + { + "epoch": 3.053763440860215, + "grad_norm": 0.8278448581695557, + "learning_rate": 0.0002, + "loss": 0.5606, + "step": 1420 + }, + { + "epoch": 3.075268817204301, + "grad_norm": 0.8806642889976501, + "learning_rate": 0.0002, + "loss": 0.5572, + "step": 1430 + }, + { + "epoch": 3.096774193548387, + "grad_norm": 0.8180029988288879, + "learning_rate": 0.0002, + "loss": 0.585, + "step": 1440 + }, + { + "epoch": 3.118279569892473, + "grad_norm": 0.8561782836914062, + "learning_rate": 0.0002, + "loss": 0.5667, + "step": 1450 + }, + { + "epoch": 3.139784946236559, + "grad_norm": 0.8377029299736023, + "learning_rate": 0.0002, + "loss": 0.5246, + "step": 1460 + }, + { + "epoch": 3.161290322580645, + "grad_norm": 0.885779082775116, + "learning_rate": 0.0002, + "loss": 0.5464, + "step": 1470 + }, + { + "epoch": 3.182795698924731, + "grad_norm": 0.9388518333435059, + "learning_rate": 0.0002, + "loss": 0.541, + "step": 1480 + }, + { + "epoch": 3.204301075268817, + "grad_norm": 0.8816235661506653, + "learning_rate": 0.0002, + "loss": 0.5447, + "step": 1490 + }, + { + "epoch": 3.225806451612903, + "grad_norm": 0.9885783791542053, + "learning_rate": 0.0002, + "loss": 0.5466, + "step": 1500 + }, + { + "epoch": 3.247311827956989, + "grad_norm": 0.8635850548744202, + "learning_rate": 0.0002, + "loss": 0.5455, + "step": 1510 + }, + { + "epoch": 3.268817204301075, + "grad_norm": 0.829853355884552, + "learning_rate": 0.0002, + "loss": 0.5419, + "step": 1520 + }, + { + "epoch": 3.2903225806451615, + "grad_norm": 0.9037486910820007, + "learning_rate": 0.0002, + "loss": 0.54, + "step": 1530 + }, + { + "epoch": 3.3118279569892475, + "grad_norm": 0.8173713684082031, + "learning_rate": 0.0002, + "loss": 0.5375, + "step": 1540 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.796953022480011, + "learning_rate": 0.0002, + "loss": 0.5405, + "step": 1550 + }, + { + "epoch": 3.3548387096774195, + "grad_norm": 0.7894400358200073, + "learning_rate": 0.0002, + "loss": 0.5505, + "step": 1560 + }, + { + "epoch": 3.3763440860215055, + "grad_norm": 0.9434949159622192, + "learning_rate": 0.0002, + "loss": 0.5395, + "step": 1570 + }, + { + "epoch": 3.3978494623655915, + "grad_norm": 0.8666760325431824, + "learning_rate": 0.0002, + "loss": 0.5271, + "step": 1580 + }, + { + "epoch": 3.4193548387096775, + "grad_norm": 0.7782467007637024, + "learning_rate": 0.0002, + "loss": 0.5439, + "step": 1590 + }, + { + "epoch": 3.4408602150537635, + "grad_norm": 0.8849126696586609, + "learning_rate": 0.0002, + "loss": 0.5161, + "step": 1600 + }, + { + "epoch": 3.4623655913978495, + "grad_norm": 0.7863831520080566, + "learning_rate": 0.0002, + "loss": 0.5353, + "step": 1610 + }, + { + "epoch": 3.4838709677419355, + "grad_norm": 1.0403116941452026, + "learning_rate": 0.0002, + "loss": 0.5308, + "step": 1620 + }, + { + "epoch": 3.5053763440860215, + "grad_norm": 0.8307499289512634, + "learning_rate": 0.0002, + "loss": 0.5339, + "step": 1630 + }, + { + "epoch": 3.5268817204301075, + "grad_norm": 0.9132118821144104, + "learning_rate": 0.0002, + "loss": 0.5361, + "step": 1640 + }, + { + "epoch": 3.5483870967741935, + "grad_norm": 0.9322578310966492, + "learning_rate": 0.0002, + "loss": 0.5828, + "step": 1650 + }, + { + "epoch": 3.5698924731182795, + "grad_norm": 0.9782460331916809, + "learning_rate": 0.0002, + "loss": 0.546, + "step": 1660 + }, + { + "epoch": 3.5913978494623655, + "grad_norm": 0.7189919352531433, + "learning_rate": 0.0002, + "loss": 0.5424, + "step": 1670 + }, + { + "epoch": 3.6129032258064515, + "grad_norm": 0.9689221382141113, + "learning_rate": 0.0002, + "loss": 0.5514, + "step": 1680 + }, + { + "epoch": 3.6344086021505375, + "grad_norm": 0.9684675335884094, + "learning_rate": 0.0002, + "loss": 0.5379, + "step": 1690 + }, + { + "epoch": 3.6559139784946235, + "grad_norm": 0.8851472735404968, + "learning_rate": 0.0002, + "loss": 0.5748, + "step": 1700 + }, + { + "epoch": 3.6774193548387095, + "grad_norm": 0.7709833383560181, + "learning_rate": 0.0002, + "loss": 0.5412, + "step": 1710 + }, + { + "epoch": 3.698924731182796, + "grad_norm": 0.818236231803894, + "learning_rate": 0.0002, + "loss": 0.521, + "step": 1720 + }, + { + "epoch": 3.720430107526882, + "grad_norm": 0.870642364025116, + "learning_rate": 0.0002, + "loss": 0.5445, + "step": 1730 + }, + { + "epoch": 3.741935483870968, + "grad_norm": 1.0245511531829834, + "learning_rate": 0.0002, + "loss": 0.5307, + "step": 1740 + }, + { + "epoch": 3.763440860215054, + "grad_norm": 0.8607558608055115, + "learning_rate": 0.0002, + "loss": 0.5593, + "step": 1750 + }, + { + "epoch": 3.78494623655914, + "grad_norm": 0.8511829972267151, + "learning_rate": 0.0002, + "loss": 0.536, + "step": 1760 + }, + { + "epoch": 3.806451612903226, + "grad_norm": 0.7969087362289429, + "learning_rate": 0.0002, + "loss": 0.5193, + "step": 1770 + }, + { + "epoch": 3.827956989247312, + "grad_norm": 0.8457245826721191, + "learning_rate": 0.0002, + "loss": 0.5578, + "step": 1780 + }, + { + "epoch": 3.849462365591398, + "grad_norm": 0.8893467783927917, + "learning_rate": 0.0002, + "loss": 0.5337, + "step": 1790 + }, + { + "epoch": 3.870967741935484, + "grad_norm": 0.8593819737434387, + "learning_rate": 0.0002, + "loss": 0.5024, + "step": 1800 + }, + { + "epoch": 3.89247311827957, + "grad_norm": 0.7574560642242432, + "learning_rate": 0.0002, + "loss": 0.5134, + "step": 1810 + }, + { + "epoch": 3.913978494623656, + "grad_norm": 0.8681567311286926, + "learning_rate": 0.0002, + "loss": 0.5263, + "step": 1820 + }, + { + "epoch": 3.935483870967742, + "grad_norm": 0.9068132042884827, + "learning_rate": 0.0002, + "loss": 0.532, + "step": 1830 + }, + { + "epoch": 3.956989247311828, + "grad_norm": 0.8668948411941528, + "learning_rate": 0.0002, + "loss": 0.5427, + "step": 1840 + }, + { + "epoch": 3.978494623655914, + "grad_norm": 1.046032428741455, + "learning_rate": 0.0002, + "loss": 0.5349, + "step": 1850 + }, + { + "epoch": 4.0, + "grad_norm": 0.904780387878418, + "learning_rate": 0.0002, + "loss": 0.5087, + "step": 1860 + }, + { + "epoch": 4.0, + "eval_loss": 0.5737715363502502, + "eval_runtime": 21.4915, + "eval_samples_per_second": 15.401, + "eval_steps_per_second": 1.954, + "step": 1860 + }, + { + "epoch": 4.021505376344086, + "grad_norm": 0.8611752986907959, + "learning_rate": 0.0002, + "loss": 0.4843, + "step": 1870 + }, + { + "epoch": 4.043010752688172, + "grad_norm": 0.838782548904419, + "learning_rate": 0.0002, + "loss": 0.4814, + "step": 1880 + }, + { + "epoch": 4.064516129032258, + "grad_norm": 0.9119709134101868, + "learning_rate": 0.0002, + "loss": 0.474, + "step": 1890 + }, + { + "epoch": 4.086021505376344, + "grad_norm": 0.8026251196861267, + "learning_rate": 0.0002, + "loss": 0.4951, + "step": 1900 + }, + { + "epoch": 4.10752688172043, + "grad_norm": 0.8773705363273621, + "learning_rate": 0.0002, + "loss": 0.491, + "step": 1910 + }, + { + "epoch": 4.129032258064516, + "grad_norm": 0.8762255907058716, + "learning_rate": 0.0002, + "loss": 0.474, + "step": 1920 + }, + { + "epoch": 4.150537634408602, + "grad_norm": 0.8371861577033997, + "learning_rate": 0.0002, + "loss": 0.4816, + "step": 1930 + }, + { + "epoch": 4.172043010752688, + "grad_norm": 0.9703728556632996, + "learning_rate": 0.0002, + "loss": 0.472, + "step": 1940 + }, + { + "epoch": 4.193548387096774, + "grad_norm": 0.8802874684333801, + "learning_rate": 0.0002, + "loss": 0.4772, + "step": 1950 + }, + { + "epoch": 4.21505376344086, + "grad_norm": 1.0103057622909546, + "learning_rate": 0.0002, + "loss": 0.5032, + "step": 1960 + }, + { + "epoch": 4.236559139784946, + "grad_norm": 0.9212995171546936, + "learning_rate": 0.0002, + "loss": 0.4945, + "step": 1970 + }, + { + "epoch": 4.258064516129032, + "grad_norm": 1.009544849395752, + "learning_rate": 0.0002, + "loss": 0.4753, + "step": 1980 + }, + { + "epoch": 4.279569892473118, + "grad_norm": 0.8535077571868896, + "learning_rate": 0.0002, + "loss": 0.4789, + "step": 1990 + }, + { + "epoch": 4.301075268817204, + "grad_norm": 0.8363022804260254, + "learning_rate": 0.0002, + "loss": 0.4782, + "step": 2000 + }, + { + "epoch": 4.32258064516129, + "grad_norm": 0.9041762948036194, + "learning_rate": 0.0002, + "loss": 0.4875, + "step": 2010 + }, + { + "epoch": 4.344086021505376, + "grad_norm": 0.960790753364563, + "learning_rate": 0.0002, + "loss": 0.4779, + "step": 2020 + }, + { + "epoch": 4.365591397849462, + "grad_norm": 0.8823095560073853, + "learning_rate": 0.0002, + "loss": 0.4626, + "step": 2030 + }, + { + "epoch": 4.387096774193548, + "grad_norm": 0.952100396156311, + "learning_rate": 0.0002, + "loss": 0.4883, + "step": 2040 + }, + { + "epoch": 4.408602150537634, + "grad_norm": 1.0793498754501343, + "learning_rate": 0.0002, + "loss": 0.4789, + "step": 2050 + }, + { + "epoch": 4.43010752688172, + "grad_norm": 0.8987208008766174, + "learning_rate": 0.0002, + "loss": 0.4827, + "step": 2060 + }, + { + "epoch": 4.451612903225806, + "grad_norm": 0.8539772033691406, + "learning_rate": 0.0002, + "loss": 0.4594, + "step": 2070 + }, + { + "epoch": 4.473118279569892, + "grad_norm": 0.9160863757133484, + "learning_rate": 0.0002, + "loss": 0.4752, + "step": 2080 + }, + { + "epoch": 4.494623655913978, + "grad_norm": 0.9946850538253784, + "learning_rate": 0.0002, + "loss": 0.5033, + "step": 2090 + }, + { + "epoch": 4.516129032258064, + "grad_norm": 0.908039391040802, + "learning_rate": 0.0002, + "loss": 0.4842, + "step": 2100 + }, + { + "epoch": 4.53763440860215, + "grad_norm": 1.1462254524230957, + "learning_rate": 0.0002, + "loss": 0.4861, + "step": 2110 + }, + { + "epoch": 4.559139784946236, + "grad_norm": 0.8392056226730347, + "learning_rate": 0.0002, + "loss": 0.4892, + "step": 2120 + }, + { + "epoch": 4.580645161290323, + "grad_norm": 0.9673896431922913, + "learning_rate": 0.0002, + "loss": 0.4824, + "step": 2130 + }, + { + "epoch": 4.602150537634409, + "grad_norm": 0.9047091603279114, + "learning_rate": 0.0002, + "loss": 0.4665, + "step": 2140 + }, + { + "epoch": 4.623655913978495, + "grad_norm": 0.9013425707817078, + "learning_rate": 0.0002, + "loss": 0.4714, + "step": 2150 + }, + { + "epoch": 4.645161290322581, + "grad_norm": 0.8899165391921997, + "learning_rate": 0.0002, + "loss": 0.472, + "step": 2160 + }, + { + "epoch": 4.666666666666667, + "grad_norm": 0.748602569103241, + "learning_rate": 0.0002, + "loss": 0.4635, + "step": 2170 + }, + { + "epoch": 4.688172043010753, + "grad_norm": 0.8694155216217041, + "learning_rate": 0.0002, + "loss": 0.4695, + "step": 2180 + }, + { + "epoch": 4.709677419354839, + "grad_norm": 0.9134316444396973, + "learning_rate": 0.0002, + "loss": 0.4929, + "step": 2190 + }, + { + "epoch": 4.731182795698925, + "grad_norm": 0.8504763245582581, + "learning_rate": 0.0002, + "loss": 0.4855, + "step": 2200 + }, + { + "epoch": 4.752688172043011, + "grad_norm": 1.0321544408798218, + "learning_rate": 0.0002, + "loss": 0.4517, + "step": 2210 + }, + { + "epoch": 4.774193548387097, + "grad_norm": 0.9368237257003784, + "learning_rate": 0.0002, + "loss": 0.4796, + "step": 2220 + }, + { + "epoch": 4.795698924731183, + "grad_norm": 0.9319947361946106, + "learning_rate": 0.0002, + "loss": 0.4837, + "step": 2230 + }, + { + "epoch": 4.817204301075269, + "grad_norm": 0.904333770275116, + "learning_rate": 0.0002, + "loss": 0.4696, + "step": 2240 + }, + { + "epoch": 4.838709677419355, + "grad_norm": 0.8097078204154968, + "learning_rate": 0.0002, + "loss": 0.4746, + "step": 2250 + }, + { + "epoch": 4.860215053763441, + "grad_norm": 0.9128859043121338, + "learning_rate": 0.0002, + "loss": 0.4438, + "step": 2260 + }, + { + "epoch": 4.881720430107527, + "grad_norm": 0.883129894733429, + "learning_rate": 0.0002, + "loss": 0.4693, + "step": 2270 + }, + { + "epoch": 4.903225806451613, + "grad_norm": 0.85712730884552, + "learning_rate": 0.0002, + "loss": 0.4494, + "step": 2280 + }, + { + "epoch": 4.924731182795699, + "grad_norm": 1.2101863622665405, + "learning_rate": 0.0002, + "loss": 0.4593, + "step": 2290 + }, + { + "epoch": 4.946236559139785, + "grad_norm": 0.917966902256012, + "learning_rate": 0.0002, + "loss": 0.4779, + "step": 2300 + }, + { + "epoch": 4.967741935483871, + "grad_norm": 0.7740724086761475, + "learning_rate": 0.0002, + "loss": 0.4666, + "step": 2310 + }, + { + "epoch": 4.989247311827957, + "grad_norm": 1.0199906826019287, + "learning_rate": 0.0002, + "loss": 0.4629, + "step": 2320 + }, + { + "epoch": 5.0, + "eval_loss": 0.5363914370536804, + "eval_runtime": 21.3941, + "eval_samples_per_second": 15.472, + "eval_steps_per_second": 1.963, + "step": 2325 + }, + { + "epoch": 5.010752688172043, + "grad_norm": 0.8580502271652222, + "learning_rate": 0.0002, + "loss": 0.4543, + "step": 2330 + }, + { + "epoch": 5.032258064516129, + "grad_norm": 0.7702704668045044, + "learning_rate": 0.0002, + "loss": 0.404, + "step": 2340 + }, + { + "epoch": 5.053763440860215, + "grad_norm": 0.9417401552200317, + "learning_rate": 0.0002, + "loss": 0.4408, + "step": 2350 + }, + { + "epoch": 5.075268817204301, + "grad_norm": 0.9461463689804077, + "learning_rate": 0.0002, + "loss": 0.4306, + "step": 2360 + }, + { + "epoch": 5.096774193548387, + "grad_norm": 0.8931282162666321, + "learning_rate": 0.0002, + "loss": 0.4251, + "step": 2370 + }, + { + "epoch": 5.118279569892473, + "grad_norm": 1.000909447669983, + "learning_rate": 0.0002, + "loss": 0.4249, + "step": 2380 + }, + { + "epoch": 5.139784946236559, + "grad_norm": 0.8640249967575073, + "learning_rate": 0.0002, + "loss": 0.4231, + "step": 2390 + }, + { + "epoch": 5.161290322580645, + "grad_norm": 1.0451020002365112, + "learning_rate": 0.0002, + "loss": 0.4272, + "step": 2400 + }, + { + "epoch": 5.182795698924731, + "grad_norm": 0.7896912097930908, + "learning_rate": 0.0002, + "loss": 0.4177, + "step": 2410 + }, + { + "epoch": 5.204301075268817, + "grad_norm": 0.8424463272094727, + "learning_rate": 0.0002, + "loss": 0.4116, + "step": 2420 + }, + { + "epoch": 5.225806451612903, + "grad_norm": 1.0852105617523193, + "learning_rate": 0.0002, + "loss": 0.4225, + "step": 2430 + }, + { + "epoch": 5.247311827956989, + "grad_norm": 0.9285983443260193, + "learning_rate": 0.0002, + "loss": 0.4352, + "step": 2440 + }, + { + "epoch": 5.268817204301075, + "grad_norm": 0.9119299054145813, + "learning_rate": 0.0002, + "loss": 0.4262, + "step": 2450 + }, + { + "epoch": 5.290322580645161, + "grad_norm": 0.8790456056594849, + "learning_rate": 0.0002, + "loss": 0.4494, + "step": 2460 + }, + { + "epoch": 5.311827956989247, + "grad_norm": 0.8726504445075989, + "learning_rate": 0.0002, + "loss": 0.4421, + "step": 2470 + }, + { + "epoch": 5.333333333333333, + "grad_norm": 0.9415227770805359, + "learning_rate": 0.0002, + "loss": 0.4372, + "step": 2480 + }, + { + "epoch": 5.354838709677419, + "grad_norm": 0.9133324027061462, + "learning_rate": 0.0002, + "loss": 0.4223, + "step": 2490 + }, + { + "epoch": 5.376344086021505, + "grad_norm": 0.9567879438400269, + "learning_rate": 0.0002, + "loss": 0.4401, + "step": 2500 + }, + { + "epoch": 5.397849462365591, + "grad_norm": 0.9239469766616821, + "learning_rate": 0.0002, + "loss": 0.4094, + "step": 2510 + }, + { + "epoch": 5.419354838709677, + "grad_norm": 1.0293527841567993, + "learning_rate": 0.0002, + "loss": 0.4416, + "step": 2520 + }, + { + "epoch": 5.440860215053763, + "grad_norm": 0.8618718981742859, + "learning_rate": 0.0002, + "loss": 0.4311, + "step": 2530 + }, + { + "epoch": 5.462365591397849, + "grad_norm": 0.740166187286377, + "learning_rate": 0.0002, + "loss": 0.462, + "step": 2540 + }, + { + "epoch": 5.483870967741936, + "grad_norm": 0.901566743850708, + "learning_rate": 0.0002, + "loss": 0.4172, + "step": 2550 + }, + { + "epoch": 5.505376344086022, + "grad_norm": 0.7957597970962524, + "learning_rate": 0.0002, + "loss": 0.4315, + "step": 2560 + }, + { + "epoch": 5.526881720430108, + "grad_norm": 1.1139343976974487, + "learning_rate": 0.0002, + "loss": 0.4263, + "step": 2570 + }, + { + "epoch": 5.548387096774194, + "grad_norm": 0.989765465259552, + "learning_rate": 0.0002, + "loss": 0.4056, + "step": 2580 + }, + { + "epoch": 5.56989247311828, + "grad_norm": 0.9416969418525696, + "learning_rate": 0.0002, + "loss": 0.4311, + "step": 2590 + }, + { + "epoch": 5.591397849462366, + "grad_norm": 0.9184830784797668, + "learning_rate": 0.0002, + "loss": 0.4363, + "step": 2600 + }, + { + "epoch": 5.612903225806452, + "grad_norm": 1.0512700080871582, + "learning_rate": 0.0002, + "loss": 0.432, + "step": 2610 + }, + { + "epoch": 5.634408602150538, + "grad_norm": 0.901462197303772, + "learning_rate": 0.0002, + "loss": 0.4227, + "step": 2620 + }, + { + "epoch": 5.655913978494624, + "grad_norm": 0.9732566475868225, + "learning_rate": 0.0002, + "loss": 0.4332, + "step": 2630 + }, + { + "epoch": 5.67741935483871, + "grad_norm": 0.8180275559425354, + "learning_rate": 0.0002, + "loss": 0.4223, + "step": 2640 + }, + { + "epoch": 5.698924731182796, + "grad_norm": 1.1354765892028809, + "learning_rate": 0.0002, + "loss": 0.4311, + "step": 2650 + }, + { + "epoch": 5.720430107526882, + "grad_norm": 0.9161503314971924, + "learning_rate": 0.0002, + "loss": 0.4409, + "step": 2660 + }, + { + "epoch": 5.741935483870968, + "grad_norm": 1.0561772584915161, + "learning_rate": 0.0002, + "loss": 0.4394, + "step": 2670 + }, + { + "epoch": 5.763440860215054, + "grad_norm": 0.7712787389755249, + "learning_rate": 0.0002, + "loss": 0.424, + "step": 2680 + }, + { + "epoch": 5.78494623655914, + "grad_norm": 0.9674550294876099, + "learning_rate": 0.0002, + "loss": 0.4326, + "step": 2690 + }, + { + "epoch": 5.806451612903226, + "grad_norm": 0.7531843781471252, + "learning_rate": 0.0002, + "loss": 0.4459, + "step": 2700 + }, + { + "epoch": 5.827956989247312, + "grad_norm": 1.1332131624221802, + "learning_rate": 0.0002, + "loss": 0.4276, + "step": 2710 + }, + { + "epoch": 5.849462365591398, + "grad_norm": 0.9367414116859436, + "learning_rate": 0.0002, + "loss": 0.4113, + "step": 2720 + }, + { + "epoch": 5.870967741935484, + "grad_norm": 0.8267706632614136, + "learning_rate": 0.0002, + "loss": 0.4227, + "step": 2730 + }, + { + "epoch": 5.89247311827957, + "grad_norm": 1.1040657758712769, + "learning_rate": 0.0002, + "loss": 0.4218, + "step": 2740 + }, + { + "epoch": 5.913978494623656, + "grad_norm": 0.8879582285881042, + "learning_rate": 0.0002, + "loss": 0.4129, + "step": 2750 + }, + { + "epoch": 5.935483870967742, + "grad_norm": 0.9264667630195618, + "learning_rate": 0.0002, + "loss": 0.4241, + "step": 2760 + }, + { + "epoch": 5.956989247311828, + "grad_norm": 0.9373905658721924, + "learning_rate": 0.0002, + "loss": 0.4318, + "step": 2770 + }, + { + "epoch": 5.978494623655914, + "grad_norm": 1.0063740015029907, + "learning_rate": 0.0002, + "loss": 0.423, + "step": 2780 + }, + { + "epoch": 6.0, + "grad_norm": 0.8291367292404175, + "learning_rate": 0.0002, + "loss": 0.4382, + "step": 2790 + }, + { + "epoch": 6.0, + "eval_loss": 0.5057176947593689, + "eval_runtime": 21.3206, + "eval_samples_per_second": 15.525, + "eval_steps_per_second": 1.97, + "step": 2790 + }, + { + "epoch": 6.021505376344086, + "grad_norm": 1.0137434005737305, + "learning_rate": 0.0002, + "loss": 0.3907, + "step": 2800 + }, + { + "epoch": 6.043010752688172, + "grad_norm": 0.7550579905509949, + "learning_rate": 0.0002, + "loss": 0.3793, + "step": 2810 + }, + { + "epoch": 6.064516129032258, + "grad_norm": 1.0664116144180298, + "learning_rate": 0.0002, + "loss": 0.4003, + "step": 2820 + }, + { + "epoch": 6.086021505376344, + "grad_norm": 0.7908814549446106, + "learning_rate": 0.0002, + "loss": 0.3876, + "step": 2830 + }, + { + "epoch": 6.10752688172043, + "grad_norm": 0.8101639747619629, + "learning_rate": 0.0002, + "loss": 0.3884, + "step": 2840 + }, + { + "epoch": 6.129032258064516, + "grad_norm": 0.7882567048072815, + "learning_rate": 0.0002, + "loss": 0.3835, + "step": 2850 + }, + { + "epoch": 6.150537634408602, + "grad_norm": 1.0134103298187256, + "learning_rate": 0.0002, + "loss": 0.3827, + "step": 2860 + }, + { + "epoch": 6.172043010752688, + "grad_norm": 0.9240215420722961, + "learning_rate": 0.0002, + "loss": 0.3963, + "step": 2870 + }, + { + "epoch": 6.193548387096774, + "grad_norm": 0.8322992920875549, + "learning_rate": 0.0002, + "loss": 0.4049, + "step": 2880 + }, + { + "epoch": 6.21505376344086, + "grad_norm": 0.9238720536231995, + "learning_rate": 0.0002, + "loss": 0.381, + "step": 2890 + }, + { + "epoch": 6.236559139784946, + "grad_norm": 0.9361863732337952, + "learning_rate": 0.0002, + "loss": 0.3852, + "step": 2900 + }, + { + "epoch": 6.258064516129032, + "grad_norm": 0.9670863747596741, + "learning_rate": 0.0002, + "loss": 0.3917, + "step": 2910 + }, + { + "epoch": 6.279569892473118, + "grad_norm": 0.7724685668945312, + "learning_rate": 0.0002, + "loss": 0.3826, + "step": 2920 + }, + { + "epoch": 6.301075268817204, + "grad_norm": 0.8125540614128113, + "learning_rate": 0.0002, + "loss": 0.3988, + "step": 2930 + }, + { + "epoch": 6.32258064516129, + "grad_norm": 0.9483002424240112, + "learning_rate": 0.0002, + "loss": 0.3778, + "step": 2940 + }, + { + "epoch": 6.344086021505376, + "grad_norm": 1.098374843597412, + "learning_rate": 0.0002, + "loss": 0.3823, + "step": 2950 + }, + { + "epoch": 6.365591397849462, + "grad_norm": 1.0169378519058228, + "learning_rate": 0.0002, + "loss": 0.3886, + "step": 2960 + }, + { + "epoch": 6.387096774193548, + "grad_norm": 0.8594151139259338, + "learning_rate": 0.0002, + "loss": 0.3936, + "step": 2970 + }, + { + "epoch": 6.408602150537634, + "grad_norm": 0.9507288336753845, + "learning_rate": 0.0002, + "loss": 0.3871, + "step": 2980 + }, + { + "epoch": 6.43010752688172, + "grad_norm": 0.9212459325790405, + "learning_rate": 0.0002, + "loss": 0.3852, + "step": 2990 + }, + { + "epoch": 6.451612903225806, + "grad_norm": 0.9696952104568481, + "learning_rate": 0.0002, + "loss": 0.3929, + "step": 3000 + }, + { + "epoch": 6.473118279569892, + "grad_norm": 0.8872610330581665, + "learning_rate": 0.0002, + "loss": 0.3933, + "step": 3010 + }, + { + "epoch": 6.494623655913978, + "grad_norm": 0.9207532405853271, + "learning_rate": 0.0002, + "loss": 0.393, + "step": 3020 + }, + { + "epoch": 6.516129032258064, + "grad_norm": 0.9116262793540955, + "learning_rate": 0.0002, + "loss": 0.3848, + "step": 3030 + }, + { + "epoch": 6.53763440860215, + "grad_norm": 0.83391934633255, + "learning_rate": 0.0002, + "loss": 0.3964, + "step": 3040 + }, + { + "epoch": 6.559139784946236, + "grad_norm": 0.890931248664856, + "learning_rate": 0.0002, + "loss": 0.3758, + "step": 3050 + }, + { + "epoch": 6.580645161290323, + "grad_norm": 1.0100581645965576, + "learning_rate": 0.0002, + "loss": 0.3944, + "step": 3060 + }, + { + "epoch": 6.602150537634409, + "grad_norm": 0.783526599407196, + "learning_rate": 0.0002, + "loss": 0.3992, + "step": 3070 + }, + { + "epoch": 6.623655913978495, + "grad_norm": 1.324326515197754, + "learning_rate": 0.0002, + "loss": 0.4144, + "step": 3080 + }, + { + "epoch": 6.645161290322581, + "grad_norm": 0.9102319478988647, + "learning_rate": 0.0002, + "loss": 0.3986, + "step": 3090 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 0.96951824426651, + "learning_rate": 0.0002, + "loss": 0.3873, + "step": 3100 + }, + { + "epoch": 6.688172043010753, + "grad_norm": 0.9786809086799622, + "learning_rate": 0.0002, + "loss": 0.3931, + "step": 3110 + }, + { + "epoch": 6.709677419354839, + "grad_norm": 1.0301238298416138, + "learning_rate": 0.0002, + "loss": 0.3714, + "step": 3120 + }, + { + "epoch": 6.731182795698925, + "grad_norm": 1.1690906286239624, + "learning_rate": 0.0002, + "loss": 0.3823, + "step": 3130 + }, + { + "epoch": 6.752688172043011, + "grad_norm": 0.963306725025177, + "learning_rate": 0.0002, + "loss": 0.3936, + "step": 3140 + }, + { + "epoch": 6.774193548387097, + "grad_norm": 0.8565770983695984, + "learning_rate": 0.0002, + "loss": 0.3975, + "step": 3150 + }, + { + "epoch": 6.795698924731183, + "grad_norm": 0.8887158632278442, + "learning_rate": 0.0002, + "loss": 0.3903, + "step": 3160 + }, + { + "epoch": 6.817204301075269, + "grad_norm": 0.8234561085700989, + "learning_rate": 0.0002, + "loss": 0.4098, + "step": 3170 + }, + { + "epoch": 6.838709677419355, + "grad_norm": 0.9000219702720642, + "learning_rate": 0.0002, + "loss": 0.4041, + "step": 3180 + }, + { + "epoch": 6.860215053763441, + "grad_norm": 1.1366009712219238, + "learning_rate": 0.0002, + "loss": 0.3933, + "step": 3190 + }, + { + "epoch": 6.881720430107527, + "grad_norm": 0.8747097849845886, + "learning_rate": 0.0002, + "loss": 0.3972, + "step": 3200 + }, + { + "epoch": 6.903225806451613, + "grad_norm": 0.8533893823623657, + "learning_rate": 0.0002, + "loss": 0.404, + "step": 3210 + }, + { + "epoch": 6.924731182795699, + "grad_norm": 0.8127949237823486, + "learning_rate": 0.0002, + "loss": 0.3906, + "step": 3220 + }, + { + "epoch": 6.946236559139785, + "grad_norm": 0.8872477412223816, + "learning_rate": 0.0002, + "loss": 0.3747, + "step": 3230 + }, + { + "epoch": 6.967741935483871, + "grad_norm": 0.8541608452796936, + "learning_rate": 0.0002, + "loss": 0.3817, + "step": 3240 + }, + { + "epoch": 6.989247311827957, + "grad_norm": 0.8390752673149109, + "learning_rate": 0.0002, + "loss": 0.3863, + "step": 3250 + }, + { + "epoch": 7.0, + "eval_loss": 0.48264747858047485, + "eval_runtime": 21.3942, + "eval_samples_per_second": 15.471, + "eval_steps_per_second": 1.963, + "step": 3255 + } + ], + "logging_steps": 10, + "max_steps": 3720, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 3.978259807076352e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/training_args.bin b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..483b53d4fc1c568a5fc890fa850e3450f390b208 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e4c4fe2be590ed03492316230adb3a1edca3e4066c55f3716c0352d7134c564 +size 5560 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/README.md b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/adapter_config.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/adapter_model.safetensors b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..109596fbe768580a1c4f560efa8a5a99f822a8aa --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6e7766c9aae4d2fd34c0286b639660fa9abfa548c97dd1e795db9b07731b767 +size 29500848 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/optimizer.pt b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5bdebbc054ca2b0200df38941952fedea3c6a295 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60ca519ee855c6817801ec024584c4698868e71abdffc173ae7f81078aeaa2e2 +size 15064314 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/rng_state.pth b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..23026a8b2b73844837d8930972411617be8de756 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:824426ac36d9031404d4740da6b956e37fa68cac1f3a35686b97495a4c2b41ce +size 14244 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/scheduler.pt b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f4e307b1dec6f5df3468f48f3e16d5c992503374 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02613a6afb86ae474ea068b51099dfd9dc4abfb57724ef7ae1f766d5528370f2 +size 1064 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/special_tokens_map.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/tokenizer.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/tokenizer.model b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/tokenizer_config.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/trainer_state.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7c70b1322223ff3381e8cd51fb03d267481762b3 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/trainer_state.json @@ -0,0 +1,2701 @@ +{ + "best_metric": 0.46552130579948425, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720", + "epoch": 8.0, + "eval_steps": 10, + "global_step": 3720, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.021505376344086023, + "grad_norm": 0.9075053930282593, + "learning_rate": 0.0002, + "loss": 3.4172, + "step": 10 + }, + { + "epoch": 0.043010752688172046, + "grad_norm": 1.4321208000183105, + "learning_rate": 0.0002, + "loss": 2.5888, + "step": 20 + }, + { + "epoch": 0.06451612903225806, + "grad_norm": 1.7500602006912231, + "learning_rate": 0.0002, + "loss": 2.1195, + "step": 30 + }, + { + "epoch": 0.08602150537634409, + "grad_norm": 0.7606641054153442, + "learning_rate": 0.0002, + "loss": 1.9303, + "step": 40 + }, + { + "epoch": 0.10752688172043011, + "grad_norm": 1.2754929065704346, + "learning_rate": 0.0002, + "loss": 1.6112, + "step": 50 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 1.0936230421066284, + "learning_rate": 0.0002, + "loss": 1.4319, + "step": 60 + }, + { + "epoch": 0.15053763440860216, + "grad_norm": 1.144593596458435, + "learning_rate": 0.0002, + "loss": 1.3568, + "step": 70 + }, + { + "epoch": 0.17204301075268819, + "grad_norm": 1.2181956768035889, + "learning_rate": 0.0002, + "loss": 1.2028, + "step": 80 + }, + { + "epoch": 0.1935483870967742, + "grad_norm": 1.1260095834732056, + "learning_rate": 0.0002, + "loss": 1.1534, + "step": 90 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 1.1155284643173218, + "learning_rate": 0.0002, + "loss": 1.1089, + "step": 100 + }, + { + "epoch": 0.23655913978494625, + "grad_norm": 1.089565396308899, + "learning_rate": 0.0002, + "loss": 1.0883, + "step": 110 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 0.9833471775054932, + "learning_rate": 0.0002, + "loss": 1.0814, + "step": 120 + }, + { + "epoch": 0.27956989247311825, + "grad_norm": 1.0265629291534424, + "learning_rate": 0.0002, + "loss": 1.0239, + "step": 130 + }, + { + "epoch": 0.3010752688172043, + "grad_norm": 0.9344286322593689, + "learning_rate": 0.0002, + "loss": 0.9888, + "step": 140 + }, + { + "epoch": 0.3225806451612903, + "grad_norm": 0.9883386492729187, + "learning_rate": 0.0002, + "loss": 1.0043, + "step": 150 + }, + { + "epoch": 0.34408602150537637, + "grad_norm": 0.9299277067184448, + "learning_rate": 0.0002, + "loss": 0.9338, + "step": 160 + }, + { + "epoch": 0.3655913978494624, + "grad_norm": 1.390045404434204, + "learning_rate": 0.0002, + "loss": 0.9432, + "step": 170 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 1.0313078165054321, + "learning_rate": 0.0002, + "loss": 0.9008, + "step": 180 + }, + { + "epoch": 0.40860215053763443, + "grad_norm": 1.1792205572128296, + "learning_rate": 0.0002, + "loss": 0.9434, + "step": 190 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 1.049809217453003, + "learning_rate": 0.0002, + "loss": 0.8761, + "step": 200 + }, + { + "epoch": 0.45161290322580644, + "grad_norm": 0.990111768245697, + "learning_rate": 0.0002, + "loss": 0.8709, + "step": 210 + }, + { + "epoch": 0.4731182795698925, + "grad_norm": 0.9870412349700928, + "learning_rate": 0.0002, + "loss": 0.905, + "step": 220 + }, + { + "epoch": 0.4946236559139785, + "grad_norm": 0.8557345867156982, + "learning_rate": 0.0002, + "loss": 0.9129, + "step": 230 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 0.9746861457824707, + "learning_rate": 0.0002, + "loss": 0.8836, + "step": 240 + }, + { + "epoch": 0.5376344086021505, + "grad_norm": 0.9010438323020935, + "learning_rate": 0.0002, + "loss": 0.873, + "step": 250 + }, + { + "epoch": 0.5591397849462365, + "grad_norm": 0.9061082005500793, + "learning_rate": 0.0002, + "loss": 0.8241, + "step": 260 + }, + { + "epoch": 0.5806451612903226, + "grad_norm": 0.9311846494674683, + "learning_rate": 0.0002, + "loss": 0.8652, + "step": 270 + }, + { + "epoch": 0.6021505376344086, + "grad_norm": 0.9140254855155945, + "learning_rate": 0.0002, + "loss": 0.8256, + "step": 280 + }, + { + "epoch": 0.6236559139784946, + "grad_norm": 0.9722253084182739, + "learning_rate": 0.0002, + "loss": 0.8441, + "step": 290 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 0.8539168238639832, + "learning_rate": 0.0002, + "loss": 0.8314, + "step": 300 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.9053162932395935, + "learning_rate": 0.0002, + "loss": 0.8528, + "step": 310 + }, + { + "epoch": 0.6881720430107527, + "grad_norm": 0.8444252610206604, + "learning_rate": 0.0002, + "loss": 0.8209, + "step": 320 + }, + { + "epoch": 0.7096774193548387, + "grad_norm": 0.8127437829971313, + "learning_rate": 0.0002, + "loss": 0.8101, + "step": 330 + }, + { + "epoch": 0.7311827956989247, + "grad_norm": 0.886555016040802, + "learning_rate": 0.0002, + "loss": 0.8223, + "step": 340 + }, + { + "epoch": 0.7526881720430108, + "grad_norm": 0.8458548784255981, + "learning_rate": 0.0002, + "loss": 0.8368, + "step": 350 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 0.8683297634124756, + "learning_rate": 0.0002, + "loss": 0.8295, + "step": 360 + }, + { + "epoch": 0.7956989247311828, + "grad_norm": 0.8308405876159668, + "learning_rate": 0.0002, + "loss": 0.8232, + "step": 370 + }, + { + "epoch": 0.8172043010752689, + "grad_norm": 0.8305579423904419, + "learning_rate": 0.0002, + "loss": 0.7752, + "step": 380 + }, + { + "epoch": 0.8387096774193549, + "grad_norm": 0.8545567393302917, + "learning_rate": 0.0002, + "loss": 0.8267, + "step": 390 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 0.8486055731773376, + "learning_rate": 0.0002, + "loss": 0.8212, + "step": 400 + }, + { + "epoch": 0.8817204301075269, + "grad_norm": 0.8126763105392456, + "learning_rate": 0.0002, + "loss": 0.743, + "step": 410 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 0.8494045734405518, + "learning_rate": 0.0002, + "loss": 0.7993, + "step": 420 + }, + { + "epoch": 0.9247311827956989, + "grad_norm": 0.7639183402061462, + "learning_rate": 0.0002, + "loss": 0.8213, + "step": 430 + }, + { + "epoch": 0.946236559139785, + "grad_norm": 0.858101487159729, + "learning_rate": 0.0002, + "loss": 0.8015, + "step": 440 + }, + { + "epoch": 0.967741935483871, + "grad_norm": 0.8141381740570068, + "learning_rate": 0.0002, + "loss": 0.7629, + "step": 450 + }, + { + "epoch": 0.989247311827957, + "grad_norm": 0.8072513937950134, + "learning_rate": 0.0002, + "loss": 0.7357, + "step": 460 + }, + { + "epoch": 1.0, + "eval_loss": 0.7740864157676697, + "eval_runtime": 21.383, + "eval_samples_per_second": 15.48, + "eval_steps_per_second": 1.964, + "step": 465 + }, + { + "epoch": 1.010752688172043, + "grad_norm": 0.8269494771957397, + "learning_rate": 0.0002, + "loss": 0.7701, + "step": 470 + }, + { + "epoch": 1.032258064516129, + "grad_norm": 0.7814009189605713, + "learning_rate": 0.0002, + "loss": 0.7532, + "step": 480 + }, + { + "epoch": 1.053763440860215, + "grad_norm": 0.8183923363685608, + "learning_rate": 0.0002, + "loss": 0.7689, + "step": 490 + }, + { + "epoch": 1.075268817204301, + "grad_norm": 0.8146600723266602, + "learning_rate": 0.0002, + "loss": 0.765, + "step": 500 + }, + { + "epoch": 1.096774193548387, + "grad_norm": 0.8635126352310181, + "learning_rate": 0.0002, + "loss": 0.7358, + "step": 510 + }, + { + "epoch": 1.118279569892473, + "grad_norm": 0.8520359396934509, + "learning_rate": 0.0002, + "loss": 0.7302, + "step": 520 + }, + { + "epoch": 1.139784946236559, + "grad_norm": 0.8026443123817444, + "learning_rate": 0.0002, + "loss": 0.7492, + "step": 530 + }, + { + "epoch": 1.1612903225806452, + "grad_norm": 0.8157258629798889, + "learning_rate": 0.0002, + "loss": 0.7518, + "step": 540 + }, + { + "epoch": 1.1827956989247312, + "grad_norm": 0.9450796246528625, + "learning_rate": 0.0002, + "loss": 0.7461, + "step": 550 + }, + { + "epoch": 1.2043010752688172, + "grad_norm": 0.8859835863113403, + "learning_rate": 0.0002, + "loss": 0.7128, + "step": 560 + }, + { + "epoch": 1.2258064516129032, + "grad_norm": 0.7819921970367432, + "learning_rate": 0.0002, + "loss": 0.7067, + "step": 570 + }, + { + "epoch": 1.2473118279569892, + "grad_norm": 0.7823445796966553, + "learning_rate": 0.0002, + "loss": 0.7577, + "step": 580 + }, + { + "epoch": 1.2688172043010753, + "grad_norm": 0.7931883931159973, + "learning_rate": 0.0002, + "loss": 0.7358, + "step": 590 + }, + { + "epoch": 1.2903225806451613, + "grad_norm": 0.7495734095573425, + "learning_rate": 0.0002, + "loss": 0.723, + "step": 600 + }, + { + "epoch": 1.3118279569892473, + "grad_norm": 0.9272717237472534, + "learning_rate": 0.0002, + "loss": 0.7386, + "step": 610 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.7968398332595825, + "learning_rate": 0.0002, + "loss": 0.7498, + "step": 620 + }, + { + "epoch": 1.3548387096774195, + "grad_norm": 0.7813659310340881, + "learning_rate": 0.0002, + "loss": 0.7635, + "step": 630 + }, + { + "epoch": 1.3763440860215055, + "grad_norm": 0.730925977230072, + "learning_rate": 0.0002, + "loss": 0.6665, + "step": 640 + }, + { + "epoch": 1.3978494623655915, + "grad_norm": 0.8011482954025269, + "learning_rate": 0.0002, + "loss": 0.7037, + "step": 650 + }, + { + "epoch": 1.4193548387096775, + "grad_norm": 0.7770085334777832, + "learning_rate": 0.0002, + "loss": 0.6931, + "step": 660 + }, + { + "epoch": 1.4408602150537635, + "grad_norm": 0.7432682514190674, + "learning_rate": 0.0002, + "loss": 0.6949, + "step": 670 + }, + { + "epoch": 1.4623655913978495, + "grad_norm": 0.8820092678070068, + "learning_rate": 0.0002, + "loss": 0.7444, + "step": 680 + }, + { + "epoch": 1.4838709677419355, + "grad_norm": 0.7786208987236023, + "learning_rate": 0.0002, + "loss": 0.6758, + "step": 690 + }, + { + "epoch": 1.5053763440860215, + "grad_norm": 0.7467480301856995, + "learning_rate": 0.0002, + "loss": 0.6702, + "step": 700 + }, + { + "epoch": 1.5268817204301075, + "grad_norm": 0.8147122263908386, + "learning_rate": 0.0002, + "loss": 0.7107, + "step": 710 + }, + { + "epoch": 1.5483870967741935, + "grad_norm": 0.796030580997467, + "learning_rate": 0.0002, + "loss": 0.7144, + "step": 720 + }, + { + "epoch": 1.5698924731182795, + "grad_norm": 0.8776171207427979, + "learning_rate": 0.0002, + "loss": 0.6936, + "step": 730 + }, + { + "epoch": 1.5913978494623655, + "grad_norm": 0.8056126236915588, + "learning_rate": 0.0002, + "loss": 0.7101, + "step": 740 + }, + { + "epoch": 1.6129032258064515, + "grad_norm": 0.8141863346099854, + "learning_rate": 0.0002, + "loss": 0.7162, + "step": 750 + }, + { + "epoch": 1.6344086021505375, + "grad_norm": 0.8100557327270508, + "learning_rate": 0.0002, + "loss": 0.7088, + "step": 760 + }, + { + "epoch": 1.6559139784946235, + "grad_norm": 0.8283200860023499, + "learning_rate": 0.0002, + "loss": 0.7212, + "step": 770 + }, + { + "epoch": 1.6774193548387095, + "grad_norm": 0.800865113735199, + "learning_rate": 0.0002, + "loss": 0.694, + "step": 780 + }, + { + "epoch": 1.6989247311827957, + "grad_norm": 0.8052287697792053, + "learning_rate": 0.0002, + "loss": 0.7076, + "step": 790 + }, + { + "epoch": 1.7204301075268817, + "grad_norm": 0.8619674444198608, + "learning_rate": 0.0002, + "loss": 0.7257, + "step": 800 + }, + { + "epoch": 1.7419354838709677, + "grad_norm": 0.8907215595245361, + "learning_rate": 0.0002, + "loss": 0.7141, + "step": 810 + }, + { + "epoch": 1.7634408602150538, + "grad_norm": 0.6976316571235657, + "learning_rate": 0.0002, + "loss": 0.7035, + "step": 820 + }, + { + "epoch": 1.7849462365591398, + "grad_norm": 0.7533746957778931, + "learning_rate": 0.0002, + "loss": 0.6916, + "step": 830 + }, + { + "epoch": 1.8064516129032258, + "grad_norm": 0.7326804399490356, + "learning_rate": 0.0002, + "loss": 0.7094, + "step": 840 + }, + { + "epoch": 1.827956989247312, + "grad_norm": 0.7782683372497559, + "learning_rate": 0.0002, + "loss": 0.6891, + "step": 850 + }, + { + "epoch": 1.849462365591398, + "grad_norm": 0.7424806356430054, + "learning_rate": 0.0002, + "loss": 0.6931, + "step": 860 + }, + { + "epoch": 1.870967741935484, + "grad_norm": 1.172325611114502, + "learning_rate": 0.0002, + "loss": 0.7354, + "step": 870 + }, + { + "epoch": 1.89247311827957, + "grad_norm": 0.771058201789856, + "learning_rate": 0.0002, + "loss": 0.6866, + "step": 880 + }, + { + "epoch": 1.913978494623656, + "grad_norm": 0.8624904155731201, + "learning_rate": 0.0002, + "loss": 0.7296, + "step": 890 + }, + { + "epoch": 1.935483870967742, + "grad_norm": 0.7062820792198181, + "learning_rate": 0.0002, + "loss": 0.7233, + "step": 900 + }, + { + "epoch": 1.956989247311828, + "grad_norm": 0.7560103535652161, + "learning_rate": 0.0002, + "loss": 0.6966, + "step": 910 + }, + { + "epoch": 1.978494623655914, + "grad_norm": 0.788899838924408, + "learning_rate": 0.0002, + "loss": 0.69, + "step": 920 + }, + { + "epoch": 2.0, + "grad_norm": 0.6562113761901855, + "learning_rate": 0.0002, + "loss": 0.6505, + "step": 930 + }, + { + "epoch": 2.0, + "eval_loss": 0.6885261535644531, + "eval_runtime": 21.4291, + "eval_samples_per_second": 15.446, + "eval_steps_per_second": 1.96, + "step": 930 + }, + { + "epoch": 2.021505376344086, + "grad_norm": 0.8216531872749329, + "learning_rate": 0.0002, + "loss": 0.6625, + "step": 940 + }, + { + "epoch": 2.043010752688172, + "grad_norm": 0.8317142724990845, + "learning_rate": 0.0002, + "loss": 0.6398, + "step": 950 + }, + { + "epoch": 2.064516129032258, + "grad_norm": 0.8446708917617798, + "learning_rate": 0.0002, + "loss": 0.649, + "step": 960 + }, + { + "epoch": 2.086021505376344, + "grad_norm": 0.735055148601532, + "learning_rate": 0.0002, + "loss": 0.657, + "step": 970 + }, + { + "epoch": 2.10752688172043, + "grad_norm": 0.7487243413925171, + "learning_rate": 0.0002, + "loss": 0.649, + "step": 980 + }, + { + "epoch": 2.129032258064516, + "grad_norm": 0.8573887944221497, + "learning_rate": 0.0002, + "loss": 0.6419, + "step": 990 + }, + { + "epoch": 2.150537634408602, + "grad_norm": 0.6284521818161011, + "learning_rate": 0.0002, + "loss": 0.6431, + "step": 1000 + }, + { + "epoch": 2.172043010752688, + "grad_norm": 0.754183292388916, + "learning_rate": 0.0002, + "loss": 0.6128, + "step": 1010 + }, + { + "epoch": 2.193548387096774, + "grad_norm": 0.9445359110832214, + "learning_rate": 0.0002, + "loss": 0.6253, + "step": 1020 + }, + { + "epoch": 2.21505376344086, + "grad_norm": 0.808508038520813, + "learning_rate": 0.0002, + "loss": 0.605, + "step": 1030 + }, + { + "epoch": 2.236559139784946, + "grad_norm": 0.9394679665565491, + "learning_rate": 0.0002, + "loss": 0.6786, + "step": 1040 + }, + { + "epoch": 2.258064516129032, + "grad_norm": 0.8151357769966125, + "learning_rate": 0.0002, + "loss": 0.6176, + "step": 1050 + }, + { + "epoch": 2.279569892473118, + "grad_norm": 0.7909848093986511, + "learning_rate": 0.0002, + "loss": 0.66, + "step": 1060 + }, + { + "epoch": 2.3010752688172045, + "grad_norm": 0.7506507039070129, + "learning_rate": 0.0002, + "loss": 0.6254, + "step": 1070 + }, + { + "epoch": 2.3225806451612905, + "grad_norm": 0.8240520358085632, + "learning_rate": 0.0002, + "loss": 0.6608, + "step": 1080 + }, + { + "epoch": 2.3440860215053765, + "grad_norm": 0.9342400431632996, + "learning_rate": 0.0002, + "loss": 0.6207, + "step": 1090 + }, + { + "epoch": 2.3655913978494625, + "grad_norm": 1.0598735809326172, + "learning_rate": 0.0002, + "loss": 0.6029, + "step": 1100 + }, + { + "epoch": 2.3870967741935485, + "grad_norm": 0.7907650470733643, + "learning_rate": 0.0002, + "loss": 0.6035, + "step": 1110 + }, + { + "epoch": 2.4086021505376345, + "grad_norm": 0.9388798475265503, + "learning_rate": 0.0002, + "loss": 0.6237, + "step": 1120 + }, + { + "epoch": 2.4301075268817205, + "grad_norm": 0.8985419869422913, + "learning_rate": 0.0002, + "loss": 0.6207, + "step": 1130 + }, + { + "epoch": 2.4516129032258065, + "grad_norm": 0.7471932768821716, + "learning_rate": 0.0002, + "loss": 0.5902, + "step": 1140 + }, + { + "epoch": 2.4731182795698925, + "grad_norm": 0.761131763458252, + "learning_rate": 0.0002, + "loss": 0.6446, + "step": 1150 + }, + { + "epoch": 2.4946236559139785, + "grad_norm": 0.7901819348335266, + "learning_rate": 0.0002, + "loss": 0.6088, + "step": 1160 + }, + { + "epoch": 2.5161290322580645, + "grad_norm": 0.9932922720909119, + "learning_rate": 0.0002, + "loss": 0.6142, + "step": 1170 + }, + { + "epoch": 2.5376344086021505, + "grad_norm": 0.7414287328720093, + "learning_rate": 0.0002, + "loss": 0.6407, + "step": 1180 + }, + { + "epoch": 2.5591397849462365, + "grad_norm": 0.8111771941184998, + "learning_rate": 0.0002, + "loss": 0.6161, + "step": 1190 + }, + { + "epoch": 2.5806451612903225, + "grad_norm": 0.7520156502723694, + "learning_rate": 0.0002, + "loss": 0.6006, + "step": 1200 + }, + { + "epoch": 2.6021505376344085, + "grad_norm": 0.9022907018661499, + "learning_rate": 0.0002, + "loss": 0.615, + "step": 1210 + }, + { + "epoch": 2.6236559139784945, + "grad_norm": 0.7746260166168213, + "learning_rate": 0.0002, + "loss": 0.6211, + "step": 1220 + }, + { + "epoch": 2.6451612903225805, + "grad_norm": 0.8482862114906311, + "learning_rate": 0.0002, + "loss": 0.616, + "step": 1230 + }, + { + "epoch": 2.6666666666666665, + "grad_norm": 0.7925458550453186, + "learning_rate": 0.0002, + "loss": 0.6417, + "step": 1240 + }, + { + "epoch": 2.688172043010753, + "grad_norm": 0.8369929194450378, + "learning_rate": 0.0002, + "loss": 0.6187, + "step": 1250 + }, + { + "epoch": 2.709677419354839, + "grad_norm": 0.8311542868614197, + "learning_rate": 0.0002, + "loss": 0.6138, + "step": 1260 + }, + { + "epoch": 2.731182795698925, + "grad_norm": 0.7204853296279907, + "learning_rate": 0.0002, + "loss": 0.5894, + "step": 1270 + }, + { + "epoch": 2.752688172043011, + "grad_norm": 0.8447284698486328, + "learning_rate": 0.0002, + "loss": 0.6325, + "step": 1280 + }, + { + "epoch": 2.774193548387097, + "grad_norm": 0.7738404273986816, + "learning_rate": 0.0002, + "loss": 0.5946, + "step": 1290 + }, + { + "epoch": 2.795698924731183, + "grad_norm": 0.8393287062644958, + "learning_rate": 0.0002, + "loss": 0.5678, + "step": 1300 + }, + { + "epoch": 2.817204301075269, + "grad_norm": 0.79121994972229, + "learning_rate": 0.0002, + "loss": 0.6092, + "step": 1310 + }, + { + "epoch": 2.838709677419355, + "grad_norm": 0.7331557869911194, + "learning_rate": 0.0002, + "loss": 0.5889, + "step": 1320 + }, + { + "epoch": 2.860215053763441, + "grad_norm": 0.9593998193740845, + "learning_rate": 0.0002, + "loss": 0.6048, + "step": 1330 + }, + { + "epoch": 2.881720430107527, + "grad_norm": 0.7215158343315125, + "learning_rate": 0.0002, + "loss": 0.6108, + "step": 1340 + }, + { + "epoch": 2.903225806451613, + "grad_norm": 0.840404212474823, + "learning_rate": 0.0002, + "loss": 0.5897, + "step": 1350 + }, + { + "epoch": 2.924731182795699, + "grad_norm": 0.870659351348877, + "learning_rate": 0.0002, + "loss": 0.6056, + "step": 1360 + }, + { + "epoch": 2.946236559139785, + "grad_norm": 0.8744975328445435, + "learning_rate": 0.0002, + "loss": 0.6205, + "step": 1370 + }, + { + "epoch": 2.967741935483871, + "grad_norm": 0.8030612468719482, + "learning_rate": 0.0002, + "loss": 0.5966, + "step": 1380 + }, + { + "epoch": 2.989247311827957, + "grad_norm": 0.825814962387085, + "learning_rate": 0.0002, + "loss": 0.6004, + "step": 1390 + }, + { + "epoch": 3.0, + "eval_loss": 0.6257933378219604, + "eval_runtime": 21.3692, + "eval_samples_per_second": 15.49, + "eval_steps_per_second": 1.965, + "step": 1395 + }, + { + "epoch": 3.010752688172043, + "grad_norm": 0.8650677800178528, + "learning_rate": 0.0002, + "loss": 0.5696, + "step": 1400 + }, + { + "epoch": 3.032258064516129, + "grad_norm": 0.8364197015762329, + "learning_rate": 0.0002, + "loss": 0.5483, + "step": 1410 + }, + { + "epoch": 3.053763440860215, + "grad_norm": 0.8278448581695557, + "learning_rate": 0.0002, + "loss": 0.5606, + "step": 1420 + }, + { + "epoch": 3.075268817204301, + "grad_norm": 0.8806642889976501, + "learning_rate": 0.0002, + "loss": 0.5572, + "step": 1430 + }, + { + "epoch": 3.096774193548387, + "grad_norm": 0.8180029988288879, + "learning_rate": 0.0002, + "loss": 0.585, + "step": 1440 + }, + { + "epoch": 3.118279569892473, + "grad_norm": 0.8561782836914062, + "learning_rate": 0.0002, + "loss": 0.5667, + "step": 1450 + }, + { + "epoch": 3.139784946236559, + "grad_norm": 0.8377029299736023, + "learning_rate": 0.0002, + "loss": 0.5246, + "step": 1460 + }, + { + "epoch": 3.161290322580645, + "grad_norm": 0.885779082775116, + "learning_rate": 0.0002, + "loss": 0.5464, + "step": 1470 + }, + { + "epoch": 3.182795698924731, + "grad_norm": 0.9388518333435059, + "learning_rate": 0.0002, + "loss": 0.541, + "step": 1480 + }, + { + "epoch": 3.204301075268817, + "grad_norm": 0.8816235661506653, + "learning_rate": 0.0002, + "loss": 0.5447, + "step": 1490 + }, + { + "epoch": 3.225806451612903, + "grad_norm": 0.9885783791542053, + "learning_rate": 0.0002, + "loss": 0.5466, + "step": 1500 + }, + { + "epoch": 3.247311827956989, + "grad_norm": 0.8635850548744202, + "learning_rate": 0.0002, + "loss": 0.5455, + "step": 1510 + }, + { + "epoch": 3.268817204301075, + "grad_norm": 0.829853355884552, + "learning_rate": 0.0002, + "loss": 0.5419, + "step": 1520 + }, + { + "epoch": 3.2903225806451615, + "grad_norm": 0.9037486910820007, + "learning_rate": 0.0002, + "loss": 0.54, + "step": 1530 + }, + { + "epoch": 3.3118279569892475, + "grad_norm": 0.8173713684082031, + "learning_rate": 0.0002, + "loss": 0.5375, + "step": 1540 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 0.796953022480011, + "learning_rate": 0.0002, + "loss": 0.5405, + "step": 1550 + }, + { + "epoch": 3.3548387096774195, + "grad_norm": 0.7894400358200073, + "learning_rate": 0.0002, + "loss": 0.5505, + "step": 1560 + }, + { + "epoch": 3.3763440860215055, + "grad_norm": 0.9434949159622192, + "learning_rate": 0.0002, + "loss": 0.5395, + "step": 1570 + }, + { + "epoch": 3.3978494623655915, + "grad_norm": 0.8666760325431824, + "learning_rate": 0.0002, + "loss": 0.5271, + "step": 1580 + }, + { + "epoch": 3.4193548387096775, + "grad_norm": 0.7782467007637024, + "learning_rate": 0.0002, + "loss": 0.5439, + "step": 1590 + }, + { + "epoch": 3.4408602150537635, + "grad_norm": 0.8849126696586609, + "learning_rate": 0.0002, + "loss": 0.5161, + "step": 1600 + }, + { + "epoch": 3.4623655913978495, + "grad_norm": 0.7863831520080566, + "learning_rate": 0.0002, + "loss": 0.5353, + "step": 1610 + }, + { + "epoch": 3.4838709677419355, + "grad_norm": 1.0403116941452026, + "learning_rate": 0.0002, + "loss": 0.5308, + "step": 1620 + }, + { + "epoch": 3.5053763440860215, + "grad_norm": 0.8307499289512634, + "learning_rate": 0.0002, + "loss": 0.5339, + "step": 1630 + }, + { + "epoch": 3.5268817204301075, + "grad_norm": 0.9132118821144104, + "learning_rate": 0.0002, + "loss": 0.5361, + "step": 1640 + }, + { + "epoch": 3.5483870967741935, + "grad_norm": 0.9322578310966492, + "learning_rate": 0.0002, + "loss": 0.5828, + "step": 1650 + }, + { + "epoch": 3.5698924731182795, + "grad_norm": 0.9782460331916809, + "learning_rate": 0.0002, + "loss": 0.546, + "step": 1660 + }, + { + "epoch": 3.5913978494623655, + "grad_norm": 0.7189919352531433, + "learning_rate": 0.0002, + "loss": 0.5424, + "step": 1670 + }, + { + "epoch": 3.6129032258064515, + "grad_norm": 0.9689221382141113, + "learning_rate": 0.0002, + "loss": 0.5514, + "step": 1680 + }, + { + "epoch": 3.6344086021505375, + "grad_norm": 0.9684675335884094, + "learning_rate": 0.0002, + "loss": 0.5379, + "step": 1690 + }, + { + "epoch": 3.6559139784946235, + "grad_norm": 0.8851472735404968, + "learning_rate": 0.0002, + "loss": 0.5748, + "step": 1700 + }, + { + "epoch": 3.6774193548387095, + "grad_norm": 0.7709833383560181, + "learning_rate": 0.0002, + "loss": 0.5412, + "step": 1710 + }, + { + "epoch": 3.698924731182796, + "grad_norm": 0.818236231803894, + "learning_rate": 0.0002, + "loss": 0.521, + "step": 1720 + }, + { + "epoch": 3.720430107526882, + "grad_norm": 0.870642364025116, + "learning_rate": 0.0002, + "loss": 0.5445, + "step": 1730 + }, + { + "epoch": 3.741935483870968, + "grad_norm": 1.0245511531829834, + "learning_rate": 0.0002, + "loss": 0.5307, + "step": 1740 + }, + { + "epoch": 3.763440860215054, + "grad_norm": 0.8607558608055115, + "learning_rate": 0.0002, + "loss": 0.5593, + "step": 1750 + }, + { + "epoch": 3.78494623655914, + "grad_norm": 0.8511829972267151, + "learning_rate": 0.0002, + "loss": 0.536, + "step": 1760 + }, + { + "epoch": 3.806451612903226, + "grad_norm": 0.7969087362289429, + "learning_rate": 0.0002, + "loss": 0.5193, + "step": 1770 + }, + { + "epoch": 3.827956989247312, + "grad_norm": 0.8457245826721191, + "learning_rate": 0.0002, + "loss": 0.5578, + "step": 1780 + }, + { + "epoch": 3.849462365591398, + "grad_norm": 0.8893467783927917, + "learning_rate": 0.0002, + "loss": 0.5337, + "step": 1790 + }, + { + "epoch": 3.870967741935484, + "grad_norm": 0.8593819737434387, + "learning_rate": 0.0002, + "loss": 0.5024, + "step": 1800 + }, + { + "epoch": 3.89247311827957, + "grad_norm": 0.7574560642242432, + "learning_rate": 0.0002, + "loss": 0.5134, + "step": 1810 + }, + { + "epoch": 3.913978494623656, + "grad_norm": 0.8681567311286926, + "learning_rate": 0.0002, + "loss": 0.5263, + "step": 1820 + }, + { + "epoch": 3.935483870967742, + "grad_norm": 0.9068132042884827, + "learning_rate": 0.0002, + "loss": 0.532, + "step": 1830 + }, + { + "epoch": 3.956989247311828, + "grad_norm": 0.8668948411941528, + "learning_rate": 0.0002, + "loss": 0.5427, + "step": 1840 + }, + { + "epoch": 3.978494623655914, + "grad_norm": 1.046032428741455, + "learning_rate": 0.0002, + "loss": 0.5349, + "step": 1850 + }, + { + "epoch": 4.0, + "grad_norm": 0.904780387878418, + "learning_rate": 0.0002, + "loss": 0.5087, + "step": 1860 + }, + { + "epoch": 4.0, + "eval_loss": 0.5737715363502502, + "eval_runtime": 21.4915, + "eval_samples_per_second": 15.401, + "eval_steps_per_second": 1.954, + "step": 1860 + }, + { + "epoch": 4.021505376344086, + "grad_norm": 0.8611752986907959, + "learning_rate": 0.0002, + "loss": 0.4843, + "step": 1870 + }, + { + "epoch": 4.043010752688172, + "grad_norm": 0.838782548904419, + "learning_rate": 0.0002, + "loss": 0.4814, + "step": 1880 + }, + { + "epoch": 4.064516129032258, + "grad_norm": 0.9119709134101868, + "learning_rate": 0.0002, + "loss": 0.474, + "step": 1890 + }, + { + "epoch": 4.086021505376344, + "grad_norm": 0.8026251196861267, + "learning_rate": 0.0002, + "loss": 0.4951, + "step": 1900 + }, + { + "epoch": 4.10752688172043, + "grad_norm": 0.8773705363273621, + "learning_rate": 0.0002, + "loss": 0.491, + "step": 1910 + }, + { + "epoch": 4.129032258064516, + "grad_norm": 0.8762255907058716, + "learning_rate": 0.0002, + "loss": 0.474, + "step": 1920 + }, + { + "epoch": 4.150537634408602, + "grad_norm": 0.8371861577033997, + "learning_rate": 0.0002, + "loss": 0.4816, + "step": 1930 + }, + { + "epoch": 4.172043010752688, + "grad_norm": 0.9703728556632996, + "learning_rate": 0.0002, + "loss": 0.472, + "step": 1940 + }, + { + "epoch": 4.193548387096774, + "grad_norm": 0.8802874684333801, + "learning_rate": 0.0002, + "loss": 0.4772, + "step": 1950 + }, + { + "epoch": 4.21505376344086, + "grad_norm": 1.0103057622909546, + "learning_rate": 0.0002, + "loss": 0.5032, + "step": 1960 + }, + { + "epoch": 4.236559139784946, + "grad_norm": 0.9212995171546936, + "learning_rate": 0.0002, + "loss": 0.4945, + "step": 1970 + }, + { + "epoch": 4.258064516129032, + "grad_norm": 1.009544849395752, + "learning_rate": 0.0002, + "loss": 0.4753, + "step": 1980 + }, + { + "epoch": 4.279569892473118, + "grad_norm": 0.8535077571868896, + "learning_rate": 0.0002, + "loss": 0.4789, + "step": 1990 + }, + { + "epoch": 4.301075268817204, + "grad_norm": 0.8363022804260254, + "learning_rate": 0.0002, + "loss": 0.4782, + "step": 2000 + }, + { + "epoch": 4.32258064516129, + "grad_norm": 0.9041762948036194, + "learning_rate": 0.0002, + "loss": 0.4875, + "step": 2010 + }, + { + "epoch": 4.344086021505376, + "grad_norm": 0.960790753364563, + "learning_rate": 0.0002, + "loss": 0.4779, + "step": 2020 + }, + { + "epoch": 4.365591397849462, + "grad_norm": 0.8823095560073853, + "learning_rate": 0.0002, + "loss": 0.4626, + "step": 2030 + }, + { + "epoch": 4.387096774193548, + "grad_norm": 0.952100396156311, + "learning_rate": 0.0002, + "loss": 0.4883, + "step": 2040 + }, + { + "epoch": 4.408602150537634, + "grad_norm": 1.0793498754501343, + "learning_rate": 0.0002, + "loss": 0.4789, + "step": 2050 + }, + { + "epoch": 4.43010752688172, + "grad_norm": 0.8987208008766174, + "learning_rate": 0.0002, + "loss": 0.4827, + "step": 2060 + }, + { + "epoch": 4.451612903225806, + "grad_norm": 0.8539772033691406, + "learning_rate": 0.0002, + "loss": 0.4594, + "step": 2070 + }, + { + "epoch": 4.473118279569892, + "grad_norm": 0.9160863757133484, + "learning_rate": 0.0002, + "loss": 0.4752, + "step": 2080 + }, + { + "epoch": 4.494623655913978, + "grad_norm": 0.9946850538253784, + "learning_rate": 0.0002, + "loss": 0.5033, + "step": 2090 + }, + { + "epoch": 4.516129032258064, + "grad_norm": 0.908039391040802, + "learning_rate": 0.0002, + "loss": 0.4842, + "step": 2100 + }, + { + "epoch": 4.53763440860215, + "grad_norm": 1.1462254524230957, + "learning_rate": 0.0002, + "loss": 0.4861, + "step": 2110 + }, + { + "epoch": 4.559139784946236, + "grad_norm": 0.8392056226730347, + "learning_rate": 0.0002, + "loss": 0.4892, + "step": 2120 + }, + { + "epoch": 4.580645161290323, + "grad_norm": 0.9673896431922913, + "learning_rate": 0.0002, + "loss": 0.4824, + "step": 2130 + }, + { + "epoch": 4.602150537634409, + "grad_norm": 0.9047091603279114, + "learning_rate": 0.0002, + "loss": 0.4665, + "step": 2140 + }, + { + "epoch": 4.623655913978495, + "grad_norm": 0.9013425707817078, + "learning_rate": 0.0002, + "loss": 0.4714, + "step": 2150 + }, + { + "epoch": 4.645161290322581, + "grad_norm": 0.8899165391921997, + "learning_rate": 0.0002, + "loss": 0.472, + "step": 2160 + }, + { + "epoch": 4.666666666666667, + "grad_norm": 0.748602569103241, + "learning_rate": 0.0002, + "loss": 0.4635, + "step": 2170 + }, + { + "epoch": 4.688172043010753, + "grad_norm": 0.8694155216217041, + "learning_rate": 0.0002, + "loss": 0.4695, + "step": 2180 + }, + { + "epoch": 4.709677419354839, + "grad_norm": 0.9134316444396973, + "learning_rate": 0.0002, + "loss": 0.4929, + "step": 2190 + }, + { + "epoch": 4.731182795698925, + "grad_norm": 0.8504763245582581, + "learning_rate": 0.0002, + "loss": 0.4855, + "step": 2200 + }, + { + "epoch": 4.752688172043011, + "grad_norm": 1.0321544408798218, + "learning_rate": 0.0002, + "loss": 0.4517, + "step": 2210 + }, + { + "epoch": 4.774193548387097, + "grad_norm": 0.9368237257003784, + "learning_rate": 0.0002, + "loss": 0.4796, + "step": 2220 + }, + { + "epoch": 4.795698924731183, + "grad_norm": 0.9319947361946106, + "learning_rate": 0.0002, + "loss": 0.4837, + "step": 2230 + }, + { + "epoch": 4.817204301075269, + "grad_norm": 0.904333770275116, + "learning_rate": 0.0002, + "loss": 0.4696, + "step": 2240 + }, + { + "epoch": 4.838709677419355, + "grad_norm": 0.8097078204154968, + "learning_rate": 0.0002, + "loss": 0.4746, + "step": 2250 + }, + { + "epoch": 4.860215053763441, + "grad_norm": 0.9128859043121338, + "learning_rate": 0.0002, + "loss": 0.4438, + "step": 2260 + }, + { + "epoch": 4.881720430107527, + "grad_norm": 0.883129894733429, + "learning_rate": 0.0002, + "loss": 0.4693, + "step": 2270 + }, + { + "epoch": 4.903225806451613, + "grad_norm": 0.85712730884552, + "learning_rate": 0.0002, + "loss": 0.4494, + "step": 2280 + }, + { + "epoch": 4.924731182795699, + "grad_norm": 1.2101863622665405, + "learning_rate": 0.0002, + "loss": 0.4593, + "step": 2290 + }, + { + "epoch": 4.946236559139785, + "grad_norm": 0.917966902256012, + "learning_rate": 0.0002, + "loss": 0.4779, + "step": 2300 + }, + { + "epoch": 4.967741935483871, + "grad_norm": 0.7740724086761475, + "learning_rate": 0.0002, + "loss": 0.4666, + "step": 2310 + }, + { + "epoch": 4.989247311827957, + "grad_norm": 1.0199906826019287, + "learning_rate": 0.0002, + "loss": 0.4629, + "step": 2320 + }, + { + "epoch": 5.0, + "eval_loss": 0.5363914370536804, + "eval_runtime": 21.3941, + "eval_samples_per_second": 15.472, + "eval_steps_per_second": 1.963, + "step": 2325 + }, + { + "epoch": 5.010752688172043, + "grad_norm": 0.8580502271652222, + "learning_rate": 0.0002, + "loss": 0.4543, + "step": 2330 + }, + { + "epoch": 5.032258064516129, + "grad_norm": 0.7702704668045044, + "learning_rate": 0.0002, + "loss": 0.404, + "step": 2340 + }, + { + "epoch": 5.053763440860215, + "grad_norm": 0.9417401552200317, + "learning_rate": 0.0002, + "loss": 0.4408, + "step": 2350 + }, + { + "epoch": 5.075268817204301, + "grad_norm": 0.9461463689804077, + "learning_rate": 0.0002, + "loss": 0.4306, + "step": 2360 + }, + { + "epoch": 5.096774193548387, + "grad_norm": 0.8931282162666321, + "learning_rate": 0.0002, + "loss": 0.4251, + "step": 2370 + }, + { + "epoch": 5.118279569892473, + "grad_norm": 1.000909447669983, + "learning_rate": 0.0002, + "loss": 0.4249, + "step": 2380 + }, + { + "epoch": 5.139784946236559, + "grad_norm": 0.8640249967575073, + "learning_rate": 0.0002, + "loss": 0.4231, + "step": 2390 + }, + { + "epoch": 5.161290322580645, + "grad_norm": 1.0451020002365112, + "learning_rate": 0.0002, + "loss": 0.4272, + "step": 2400 + }, + { + "epoch": 5.182795698924731, + "grad_norm": 0.7896912097930908, + "learning_rate": 0.0002, + "loss": 0.4177, + "step": 2410 + }, + { + "epoch": 5.204301075268817, + "grad_norm": 0.8424463272094727, + "learning_rate": 0.0002, + "loss": 0.4116, + "step": 2420 + }, + { + "epoch": 5.225806451612903, + "grad_norm": 1.0852105617523193, + "learning_rate": 0.0002, + "loss": 0.4225, + "step": 2430 + }, + { + "epoch": 5.247311827956989, + "grad_norm": 0.9285983443260193, + "learning_rate": 0.0002, + "loss": 0.4352, + "step": 2440 + }, + { + "epoch": 5.268817204301075, + "grad_norm": 0.9119299054145813, + "learning_rate": 0.0002, + "loss": 0.4262, + "step": 2450 + }, + { + "epoch": 5.290322580645161, + "grad_norm": 0.8790456056594849, + "learning_rate": 0.0002, + "loss": 0.4494, + "step": 2460 + }, + { + "epoch": 5.311827956989247, + "grad_norm": 0.8726504445075989, + "learning_rate": 0.0002, + "loss": 0.4421, + "step": 2470 + }, + { + "epoch": 5.333333333333333, + "grad_norm": 0.9415227770805359, + "learning_rate": 0.0002, + "loss": 0.4372, + "step": 2480 + }, + { + "epoch": 5.354838709677419, + "grad_norm": 0.9133324027061462, + "learning_rate": 0.0002, + "loss": 0.4223, + "step": 2490 + }, + { + "epoch": 5.376344086021505, + "grad_norm": 0.9567879438400269, + "learning_rate": 0.0002, + "loss": 0.4401, + "step": 2500 + }, + { + "epoch": 5.397849462365591, + "grad_norm": 0.9239469766616821, + "learning_rate": 0.0002, + "loss": 0.4094, + "step": 2510 + }, + { + "epoch": 5.419354838709677, + "grad_norm": 1.0293527841567993, + "learning_rate": 0.0002, + "loss": 0.4416, + "step": 2520 + }, + { + "epoch": 5.440860215053763, + "grad_norm": 0.8618718981742859, + "learning_rate": 0.0002, + "loss": 0.4311, + "step": 2530 + }, + { + "epoch": 5.462365591397849, + "grad_norm": 0.740166187286377, + "learning_rate": 0.0002, + "loss": 0.462, + "step": 2540 + }, + { + "epoch": 5.483870967741936, + "grad_norm": 0.901566743850708, + "learning_rate": 0.0002, + "loss": 0.4172, + "step": 2550 + }, + { + "epoch": 5.505376344086022, + "grad_norm": 0.7957597970962524, + "learning_rate": 0.0002, + "loss": 0.4315, + "step": 2560 + }, + { + "epoch": 5.526881720430108, + "grad_norm": 1.1139343976974487, + "learning_rate": 0.0002, + "loss": 0.4263, + "step": 2570 + }, + { + "epoch": 5.548387096774194, + "grad_norm": 0.989765465259552, + "learning_rate": 0.0002, + "loss": 0.4056, + "step": 2580 + }, + { + "epoch": 5.56989247311828, + "grad_norm": 0.9416969418525696, + "learning_rate": 0.0002, + "loss": 0.4311, + "step": 2590 + }, + { + "epoch": 5.591397849462366, + "grad_norm": 0.9184830784797668, + "learning_rate": 0.0002, + "loss": 0.4363, + "step": 2600 + }, + { + "epoch": 5.612903225806452, + "grad_norm": 1.0512700080871582, + "learning_rate": 0.0002, + "loss": 0.432, + "step": 2610 + }, + { + "epoch": 5.634408602150538, + "grad_norm": 0.901462197303772, + "learning_rate": 0.0002, + "loss": 0.4227, + "step": 2620 + }, + { + "epoch": 5.655913978494624, + "grad_norm": 0.9732566475868225, + "learning_rate": 0.0002, + "loss": 0.4332, + "step": 2630 + }, + { + "epoch": 5.67741935483871, + "grad_norm": 0.8180275559425354, + "learning_rate": 0.0002, + "loss": 0.4223, + "step": 2640 + }, + { + "epoch": 5.698924731182796, + "grad_norm": 1.1354765892028809, + "learning_rate": 0.0002, + "loss": 0.4311, + "step": 2650 + }, + { + "epoch": 5.720430107526882, + "grad_norm": 0.9161503314971924, + "learning_rate": 0.0002, + "loss": 0.4409, + "step": 2660 + }, + { + "epoch": 5.741935483870968, + "grad_norm": 1.0561772584915161, + "learning_rate": 0.0002, + "loss": 0.4394, + "step": 2670 + }, + { + "epoch": 5.763440860215054, + "grad_norm": 0.7712787389755249, + "learning_rate": 0.0002, + "loss": 0.424, + "step": 2680 + }, + { + "epoch": 5.78494623655914, + "grad_norm": 0.9674550294876099, + "learning_rate": 0.0002, + "loss": 0.4326, + "step": 2690 + }, + { + "epoch": 5.806451612903226, + "grad_norm": 0.7531843781471252, + "learning_rate": 0.0002, + "loss": 0.4459, + "step": 2700 + }, + { + "epoch": 5.827956989247312, + "grad_norm": 1.1332131624221802, + "learning_rate": 0.0002, + "loss": 0.4276, + "step": 2710 + }, + { + "epoch": 5.849462365591398, + "grad_norm": 0.9367414116859436, + "learning_rate": 0.0002, + "loss": 0.4113, + "step": 2720 + }, + { + "epoch": 5.870967741935484, + "grad_norm": 0.8267706632614136, + "learning_rate": 0.0002, + "loss": 0.4227, + "step": 2730 + }, + { + "epoch": 5.89247311827957, + "grad_norm": 1.1040657758712769, + "learning_rate": 0.0002, + "loss": 0.4218, + "step": 2740 + }, + { + "epoch": 5.913978494623656, + "grad_norm": 0.8879582285881042, + "learning_rate": 0.0002, + "loss": 0.4129, + "step": 2750 + }, + { + "epoch": 5.935483870967742, + "grad_norm": 0.9264667630195618, + "learning_rate": 0.0002, + "loss": 0.4241, + "step": 2760 + }, + { + "epoch": 5.956989247311828, + "grad_norm": 0.9373905658721924, + "learning_rate": 0.0002, + "loss": 0.4318, + "step": 2770 + }, + { + "epoch": 5.978494623655914, + "grad_norm": 1.0063740015029907, + "learning_rate": 0.0002, + "loss": 0.423, + "step": 2780 + }, + { + "epoch": 6.0, + "grad_norm": 0.8291367292404175, + "learning_rate": 0.0002, + "loss": 0.4382, + "step": 2790 + }, + { + "epoch": 6.0, + "eval_loss": 0.5057176947593689, + "eval_runtime": 21.3206, + "eval_samples_per_second": 15.525, + "eval_steps_per_second": 1.97, + "step": 2790 + }, + { + "epoch": 6.021505376344086, + "grad_norm": 1.0137434005737305, + "learning_rate": 0.0002, + "loss": 0.3907, + "step": 2800 + }, + { + "epoch": 6.043010752688172, + "grad_norm": 0.7550579905509949, + "learning_rate": 0.0002, + "loss": 0.3793, + "step": 2810 + }, + { + "epoch": 6.064516129032258, + "grad_norm": 1.0664116144180298, + "learning_rate": 0.0002, + "loss": 0.4003, + "step": 2820 + }, + { + "epoch": 6.086021505376344, + "grad_norm": 0.7908814549446106, + "learning_rate": 0.0002, + "loss": 0.3876, + "step": 2830 + }, + { + "epoch": 6.10752688172043, + "grad_norm": 0.8101639747619629, + "learning_rate": 0.0002, + "loss": 0.3884, + "step": 2840 + }, + { + "epoch": 6.129032258064516, + "grad_norm": 0.7882567048072815, + "learning_rate": 0.0002, + "loss": 0.3835, + "step": 2850 + }, + { + "epoch": 6.150537634408602, + "grad_norm": 1.0134103298187256, + "learning_rate": 0.0002, + "loss": 0.3827, + "step": 2860 + }, + { + "epoch": 6.172043010752688, + "grad_norm": 0.9240215420722961, + "learning_rate": 0.0002, + "loss": 0.3963, + "step": 2870 + }, + { + "epoch": 6.193548387096774, + "grad_norm": 0.8322992920875549, + "learning_rate": 0.0002, + "loss": 0.4049, + "step": 2880 + }, + { + "epoch": 6.21505376344086, + "grad_norm": 0.9238720536231995, + "learning_rate": 0.0002, + "loss": 0.381, + "step": 2890 + }, + { + "epoch": 6.236559139784946, + "grad_norm": 0.9361863732337952, + "learning_rate": 0.0002, + "loss": 0.3852, + "step": 2900 + }, + { + "epoch": 6.258064516129032, + "grad_norm": 0.9670863747596741, + "learning_rate": 0.0002, + "loss": 0.3917, + "step": 2910 + }, + { + "epoch": 6.279569892473118, + "grad_norm": 0.7724685668945312, + "learning_rate": 0.0002, + "loss": 0.3826, + "step": 2920 + }, + { + "epoch": 6.301075268817204, + "grad_norm": 0.8125540614128113, + "learning_rate": 0.0002, + "loss": 0.3988, + "step": 2930 + }, + { + "epoch": 6.32258064516129, + "grad_norm": 0.9483002424240112, + "learning_rate": 0.0002, + "loss": 0.3778, + "step": 2940 + }, + { + "epoch": 6.344086021505376, + "grad_norm": 1.098374843597412, + "learning_rate": 0.0002, + "loss": 0.3823, + "step": 2950 + }, + { + "epoch": 6.365591397849462, + "grad_norm": 1.0169378519058228, + "learning_rate": 0.0002, + "loss": 0.3886, + "step": 2960 + }, + { + "epoch": 6.387096774193548, + "grad_norm": 0.8594151139259338, + "learning_rate": 0.0002, + "loss": 0.3936, + "step": 2970 + }, + { + "epoch": 6.408602150537634, + "grad_norm": 0.9507288336753845, + "learning_rate": 0.0002, + "loss": 0.3871, + "step": 2980 + }, + { + "epoch": 6.43010752688172, + "grad_norm": 0.9212459325790405, + "learning_rate": 0.0002, + "loss": 0.3852, + "step": 2990 + }, + { + "epoch": 6.451612903225806, + "grad_norm": 0.9696952104568481, + "learning_rate": 0.0002, + "loss": 0.3929, + "step": 3000 + }, + { + "epoch": 6.473118279569892, + "grad_norm": 0.8872610330581665, + "learning_rate": 0.0002, + "loss": 0.3933, + "step": 3010 + }, + { + "epoch": 6.494623655913978, + "grad_norm": 0.9207532405853271, + "learning_rate": 0.0002, + "loss": 0.393, + "step": 3020 + }, + { + "epoch": 6.516129032258064, + "grad_norm": 0.9116262793540955, + "learning_rate": 0.0002, + "loss": 0.3848, + "step": 3030 + }, + { + "epoch": 6.53763440860215, + "grad_norm": 0.83391934633255, + "learning_rate": 0.0002, + "loss": 0.3964, + "step": 3040 + }, + { + "epoch": 6.559139784946236, + "grad_norm": 0.890931248664856, + "learning_rate": 0.0002, + "loss": 0.3758, + "step": 3050 + }, + { + "epoch": 6.580645161290323, + "grad_norm": 1.0100581645965576, + "learning_rate": 0.0002, + "loss": 0.3944, + "step": 3060 + }, + { + "epoch": 6.602150537634409, + "grad_norm": 0.783526599407196, + "learning_rate": 0.0002, + "loss": 0.3992, + "step": 3070 + }, + { + "epoch": 6.623655913978495, + "grad_norm": 1.324326515197754, + "learning_rate": 0.0002, + "loss": 0.4144, + "step": 3080 + }, + { + "epoch": 6.645161290322581, + "grad_norm": 0.9102319478988647, + "learning_rate": 0.0002, + "loss": 0.3986, + "step": 3090 + }, + { + "epoch": 6.666666666666667, + "grad_norm": 0.96951824426651, + "learning_rate": 0.0002, + "loss": 0.3873, + "step": 3100 + }, + { + "epoch": 6.688172043010753, + "grad_norm": 0.9786809086799622, + "learning_rate": 0.0002, + "loss": 0.3931, + "step": 3110 + }, + { + "epoch": 6.709677419354839, + "grad_norm": 1.0301238298416138, + "learning_rate": 0.0002, + "loss": 0.3714, + "step": 3120 + }, + { + "epoch": 6.731182795698925, + "grad_norm": 1.1690906286239624, + "learning_rate": 0.0002, + "loss": 0.3823, + "step": 3130 + }, + { + "epoch": 6.752688172043011, + "grad_norm": 0.963306725025177, + "learning_rate": 0.0002, + "loss": 0.3936, + "step": 3140 + }, + { + "epoch": 6.774193548387097, + "grad_norm": 0.8565770983695984, + "learning_rate": 0.0002, + "loss": 0.3975, + "step": 3150 + }, + { + "epoch": 6.795698924731183, + "grad_norm": 0.8887158632278442, + "learning_rate": 0.0002, + "loss": 0.3903, + "step": 3160 + }, + { + "epoch": 6.817204301075269, + "grad_norm": 0.8234561085700989, + "learning_rate": 0.0002, + "loss": 0.4098, + "step": 3170 + }, + { + "epoch": 6.838709677419355, + "grad_norm": 0.9000219702720642, + "learning_rate": 0.0002, + "loss": 0.4041, + "step": 3180 + }, + { + "epoch": 6.860215053763441, + "grad_norm": 1.1366009712219238, + "learning_rate": 0.0002, + "loss": 0.3933, + "step": 3190 + }, + { + "epoch": 6.881720430107527, + "grad_norm": 0.8747097849845886, + "learning_rate": 0.0002, + "loss": 0.3972, + "step": 3200 + }, + { + "epoch": 6.903225806451613, + "grad_norm": 0.8533893823623657, + "learning_rate": 0.0002, + "loss": 0.404, + "step": 3210 + }, + { + "epoch": 6.924731182795699, + "grad_norm": 0.8127949237823486, + "learning_rate": 0.0002, + "loss": 0.3906, + "step": 3220 + }, + { + "epoch": 6.946236559139785, + "grad_norm": 0.8872477412223816, + "learning_rate": 0.0002, + "loss": 0.3747, + "step": 3230 + }, + { + "epoch": 6.967741935483871, + "grad_norm": 0.8541608452796936, + "learning_rate": 0.0002, + "loss": 0.3817, + "step": 3240 + }, + { + "epoch": 6.989247311827957, + "grad_norm": 0.8390752673149109, + "learning_rate": 0.0002, + "loss": 0.3863, + "step": 3250 + }, + { + "epoch": 7.0, + "eval_loss": 0.48264747858047485, + "eval_runtime": 21.3942, + "eval_samples_per_second": 15.471, + "eval_steps_per_second": 1.963, + "step": 3255 + }, + { + "epoch": 7.010752688172043, + "grad_norm": 1.0476834774017334, + "learning_rate": 0.0002, + "loss": 0.391, + "step": 3260 + }, + { + "epoch": 7.032258064516129, + "grad_norm": 0.7501131296157837, + "learning_rate": 0.0002, + "loss": 0.3422, + "step": 3270 + }, + { + "epoch": 7.053763440860215, + "grad_norm": 0.9057435393333435, + "learning_rate": 0.0002, + "loss": 0.3542, + "step": 3280 + }, + { + "epoch": 7.075268817204301, + "grad_norm": 0.7058833241462708, + "learning_rate": 0.0002, + "loss": 0.3522, + "step": 3290 + }, + { + "epoch": 7.096774193548387, + "grad_norm": 0.9908691644668579, + "learning_rate": 0.0002, + "loss": 0.3575, + "step": 3300 + }, + { + "epoch": 7.118279569892473, + "grad_norm": 0.9515542984008789, + "learning_rate": 0.0002, + "loss": 0.3755, + "step": 3310 + }, + { + "epoch": 7.139784946236559, + "grad_norm": 0.997296154499054, + "learning_rate": 0.0002, + "loss": 0.3612, + "step": 3320 + }, + { + "epoch": 7.161290322580645, + "grad_norm": 0.9810499548912048, + "learning_rate": 0.0002, + "loss": 0.3616, + "step": 3330 + }, + { + "epoch": 7.182795698924731, + "grad_norm": 0.8133336901664734, + "learning_rate": 0.0002, + "loss": 0.3584, + "step": 3340 + }, + { + "epoch": 7.204301075268817, + "grad_norm": 1.0679855346679688, + "learning_rate": 0.0002, + "loss": 0.3644, + "step": 3350 + }, + { + "epoch": 7.225806451612903, + "grad_norm": 0.7656611204147339, + "learning_rate": 0.0002, + "loss": 0.358, + "step": 3360 + }, + { + "epoch": 7.247311827956989, + "grad_norm": 0.9478468298912048, + "learning_rate": 0.0002, + "loss": 0.3616, + "step": 3370 + }, + { + "epoch": 7.268817204301075, + "grad_norm": 0.8425832986831665, + "learning_rate": 0.0002, + "loss": 0.3631, + "step": 3380 + }, + { + "epoch": 7.290322580645161, + "grad_norm": 0.9573627710342407, + "learning_rate": 0.0002, + "loss": 0.3735, + "step": 3390 + }, + { + "epoch": 7.311827956989247, + "grad_norm": 0.9219972491264343, + "learning_rate": 0.0002, + "loss": 0.359, + "step": 3400 + }, + { + "epoch": 7.333333333333333, + "grad_norm": 0.876099705696106, + "learning_rate": 0.0002, + "loss": 0.3644, + "step": 3410 + }, + { + "epoch": 7.354838709677419, + "grad_norm": 1.0051969289779663, + "learning_rate": 0.0002, + "loss": 0.3747, + "step": 3420 + }, + { + "epoch": 7.376344086021505, + "grad_norm": 1.1347692012786865, + "learning_rate": 0.0002, + "loss": 0.3527, + "step": 3430 + }, + { + "epoch": 7.397849462365591, + "grad_norm": 0.9641520380973816, + "learning_rate": 0.0002, + "loss": 0.3644, + "step": 3440 + }, + { + "epoch": 7.419354838709677, + "grad_norm": 0.7777793407440186, + "learning_rate": 0.0002, + "loss": 0.3486, + "step": 3450 + }, + { + "epoch": 7.440860215053763, + "grad_norm": 0.9649308323860168, + "learning_rate": 0.0002, + "loss": 0.3593, + "step": 3460 + }, + { + "epoch": 7.462365591397849, + "grad_norm": 0.9245585203170776, + "learning_rate": 0.0002, + "loss": 0.3754, + "step": 3470 + }, + { + "epoch": 7.483870967741936, + "grad_norm": 0.8298666477203369, + "learning_rate": 0.0002, + "loss": 0.3732, + "step": 3480 + }, + { + "epoch": 7.505376344086022, + "grad_norm": 1.1579877138137817, + "learning_rate": 0.0002, + "loss": 0.3585, + "step": 3490 + }, + { + "epoch": 7.526881720430108, + "grad_norm": 0.8718803524971008, + "learning_rate": 0.0002, + "loss": 0.3505, + "step": 3500 + }, + { + "epoch": 7.548387096774194, + "grad_norm": 0.7785154581069946, + "learning_rate": 0.0002, + "loss": 0.3725, + "step": 3510 + }, + { + "epoch": 7.56989247311828, + "grad_norm": 0.753657877445221, + "learning_rate": 0.0002, + "loss": 0.3507, + "step": 3520 + }, + { + "epoch": 7.591397849462366, + "grad_norm": 0.834524929523468, + "learning_rate": 0.0002, + "loss": 0.3665, + "step": 3530 + }, + { + "epoch": 7.612903225806452, + "grad_norm": 0.9546446800231934, + "learning_rate": 0.0002, + "loss": 0.3686, + "step": 3540 + }, + { + "epoch": 7.634408602150538, + "grad_norm": 0.8275105357170105, + "learning_rate": 0.0002, + "loss": 0.3673, + "step": 3550 + }, + { + "epoch": 7.655913978494624, + "grad_norm": 0.9137991070747375, + "learning_rate": 0.0002, + "loss": 0.381, + "step": 3560 + }, + { + "epoch": 7.67741935483871, + "grad_norm": 0.993617057800293, + "learning_rate": 0.0002, + "loss": 0.3565, + "step": 3570 + }, + { + "epoch": 7.698924731182796, + "grad_norm": 1.0079665184020996, + "learning_rate": 0.0002, + "loss": 0.3701, + "step": 3580 + }, + { + "epoch": 7.720430107526882, + "grad_norm": 0.8295491337776184, + "learning_rate": 0.0002, + "loss": 0.3495, + "step": 3590 + }, + { + "epoch": 7.741935483870968, + "grad_norm": 0.814578115940094, + "learning_rate": 0.0002, + "loss": 0.374, + "step": 3600 + }, + { + "epoch": 7.763440860215054, + "grad_norm": 0.8422811031341553, + "learning_rate": 0.0002, + "loss": 0.3673, + "step": 3610 + }, + { + "epoch": 7.78494623655914, + "grad_norm": 1.0220918655395508, + "learning_rate": 0.0002, + "loss": 0.3452, + "step": 3620 + }, + { + "epoch": 7.806451612903226, + "grad_norm": 0.8065739870071411, + "learning_rate": 0.0002, + "loss": 0.3641, + "step": 3630 + }, + { + "epoch": 7.827956989247312, + "grad_norm": 0.8039169907569885, + "learning_rate": 0.0002, + "loss": 0.3699, + "step": 3640 + }, + { + "epoch": 7.849462365591398, + "grad_norm": 1.0766745805740356, + "learning_rate": 0.0002, + "loss": 0.3635, + "step": 3650 + }, + { + "epoch": 7.870967741935484, + "grad_norm": 1.0806103944778442, + "learning_rate": 0.0002, + "loss": 0.3633, + "step": 3660 + }, + { + "epoch": 7.89247311827957, + "grad_norm": 1.1005314588546753, + "learning_rate": 0.0002, + "loss": 0.3678, + "step": 3670 + }, + { + "epoch": 7.913978494623656, + "grad_norm": 0.9276911616325378, + "learning_rate": 0.0002, + "loss": 0.3583, + "step": 3680 + }, + { + "epoch": 7.935483870967742, + "grad_norm": 0.9914153814315796, + "learning_rate": 0.0002, + "loss": 0.374, + "step": 3690 + }, + { + "epoch": 7.956989247311828, + "grad_norm": 0.8128159046173096, + "learning_rate": 0.0002, + "loss": 0.3575, + "step": 3700 + }, + { + "epoch": 7.978494623655914, + "grad_norm": 0.8122950196266174, + "learning_rate": 0.0002, + "loss": 0.3491, + "step": 3710 + }, + { + "epoch": 8.0, + "grad_norm": 1.0291857719421387, + "learning_rate": 0.0002, + "loss": 0.3723, + "step": 3720 + }, + { + "epoch": 8.0, + "eval_loss": 0.46552130579948425, + "eval_runtime": 21.3919, + "eval_samples_per_second": 15.473, + "eval_steps_per_second": 1.963, + "step": 3720 + } + ], + "logging_steps": 10, + "max_steps": 3720, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 4.546582636658688e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/training_args.bin b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..483b53d4fc1c568a5fc890fa850e3450f390b208 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3720/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e4c4fe2be590ed03492316230adb3a1edca3e4066c55f3716c0352d7134c564 +size 5560 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/README.md b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/adapter_config.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/adapter_model.safetensors b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6bbb58188844fc5715b13662e683c9bb2d19ebcc --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b25038edeef7fb2f5419a519ef6f7cecf0131ccd565c4322f9eda6c024a7da2 +size 29500848 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/optimizer.pt b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..924994f56601d458aa90608b55379cefe79b5dea --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b577877b637e44e84199f49726c667f72ebecd540dd62885a1b3388352ff61a +size 15064314 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/rng_state.pth b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bd9091c61ace4ca0a36fce7f975e446d2c6d7189 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab337f1f93edeb8702cf990e25b6fb77257b92ff4152411237615ef14d6f0160 +size 14244 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/scheduler.pt b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d7f87ba141fde557bb0d944880e0de0a4939c4c --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e1590542588edd3aa2e2d5dc727769ef980fc339b8db6475867c3b5914d8e1e +size 1064 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/special_tokens_map.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/tokenizer.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/tokenizer.model b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/tokenizer_config.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/trainer_state.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..04ee84a8a42117f9bddadfe1e3751ec5e6219b59 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/trainer_state.json @@ -0,0 +1,363 @@ +{ + "best_metric": 0.7740864157676697, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465", + "epoch": 1.0, + "eval_steps": 10, + "global_step": 465, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.021505376344086023, + "grad_norm": 0.9075053930282593, + "learning_rate": 0.0002, + "loss": 3.4172, + "step": 10 + }, + { + "epoch": 0.043010752688172046, + "grad_norm": 1.4321208000183105, + "learning_rate": 0.0002, + "loss": 2.5888, + "step": 20 + }, + { + "epoch": 0.06451612903225806, + "grad_norm": 1.7500602006912231, + "learning_rate": 0.0002, + "loss": 2.1195, + "step": 30 + }, + { + "epoch": 0.08602150537634409, + "grad_norm": 0.7606641054153442, + "learning_rate": 0.0002, + "loss": 1.9303, + "step": 40 + }, + { + "epoch": 0.10752688172043011, + "grad_norm": 1.2754929065704346, + "learning_rate": 0.0002, + "loss": 1.6112, + "step": 50 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 1.0936230421066284, + "learning_rate": 0.0002, + "loss": 1.4319, + "step": 60 + }, + { + "epoch": 0.15053763440860216, + "grad_norm": 1.144593596458435, + "learning_rate": 0.0002, + "loss": 1.3568, + "step": 70 + }, + { + "epoch": 0.17204301075268819, + "grad_norm": 1.2181956768035889, + "learning_rate": 0.0002, + "loss": 1.2028, + "step": 80 + }, + { + "epoch": 0.1935483870967742, + "grad_norm": 1.1260095834732056, + "learning_rate": 0.0002, + "loss": 1.1534, + "step": 90 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 1.1155284643173218, + "learning_rate": 0.0002, + "loss": 1.1089, + "step": 100 + }, + { + "epoch": 0.23655913978494625, + "grad_norm": 1.089565396308899, + "learning_rate": 0.0002, + "loss": 1.0883, + "step": 110 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 0.9833471775054932, + "learning_rate": 0.0002, + "loss": 1.0814, + "step": 120 + }, + { + "epoch": 0.27956989247311825, + "grad_norm": 1.0265629291534424, + "learning_rate": 0.0002, + "loss": 1.0239, + "step": 130 + }, + { + "epoch": 0.3010752688172043, + "grad_norm": 0.9344286322593689, + "learning_rate": 0.0002, + "loss": 0.9888, + "step": 140 + }, + { + "epoch": 0.3225806451612903, + "grad_norm": 0.9883386492729187, + "learning_rate": 0.0002, + "loss": 1.0043, + "step": 150 + }, + { + "epoch": 0.34408602150537637, + "grad_norm": 0.9299277067184448, + "learning_rate": 0.0002, + "loss": 0.9338, + "step": 160 + }, + { + "epoch": 0.3655913978494624, + "grad_norm": 1.390045404434204, + "learning_rate": 0.0002, + "loss": 0.9432, + "step": 170 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 1.0313078165054321, + "learning_rate": 0.0002, + "loss": 0.9008, + "step": 180 + }, + { + "epoch": 0.40860215053763443, + "grad_norm": 1.1792205572128296, + "learning_rate": 0.0002, + "loss": 0.9434, + "step": 190 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 1.049809217453003, + "learning_rate": 0.0002, + "loss": 0.8761, + "step": 200 + }, + { + "epoch": 0.45161290322580644, + "grad_norm": 0.990111768245697, + "learning_rate": 0.0002, + "loss": 0.8709, + "step": 210 + }, + { + "epoch": 0.4731182795698925, + "grad_norm": 0.9870412349700928, + "learning_rate": 0.0002, + "loss": 0.905, + "step": 220 + }, + { + "epoch": 0.4946236559139785, + "grad_norm": 0.8557345867156982, + "learning_rate": 0.0002, + "loss": 0.9129, + "step": 230 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 0.9746861457824707, + "learning_rate": 0.0002, + "loss": 0.8836, + "step": 240 + }, + { + "epoch": 0.5376344086021505, + "grad_norm": 0.9010438323020935, + "learning_rate": 0.0002, + "loss": 0.873, + "step": 250 + }, + { + "epoch": 0.5591397849462365, + "grad_norm": 0.9061082005500793, + "learning_rate": 0.0002, + "loss": 0.8241, + "step": 260 + }, + { + "epoch": 0.5806451612903226, + "grad_norm": 0.9311846494674683, + "learning_rate": 0.0002, + "loss": 0.8652, + "step": 270 + }, + { + "epoch": 0.6021505376344086, + "grad_norm": 0.9140254855155945, + "learning_rate": 0.0002, + "loss": 0.8256, + "step": 280 + }, + { + "epoch": 0.6236559139784946, + "grad_norm": 0.9722253084182739, + "learning_rate": 0.0002, + "loss": 0.8441, + "step": 290 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 0.8539168238639832, + "learning_rate": 0.0002, + "loss": 0.8314, + "step": 300 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.9053162932395935, + "learning_rate": 0.0002, + "loss": 0.8528, + "step": 310 + }, + { + "epoch": 0.6881720430107527, + "grad_norm": 0.8444252610206604, + "learning_rate": 0.0002, + "loss": 0.8209, + "step": 320 + }, + { + "epoch": 0.7096774193548387, + "grad_norm": 0.8127437829971313, + "learning_rate": 0.0002, + "loss": 0.8101, + "step": 330 + }, + { + "epoch": 0.7311827956989247, + "grad_norm": 0.886555016040802, + "learning_rate": 0.0002, + "loss": 0.8223, + "step": 340 + }, + { + "epoch": 0.7526881720430108, + "grad_norm": 0.8458548784255981, + "learning_rate": 0.0002, + "loss": 0.8368, + "step": 350 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 0.8683297634124756, + "learning_rate": 0.0002, + "loss": 0.8295, + "step": 360 + }, + { + "epoch": 0.7956989247311828, + "grad_norm": 0.8308405876159668, + "learning_rate": 0.0002, + "loss": 0.8232, + "step": 370 + }, + { + "epoch": 0.8172043010752689, + "grad_norm": 0.8305579423904419, + "learning_rate": 0.0002, + "loss": 0.7752, + "step": 380 + }, + { + "epoch": 0.8387096774193549, + "grad_norm": 0.8545567393302917, + "learning_rate": 0.0002, + "loss": 0.8267, + "step": 390 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 0.8486055731773376, + "learning_rate": 0.0002, + "loss": 0.8212, + "step": 400 + }, + { + "epoch": 0.8817204301075269, + "grad_norm": 0.8126763105392456, + "learning_rate": 0.0002, + "loss": 0.743, + "step": 410 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 0.8494045734405518, + "learning_rate": 0.0002, + "loss": 0.7993, + "step": 420 + }, + { + "epoch": 0.9247311827956989, + "grad_norm": 0.7639183402061462, + "learning_rate": 0.0002, + "loss": 0.8213, + "step": 430 + }, + { + "epoch": 0.946236559139785, + "grad_norm": 0.858101487159729, + "learning_rate": 0.0002, + "loss": 0.8015, + "step": 440 + }, + { + "epoch": 0.967741935483871, + "grad_norm": 0.8141381740570068, + "learning_rate": 0.0002, + "loss": 0.7629, + "step": 450 + }, + { + "epoch": 0.989247311827957, + "grad_norm": 0.8072513937950134, + "learning_rate": 0.0002, + "loss": 0.7357, + "step": 460 + }, + { + "epoch": 1.0, + "eval_loss": 0.7740864157676697, + "eval_runtime": 21.383, + "eval_samples_per_second": 15.48, + "eval_steps_per_second": 1.964, + "step": 465 + } + ], + "logging_steps": 10, + "max_steps": 3720, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5683228295823360.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/training_args.bin b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..483b53d4fc1c568a5fc890fa850e3450f390b208 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e4c4fe2be590ed03492316230adb3a1edca3e4066c55f3716c0352d7134c564 +size 5560 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/README.md b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/README.md new file mode 100644 index 0000000000000000000000000000000000000000..830a14f7db2734beb59f320973504e45a3fe87f5 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/adapter_config.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e99bbcd43df1c19d98706c7e3be95c93844c5349 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/adapter_model.safetensors b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f940190d9795c50ad873dfd121e1a9ec33b167d5 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a08a9f9d94c71b4ae51ac490bf8084de74a3a512d3c57dbc41c48341ec460d59 +size 29500848 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/optimizer.pt b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..54a4f305b9b2db64f4657e04730cd6b3560d51f1 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcdb8d97813ef09ab3f62cfcb1869ccc4eb3053b6d6147c01042dc6eb164df8b +size 15064314 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/rng_state.pth b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..90073eaaf42985bbbdf288ae3c79ab3b7a859049 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6cdf4123075a81389fb31c181195d1d3f878c02aba83d2601e972fdaf99df90 +size 14244 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/scheduler.pt b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1ea512ee003a4a7388698db6b6ab095e5022a950 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf9812a2e2f6e5ccc013a71db2228ca4973e2e4618013a6759f1a4c9ad6ca514 +size 1064 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/special_tokens_map.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/tokenizer.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/tokenizer.model b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/tokenizer_config.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/trainer_state.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2024a1438f8d2f911191c5967d08712762fb118f --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/trainer_state.json @@ -0,0 +1,700 @@ +{ + "best_metric": 0.6885261535644531, + "best_model_checkpoint": "outputs-001/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930", + "epoch": 2.0, + "eval_steps": 10, + "global_step": 930, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.021505376344086023, + "grad_norm": 0.9075053930282593, + "learning_rate": 0.0002, + "loss": 3.4172, + "step": 10 + }, + { + "epoch": 0.043010752688172046, + "grad_norm": 1.4321208000183105, + "learning_rate": 0.0002, + "loss": 2.5888, + "step": 20 + }, + { + "epoch": 0.06451612903225806, + "grad_norm": 1.7500602006912231, + "learning_rate": 0.0002, + "loss": 2.1195, + "step": 30 + }, + { + "epoch": 0.08602150537634409, + "grad_norm": 0.7606641054153442, + "learning_rate": 0.0002, + "loss": 1.9303, + "step": 40 + }, + { + "epoch": 0.10752688172043011, + "grad_norm": 1.2754929065704346, + "learning_rate": 0.0002, + "loss": 1.6112, + "step": 50 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 1.0936230421066284, + "learning_rate": 0.0002, + "loss": 1.4319, + "step": 60 + }, + { + "epoch": 0.15053763440860216, + "grad_norm": 1.144593596458435, + "learning_rate": 0.0002, + "loss": 1.3568, + "step": 70 + }, + { + "epoch": 0.17204301075268819, + "grad_norm": 1.2181956768035889, + "learning_rate": 0.0002, + "loss": 1.2028, + "step": 80 + }, + { + "epoch": 0.1935483870967742, + "grad_norm": 1.1260095834732056, + "learning_rate": 0.0002, + "loss": 1.1534, + "step": 90 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 1.1155284643173218, + "learning_rate": 0.0002, + "loss": 1.1089, + "step": 100 + }, + { + "epoch": 0.23655913978494625, + "grad_norm": 1.089565396308899, + "learning_rate": 0.0002, + "loss": 1.0883, + "step": 110 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 0.9833471775054932, + "learning_rate": 0.0002, + "loss": 1.0814, + "step": 120 + }, + { + "epoch": 0.27956989247311825, + "grad_norm": 1.0265629291534424, + "learning_rate": 0.0002, + "loss": 1.0239, + "step": 130 + }, + { + "epoch": 0.3010752688172043, + "grad_norm": 0.9344286322593689, + "learning_rate": 0.0002, + "loss": 0.9888, + "step": 140 + }, + { + "epoch": 0.3225806451612903, + "grad_norm": 0.9883386492729187, + "learning_rate": 0.0002, + "loss": 1.0043, + "step": 150 + }, + { + "epoch": 0.34408602150537637, + "grad_norm": 0.9299277067184448, + "learning_rate": 0.0002, + "loss": 0.9338, + "step": 160 + }, + { + "epoch": 0.3655913978494624, + "grad_norm": 1.390045404434204, + "learning_rate": 0.0002, + "loss": 0.9432, + "step": 170 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 1.0313078165054321, + "learning_rate": 0.0002, + "loss": 0.9008, + "step": 180 + }, + { + "epoch": 0.40860215053763443, + "grad_norm": 1.1792205572128296, + "learning_rate": 0.0002, + "loss": 0.9434, + "step": 190 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 1.049809217453003, + "learning_rate": 0.0002, + "loss": 0.8761, + "step": 200 + }, + { + "epoch": 0.45161290322580644, + "grad_norm": 0.990111768245697, + "learning_rate": 0.0002, + "loss": 0.8709, + "step": 210 + }, + { + "epoch": 0.4731182795698925, + "grad_norm": 0.9870412349700928, + "learning_rate": 0.0002, + "loss": 0.905, + "step": 220 + }, + { + "epoch": 0.4946236559139785, + "grad_norm": 0.8557345867156982, + "learning_rate": 0.0002, + "loss": 0.9129, + "step": 230 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 0.9746861457824707, + "learning_rate": 0.0002, + "loss": 0.8836, + "step": 240 + }, + { + "epoch": 0.5376344086021505, + "grad_norm": 0.9010438323020935, + "learning_rate": 0.0002, + "loss": 0.873, + "step": 250 + }, + { + "epoch": 0.5591397849462365, + "grad_norm": 0.9061082005500793, + "learning_rate": 0.0002, + "loss": 0.8241, + "step": 260 + }, + { + "epoch": 0.5806451612903226, + "grad_norm": 0.9311846494674683, + "learning_rate": 0.0002, + "loss": 0.8652, + "step": 270 + }, + { + "epoch": 0.6021505376344086, + "grad_norm": 0.9140254855155945, + "learning_rate": 0.0002, + "loss": 0.8256, + "step": 280 + }, + { + "epoch": 0.6236559139784946, + "grad_norm": 0.9722253084182739, + "learning_rate": 0.0002, + "loss": 0.8441, + "step": 290 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 0.8539168238639832, + "learning_rate": 0.0002, + "loss": 0.8314, + "step": 300 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 0.9053162932395935, + "learning_rate": 0.0002, + "loss": 0.8528, + "step": 310 + }, + { + "epoch": 0.6881720430107527, + "grad_norm": 0.8444252610206604, + "learning_rate": 0.0002, + "loss": 0.8209, + "step": 320 + }, + { + "epoch": 0.7096774193548387, + "grad_norm": 0.8127437829971313, + "learning_rate": 0.0002, + "loss": 0.8101, + "step": 330 + }, + { + "epoch": 0.7311827956989247, + "grad_norm": 0.886555016040802, + "learning_rate": 0.0002, + "loss": 0.8223, + "step": 340 + }, + { + "epoch": 0.7526881720430108, + "grad_norm": 0.8458548784255981, + "learning_rate": 0.0002, + "loss": 0.8368, + "step": 350 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 0.8683297634124756, + "learning_rate": 0.0002, + "loss": 0.8295, + "step": 360 + }, + { + "epoch": 0.7956989247311828, + "grad_norm": 0.8308405876159668, + "learning_rate": 0.0002, + "loss": 0.8232, + "step": 370 + }, + { + "epoch": 0.8172043010752689, + "grad_norm": 0.8305579423904419, + "learning_rate": 0.0002, + "loss": 0.7752, + "step": 380 + }, + { + "epoch": 0.8387096774193549, + "grad_norm": 0.8545567393302917, + "learning_rate": 0.0002, + "loss": 0.8267, + "step": 390 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 0.8486055731773376, + "learning_rate": 0.0002, + "loss": 0.8212, + "step": 400 + }, + { + "epoch": 0.8817204301075269, + "grad_norm": 0.8126763105392456, + "learning_rate": 0.0002, + "loss": 0.743, + "step": 410 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 0.8494045734405518, + "learning_rate": 0.0002, + "loss": 0.7993, + "step": 420 + }, + { + "epoch": 0.9247311827956989, + "grad_norm": 0.7639183402061462, + "learning_rate": 0.0002, + "loss": 0.8213, + "step": 430 + }, + { + "epoch": 0.946236559139785, + "grad_norm": 0.858101487159729, + "learning_rate": 0.0002, + "loss": 0.8015, + "step": 440 + }, + { + "epoch": 0.967741935483871, + "grad_norm": 0.8141381740570068, + "learning_rate": 0.0002, + "loss": 0.7629, + "step": 450 + }, + { + "epoch": 0.989247311827957, + "grad_norm": 0.8072513937950134, + "learning_rate": 0.0002, + "loss": 0.7357, + "step": 460 + }, + { + "epoch": 1.0, + "eval_loss": 0.7740864157676697, + "eval_runtime": 21.383, + "eval_samples_per_second": 15.48, + "eval_steps_per_second": 1.964, + "step": 465 + }, + { + "epoch": 1.010752688172043, + "grad_norm": 0.8269494771957397, + "learning_rate": 0.0002, + "loss": 0.7701, + "step": 470 + }, + { + "epoch": 1.032258064516129, + "grad_norm": 0.7814009189605713, + "learning_rate": 0.0002, + "loss": 0.7532, + "step": 480 + }, + { + "epoch": 1.053763440860215, + "grad_norm": 0.8183923363685608, + "learning_rate": 0.0002, + "loss": 0.7689, + "step": 490 + }, + { + "epoch": 1.075268817204301, + "grad_norm": 0.8146600723266602, + "learning_rate": 0.0002, + "loss": 0.765, + "step": 500 + }, + { + "epoch": 1.096774193548387, + "grad_norm": 0.8635126352310181, + "learning_rate": 0.0002, + "loss": 0.7358, + "step": 510 + }, + { + "epoch": 1.118279569892473, + "grad_norm": 0.8520359396934509, + "learning_rate": 0.0002, + "loss": 0.7302, + "step": 520 + }, + { + "epoch": 1.139784946236559, + "grad_norm": 0.8026443123817444, + "learning_rate": 0.0002, + "loss": 0.7492, + "step": 530 + }, + { + "epoch": 1.1612903225806452, + "grad_norm": 0.8157258629798889, + "learning_rate": 0.0002, + "loss": 0.7518, + "step": 540 + }, + { + "epoch": 1.1827956989247312, + "grad_norm": 0.9450796246528625, + "learning_rate": 0.0002, + "loss": 0.7461, + "step": 550 + }, + { + "epoch": 1.2043010752688172, + "grad_norm": 0.8859835863113403, + "learning_rate": 0.0002, + "loss": 0.7128, + "step": 560 + }, + { + "epoch": 1.2258064516129032, + "grad_norm": 0.7819921970367432, + "learning_rate": 0.0002, + "loss": 0.7067, + "step": 570 + }, + { + "epoch": 1.2473118279569892, + "grad_norm": 0.7823445796966553, + "learning_rate": 0.0002, + "loss": 0.7577, + "step": 580 + }, + { + "epoch": 1.2688172043010753, + "grad_norm": 0.7931883931159973, + "learning_rate": 0.0002, + "loss": 0.7358, + "step": 590 + }, + { + "epoch": 1.2903225806451613, + "grad_norm": 0.7495734095573425, + "learning_rate": 0.0002, + "loss": 0.723, + "step": 600 + }, + { + "epoch": 1.3118279569892473, + "grad_norm": 0.9272717237472534, + "learning_rate": 0.0002, + "loss": 0.7386, + "step": 610 + }, + { + "epoch": 1.3333333333333333, + "grad_norm": 0.7968398332595825, + "learning_rate": 0.0002, + "loss": 0.7498, + "step": 620 + }, + { + "epoch": 1.3548387096774195, + "grad_norm": 0.7813659310340881, + "learning_rate": 0.0002, + "loss": 0.7635, + "step": 630 + }, + { + "epoch": 1.3763440860215055, + "grad_norm": 0.730925977230072, + "learning_rate": 0.0002, + "loss": 0.6665, + "step": 640 + }, + { + "epoch": 1.3978494623655915, + "grad_norm": 0.8011482954025269, + "learning_rate": 0.0002, + "loss": 0.7037, + "step": 650 + }, + { + "epoch": 1.4193548387096775, + "grad_norm": 0.7770085334777832, + "learning_rate": 0.0002, + "loss": 0.6931, + "step": 660 + }, + { + "epoch": 1.4408602150537635, + "grad_norm": 0.7432682514190674, + "learning_rate": 0.0002, + "loss": 0.6949, + "step": 670 + }, + { + "epoch": 1.4623655913978495, + "grad_norm": 0.8820092678070068, + "learning_rate": 0.0002, + "loss": 0.7444, + "step": 680 + }, + { + "epoch": 1.4838709677419355, + "grad_norm": 0.7786208987236023, + "learning_rate": 0.0002, + "loss": 0.6758, + "step": 690 + }, + { + "epoch": 1.5053763440860215, + "grad_norm": 0.7467480301856995, + "learning_rate": 0.0002, + "loss": 0.6702, + "step": 700 + }, + { + "epoch": 1.5268817204301075, + "grad_norm": 0.8147122263908386, + "learning_rate": 0.0002, + "loss": 0.7107, + "step": 710 + }, + { + "epoch": 1.5483870967741935, + "grad_norm": 0.796030580997467, + "learning_rate": 0.0002, + "loss": 0.7144, + "step": 720 + }, + { + "epoch": 1.5698924731182795, + "grad_norm": 0.8776171207427979, + "learning_rate": 0.0002, + "loss": 0.6936, + "step": 730 + }, + { + "epoch": 1.5913978494623655, + "grad_norm": 0.8056126236915588, + "learning_rate": 0.0002, + "loss": 0.7101, + "step": 740 + }, + { + "epoch": 1.6129032258064515, + "grad_norm": 0.8141863346099854, + "learning_rate": 0.0002, + "loss": 0.7162, + "step": 750 + }, + { + "epoch": 1.6344086021505375, + "grad_norm": 0.8100557327270508, + "learning_rate": 0.0002, + "loss": 0.7088, + "step": 760 + }, + { + "epoch": 1.6559139784946235, + "grad_norm": 0.8283200860023499, + "learning_rate": 0.0002, + "loss": 0.7212, + "step": 770 + }, + { + "epoch": 1.6774193548387095, + "grad_norm": 0.800865113735199, + "learning_rate": 0.0002, + "loss": 0.694, + "step": 780 + }, + { + "epoch": 1.6989247311827957, + "grad_norm": 0.8052287697792053, + "learning_rate": 0.0002, + "loss": 0.7076, + "step": 790 + }, + { + "epoch": 1.7204301075268817, + "grad_norm": 0.8619674444198608, + "learning_rate": 0.0002, + "loss": 0.7257, + "step": 800 + }, + { + "epoch": 1.7419354838709677, + "grad_norm": 0.8907215595245361, + "learning_rate": 0.0002, + "loss": 0.7141, + "step": 810 + }, + { + "epoch": 1.7634408602150538, + "grad_norm": 0.6976316571235657, + "learning_rate": 0.0002, + "loss": 0.7035, + "step": 820 + }, + { + "epoch": 1.7849462365591398, + "grad_norm": 0.7533746957778931, + "learning_rate": 0.0002, + "loss": 0.6916, + "step": 830 + }, + { + "epoch": 1.8064516129032258, + "grad_norm": 0.7326804399490356, + "learning_rate": 0.0002, + "loss": 0.7094, + "step": 840 + }, + { + "epoch": 1.827956989247312, + "grad_norm": 0.7782683372497559, + "learning_rate": 0.0002, + "loss": 0.6891, + "step": 850 + }, + { + "epoch": 1.849462365591398, + "grad_norm": 0.7424806356430054, + "learning_rate": 0.0002, + "loss": 0.6931, + "step": 860 + }, + { + "epoch": 1.870967741935484, + "grad_norm": 1.172325611114502, + "learning_rate": 0.0002, + "loss": 0.7354, + "step": 870 + }, + { + "epoch": 1.89247311827957, + "grad_norm": 0.771058201789856, + "learning_rate": 0.0002, + "loss": 0.6866, + "step": 880 + }, + { + "epoch": 1.913978494623656, + "grad_norm": 0.8624904155731201, + "learning_rate": 0.0002, + "loss": 0.7296, + "step": 890 + }, + { + "epoch": 1.935483870967742, + "grad_norm": 0.7062820792198181, + "learning_rate": 0.0002, + "loss": 0.7233, + "step": 900 + }, + { + "epoch": 1.956989247311828, + "grad_norm": 0.7560103535652161, + "learning_rate": 0.0002, + "loss": 0.6966, + "step": 910 + }, + { + "epoch": 1.978494623655914, + "grad_norm": 0.788899838924408, + "learning_rate": 0.0002, + "loss": 0.69, + "step": 920 + }, + { + "epoch": 2.0, + "grad_norm": 0.6562113761901855, + "learning_rate": 0.0002, + "loss": 0.6505, + "step": 930 + }, + { + "epoch": 2.0, + "eval_loss": 0.6885261535644531, + "eval_runtime": 21.4291, + "eval_samples_per_second": 15.446, + "eval_steps_per_second": 1.96, + "step": 930 + } + ], + "logging_steps": 10, + "max_steps": 3720, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.136645659164672e+16, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/training_args.bin b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..483b53d4fc1c568a5fc890fa850e3450f390b208 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e4c4fe2be590ed03492316230adb3a1edca3e4066c55f3716c0352d7134c564 +size 5560 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/special_tokens_map.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/tokenizer.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..f58963a682665634ab180c28667e4faa8cf02ba2 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f559f2189f392b4555613965f089e7c4d300b41fbe080bf79da0d676e33ee7f0 +size 34356041 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/tokenizer.model b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/tokenizer_config.json b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1adb4796c13b8d975555ecec45876ee75d1ae8b7 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/tokenizer_config.json @@ -0,0 +1,1757 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/training_args.bin b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..483b53d4fc1c568a5fc890fa850e3450f390b208 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e4c4fe2be590ed03492316230adb3a1edca3e4066c55f3716c0352d7134c564 +size 5560 diff --git a/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/training_log.jsonl b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/training_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..04e083afc9c7ee4335afa1c19d4eea4fe4df4714 --- /dev/null +++ b/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/training_log.jsonl @@ -0,0 +1,8 @@ +{"epoch": 1.0, "step": 465, "epoch_duration": 452.59338760375977, "total_accumulated_duration": 452.59338760375977, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 5628.7490234375}, "avg_memory_reserved": {"GPU_0": 6182.0}, "peak_memory_reserved": {"GPU_0": 6182.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.4172, "grad_norm": 0.9075053930282593, "learning_rate": 0.0002, "epoch": 0.021505376344086023, "step": 10}, {"loss": 2.5888, "grad_norm": 1.4321208000183105, "learning_rate": 0.0002, "epoch": 0.043010752688172046, "step": 20}, {"loss": 2.1195, "grad_norm": 1.7500602006912231, "learning_rate": 0.0002, "epoch": 0.06451612903225806, "step": 30}, {"loss": 1.9303, "grad_norm": 0.7606641054153442, "learning_rate": 0.0002, "epoch": 0.08602150537634409, "step": 40}, {"loss": 1.6112, "grad_norm": 1.2754929065704346, "learning_rate": 0.0002, "epoch": 0.10752688172043011, "step": 50}, {"loss": 1.4319, "grad_norm": 1.0936230421066284, "learning_rate": 0.0002, "epoch": 0.12903225806451613, "step": 60}, {"loss": 1.3568, "grad_norm": 1.144593596458435, "learning_rate": 0.0002, "epoch": 0.15053763440860216, "step": 70}, {"loss": 1.2028, "grad_norm": 1.2181956768035889, "learning_rate": 0.0002, "epoch": 0.17204301075268819, "step": 80}, {"loss": 1.1534, "grad_norm": 1.1260095834732056, "learning_rate": 0.0002, "epoch": 0.1935483870967742, "step": 90}, {"loss": 1.1089, "grad_norm": 1.1155284643173218, "learning_rate": 0.0002, "epoch": 0.21505376344086022, "step": 100}, {"loss": 1.0883, "grad_norm": 1.089565396308899, "learning_rate": 0.0002, "epoch": 0.23655913978494625, "step": 110}, {"loss": 1.0814, "grad_norm": 0.9833471775054932, "learning_rate": 0.0002, "epoch": 0.25806451612903225, "step": 120}, {"loss": 1.0239, "grad_norm": 1.0265629291534424, "learning_rate": 0.0002, "epoch": 0.27956989247311825, "step": 130}, {"loss": 0.9888, "grad_norm": 0.9344286322593689, "learning_rate": 0.0002, "epoch": 0.3010752688172043, "step": 140}, {"loss": 1.0043, "grad_norm": 0.9883386492729187, "learning_rate": 0.0002, "epoch": 0.3225806451612903, "step": 150}, {"loss": 0.9338, "grad_norm": 0.9299277067184448, "learning_rate": 0.0002, "epoch": 0.34408602150537637, "step": 160}, {"loss": 0.9432, "grad_norm": 1.390045404434204, "learning_rate": 0.0002, "epoch": 0.3655913978494624, "step": 170}, {"loss": 0.9008, "grad_norm": 1.0313078165054321, "learning_rate": 0.0002, "epoch": 0.3870967741935484, "step": 180}, {"loss": 0.9434, "grad_norm": 1.1792205572128296, "learning_rate": 0.0002, "epoch": 0.40860215053763443, "step": 190}, {"loss": 0.8761, "grad_norm": 1.049809217453003, "learning_rate": 0.0002, "epoch": 0.43010752688172044, "step": 200}, {"loss": 0.8709, "grad_norm": 0.990111768245697, "learning_rate": 0.0002, "epoch": 0.45161290322580644, "step": 210}, {"loss": 0.905, "grad_norm": 0.9870412349700928, "learning_rate": 0.0002, "epoch": 0.4731182795698925, "step": 220}, {"loss": 0.9129, "grad_norm": 0.8557345867156982, "learning_rate": 0.0002, "epoch": 0.4946236559139785, "step": 230}, {"loss": 0.8836, "grad_norm": 0.9746861457824707, "learning_rate": 0.0002, "epoch": 0.5161290322580645, "step": 240}, {"loss": 0.873, "grad_norm": 0.9010438323020935, "learning_rate": 0.0002, "epoch": 0.5376344086021505, "step": 250}, {"loss": 0.8241, "grad_norm": 0.9061082005500793, "learning_rate": 0.0002, "epoch": 0.5591397849462365, "step": 260}, {"loss": 0.8652, "grad_norm": 0.9311846494674683, "learning_rate": 0.0002, "epoch": 0.5806451612903226, "step": 270}, {"loss": 0.8256, "grad_norm": 0.9140254855155945, "learning_rate": 0.0002, "epoch": 0.6021505376344086, "step": 280}, {"loss": 0.8441, "grad_norm": 0.9722253084182739, "learning_rate": 0.0002, "epoch": 0.6236559139784946, "step": 290}, {"loss": 0.8314, "grad_norm": 0.8539168238639832, "learning_rate": 0.0002, "epoch": 0.6451612903225806, "step": 300}, {"loss": 0.8528, "grad_norm": 0.9053162932395935, "learning_rate": 0.0002, "epoch": 0.6666666666666666, "step": 310}, {"loss": 0.8209, "grad_norm": 0.8444252610206604, "learning_rate": 0.0002, "epoch": 0.6881720430107527, "step": 320}, {"loss": 0.8101, "grad_norm": 0.8127437829971313, "learning_rate": 0.0002, "epoch": 0.7096774193548387, "step": 330}, {"loss": 0.8223, "grad_norm": 0.886555016040802, "learning_rate": 0.0002, "epoch": 0.7311827956989247, "step": 340}, {"loss": 0.8368, "grad_norm": 0.8458548784255981, "learning_rate": 0.0002, "epoch": 0.7526881720430108, "step": 350}, {"loss": 0.8295, "grad_norm": 0.8683297634124756, "learning_rate": 0.0002, "epoch": 0.7741935483870968, "step": 360}, {"loss": 0.8232, "grad_norm": 0.8308405876159668, "learning_rate": 0.0002, "epoch": 0.7956989247311828, "step": 370}, {"loss": 0.7752, "grad_norm": 0.8305579423904419, "learning_rate": 0.0002, "epoch": 0.8172043010752689, "step": 380}, {"loss": 0.8267, "grad_norm": 0.8545567393302917, "learning_rate": 0.0002, "epoch": 0.8387096774193549, "step": 390}, {"loss": 0.8212, "grad_norm": 0.8486055731773376, "learning_rate": 0.0002, "epoch": 0.8602150537634409, "step": 400}, {"loss": 0.743, "grad_norm": 0.8126763105392456, "learning_rate": 0.0002, "epoch": 0.8817204301075269, "step": 410}, {"loss": 0.7993, "grad_norm": 0.8494045734405518, "learning_rate": 0.0002, "epoch": 0.9032258064516129, "step": 420}, {"loss": 0.8213, "grad_norm": 0.7639183402061462, "learning_rate": 0.0002, "epoch": 0.9247311827956989, "step": 430}, {"loss": 0.8015, "grad_norm": 0.858101487159729, "learning_rate": 0.0002, "epoch": 0.946236559139785, "step": 440}, {"loss": 0.7629, "grad_norm": 0.8141381740570068, "learning_rate": 0.0002, "epoch": 0.967741935483871, "step": 450}, {"loss": 0.7357, "grad_norm": 0.8072513937950134, "learning_rate": 0.0002, "epoch": 0.989247311827957, "step": 460}]} +{"epoch": 2.0, "step": 930, "epoch_duration": 451.6676411628723, "total_accumulated_duration": 904.2610287666321, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.17431640625}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-465", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.4172, "grad_norm": 0.9075053930282593, "learning_rate": 0.0002, "epoch": 0.021505376344086023, "step": 10}, {"loss": 2.5888, "grad_norm": 1.4321208000183105, "learning_rate": 0.0002, "epoch": 0.043010752688172046, "step": 20}, {"loss": 2.1195, "grad_norm": 1.7500602006912231, "learning_rate": 0.0002, "epoch": 0.06451612903225806, "step": 30}, {"loss": 1.9303, "grad_norm": 0.7606641054153442, "learning_rate": 0.0002, "epoch": 0.08602150537634409, "step": 40}, {"loss": 1.6112, "grad_norm": 1.2754929065704346, "learning_rate": 0.0002, "epoch": 0.10752688172043011, "step": 50}, {"loss": 1.4319, "grad_norm": 1.0936230421066284, "learning_rate": 0.0002, "epoch": 0.12903225806451613, "step": 60}, {"loss": 1.3568, "grad_norm": 1.144593596458435, "learning_rate": 0.0002, "epoch": 0.15053763440860216, "step": 70}, {"loss": 1.2028, "grad_norm": 1.2181956768035889, "learning_rate": 0.0002, "epoch": 0.17204301075268819, "step": 80}, {"loss": 1.1534, "grad_norm": 1.1260095834732056, "learning_rate": 0.0002, "epoch": 0.1935483870967742, "step": 90}, {"loss": 1.1089, "grad_norm": 1.1155284643173218, "learning_rate": 0.0002, "epoch": 0.21505376344086022, "step": 100}, {"loss": 1.0883, "grad_norm": 1.089565396308899, "learning_rate": 0.0002, "epoch": 0.23655913978494625, "step": 110}, {"loss": 1.0814, "grad_norm": 0.9833471775054932, "learning_rate": 0.0002, "epoch": 0.25806451612903225, "step": 120}, {"loss": 1.0239, "grad_norm": 1.0265629291534424, "learning_rate": 0.0002, "epoch": 0.27956989247311825, "step": 130}, {"loss": 0.9888, "grad_norm": 0.9344286322593689, "learning_rate": 0.0002, "epoch": 0.3010752688172043, "step": 140}, {"loss": 1.0043, "grad_norm": 0.9883386492729187, "learning_rate": 0.0002, "epoch": 0.3225806451612903, "step": 150}, {"loss": 0.9338, "grad_norm": 0.9299277067184448, "learning_rate": 0.0002, "epoch": 0.34408602150537637, "step": 160}, {"loss": 0.9432, "grad_norm": 1.390045404434204, "learning_rate": 0.0002, "epoch": 0.3655913978494624, "step": 170}, {"loss": 0.9008, "grad_norm": 1.0313078165054321, "learning_rate": 0.0002, "epoch": 0.3870967741935484, "step": 180}, {"loss": 0.9434, "grad_norm": 1.1792205572128296, "learning_rate": 0.0002, "epoch": 0.40860215053763443, "step": 190}, {"loss": 0.8761, "grad_norm": 1.049809217453003, "learning_rate": 0.0002, "epoch": 0.43010752688172044, "step": 200}, {"loss": 0.8709, "grad_norm": 0.990111768245697, "learning_rate": 0.0002, "epoch": 0.45161290322580644, "step": 210}, {"loss": 0.905, "grad_norm": 0.9870412349700928, "learning_rate": 0.0002, "epoch": 0.4731182795698925, "step": 220}, {"loss": 0.9129, "grad_norm": 0.8557345867156982, "learning_rate": 0.0002, "epoch": 0.4946236559139785, "step": 230}, {"loss": 0.8836, "grad_norm": 0.9746861457824707, "learning_rate": 0.0002, "epoch": 0.5161290322580645, "step": 240}, {"loss": 0.873, "grad_norm": 0.9010438323020935, "learning_rate": 0.0002, "epoch": 0.5376344086021505, "step": 250}, {"loss": 0.8241, "grad_norm": 0.9061082005500793, "learning_rate": 0.0002, "epoch": 0.5591397849462365, "step": 260}, {"loss": 0.8652, "grad_norm": 0.9311846494674683, "learning_rate": 0.0002, "epoch": 0.5806451612903226, "step": 270}, {"loss": 0.8256, "grad_norm": 0.9140254855155945, "learning_rate": 0.0002, "epoch": 0.6021505376344086, "step": 280}, {"loss": 0.8441, "grad_norm": 0.9722253084182739, "learning_rate": 0.0002, "epoch": 0.6236559139784946, "step": 290}, {"loss": 0.8314, "grad_norm": 0.8539168238639832, "learning_rate": 0.0002, "epoch": 0.6451612903225806, "step": 300}, {"loss": 0.8528, "grad_norm": 0.9053162932395935, "learning_rate": 0.0002, "epoch": 0.6666666666666666, "step": 310}, {"loss": 0.8209, "grad_norm": 0.8444252610206604, "learning_rate": 0.0002, "epoch": 0.6881720430107527, "step": 320}, {"loss": 0.8101, "grad_norm": 0.8127437829971313, "learning_rate": 0.0002, "epoch": 0.7096774193548387, "step": 330}, {"loss": 0.8223, "grad_norm": 0.886555016040802, "learning_rate": 0.0002, "epoch": 0.7311827956989247, "step": 340}, {"loss": 0.8368, "grad_norm": 0.8458548784255981, "learning_rate": 0.0002, "epoch": 0.7526881720430108, "step": 350}, {"loss": 0.8295, "grad_norm": 0.8683297634124756, "learning_rate": 0.0002, "epoch": 0.7741935483870968, "step": 360}, {"loss": 0.8232, "grad_norm": 0.8308405876159668, "learning_rate": 0.0002, "epoch": 0.7956989247311828, "step": 370}, {"loss": 0.7752, "grad_norm": 0.8305579423904419, "learning_rate": 0.0002, "epoch": 0.8172043010752689, "step": 380}, {"loss": 0.8267, "grad_norm": 0.8545567393302917, "learning_rate": 0.0002, "epoch": 0.8387096774193549, "step": 390}, {"loss": 0.8212, "grad_norm": 0.8486055731773376, "learning_rate": 0.0002, "epoch": 0.8602150537634409, "step": 400}, {"loss": 0.743, "grad_norm": 0.8126763105392456, "learning_rate": 0.0002, "epoch": 0.8817204301075269, "step": 410}, {"loss": 0.7993, "grad_norm": 0.8494045734405518, "learning_rate": 0.0002, "epoch": 0.9032258064516129, "step": 420}, {"loss": 0.8213, "grad_norm": 0.7639183402061462, "learning_rate": 0.0002, "epoch": 0.9247311827956989, "step": 430}, {"loss": 0.8015, "grad_norm": 0.858101487159729, "learning_rate": 0.0002, "epoch": 0.946236559139785, "step": 440}, {"loss": 0.7629, "grad_norm": 0.8141381740570068, "learning_rate": 0.0002, "epoch": 0.967741935483871, "step": 450}, {"loss": 0.7357, "grad_norm": 0.8072513937950134, "learning_rate": 0.0002, "epoch": 0.989247311827957, "step": 460}, {"eval_loss": 0.7740864157676697, "eval_runtime": 21.383, "eval_samples_per_second": 15.48, "eval_steps_per_second": 1.964, "epoch": 1.0, "step": 465}, {"loss": 0.7701, "grad_norm": 0.8269494771957397, "learning_rate": 0.0002, "epoch": 1.010752688172043, "step": 470}, {"loss": 0.7532, "grad_norm": 0.7814009189605713, "learning_rate": 0.0002, "epoch": 1.032258064516129, "step": 480}, {"loss": 0.7689, "grad_norm": 0.8183923363685608, "learning_rate": 0.0002, "epoch": 1.053763440860215, "step": 490}, {"loss": 0.765, "grad_norm": 0.8146600723266602, "learning_rate": 0.0002, "epoch": 1.075268817204301, "step": 500}, {"loss": 0.7358, "grad_norm": 0.8635126352310181, "learning_rate": 0.0002, "epoch": 1.096774193548387, "step": 510}, {"loss": 0.7302, "grad_norm": 0.8520359396934509, "learning_rate": 0.0002, "epoch": 1.118279569892473, "step": 520}, {"loss": 0.7492, "grad_norm": 0.8026443123817444, "learning_rate": 0.0002, "epoch": 1.139784946236559, "step": 530}, {"loss": 0.7518, "grad_norm": 0.8157258629798889, "learning_rate": 0.0002, "epoch": 1.1612903225806452, "step": 540}, {"loss": 0.7461, "grad_norm": 0.9450796246528625, "learning_rate": 0.0002, "epoch": 1.1827956989247312, "step": 550}, {"loss": 0.7128, "grad_norm": 0.8859835863113403, "learning_rate": 0.0002, "epoch": 1.2043010752688172, "step": 560}, {"loss": 0.7067, "grad_norm": 0.7819921970367432, "learning_rate": 0.0002, "epoch": 1.2258064516129032, "step": 570}, {"loss": 0.7577, "grad_norm": 0.7823445796966553, "learning_rate": 0.0002, "epoch": 1.2473118279569892, "step": 580}, {"loss": 0.7358, "grad_norm": 0.7931883931159973, "learning_rate": 0.0002, "epoch": 1.2688172043010753, "step": 590}, {"loss": 0.723, "grad_norm": 0.7495734095573425, "learning_rate": 0.0002, "epoch": 1.2903225806451613, "step": 600}, {"loss": 0.7386, "grad_norm": 0.9272717237472534, "learning_rate": 0.0002, "epoch": 1.3118279569892473, "step": 610}, {"loss": 0.7498, "grad_norm": 0.7968398332595825, "learning_rate": 0.0002, "epoch": 1.3333333333333333, "step": 620}, {"loss": 0.7635, "grad_norm": 0.7813659310340881, "learning_rate": 0.0002, "epoch": 1.3548387096774195, "step": 630}, {"loss": 0.6665, "grad_norm": 0.730925977230072, "learning_rate": 0.0002, "epoch": 1.3763440860215055, "step": 640}, {"loss": 0.7037, "grad_norm": 0.8011482954025269, "learning_rate": 0.0002, "epoch": 1.3978494623655915, "step": 650}, {"loss": 0.6931, "grad_norm": 0.7770085334777832, "learning_rate": 0.0002, "epoch": 1.4193548387096775, "step": 660}, {"loss": 0.6949, "grad_norm": 0.7432682514190674, "learning_rate": 0.0002, "epoch": 1.4408602150537635, "step": 670}, {"loss": 0.7444, "grad_norm": 0.8820092678070068, "learning_rate": 0.0002, "epoch": 1.4623655913978495, "step": 680}, {"loss": 0.6758, "grad_norm": 0.7786208987236023, "learning_rate": 0.0002, "epoch": 1.4838709677419355, "step": 690}, {"loss": 0.6702, "grad_norm": 0.7467480301856995, "learning_rate": 0.0002, "epoch": 1.5053763440860215, "step": 700}, {"loss": 0.7107, "grad_norm": 0.8147122263908386, "learning_rate": 0.0002, "epoch": 1.5268817204301075, "step": 710}, {"loss": 0.7144, "grad_norm": 0.796030580997467, "learning_rate": 0.0002, "epoch": 1.5483870967741935, "step": 720}, {"loss": 0.6936, "grad_norm": 0.8776171207427979, "learning_rate": 0.0002, "epoch": 1.5698924731182795, "step": 730}, {"loss": 0.7101, "grad_norm": 0.8056126236915588, "learning_rate": 0.0002, "epoch": 1.5913978494623655, "step": 740}, {"loss": 0.7162, "grad_norm": 0.8141863346099854, "learning_rate": 0.0002, "epoch": 1.6129032258064515, "step": 750}, {"loss": 0.7088, "grad_norm": 0.8100557327270508, "learning_rate": 0.0002, "epoch": 1.6344086021505375, "step": 760}, {"loss": 0.7212, "grad_norm": 0.8283200860023499, "learning_rate": 0.0002, "epoch": 1.6559139784946235, "step": 770}, {"loss": 0.694, "grad_norm": 0.800865113735199, "learning_rate": 0.0002, "epoch": 1.6774193548387095, "step": 780}, {"loss": 0.7076, "grad_norm": 0.8052287697792053, "learning_rate": 0.0002, "epoch": 1.6989247311827957, "step": 790}, {"loss": 0.7257, "grad_norm": 0.8619674444198608, "learning_rate": 0.0002, "epoch": 1.7204301075268817, "step": 800}, {"loss": 0.7141, "grad_norm": 0.8907215595245361, "learning_rate": 0.0002, "epoch": 1.7419354838709677, "step": 810}, {"loss": 0.7035, "grad_norm": 0.6976316571235657, "learning_rate": 0.0002, "epoch": 1.7634408602150538, "step": 820}, {"loss": 0.6916, "grad_norm": 0.7533746957778931, "learning_rate": 0.0002, "epoch": 1.7849462365591398, "step": 830}, {"loss": 0.7094, "grad_norm": 0.7326804399490356, "learning_rate": 0.0002, "epoch": 1.8064516129032258, "step": 840}, {"loss": 0.6891, "grad_norm": 0.7782683372497559, "learning_rate": 0.0002, "epoch": 1.827956989247312, "step": 850}, {"loss": 0.6931, "grad_norm": 0.7424806356430054, "learning_rate": 0.0002, "epoch": 1.849462365591398, "step": 860}, {"loss": 0.7354, "grad_norm": 1.172325611114502, "learning_rate": 0.0002, "epoch": 1.870967741935484, "step": 870}, {"loss": 0.6866, "grad_norm": 0.771058201789856, "learning_rate": 0.0002, "epoch": 1.89247311827957, "step": 880}, {"loss": 0.7296, "grad_norm": 0.8624904155731201, "learning_rate": 0.0002, "epoch": 1.913978494623656, "step": 890}, {"loss": 0.7233, "grad_norm": 0.7062820792198181, "learning_rate": 0.0002, "epoch": 1.935483870967742, "step": 900}, {"loss": 0.6966, "grad_norm": 0.7560103535652161, "learning_rate": 0.0002, "epoch": 1.956989247311828, "step": 910}, {"loss": 0.69, "grad_norm": 0.788899838924408, "learning_rate": 0.0002, "epoch": 1.978494623655914, "step": 920}, {"loss": 0.6505, "grad_norm": 0.6562113761901855, "learning_rate": 0.0002, "epoch": 2.0, "step": 930}]} +{"epoch": 3.0, "step": 1395, "epoch_duration": 448.89671635627747, "total_accumulated_duration": 1353.1577451229095, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.17431640625}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-930", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.4172, "grad_norm": 0.9075053930282593, "learning_rate": 0.0002, "epoch": 0.021505376344086023, "step": 10}, {"loss": 2.5888, "grad_norm": 1.4321208000183105, "learning_rate": 0.0002, "epoch": 0.043010752688172046, "step": 20}, {"loss": 2.1195, "grad_norm": 1.7500602006912231, "learning_rate": 0.0002, "epoch": 0.06451612903225806, "step": 30}, {"loss": 1.9303, "grad_norm": 0.7606641054153442, "learning_rate": 0.0002, "epoch": 0.08602150537634409, "step": 40}, {"loss": 1.6112, "grad_norm": 1.2754929065704346, "learning_rate": 0.0002, "epoch": 0.10752688172043011, "step": 50}, {"loss": 1.4319, "grad_norm": 1.0936230421066284, "learning_rate": 0.0002, "epoch": 0.12903225806451613, "step": 60}, {"loss": 1.3568, "grad_norm": 1.144593596458435, "learning_rate": 0.0002, "epoch": 0.15053763440860216, "step": 70}, {"loss": 1.2028, "grad_norm": 1.2181956768035889, "learning_rate": 0.0002, "epoch": 0.17204301075268819, "step": 80}, {"loss": 1.1534, "grad_norm": 1.1260095834732056, "learning_rate": 0.0002, "epoch": 0.1935483870967742, "step": 90}, {"loss": 1.1089, "grad_norm": 1.1155284643173218, "learning_rate": 0.0002, "epoch": 0.21505376344086022, "step": 100}, {"loss": 1.0883, "grad_norm": 1.089565396308899, "learning_rate": 0.0002, "epoch": 0.23655913978494625, "step": 110}, {"loss": 1.0814, "grad_norm": 0.9833471775054932, "learning_rate": 0.0002, "epoch": 0.25806451612903225, "step": 120}, {"loss": 1.0239, "grad_norm": 1.0265629291534424, "learning_rate": 0.0002, "epoch": 0.27956989247311825, "step": 130}, {"loss": 0.9888, "grad_norm": 0.9344286322593689, "learning_rate": 0.0002, "epoch": 0.3010752688172043, "step": 140}, {"loss": 1.0043, "grad_norm": 0.9883386492729187, "learning_rate": 0.0002, "epoch": 0.3225806451612903, "step": 150}, {"loss": 0.9338, "grad_norm": 0.9299277067184448, "learning_rate": 0.0002, "epoch": 0.34408602150537637, "step": 160}, {"loss": 0.9432, "grad_norm": 1.390045404434204, "learning_rate": 0.0002, "epoch": 0.3655913978494624, "step": 170}, {"loss": 0.9008, "grad_norm": 1.0313078165054321, "learning_rate": 0.0002, "epoch": 0.3870967741935484, "step": 180}, {"loss": 0.9434, "grad_norm": 1.1792205572128296, "learning_rate": 0.0002, "epoch": 0.40860215053763443, "step": 190}, {"loss": 0.8761, "grad_norm": 1.049809217453003, "learning_rate": 0.0002, "epoch": 0.43010752688172044, "step": 200}, {"loss": 0.8709, "grad_norm": 0.990111768245697, "learning_rate": 0.0002, "epoch": 0.45161290322580644, "step": 210}, {"loss": 0.905, "grad_norm": 0.9870412349700928, "learning_rate": 0.0002, "epoch": 0.4731182795698925, "step": 220}, {"loss": 0.9129, "grad_norm": 0.8557345867156982, "learning_rate": 0.0002, "epoch": 0.4946236559139785, "step": 230}, {"loss": 0.8836, "grad_norm": 0.9746861457824707, "learning_rate": 0.0002, "epoch": 0.5161290322580645, "step": 240}, {"loss": 0.873, "grad_norm": 0.9010438323020935, "learning_rate": 0.0002, "epoch": 0.5376344086021505, "step": 250}, {"loss": 0.8241, "grad_norm": 0.9061082005500793, "learning_rate": 0.0002, "epoch": 0.5591397849462365, "step": 260}, {"loss": 0.8652, "grad_norm": 0.9311846494674683, "learning_rate": 0.0002, "epoch": 0.5806451612903226, "step": 270}, {"loss": 0.8256, "grad_norm": 0.9140254855155945, "learning_rate": 0.0002, "epoch": 0.6021505376344086, "step": 280}, {"loss": 0.8441, "grad_norm": 0.9722253084182739, "learning_rate": 0.0002, "epoch": 0.6236559139784946, "step": 290}, {"loss": 0.8314, "grad_norm": 0.8539168238639832, "learning_rate": 0.0002, "epoch": 0.6451612903225806, "step": 300}, {"loss": 0.8528, "grad_norm": 0.9053162932395935, "learning_rate": 0.0002, "epoch": 0.6666666666666666, "step": 310}, {"loss": 0.8209, "grad_norm": 0.8444252610206604, "learning_rate": 0.0002, "epoch": 0.6881720430107527, "step": 320}, {"loss": 0.8101, "grad_norm": 0.8127437829971313, "learning_rate": 0.0002, "epoch": 0.7096774193548387, "step": 330}, {"loss": 0.8223, "grad_norm": 0.886555016040802, "learning_rate": 0.0002, "epoch": 0.7311827956989247, "step": 340}, {"loss": 0.8368, "grad_norm": 0.8458548784255981, "learning_rate": 0.0002, "epoch": 0.7526881720430108, "step": 350}, {"loss": 0.8295, "grad_norm": 0.8683297634124756, "learning_rate": 0.0002, "epoch": 0.7741935483870968, "step": 360}, {"loss": 0.8232, "grad_norm": 0.8308405876159668, "learning_rate": 0.0002, "epoch": 0.7956989247311828, "step": 370}, {"loss": 0.7752, "grad_norm": 0.8305579423904419, "learning_rate": 0.0002, "epoch": 0.8172043010752689, "step": 380}, {"loss": 0.8267, "grad_norm": 0.8545567393302917, "learning_rate": 0.0002, "epoch": 0.8387096774193549, "step": 390}, {"loss": 0.8212, "grad_norm": 0.8486055731773376, "learning_rate": 0.0002, "epoch": 0.8602150537634409, "step": 400}, {"loss": 0.743, "grad_norm": 0.8126763105392456, "learning_rate": 0.0002, "epoch": 0.8817204301075269, "step": 410}, {"loss": 0.7993, "grad_norm": 0.8494045734405518, "learning_rate": 0.0002, "epoch": 0.9032258064516129, "step": 420}, {"loss": 0.8213, "grad_norm": 0.7639183402061462, "learning_rate": 0.0002, "epoch": 0.9247311827956989, "step": 430}, {"loss": 0.8015, "grad_norm": 0.858101487159729, "learning_rate": 0.0002, "epoch": 0.946236559139785, "step": 440}, {"loss": 0.7629, "grad_norm": 0.8141381740570068, "learning_rate": 0.0002, "epoch": 0.967741935483871, "step": 450}, {"loss": 0.7357, "grad_norm": 0.8072513937950134, "learning_rate": 0.0002, "epoch": 0.989247311827957, "step": 460}, {"eval_loss": 0.7740864157676697, "eval_runtime": 21.383, "eval_samples_per_second": 15.48, "eval_steps_per_second": 1.964, "epoch": 1.0, "step": 465}, {"loss": 0.7701, "grad_norm": 0.8269494771957397, "learning_rate": 0.0002, "epoch": 1.010752688172043, "step": 470}, {"loss": 0.7532, "grad_norm": 0.7814009189605713, "learning_rate": 0.0002, "epoch": 1.032258064516129, "step": 480}, {"loss": 0.7689, "grad_norm": 0.8183923363685608, "learning_rate": 0.0002, "epoch": 1.053763440860215, "step": 490}, {"loss": 0.765, "grad_norm": 0.8146600723266602, "learning_rate": 0.0002, "epoch": 1.075268817204301, "step": 500}, {"loss": 0.7358, "grad_norm": 0.8635126352310181, "learning_rate": 0.0002, "epoch": 1.096774193548387, "step": 510}, {"loss": 0.7302, "grad_norm": 0.8520359396934509, "learning_rate": 0.0002, "epoch": 1.118279569892473, "step": 520}, {"loss": 0.7492, "grad_norm": 0.8026443123817444, "learning_rate": 0.0002, "epoch": 1.139784946236559, "step": 530}, {"loss": 0.7518, "grad_norm": 0.8157258629798889, "learning_rate": 0.0002, "epoch": 1.1612903225806452, "step": 540}, {"loss": 0.7461, "grad_norm": 0.9450796246528625, "learning_rate": 0.0002, "epoch": 1.1827956989247312, "step": 550}, {"loss": 0.7128, "grad_norm": 0.8859835863113403, "learning_rate": 0.0002, "epoch": 1.2043010752688172, "step": 560}, {"loss": 0.7067, "grad_norm": 0.7819921970367432, "learning_rate": 0.0002, "epoch": 1.2258064516129032, "step": 570}, {"loss": 0.7577, "grad_norm": 0.7823445796966553, "learning_rate": 0.0002, "epoch": 1.2473118279569892, "step": 580}, {"loss": 0.7358, "grad_norm": 0.7931883931159973, "learning_rate": 0.0002, "epoch": 1.2688172043010753, "step": 590}, {"loss": 0.723, "grad_norm": 0.7495734095573425, "learning_rate": 0.0002, "epoch": 1.2903225806451613, "step": 600}, {"loss": 0.7386, "grad_norm": 0.9272717237472534, "learning_rate": 0.0002, "epoch": 1.3118279569892473, "step": 610}, {"loss": 0.7498, "grad_norm": 0.7968398332595825, "learning_rate": 0.0002, "epoch": 1.3333333333333333, "step": 620}, {"loss": 0.7635, "grad_norm": 0.7813659310340881, "learning_rate": 0.0002, "epoch": 1.3548387096774195, "step": 630}, {"loss": 0.6665, "grad_norm": 0.730925977230072, "learning_rate": 0.0002, "epoch": 1.3763440860215055, "step": 640}, {"loss": 0.7037, "grad_norm": 0.8011482954025269, "learning_rate": 0.0002, "epoch": 1.3978494623655915, "step": 650}, {"loss": 0.6931, "grad_norm": 0.7770085334777832, "learning_rate": 0.0002, "epoch": 1.4193548387096775, "step": 660}, {"loss": 0.6949, "grad_norm": 0.7432682514190674, "learning_rate": 0.0002, "epoch": 1.4408602150537635, "step": 670}, {"loss": 0.7444, "grad_norm": 0.8820092678070068, "learning_rate": 0.0002, "epoch": 1.4623655913978495, "step": 680}, {"loss": 0.6758, "grad_norm": 0.7786208987236023, "learning_rate": 0.0002, "epoch": 1.4838709677419355, "step": 690}, {"loss": 0.6702, "grad_norm": 0.7467480301856995, "learning_rate": 0.0002, "epoch": 1.5053763440860215, "step": 700}, {"loss": 0.7107, "grad_norm": 0.8147122263908386, "learning_rate": 0.0002, "epoch": 1.5268817204301075, "step": 710}, {"loss": 0.7144, "grad_norm": 0.796030580997467, "learning_rate": 0.0002, "epoch": 1.5483870967741935, "step": 720}, {"loss": 0.6936, "grad_norm": 0.8776171207427979, "learning_rate": 0.0002, "epoch": 1.5698924731182795, "step": 730}, {"loss": 0.7101, "grad_norm": 0.8056126236915588, "learning_rate": 0.0002, "epoch": 1.5913978494623655, "step": 740}, {"loss": 0.7162, "grad_norm": 0.8141863346099854, "learning_rate": 0.0002, "epoch": 1.6129032258064515, "step": 750}, {"loss": 0.7088, "grad_norm": 0.8100557327270508, "learning_rate": 0.0002, "epoch": 1.6344086021505375, "step": 760}, {"loss": 0.7212, "grad_norm": 0.8283200860023499, "learning_rate": 0.0002, "epoch": 1.6559139784946235, "step": 770}, {"loss": 0.694, "grad_norm": 0.800865113735199, "learning_rate": 0.0002, "epoch": 1.6774193548387095, "step": 780}, {"loss": 0.7076, "grad_norm": 0.8052287697792053, "learning_rate": 0.0002, "epoch": 1.6989247311827957, "step": 790}, {"loss": 0.7257, "grad_norm": 0.8619674444198608, "learning_rate": 0.0002, "epoch": 1.7204301075268817, "step": 800}, {"loss": 0.7141, "grad_norm": 0.8907215595245361, "learning_rate": 0.0002, "epoch": 1.7419354838709677, "step": 810}, {"loss": 0.7035, "grad_norm": 0.6976316571235657, "learning_rate": 0.0002, "epoch": 1.7634408602150538, "step": 820}, {"loss": 0.6916, "grad_norm": 0.7533746957778931, "learning_rate": 0.0002, "epoch": 1.7849462365591398, "step": 830}, {"loss": 0.7094, "grad_norm": 0.7326804399490356, "learning_rate": 0.0002, "epoch": 1.8064516129032258, "step": 840}, {"loss": 0.6891, "grad_norm": 0.7782683372497559, "learning_rate": 0.0002, "epoch": 1.827956989247312, "step": 850}, {"loss": 0.6931, "grad_norm": 0.7424806356430054, "learning_rate": 0.0002, "epoch": 1.849462365591398, "step": 860}, {"loss": 0.7354, "grad_norm": 1.172325611114502, "learning_rate": 0.0002, "epoch": 1.870967741935484, "step": 870}, {"loss": 0.6866, "grad_norm": 0.771058201789856, "learning_rate": 0.0002, "epoch": 1.89247311827957, "step": 880}, {"loss": 0.7296, "grad_norm": 0.8624904155731201, "learning_rate": 0.0002, "epoch": 1.913978494623656, "step": 890}, {"loss": 0.7233, "grad_norm": 0.7062820792198181, "learning_rate": 0.0002, "epoch": 1.935483870967742, "step": 900}, {"loss": 0.6966, "grad_norm": 0.7560103535652161, "learning_rate": 0.0002, "epoch": 1.956989247311828, "step": 910}, {"loss": 0.69, "grad_norm": 0.788899838924408, "learning_rate": 0.0002, "epoch": 1.978494623655914, "step": 920}, {"loss": 0.6505, "grad_norm": 0.6562113761901855, "learning_rate": 0.0002, "epoch": 2.0, "step": 930}, {"eval_loss": 0.6885261535644531, "eval_runtime": 21.4291, "eval_samples_per_second": 15.446, "eval_steps_per_second": 1.96, "epoch": 2.0, "step": 930}, {"loss": 0.6625, "grad_norm": 0.8216531872749329, "learning_rate": 0.0002, "epoch": 2.021505376344086, "step": 940}, {"loss": 0.6398, "grad_norm": 0.8317142724990845, "learning_rate": 0.0002, "epoch": 2.043010752688172, "step": 950}, {"loss": 0.649, "grad_norm": 0.8446708917617798, "learning_rate": 0.0002, "epoch": 2.064516129032258, "step": 960}, {"loss": 0.657, "grad_norm": 0.735055148601532, "learning_rate": 0.0002, "epoch": 2.086021505376344, "step": 970}, {"loss": 0.649, "grad_norm": 0.7487243413925171, "learning_rate": 0.0002, "epoch": 2.10752688172043, "step": 980}, {"loss": 0.6419, "grad_norm": 0.8573887944221497, "learning_rate": 0.0002, "epoch": 2.129032258064516, "step": 990}, {"loss": 0.6431, "grad_norm": 0.6284521818161011, "learning_rate": 0.0002, "epoch": 2.150537634408602, "step": 1000}, {"loss": 0.6128, "grad_norm": 0.754183292388916, "learning_rate": 0.0002, "epoch": 2.172043010752688, "step": 1010}, {"loss": 0.6253, "grad_norm": 0.9445359110832214, "learning_rate": 0.0002, "epoch": 2.193548387096774, "step": 1020}, {"loss": 0.605, "grad_norm": 0.808508038520813, "learning_rate": 0.0002, "epoch": 2.21505376344086, "step": 1030}, {"loss": 0.6786, "grad_norm": 0.9394679665565491, "learning_rate": 0.0002, "epoch": 2.236559139784946, "step": 1040}, {"loss": 0.6176, "grad_norm": 0.8151357769966125, "learning_rate": 0.0002, "epoch": 2.258064516129032, "step": 1050}, {"loss": 0.66, "grad_norm": 0.7909848093986511, "learning_rate": 0.0002, "epoch": 2.279569892473118, "step": 1060}, {"loss": 0.6254, "grad_norm": 0.7506507039070129, "learning_rate": 0.0002, "epoch": 2.3010752688172045, "step": 1070}, {"loss": 0.6608, "grad_norm": 0.8240520358085632, "learning_rate": 0.0002, "epoch": 2.3225806451612905, "step": 1080}, {"loss": 0.6207, "grad_norm": 0.9342400431632996, "learning_rate": 0.0002, "epoch": 2.3440860215053765, "step": 1090}, {"loss": 0.6029, "grad_norm": 1.0598735809326172, "learning_rate": 0.0002, "epoch": 2.3655913978494625, "step": 1100}, {"loss": 0.6035, "grad_norm": 0.7907650470733643, "learning_rate": 0.0002, "epoch": 2.3870967741935485, "step": 1110}, {"loss": 0.6237, "grad_norm": 0.9388798475265503, "learning_rate": 0.0002, "epoch": 2.4086021505376345, "step": 1120}, {"loss": 0.6207, "grad_norm": 0.8985419869422913, "learning_rate": 0.0002, "epoch": 2.4301075268817205, "step": 1130}, {"loss": 0.5902, "grad_norm": 0.7471932768821716, "learning_rate": 0.0002, "epoch": 2.4516129032258065, "step": 1140}, {"loss": 0.6446, "grad_norm": 0.761131763458252, "learning_rate": 0.0002, "epoch": 2.4731182795698925, "step": 1150}, {"loss": 0.6088, "grad_norm": 0.7901819348335266, "learning_rate": 0.0002, "epoch": 2.4946236559139785, "step": 1160}, {"loss": 0.6142, "grad_norm": 0.9932922720909119, "learning_rate": 0.0002, "epoch": 2.5161290322580645, "step": 1170}, {"loss": 0.6407, "grad_norm": 0.7414287328720093, "learning_rate": 0.0002, "epoch": 2.5376344086021505, "step": 1180}, {"loss": 0.6161, "grad_norm": 0.8111771941184998, "learning_rate": 0.0002, "epoch": 2.5591397849462365, "step": 1190}, {"loss": 0.6006, "grad_norm": 0.7520156502723694, "learning_rate": 0.0002, "epoch": 2.5806451612903225, "step": 1200}, {"loss": 0.615, "grad_norm": 0.9022907018661499, "learning_rate": 0.0002, "epoch": 2.6021505376344085, "step": 1210}, {"loss": 0.6211, "grad_norm": 0.7746260166168213, "learning_rate": 0.0002, "epoch": 2.6236559139784945, "step": 1220}, {"loss": 0.616, "grad_norm": 0.8482862114906311, "learning_rate": 0.0002, "epoch": 2.6451612903225805, "step": 1230}, {"loss": 0.6417, "grad_norm": 0.7925458550453186, "learning_rate": 0.0002, "epoch": 2.6666666666666665, "step": 1240}, {"loss": 0.6187, "grad_norm": 0.8369929194450378, "learning_rate": 0.0002, "epoch": 2.688172043010753, "step": 1250}, {"loss": 0.6138, "grad_norm": 0.8311542868614197, "learning_rate": 0.0002, "epoch": 2.709677419354839, "step": 1260}, {"loss": 0.5894, "grad_norm": 0.7204853296279907, "learning_rate": 0.0002, "epoch": 2.731182795698925, "step": 1270}, {"loss": 0.6325, "grad_norm": 0.8447284698486328, "learning_rate": 0.0002, "epoch": 2.752688172043011, "step": 1280}, {"loss": 0.5946, "grad_norm": 0.7738404273986816, "learning_rate": 0.0002, "epoch": 2.774193548387097, "step": 1290}, {"loss": 0.5678, "grad_norm": 0.8393287062644958, "learning_rate": 0.0002, "epoch": 2.795698924731183, "step": 1300}, {"loss": 0.6092, "grad_norm": 0.79121994972229, "learning_rate": 0.0002, "epoch": 2.817204301075269, "step": 1310}, {"loss": 0.5889, "grad_norm": 0.7331557869911194, "learning_rate": 0.0002, "epoch": 2.838709677419355, "step": 1320}, {"loss": 0.6048, "grad_norm": 0.9593998193740845, "learning_rate": 0.0002, "epoch": 2.860215053763441, "step": 1330}, {"loss": 0.6108, "grad_norm": 0.7215158343315125, "learning_rate": 0.0002, "epoch": 2.881720430107527, "step": 1340}, {"loss": 0.5897, "grad_norm": 0.840404212474823, "learning_rate": 0.0002, "epoch": 2.903225806451613, "step": 1350}, {"loss": 0.6056, "grad_norm": 0.870659351348877, "learning_rate": 0.0002, "epoch": 2.924731182795699, "step": 1360}, {"loss": 0.6205, "grad_norm": 0.8744975328445435, "learning_rate": 0.0002, "epoch": 2.946236559139785, "step": 1370}, {"loss": 0.5966, "grad_norm": 0.8030612468719482, "learning_rate": 0.0002, "epoch": 2.967741935483871, "step": 1380}, {"loss": 0.6004, "grad_norm": 0.825814962387085, "learning_rate": 0.0002, "epoch": 2.989247311827957, "step": 1390}]} +{"epoch": 4.0, "step": 1860, "epoch_duration": 449.0518436431885, "total_accumulated_duration": 1802.209588766098, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.17431640625}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1395", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.4172, "grad_norm": 0.9075053930282593, "learning_rate": 0.0002, "epoch": 0.021505376344086023, "step": 10}, {"loss": 2.5888, "grad_norm": 1.4321208000183105, "learning_rate": 0.0002, "epoch": 0.043010752688172046, "step": 20}, {"loss": 2.1195, "grad_norm": 1.7500602006912231, "learning_rate": 0.0002, "epoch": 0.06451612903225806, "step": 30}, {"loss": 1.9303, "grad_norm": 0.7606641054153442, "learning_rate": 0.0002, "epoch": 0.08602150537634409, "step": 40}, {"loss": 1.6112, "grad_norm": 1.2754929065704346, "learning_rate": 0.0002, "epoch": 0.10752688172043011, "step": 50}, {"loss": 1.4319, "grad_norm": 1.0936230421066284, "learning_rate": 0.0002, "epoch": 0.12903225806451613, "step": 60}, {"loss": 1.3568, "grad_norm": 1.144593596458435, "learning_rate": 0.0002, "epoch": 0.15053763440860216, "step": 70}, {"loss": 1.2028, "grad_norm": 1.2181956768035889, "learning_rate": 0.0002, "epoch": 0.17204301075268819, "step": 80}, {"loss": 1.1534, "grad_norm": 1.1260095834732056, "learning_rate": 0.0002, "epoch": 0.1935483870967742, "step": 90}, {"loss": 1.1089, "grad_norm": 1.1155284643173218, "learning_rate": 0.0002, "epoch": 0.21505376344086022, "step": 100}, {"loss": 1.0883, "grad_norm": 1.089565396308899, "learning_rate": 0.0002, "epoch": 0.23655913978494625, "step": 110}, {"loss": 1.0814, "grad_norm": 0.9833471775054932, "learning_rate": 0.0002, "epoch": 0.25806451612903225, "step": 120}, {"loss": 1.0239, "grad_norm": 1.0265629291534424, "learning_rate": 0.0002, "epoch": 0.27956989247311825, "step": 130}, {"loss": 0.9888, "grad_norm": 0.9344286322593689, "learning_rate": 0.0002, "epoch": 0.3010752688172043, "step": 140}, {"loss": 1.0043, "grad_norm": 0.9883386492729187, "learning_rate": 0.0002, "epoch": 0.3225806451612903, "step": 150}, {"loss": 0.9338, "grad_norm": 0.9299277067184448, "learning_rate": 0.0002, "epoch": 0.34408602150537637, "step": 160}, {"loss": 0.9432, "grad_norm": 1.390045404434204, "learning_rate": 0.0002, "epoch": 0.3655913978494624, "step": 170}, {"loss": 0.9008, "grad_norm": 1.0313078165054321, "learning_rate": 0.0002, "epoch": 0.3870967741935484, "step": 180}, {"loss": 0.9434, "grad_norm": 1.1792205572128296, "learning_rate": 0.0002, "epoch": 0.40860215053763443, "step": 190}, {"loss": 0.8761, "grad_norm": 1.049809217453003, "learning_rate": 0.0002, "epoch": 0.43010752688172044, "step": 200}, {"loss": 0.8709, "grad_norm": 0.990111768245697, "learning_rate": 0.0002, "epoch": 0.45161290322580644, "step": 210}, {"loss": 0.905, "grad_norm": 0.9870412349700928, "learning_rate": 0.0002, "epoch": 0.4731182795698925, "step": 220}, {"loss": 0.9129, "grad_norm": 0.8557345867156982, "learning_rate": 0.0002, "epoch": 0.4946236559139785, "step": 230}, {"loss": 0.8836, "grad_norm": 0.9746861457824707, "learning_rate": 0.0002, "epoch": 0.5161290322580645, "step": 240}, {"loss": 0.873, "grad_norm": 0.9010438323020935, "learning_rate": 0.0002, "epoch": 0.5376344086021505, "step": 250}, {"loss": 0.8241, "grad_norm": 0.9061082005500793, "learning_rate": 0.0002, "epoch": 0.5591397849462365, "step": 260}, {"loss": 0.8652, "grad_norm": 0.9311846494674683, "learning_rate": 0.0002, "epoch": 0.5806451612903226, "step": 270}, {"loss": 0.8256, "grad_norm": 0.9140254855155945, "learning_rate": 0.0002, "epoch": 0.6021505376344086, "step": 280}, {"loss": 0.8441, "grad_norm": 0.9722253084182739, "learning_rate": 0.0002, "epoch": 0.6236559139784946, "step": 290}, {"loss": 0.8314, "grad_norm": 0.8539168238639832, "learning_rate": 0.0002, "epoch": 0.6451612903225806, "step": 300}, {"loss": 0.8528, "grad_norm": 0.9053162932395935, "learning_rate": 0.0002, "epoch": 0.6666666666666666, "step": 310}, {"loss": 0.8209, "grad_norm": 0.8444252610206604, "learning_rate": 0.0002, "epoch": 0.6881720430107527, "step": 320}, {"loss": 0.8101, "grad_norm": 0.8127437829971313, "learning_rate": 0.0002, "epoch": 0.7096774193548387, "step": 330}, {"loss": 0.8223, "grad_norm": 0.886555016040802, "learning_rate": 0.0002, "epoch": 0.7311827956989247, "step": 340}, {"loss": 0.8368, "grad_norm": 0.8458548784255981, "learning_rate": 0.0002, "epoch": 0.7526881720430108, "step": 350}, {"loss": 0.8295, "grad_norm": 0.8683297634124756, "learning_rate": 0.0002, "epoch": 0.7741935483870968, "step": 360}, {"loss": 0.8232, "grad_norm": 0.8308405876159668, "learning_rate": 0.0002, "epoch": 0.7956989247311828, "step": 370}, {"loss": 0.7752, "grad_norm": 0.8305579423904419, "learning_rate": 0.0002, "epoch": 0.8172043010752689, "step": 380}, {"loss": 0.8267, "grad_norm": 0.8545567393302917, "learning_rate": 0.0002, "epoch": 0.8387096774193549, "step": 390}, {"loss": 0.8212, "grad_norm": 0.8486055731773376, "learning_rate": 0.0002, "epoch": 0.8602150537634409, "step": 400}, {"loss": 0.743, "grad_norm": 0.8126763105392456, "learning_rate": 0.0002, "epoch": 0.8817204301075269, "step": 410}, {"loss": 0.7993, "grad_norm": 0.8494045734405518, "learning_rate": 0.0002, "epoch": 0.9032258064516129, "step": 420}, {"loss": 0.8213, "grad_norm": 0.7639183402061462, "learning_rate": 0.0002, "epoch": 0.9247311827956989, "step": 430}, {"loss": 0.8015, "grad_norm": 0.858101487159729, "learning_rate": 0.0002, "epoch": 0.946236559139785, "step": 440}, {"loss": 0.7629, "grad_norm": 0.8141381740570068, "learning_rate": 0.0002, "epoch": 0.967741935483871, "step": 450}, {"loss": 0.7357, "grad_norm": 0.8072513937950134, "learning_rate": 0.0002, "epoch": 0.989247311827957, "step": 460}, {"eval_loss": 0.7740864157676697, "eval_runtime": 21.383, "eval_samples_per_second": 15.48, "eval_steps_per_second": 1.964, "epoch": 1.0, "step": 465}, {"loss": 0.7701, "grad_norm": 0.8269494771957397, "learning_rate": 0.0002, "epoch": 1.010752688172043, "step": 470}, {"loss": 0.7532, "grad_norm": 0.7814009189605713, "learning_rate": 0.0002, "epoch": 1.032258064516129, "step": 480}, {"loss": 0.7689, "grad_norm": 0.8183923363685608, "learning_rate": 0.0002, "epoch": 1.053763440860215, "step": 490}, {"loss": 0.765, "grad_norm": 0.8146600723266602, "learning_rate": 0.0002, "epoch": 1.075268817204301, "step": 500}, {"loss": 0.7358, "grad_norm": 0.8635126352310181, "learning_rate": 0.0002, "epoch": 1.096774193548387, "step": 510}, {"loss": 0.7302, "grad_norm": 0.8520359396934509, "learning_rate": 0.0002, "epoch": 1.118279569892473, "step": 520}, {"loss": 0.7492, "grad_norm": 0.8026443123817444, "learning_rate": 0.0002, "epoch": 1.139784946236559, "step": 530}, {"loss": 0.7518, "grad_norm": 0.8157258629798889, "learning_rate": 0.0002, "epoch": 1.1612903225806452, "step": 540}, {"loss": 0.7461, "grad_norm": 0.9450796246528625, "learning_rate": 0.0002, "epoch": 1.1827956989247312, "step": 550}, {"loss": 0.7128, "grad_norm": 0.8859835863113403, "learning_rate": 0.0002, "epoch": 1.2043010752688172, "step": 560}, {"loss": 0.7067, "grad_norm": 0.7819921970367432, "learning_rate": 0.0002, "epoch": 1.2258064516129032, "step": 570}, {"loss": 0.7577, "grad_norm": 0.7823445796966553, "learning_rate": 0.0002, "epoch": 1.2473118279569892, "step": 580}, {"loss": 0.7358, "grad_norm": 0.7931883931159973, "learning_rate": 0.0002, "epoch": 1.2688172043010753, "step": 590}, {"loss": 0.723, "grad_norm": 0.7495734095573425, "learning_rate": 0.0002, "epoch": 1.2903225806451613, "step": 600}, {"loss": 0.7386, "grad_norm": 0.9272717237472534, "learning_rate": 0.0002, "epoch": 1.3118279569892473, "step": 610}, {"loss": 0.7498, "grad_norm": 0.7968398332595825, "learning_rate": 0.0002, "epoch": 1.3333333333333333, "step": 620}, {"loss": 0.7635, "grad_norm": 0.7813659310340881, "learning_rate": 0.0002, "epoch": 1.3548387096774195, "step": 630}, {"loss": 0.6665, "grad_norm": 0.730925977230072, "learning_rate": 0.0002, "epoch": 1.3763440860215055, "step": 640}, {"loss": 0.7037, "grad_norm": 0.8011482954025269, "learning_rate": 0.0002, "epoch": 1.3978494623655915, "step": 650}, {"loss": 0.6931, "grad_norm": 0.7770085334777832, "learning_rate": 0.0002, "epoch": 1.4193548387096775, "step": 660}, {"loss": 0.6949, "grad_norm": 0.7432682514190674, "learning_rate": 0.0002, "epoch": 1.4408602150537635, "step": 670}, {"loss": 0.7444, "grad_norm": 0.8820092678070068, "learning_rate": 0.0002, "epoch": 1.4623655913978495, "step": 680}, {"loss": 0.6758, "grad_norm": 0.7786208987236023, "learning_rate": 0.0002, "epoch": 1.4838709677419355, "step": 690}, {"loss": 0.6702, "grad_norm": 0.7467480301856995, "learning_rate": 0.0002, "epoch": 1.5053763440860215, "step": 700}, {"loss": 0.7107, "grad_norm": 0.8147122263908386, "learning_rate": 0.0002, "epoch": 1.5268817204301075, "step": 710}, {"loss": 0.7144, "grad_norm": 0.796030580997467, "learning_rate": 0.0002, "epoch": 1.5483870967741935, "step": 720}, {"loss": 0.6936, "grad_norm": 0.8776171207427979, "learning_rate": 0.0002, "epoch": 1.5698924731182795, "step": 730}, {"loss": 0.7101, "grad_norm": 0.8056126236915588, "learning_rate": 0.0002, "epoch": 1.5913978494623655, "step": 740}, {"loss": 0.7162, "grad_norm": 0.8141863346099854, "learning_rate": 0.0002, "epoch": 1.6129032258064515, "step": 750}, {"loss": 0.7088, "grad_norm": 0.8100557327270508, "learning_rate": 0.0002, "epoch": 1.6344086021505375, "step": 760}, {"loss": 0.7212, "grad_norm": 0.8283200860023499, "learning_rate": 0.0002, "epoch": 1.6559139784946235, "step": 770}, {"loss": 0.694, "grad_norm": 0.800865113735199, "learning_rate": 0.0002, "epoch": 1.6774193548387095, "step": 780}, {"loss": 0.7076, "grad_norm": 0.8052287697792053, "learning_rate": 0.0002, "epoch": 1.6989247311827957, "step": 790}, {"loss": 0.7257, "grad_norm": 0.8619674444198608, "learning_rate": 0.0002, "epoch": 1.7204301075268817, "step": 800}, {"loss": 0.7141, "grad_norm": 0.8907215595245361, "learning_rate": 0.0002, "epoch": 1.7419354838709677, "step": 810}, {"loss": 0.7035, "grad_norm": 0.6976316571235657, "learning_rate": 0.0002, "epoch": 1.7634408602150538, "step": 820}, {"loss": 0.6916, "grad_norm": 0.7533746957778931, "learning_rate": 0.0002, "epoch": 1.7849462365591398, "step": 830}, {"loss": 0.7094, "grad_norm": 0.7326804399490356, "learning_rate": 0.0002, "epoch": 1.8064516129032258, "step": 840}, {"loss": 0.6891, "grad_norm": 0.7782683372497559, "learning_rate": 0.0002, "epoch": 1.827956989247312, "step": 850}, {"loss": 0.6931, "grad_norm": 0.7424806356430054, "learning_rate": 0.0002, "epoch": 1.849462365591398, "step": 860}, {"loss": 0.7354, "grad_norm": 1.172325611114502, "learning_rate": 0.0002, "epoch": 1.870967741935484, "step": 870}, {"loss": 0.6866, "grad_norm": 0.771058201789856, "learning_rate": 0.0002, "epoch": 1.89247311827957, "step": 880}, {"loss": 0.7296, "grad_norm": 0.8624904155731201, "learning_rate": 0.0002, "epoch": 1.913978494623656, "step": 890}, {"loss": 0.7233, "grad_norm": 0.7062820792198181, "learning_rate": 0.0002, "epoch": 1.935483870967742, "step": 900}, {"loss": 0.6966, "grad_norm": 0.7560103535652161, "learning_rate": 0.0002, "epoch": 1.956989247311828, "step": 910}, {"loss": 0.69, "grad_norm": 0.788899838924408, "learning_rate": 0.0002, "epoch": 1.978494623655914, "step": 920}, {"loss": 0.6505, "grad_norm": 0.6562113761901855, "learning_rate": 0.0002, "epoch": 2.0, "step": 930}, {"eval_loss": 0.6885261535644531, "eval_runtime": 21.4291, "eval_samples_per_second": 15.446, "eval_steps_per_second": 1.96, "epoch": 2.0, "step": 930}, {"loss": 0.6625, "grad_norm": 0.8216531872749329, "learning_rate": 0.0002, "epoch": 2.021505376344086, "step": 940}, {"loss": 0.6398, "grad_norm": 0.8317142724990845, "learning_rate": 0.0002, "epoch": 2.043010752688172, "step": 950}, {"loss": 0.649, "grad_norm": 0.8446708917617798, "learning_rate": 0.0002, "epoch": 2.064516129032258, "step": 960}, {"loss": 0.657, "grad_norm": 0.735055148601532, "learning_rate": 0.0002, "epoch": 2.086021505376344, "step": 970}, {"loss": 0.649, "grad_norm": 0.7487243413925171, "learning_rate": 0.0002, "epoch": 2.10752688172043, "step": 980}, {"loss": 0.6419, "grad_norm": 0.8573887944221497, "learning_rate": 0.0002, "epoch": 2.129032258064516, "step": 990}, {"loss": 0.6431, "grad_norm": 0.6284521818161011, "learning_rate": 0.0002, "epoch": 2.150537634408602, "step": 1000}, {"loss": 0.6128, "grad_norm": 0.754183292388916, "learning_rate": 0.0002, "epoch": 2.172043010752688, "step": 1010}, {"loss": 0.6253, "grad_norm": 0.9445359110832214, "learning_rate": 0.0002, "epoch": 2.193548387096774, "step": 1020}, {"loss": 0.605, "grad_norm": 0.808508038520813, "learning_rate": 0.0002, "epoch": 2.21505376344086, "step": 1030}, {"loss": 0.6786, "grad_norm": 0.9394679665565491, "learning_rate": 0.0002, "epoch": 2.236559139784946, "step": 1040}, {"loss": 0.6176, "grad_norm": 0.8151357769966125, "learning_rate": 0.0002, "epoch": 2.258064516129032, "step": 1050}, {"loss": 0.66, "grad_norm": 0.7909848093986511, "learning_rate": 0.0002, "epoch": 2.279569892473118, "step": 1060}, {"loss": 0.6254, "grad_norm": 0.7506507039070129, "learning_rate": 0.0002, "epoch": 2.3010752688172045, "step": 1070}, {"loss": 0.6608, "grad_norm": 0.8240520358085632, "learning_rate": 0.0002, "epoch": 2.3225806451612905, "step": 1080}, {"loss": 0.6207, "grad_norm": 0.9342400431632996, "learning_rate": 0.0002, "epoch": 2.3440860215053765, "step": 1090}, {"loss": 0.6029, "grad_norm": 1.0598735809326172, "learning_rate": 0.0002, "epoch": 2.3655913978494625, "step": 1100}, {"loss": 0.6035, "grad_norm": 0.7907650470733643, "learning_rate": 0.0002, "epoch": 2.3870967741935485, "step": 1110}, {"loss": 0.6237, "grad_norm": 0.9388798475265503, "learning_rate": 0.0002, "epoch": 2.4086021505376345, "step": 1120}, {"loss": 0.6207, "grad_norm": 0.8985419869422913, "learning_rate": 0.0002, "epoch": 2.4301075268817205, "step": 1130}, {"loss": 0.5902, "grad_norm": 0.7471932768821716, "learning_rate": 0.0002, "epoch": 2.4516129032258065, "step": 1140}, {"loss": 0.6446, "grad_norm": 0.761131763458252, "learning_rate": 0.0002, "epoch": 2.4731182795698925, "step": 1150}, {"loss": 0.6088, "grad_norm": 0.7901819348335266, "learning_rate": 0.0002, "epoch": 2.4946236559139785, "step": 1160}, {"loss": 0.6142, "grad_norm": 0.9932922720909119, "learning_rate": 0.0002, "epoch": 2.5161290322580645, "step": 1170}, {"loss": 0.6407, "grad_norm": 0.7414287328720093, "learning_rate": 0.0002, "epoch": 2.5376344086021505, "step": 1180}, {"loss": 0.6161, "grad_norm": 0.8111771941184998, "learning_rate": 0.0002, "epoch": 2.5591397849462365, "step": 1190}, {"loss": 0.6006, "grad_norm": 0.7520156502723694, "learning_rate": 0.0002, "epoch": 2.5806451612903225, "step": 1200}, {"loss": 0.615, "grad_norm": 0.9022907018661499, "learning_rate": 0.0002, "epoch": 2.6021505376344085, "step": 1210}, {"loss": 0.6211, "grad_norm": 0.7746260166168213, "learning_rate": 0.0002, "epoch": 2.6236559139784945, "step": 1220}, {"loss": 0.616, "grad_norm": 0.8482862114906311, "learning_rate": 0.0002, "epoch": 2.6451612903225805, "step": 1230}, {"loss": 0.6417, "grad_norm": 0.7925458550453186, "learning_rate": 0.0002, "epoch": 2.6666666666666665, "step": 1240}, {"loss": 0.6187, "grad_norm": 0.8369929194450378, "learning_rate": 0.0002, "epoch": 2.688172043010753, "step": 1250}, {"loss": 0.6138, "grad_norm": 0.8311542868614197, "learning_rate": 0.0002, "epoch": 2.709677419354839, "step": 1260}, {"loss": 0.5894, "grad_norm": 0.7204853296279907, "learning_rate": 0.0002, "epoch": 2.731182795698925, "step": 1270}, {"loss": 0.6325, "grad_norm": 0.8447284698486328, "learning_rate": 0.0002, "epoch": 2.752688172043011, "step": 1280}, {"loss": 0.5946, "grad_norm": 0.7738404273986816, "learning_rate": 0.0002, "epoch": 2.774193548387097, "step": 1290}, {"loss": 0.5678, "grad_norm": 0.8393287062644958, "learning_rate": 0.0002, "epoch": 2.795698924731183, "step": 1300}, {"loss": 0.6092, "grad_norm": 0.79121994972229, "learning_rate": 0.0002, "epoch": 2.817204301075269, "step": 1310}, {"loss": 0.5889, "grad_norm": 0.7331557869911194, "learning_rate": 0.0002, "epoch": 2.838709677419355, "step": 1320}, {"loss": 0.6048, "grad_norm": 0.9593998193740845, "learning_rate": 0.0002, "epoch": 2.860215053763441, "step": 1330}, {"loss": 0.6108, "grad_norm": 0.7215158343315125, "learning_rate": 0.0002, "epoch": 2.881720430107527, "step": 1340}, {"loss": 0.5897, "grad_norm": 0.840404212474823, "learning_rate": 0.0002, "epoch": 2.903225806451613, "step": 1350}, {"loss": 0.6056, "grad_norm": 0.870659351348877, "learning_rate": 0.0002, "epoch": 2.924731182795699, "step": 1360}, {"loss": 0.6205, "grad_norm": 0.8744975328445435, "learning_rate": 0.0002, "epoch": 2.946236559139785, "step": 1370}, {"loss": 0.5966, "grad_norm": 0.8030612468719482, "learning_rate": 0.0002, "epoch": 2.967741935483871, "step": 1380}, {"loss": 0.6004, "grad_norm": 0.825814962387085, "learning_rate": 0.0002, "epoch": 2.989247311827957, "step": 1390}, {"eval_loss": 0.6257933378219604, "eval_runtime": 21.3692, "eval_samples_per_second": 15.49, "eval_steps_per_second": 1.965, "epoch": 3.0, "step": 1395}, {"loss": 0.5696, "grad_norm": 0.8650677800178528, "learning_rate": 0.0002, "epoch": 3.010752688172043, "step": 1400}, {"loss": 0.5483, "grad_norm": 0.8364197015762329, "learning_rate": 0.0002, "epoch": 3.032258064516129, "step": 1410}, {"loss": 0.5606, "grad_norm": 0.8278448581695557, "learning_rate": 0.0002, "epoch": 3.053763440860215, "step": 1420}, {"loss": 0.5572, "grad_norm": 0.8806642889976501, "learning_rate": 0.0002, "epoch": 3.075268817204301, "step": 1430}, {"loss": 0.585, "grad_norm": 0.8180029988288879, "learning_rate": 0.0002, "epoch": 3.096774193548387, "step": 1440}, {"loss": 0.5667, "grad_norm": 0.8561782836914062, "learning_rate": 0.0002, "epoch": 3.118279569892473, "step": 1450}, {"loss": 0.5246, "grad_norm": 0.8377029299736023, "learning_rate": 0.0002, "epoch": 3.139784946236559, "step": 1460}, {"loss": 0.5464, "grad_norm": 0.885779082775116, "learning_rate": 0.0002, "epoch": 3.161290322580645, "step": 1470}, {"loss": 0.541, "grad_norm": 0.9388518333435059, "learning_rate": 0.0002, "epoch": 3.182795698924731, "step": 1480}, {"loss": 0.5447, "grad_norm": 0.8816235661506653, "learning_rate": 0.0002, "epoch": 3.204301075268817, "step": 1490}, {"loss": 0.5466, "grad_norm": 0.9885783791542053, "learning_rate": 0.0002, "epoch": 3.225806451612903, "step": 1500}, {"loss": 0.5455, "grad_norm": 0.8635850548744202, "learning_rate": 0.0002, "epoch": 3.247311827956989, "step": 1510}, {"loss": 0.5419, "grad_norm": 0.829853355884552, "learning_rate": 0.0002, "epoch": 3.268817204301075, "step": 1520}, {"loss": 0.54, "grad_norm": 0.9037486910820007, "learning_rate": 0.0002, "epoch": 3.2903225806451615, "step": 1530}, {"loss": 0.5375, "grad_norm": 0.8173713684082031, "learning_rate": 0.0002, "epoch": 3.3118279569892475, "step": 1540}, {"loss": 0.5405, "grad_norm": 0.796953022480011, "learning_rate": 0.0002, "epoch": 3.3333333333333335, "step": 1550}, {"loss": 0.5505, "grad_norm": 0.7894400358200073, "learning_rate": 0.0002, "epoch": 3.3548387096774195, "step": 1560}, {"loss": 0.5395, "grad_norm": 0.9434949159622192, "learning_rate": 0.0002, "epoch": 3.3763440860215055, "step": 1570}, {"loss": 0.5271, "grad_norm": 0.8666760325431824, "learning_rate": 0.0002, "epoch": 3.3978494623655915, "step": 1580}, {"loss": 0.5439, "grad_norm": 0.7782467007637024, "learning_rate": 0.0002, "epoch": 3.4193548387096775, "step": 1590}, {"loss": 0.5161, "grad_norm": 0.8849126696586609, "learning_rate": 0.0002, "epoch": 3.4408602150537635, "step": 1600}, {"loss": 0.5353, "grad_norm": 0.7863831520080566, "learning_rate": 0.0002, "epoch": 3.4623655913978495, "step": 1610}, {"loss": 0.5308, "grad_norm": 1.0403116941452026, "learning_rate": 0.0002, "epoch": 3.4838709677419355, "step": 1620}, {"loss": 0.5339, "grad_norm": 0.8307499289512634, "learning_rate": 0.0002, "epoch": 3.5053763440860215, "step": 1630}, {"loss": 0.5361, "grad_norm": 0.9132118821144104, "learning_rate": 0.0002, "epoch": 3.5268817204301075, "step": 1640}, {"loss": 0.5828, "grad_norm": 0.9322578310966492, "learning_rate": 0.0002, "epoch": 3.5483870967741935, "step": 1650}, {"loss": 0.546, "grad_norm": 0.9782460331916809, "learning_rate": 0.0002, "epoch": 3.5698924731182795, "step": 1660}, {"loss": 0.5424, "grad_norm": 0.7189919352531433, "learning_rate": 0.0002, "epoch": 3.5913978494623655, "step": 1670}, {"loss": 0.5514, "grad_norm": 0.9689221382141113, "learning_rate": 0.0002, "epoch": 3.6129032258064515, "step": 1680}, {"loss": 0.5379, "grad_norm": 0.9684675335884094, "learning_rate": 0.0002, "epoch": 3.6344086021505375, "step": 1690}, {"loss": 0.5748, "grad_norm": 0.8851472735404968, "learning_rate": 0.0002, "epoch": 3.6559139784946235, "step": 1700}, {"loss": 0.5412, "grad_norm": 0.7709833383560181, "learning_rate": 0.0002, "epoch": 3.6774193548387095, "step": 1710}, {"loss": 0.521, "grad_norm": 0.818236231803894, "learning_rate": 0.0002, "epoch": 3.698924731182796, "step": 1720}, {"loss": 0.5445, "grad_norm": 0.870642364025116, "learning_rate": 0.0002, "epoch": 3.720430107526882, "step": 1730}, {"loss": 0.5307, "grad_norm": 1.0245511531829834, "learning_rate": 0.0002, "epoch": 3.741935483870968, "step": 1740}, {"loss": 0.5593, "grad_norm": 0.8607558608055115, "learning_rate": 0.0002, "epoch": 3.763440860215054, "step": 1750}, {"loss": 0.536, "grad_norm": 0.8511829972267151, "learning_rate": 0.0002, "epoch": 3.78494623655914, "step": 1760}, {"loss": 0.5193, "grad_norm": 0.7969087362289429, "learning_rate": 0.0002, "epoch": 3.806451612903226, "step": 1770}, {"loss": 0.5578, "grad_norm": 0.8457245826721191, "learning_rate": 0.0002, "epoch": 3.827956989247312, "step": 1780}, {"loss": 0.5337, "grad_norm": 0.8893467783927917, "learning_rate": 0.0002, "epoch": 3.849462365591398, "step": 1790}, {"loss": 0.5024, "grad_norm": 0.8593819737434387, "learning_rate": 0.0002, "epoch": 3.870967741935484, "step": 1800}, {"loss": 0.5134, "grad_norm": 0.7574560642242432, "learning_rate": 0.0002, "epoch": 3.89247311827957, "step": 1810}, {"loss": 0.5263, "grad_norm": 0.8681567311286926, "learning_rate": 0.0002, "epoch": 3.913978494623656, "step": 1820}, {"loss": 0.532, "grad_norm": 0.9068132042884827, "learning_rate": 0.0002, "epoch": 3.935483870967742, "step": 1830}, {"loss": 0.5427, "grad_norm": 0.8668948411941528, "learning_rate": 0.0002, "epoch": 3.956989247311828, "step": 1840}, {"loss": 0.5349, "grad_norm": 1.046032428741455, "learning_rate": 0.0002, "epoch": 3.978494623655914, "step": 1850}, {"loss": 0.5087, "grad_norm": 0.904780387878418, "learning_rate": 0.0002, "epoch": 4.0, "step": 1860}]} +{"epoch": 5.0, "step": 2325, "epoch_duration": 451.0625660419464, "total_accumulated_duration": 2253.2721548080444, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.17431640625}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-1860", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.4172, "grad_norm": 0.9075053930282593, "learning_rate": 0.0002, "epoch": 0.021505376344086023, "step": 10}, {"loss": 2.5888, "grad_norm": 1.4321208000183105, "learning_rate": 0.0002, "epoch": 0.043010752688172046, "step": 20}, {"loss": 2.1195, "grad_norm": 1.7500602006912231, "learning_rate": 0.0002, "epoch": 0.06451612903225806, "step": 30}, {"loss": 1.9303, "grad_norm": 0.7606641054153442, "learning_rate": 0.0002, "epoch": 0.08602150537634409, "step": 40}, {"loss": 1.6112, "grad_norm": 1.2754929065704346, "learning_rate": 0.0002, "epoch": 0.10752688172043011, "step": 50}, {"loss": 1.4319, "grad_norm": 1.0936230421066284, "learning_rate": 0.0002, "epoch": 0.12903225806451613, "step": 60}, {"loss": 1.3568, "grad_norm": 1.144593596458435, "learning_rate": 0.0002, "epoch": 0.15053763440860216, "step": 70}, {"loss": 1.2028, "grad_norm": 1.2181956768035889, "learning_rate": 0.0002, "epoch": 0.17204301075268819, "step": 80}, {"loss": 1.1534, "grad_norm": 1.1260095834732056, "learning_rate": 0.0002, "epoch": 0.1935483870967742, "step": 90}, {"loss": 1.1089, "grad_norm": 1.1155284643173218, "learning_rate": 0.0002, "epoch": 0.21505376344086022, "step": 100}, {"loss": 1.0883, "grad_norm": 1.089565396308899, "learning_rate": 0.0002, "epoch": 0.23655913978494625, "step": 110}, {"loss": 1.0814, "grad_norm": 0.9833471775054932, "learning_rate": 0.0002, "epoch": 0.25806451612903225, "step": 120}, {"loss": 1.0239, "grad_norm": 1.0265629291534424, "learning_rate": 0.0002, "epoch": 0.27956989247311825, "step": 130}, {"loss": 0.9888, "grad_norm": 0.9344286322593689, "learning_rate": 0.0002, "epoch": 0.3010752688172043, "step": 140}, {"loss": 1.0043, "grad_norm": 0.9883386492729187, "learning_rate": 0.0002, "epoch": 0.3225806451612903, "step": 150}, {"loss": 0.9338, "grad_norm": 0.9299277067184448, "learning_rate": 0.0002, "epoch": 0.34408602150537637, "step": 160}, {"loss": 0.9432, "grad_norm": 1.390045404434204, "learning_rate": 0.0002, "epoch": 0.3655913978494624, "step": 170}, {"loss": 0.9008, "grad_norm": 1.0313078165054321, "learning_rate": 0.0002, "epoch": 0.3870967741935484, "step": 180}, {"loss": 0.9434, "grad_norm": 1.1792205572128296, "learning_rate": 0.0002, "epoch": 0.40860215053763443, "step": 190}, {"loss": 0.8761, "grad_norm": 1.049809217453003, "learning_rate": 0.0002, "epoch": 0.43010752688172044, "step": 200}, {"loss": 0.8709, "grad_norm": 0.990111768245697, "learning_rate": 0.0002, "epoch": 0.45161290322580644, "step": 210}, {"loss": 0.905, "grad_norm": 0.9870412349700928, "learning_rate": 0.0002, "epoch": 0.4731182795698925, "step": 220}, {"loss": 0.9129, "grad_norm": 0.8557345867156982, "learning_rate": 0.0002, "epoch": 0.4946236559139785, "step": 230}, {"loss": 0.8836, "grad_norm": 0.9746861457824707, "learning_rate": 0.0002, "epoch": 0.5161290322580645, "step": 240}, {"loss": 0.873, "grad_norm": 0.9010438323020935, "learning_rate": 0.0002, "epoch": 0.5376344086021505, "step": 250}, {"loss": 0.8241, "grad_norm": 0.9061082005500793, "learning_rate": 0.0002, "epoch": 0.5591397849462365, "step": 260}, {"loss": 0.8652, "grad_norm": 0.9311846494674683, "learning_rate": 0.0002, "epoch": 0.5806451612903226, "step": 270}, {"loss": 0.8256, "grad_norm": 0.9140254855155945, "learning_rate": 0.0002, "epoch": 0.6021505376344086, "step": 280}, {"loss": 0.8441, "grad_norm": 0.9722253084182739, "learning_rate": 0.0002, "epoch": 0.6236559139784946, "step": 290}, {"loss": 0.8314, "grad_norm": 0.8539168238639832, "learning_rate": 0.0002, "epoch": 0.6451612903225806, "step": 300}, {"loss": 0.8528, "grad_norm": 0.9053162932395935, "learning_rate": 0.0002, "epoch": 0.6666666666666666, "step": 310}, {"loss": 0.8209, "grad_norm": 0.8444252610206604, "learning_rate": 0.0002, "epoch": 0.6881720430107527, "step": 320}, {"loss": 0.8101, "grad_norm": 0.8127437829971313, "learning_rate": 0.0002, "epoch": 0.7096774193548387, "step": 330}, {"loss": 0.8223, "grad_norm": 0.886555016040802, "learning_rate": 0.0002, "epoch": 0.7311827956989247, "step": 340}, {"loss": 0.8368, "grad_norm": 0.8458548784255981, "learning_rate": 0.0002, "epoch": 0.7526881720430108, "step": 350}, {"loss": 0.8295, "grad_norm": 0.8683297634124756, "learning_rate": 0.0002, "epoch": 0.7741935483870968, "step": 360}, {"loss": 0.8232, "grad_norm": 0.8308405876159668, "learning_rate": 0.0002, "epoch": 0.7956989247311828, "step": 370}, {"loss": 0.7752, "grad_norm": 0.8305579423904419, "learning_rate": 0.0002, "epoch": 0.8172043010752689, "step": 380}, {"loss": 0.8267, "grad_norm": 0.8545567393302917, "learning_rate": 0.0002, "epoch": 0.8387096774193549, "step": 390}, {"loss": 0.8212, "grad_norm": 0.8486055731773376, "learning_rate": 0.0002, "epoch": 0.8602150537634409, "step": 400}, {"loss": 0.743, "grad_norm": 0.8126763105392456, "learning_rate": 0.0002, "epoch": 0.8817204301075269, "step": 410}, {"loss": 0.7993, "grad_norm": 0.8494045734405518, "learning_rate": 0.0002, "epoch": 0.9032258064516129, "step": 420}, {"loss": 0.8213, "grad_norm": 0.7639183402061462, "learning_rate": 0.0002, "epoch": 0.9247311827956989, "step": 430}, {"loss": 0.8015, "grad_norm": 0.858101487159729, "learning_rate": 0.0002, "epoch": 0.946236559139785, "step": 440}, {"loss": 0.7629, "grad_norm": 0.8141381740570068, "learning_rate": 0.0002, "epoch": 0.967741935483871, "step": 450}, {"loss": 0.7357, "grad_norm": 0.8072513937950134, "learning_rate": 0.0002, "epoch": 0.989247311827957, "step": 460}, {"eval_loss": 0.7740864157676697, "eval_runtime": 21.383, "eval_samples_per_second": 15.48, "eval_steps_per_second": 1.964, "epoch": 1.0, "step": 465}, {"loss": 0.7701, "grad_norm": 0.8269494771957397, "learning_rate": 0.0002, "epoch": 1.010752688172043, "step": 470}, {"loss": 0.7532, "grad_norm": 0.7814009189605713, "learning_rate": 0.0002, "epoch": 1.032258064516129, "step": 480}, {"loss": 0.7689, "grad_norm": 0.8183923363685608, "learning_rate": 0.0002, "epoch": 1.053763440860215, "step": 490}, {"loss": 0.765, "grad_norm": 0.8146600723266602, "learning_rate": 0.0002, "epoch": 1.075268817204301, "step": 500}, {"loss": 0.7358, "grad_norm": 0.8635126352310181, "learning_rate": 0.0002, "epoch": 1.096774193548387, "step": 510}, {"loss": 0.7302, "grad_norm": 0.8520359396934509, "learning_rate": 0.0002, "epoch": 1.118279569892473, "step": 520}, {"loss": 0.7492, "grad_norm": 0.8026443123817444, "learning_rate": 0.0002, "epoch": 1.139784946236559, "step": 530}, {"loss": 0.7518, "grad_norm": 0.8157258629798889, "learning_rate": 0.0002, "epoch": 1.1612903225806452, "step": 540}, {"loss": 0.7461, "grad_norm": 0.9450796246528625, "learning_rate": 0.0002, "epoch": 1.1827956989247312, "step": 550}, {"loss": 0.7128, "grad_norm": 0.8859835863113403, "learning_rate": 0.0002, "epoch": 1.2043010752688172, "step": 560}, {"loss": 0.7067, "grad_norm": 0.7819921970367432, "learning_rate": 0.0002, "epoch": 1.2258064516129032, "step": 570}, {"loss": 0.7577, "grad_norm": 0.7823445796966553, "learning_rate": 0.0002, "epoch": 1.2473118279569892, "step": 580}, {"loss": 0.7358, "grad_norm": 0.7931883931159973, "learning_rate": 0.0002, "epoch": 1.2688172043010753, "step": 590}, {"loss": 0.723, "grad_norm": 0.7495734095573425, "learning_rate": 0.0002, "epoch": 1.2903225806451613, "step": 600}, {"loss": 0.7386, "grad_norm": 0.9272717237472534, "learning_rate": 0.0002, "epoch": 1.3118279569892473, "step": 610}, {"loss": 0.7498, "grad_norm": 0.7968398332595825, "learning_rate": 0.0002, "epoch": 1.3333333333333333, "step": 620}, {"loss": 0.7635, "grad_norm": 0.7813659310340881, "learning_rate": 0.0002, "epoch": 1.3548387096774195, "step": 630}, {"loss": 0.6665, "grad_norm": 0.730925977230072, "learning_rate": 0.0002, "epoch": 1.3763440860215055, "step": 640}, {"loss": 0.7037, "grad_norm": 0.8011482954025269, "learning_rate": 0.0002, "epoch": 1.3978494623655915, "step": 650}, {"loss": 0.6931, "grad_norm": 0.7770085334777832, "learning_rate": 0.0002, "epoch": 1.4193548387096775, "step": 660}, {"loss": 0.6949, "grad_norm": 0.7432682514190674, "learning_rate": 0.0002, "epoch": 1.4408602150537635, "step": 670}, {"loss": 0.7444, "grad_norm": 0.8820092678070068, "learning_rate": 0.0002, "epoch": 1.4623655913978495, "step": 680}, {"loss": 0.6758, "grad_norm": 0.7786208987236023, "learning_rate": 0.0002, "epoch": 1.4838709677419355, "step": 690}, {"loss": 0.6702, "grad_norm": 0.7467480301856995, "learning_rate": 0.0002, "epoch": 1.5053763440860215, "step": 700}, {"loss": 0.7107, "grad_norm": 0.8147122263908386, "learning_rate": 0.0002, "epoch": 1.5268817204301075, "step": 710}, {"loss": 0.7144, "grad_norm": 0.796030580997467, "learning_rate": 0.0002, "epoch": 1.5483870967741935, "step": 720}, {"loss": 0.6936, "grad_norm": 0.8776171207427979, "learning_rate": 0.0002, "epoch": 1.5698924731182795, "step": 730}, {"loss": 0.7101, "grad_norm": 0.8056126236915588, "learning_rate": 0.0002, "epoch": 1.5913978494623655, "step": 740}, {"loss": 0.7162, "grad_norm": 0.8141863346099854, "learning_rate": 0.0002, "epoch": 1.6129032258064515, "step": 750}, {"loss": 0.7088, "grad_norm": 0.8100557327270508, "learning_rate": 0.0002, "epoch": 1.6344086021505375, "step": 760}, {"loss": 0.7212, "grad_norm": 0.8283200860023499, "learning_rate": 0.0002, "epoch": 1.6559139784946235, "step": 770}, {"loss": 0.694, "grad_norm": 0.800865113735199, "learning_rate": 0.0002, "epoch": 1.6774193548387095, "step": 780}, {"loss": 0.7076, "grad_norm": 0.8052287697792053, "learning_rate": 0.0002, "epoch": 1.6989247311827957, "step": 790}, {"loss": 0.7257, "grad_norm": 0.8619674444198608, "learning_rate": 0.0002, "epoch": 1.7204301075268817, "step": 800}, {"loss": 0.7141, "grad_norm": 0.8907215595245361, "learning_rate": 0.0002, "epoch": 1.7419354838709677, "step": 810}, {"loss": 0.7035, "grad_norm": 0.6976316571235657, "learning_rate": 0.0002, "epoch": 1.7634408602150538, "step": 820}, {"loss": 0.6916, "grad_norm": 0.7533746957778931, "learning_rate": 0.0002, "epoch": 1.7849462365591398, "step": 830}, {"loss": 0.7094, "grad_norm": 0.7326804399490356, "learning_rate": 0.0002, "epoch": 1.8064516129032258, "step": 840}, {"loss": 0.6891, "grad_norm": 0.7782683372497559, "learning_rate": 0.0002, "epoch": 1.827956989247312, "step": 850}, {"loss": 0.6931, "grad_norm": 0.7424806356430054, "learning_rate": 0.0002, "epoch": 1.849462365591398, "step": 860}, {"loss": 0.7354, "grad_norm": 1.172325611114502, "learning_rate": 0.0002, "epoch": 1.870967741935484, "step": 870}, {"loss": 0.6866, "grad_norm": 0.771058201789856, "learning_rate": 0.0002, "epoch": 1.89247311827957, "step": 880}, {"loss": 0.7296, "grad_norm": 0.8624904155731201, "learning_rate": 0.0002, "epoch": 1.913978494623656, "step": 890}, {"loss": 0.7233, "grad_norm": 0.7062820792198181, "learning_rate": 0.0002, "epoch": 1.935483870967742, "step": 900}, {"loss": 0.6966, "grad_norm": 0.7560103535652161, "learning_rate": 0.0002, "epoch": 1.956989247311828, "step": 910}, {"loss": 0.69, "grad_norm": 0.788899838924408, "learning_rate": 0.0002, "epoch": 1.978494623655914, "step": 920}, {"loss": 0.6505, "grad_norm": 0.6562113761901855, "learning_rate": 0.0002, "epoch": 2.0, "step": 930}, {"eval_loss": 0.6885261535644531, "eval_runtime": 21.4291, "eval_samples_per_second": 15.446, "eval_steps_per_second": 1.96, "epoch": 2.0, "step": 930}, {"loss": 0.6625, "grad_norm": 0.8216531872749329, "learning_rate": 0.0002, "epoch": 2.021505376344086, "step": 940}, {"loss": 0.6398, "grad_norm": 0.8317142724990845, "learning_rate": 0.0002, "epoch": 2.043010752688172, "step": 950}, {"loss": 0.649, "grad_norm": 0.8446708917617798, "learning_rate": 0.0002, "epoch": 2.064516129032258, "step": 960}, {"loss": 0.657, "grad_norm": 0.735055148601532, "learning_rate": 0.0002, "epoch": 2.086021505376344, "step": 970}, {"loss": 0.649, "grad_norm": 0.7487243413925171, "learning_rate": 0.0002, "epoch": 2.10752688172043, "step": 980}, {"loss": 0.6419, "grad_norm": 0.8573887944221497, "learning_rate": 0.0002, "epoch": 2.129032258064516, "step": 990}, {"loss": 0.6431, "grad_norm": 0.6284521818161011, "learning_rate": 0.0002, "epoch": 2.150537634408602, "step": 1000}, {"loss": 0.6128, "grad_norm": 0.754183292388916, "learning_rate": 0.0002, "epoch": 2.172043010752688, "step": 1010}, {"loss": 0.6253, "grad_norm": 0.9445359110832214, "learning_rate": 0.0002, "epoch": 2.193548387096774, "step": 1020}, {"loss": 0.605, "grad_norm": 0.808508038520813, "learning_rate": 0.0002, "epoch": 2.21505376344086, "step": 1030}, {"loss": 0.6786, "grad_norm": 0.9394679665565491, "learning_rate": 0.0002, "epoch": 2.236559139784946, "step": 1040}, {"loss": 0.6176, "grad_norm": 0.8151357769966125, "learning_rate": 0.0002, "epoch": 2.258064516129032, "step": 1050}, {"loss": 0.66, "grad_norm": 0.7909848093986511, "learning_rate": 0.0002, "epoch": 2.279569892473118, "step": 1060}, {"loss": 0.6254, "grad_norm": 0.7506507039070129, "learning_rate": 0.0002, "epoch": 2.3010752688172045, "step": 1070}, {"loss": 0.6608, "grad_norm": 0.8240520358085632, "learning_rate": 0.0002, "epoch": 2.3225806451612905, "step": 1080}, {"loss": 0.6207, "grad_norm": 0.9342400431632996, "learning_rate": 0.0002, "epoch": 2.3440860215053765, "step": 1090}, {"loss": 0.6029, "grad_norm": 1.0598735809326172, "learning_rate": 0.0002, "epoch": 2.3655913978494625, "step": 1100}, {"loss": 0.6035, "grad_norm": 0.7907650470733643, "learning_rate": 0.0002, "epoch": 2.3870967741935485, "step": 1110}, {"loss": 0.6237, "grad_norm": 0.9388798475265503, "learning_rate": 0.0002, "epoch": 2.4086021505376345, "step": 1120}, {"loss": 0.6207, "grad_norm": 0.8985419869422913, "learning_rate": 0.0002, "epoch": 2.4301075268817205, "step": 1130}, {"loss": 0.5902, "grad_norm": 0.7471932768821716, "learning_rate": 0.0002, "epoch": 2.4516129032258065, "step": 1140}, {"loss": 0.6446, "grad_norm": 0.761131763458252, "learning_rate": 0.0002, "epoch": 2.4731182795698925, "step": 1150}, {"loss": 0.6088, "grad_norm": 0.7901819348335266, "learning_rate": 0.0002, "epoch": 2.4946236559139785, "step": 1160}, {"loss": 0.6142, "grad_norm": 0.9932922720909119, "learning_rate": 0.0002, "epoch": 2.5161290322580645, "step": 1170}, {"loss": 0.6407, "grad_norm": 0.7414287328720093, "learning_rate": 0.0002, "epoch": 2.5376344086021505, "step": 1180}, {"loss": 0.6161, "grad_norm": 0.8111771941184998, "learning_rate": 0.0002, "epoch": 2.5591397849462365, "step": 1190}, {"loss": 0.6006, "grad_norm": 0.7520156502723694, "learning_rate": 0.0002, "epoch": 2.5806451612903225, "step": 1200}, {"loss": 0.615, "grad_norm": 0.9022907018661499, "learning_rate": 0.0002, "epoch": 2.6021505376344085, "step": 1210}, {"loss": 0.6211, "grad_norm": 0.7746260166168213, "learning_rate": 0.0002, "epoch": 2.6236559139784945, "step": 1220}, {"loss": 0.616, "grad_norm": 0.8482862114906311, "learning_rate": 0.0002, "epoch": 2.6451612903225805, "step": 1230}, {"loss": 0.6417, "grad_norm": 0.7925458550453186, "learning_rate": 0.0002, "epoch": 2.6666666666666665, "step": 1240}, {"loss": 0.6187, "grad_norm": 0.8369929194450378, "learning_rate": 0.0002, "epoch": 2.688172043010753, "step": 1250}, {"loss": 0.6138, "grad_norm": 0.8311542868614197, "learning_rate": 0.0002, "epoch": 2.709677419354839, "step": 1260}, {"loss": 0.5894, "grad_norm": 0.7204853296279907, "learning_rate": 0.0002, "epoch": 2.731182795698925, "step": 1270}, {"loss": 0.6325, "grad_norm": 0.8447284698486328, "learning_rate": 0.0002, "epoch": 2.752688172043011, "step": 1280}, {"loss": 0.5946, "grad_norm": 0.7738404273986816, "learning_rate": 0.0002, "epoch": 2.774193548387097, "step": 1290}, {"loss": 0.5678, "grad_norm": 0.8393287062644958, "learning_rate": 0.0002, "epoch": 2.795698924731183, "step": 1300}, {"loss": 0.6092, "grad_norm": 0.79121994972229, "learning_rate": 0.0002, "epoch": 2.817204301075269, "step": 1310}, {"loss": 0.5889, "grad_norm": 0.7331557869911194, "learning_rate": 0.0002, "epoch": 2.838709677419355, "step": 1320}, {"loss": 0.6048, "grad_norm": 0.9593998193740845, "learning_rate": 0.0002, "epoch": 2.860215053763441, "step": 1330}, {"loss": 0.6108, "grad_norm": 0.7215158343315125, "learning_rate": 0.0002, "epoch": 2.881720430107527, "step": 1340}, {"loss": 0.5897, "grad_norm": 0.840404212474823, "learning_rate": 0.0002, "epoch": 2.903225806451613, "step": 1350}, {"loss": 0.6056, "grad_norm": 0.870659351348877, "learning_rate": 0.0002, "epoch": 2.924731182795699, "step": 1360}, {"loss": 0.6205, "grad_norm": 0.8744975328445435, "learning_rate": 0.0002, "epoch": 2.946236559139785, "step": 1370}, {"loss": 0.5966, "grad_norm": 0.8030612468719482, "learning_rate": 0.0002, "epoch": 2.967741935483871, "step": 1380}, {"loss": 0.6004, "grad_norm": 0.825814962387085, "learning_rate": 0.0002, "epoch": 2.989247311827957, "step": 1390}, {"eval_loss": 0.6257933378219604, "eval_runtime": 21.3692, "eval_samples_per_second": 15.49, "eval_steps_per_second": 1.965, "epoch": 3.0, "step": 1395}, {"loss": 0.5696, "grad_norm": 0.8650677800178528, "learning_rate": 0.0002, "epoch": 3.010752688172043, "step": 1400}, {"loss": 0.5483, "grad_norm": 0.8364197015762329, "learning_rate": 0.0002, "epoch": 3.032258064516129, "step": 1410}, {"loss": 0.5606, "grad_norm": 0.8278448581695557, "learning_rate": 0.0002, "epoch": 3.053763440860215, "step": 1420}, {"loss": 0.5572, "grad_norm": 0.8806642889976501, "learning_rate": 0.0002, "epoch": 3.075268817204301, "step": 1430}, {"loss": 0.585, "grad_norm": 0.8180029988288879, "learning_rate": 0.0002, "epoch": 3.096774193548387, "step": 1440}, {"loss": 0.5667, "grad_norm": 0.8561782836914062, "learning_rate": 0.0002, "epoch": 3.118279569892473, "step": 1450}, {"loss": 0.5246, "grad_norm": 0.8377029299736023, "learning_rate": 0.0002, "epoch": 3.139784946236559, "step": 1460}, {"loss": 0.5464, "grad_norm": 0.885779082775116, "learning_rate": 0.0002, "epoch": 3.161290322580645, "step": 1470}, {"loss": 0.541, "grad_norm": 0.9388518333435059, "learning_rate": 0.0002, "epoch": 3.182795698924731, "step": 1480}, {"loss": 0.5447, "grad_norm": 0.8816235661506653, "learning_rate": 0.0002, "epoch": 3.204301075268817, "step": 1490}, {"loss": 0.5466, "grad_norm": 0.9885783791542053, "learning_rate": 0.0002, "epoch": 3.225806451612903, "step": 1500}, {"loss": 0.5455, "grad_norm": 0.8635850548744202, "learning_rate": 0.0002, "epoch": 3.247311827956989, "step": 1510}, {"loss": 0.5419, "grad_norm": 0.829853355884552, "learning_rate": 0.0002, "epoch": 3.268817204301075, "step": 1520}, {"loss": 0.54, "grad_norm": 0.9037486910820007, "learning_rate": 0.0002, "epoch": 3.2903225806451615, "step": 1530}, {"loss": 0.5375, "grad_norm": 0.8173713684082031, "learning_rate": 0.0002, "epoch": 3.3118279569892475, "step": 1540}, {"loss": 0.5405, "grad_norm": 0.796953022480011, "learning_rate": 0.0002, "epoch": 3.3333333333333335, "step": 1550}, {"loss": 0.5505, "grad_norm": 0.7894400358200073, "learning_rate": 0.0002, "epoch": 3.3548387096774195, "step": 1560}, {"loss": 0.5395, "grad_norm": 0.9434949159622192, "learning_rate": 0.0002, "epoch": 3.3763440860215055, "step": 1570}, {"loss": 0.5271, "grad_norm": 0.8666760325431824, "learning_rate": 0.0002, "epoch": 3.3978494623655915, "step": 1580}, {"loss": 0.5439, "grad_norm": 0.7782467007637024, "learning_rate": 0.0002, "epoch": 3.4193548387096775, "step": 1590}, {"loss": 0.5161, "grad_norm": 0.8849126696586609, "learning_rate": 0.0002, "epoch": 3.4408602150537635, "step": 1600}, {"loss": 0.5353, "grad_norm": 0.7863831520080566, "learning_rate": 0.0002, "epoch": 3.4623655913978495, "step": 1610}, {"loss": 0.5308, "grad_norm": 1.0403116941452026, "learning_rate": 0.0002, "epoch": 3.4838709677419355, "step": 1620}, {"loss": 0.5339, "grad_norm": 0.8307499289512634, "learning_rate": 0.0002, "epoch": 3.5053763440860215, "step": 1630}, {"loss": 0.5361, "grad_norm": 0.9132118821144104, "learning_rate": 0.0002, "epoch": 3.5268817204301075, "step": 1640}, {"loss": 0.5828, "grad_norm": 0.9322578310966492, "learning_rate": 0.0002, "epoch": 3.5483870967741935, "step": 1650}, {"loss": 0.546, "grad_norm": 0.9782460331916809, "learning_rate": 0.0002, "epoch": 3.5698924731182795, "step": 1660}, {"loss": 0.5424, "grad_norm": 0.7189919352531433, "learning_rate": 0.0002, "epoch": 3.5913978494623655, "step": 1670}, {"loss": 0.5514, "grad_norm": 0.9689221382141113, "learning_rate": 0.0002, "epoch": 3.6129032258064515, "step": 1680}, {"loss": 0.5379, "grad_norm": 0.9684675335884094, "learning_rate": 0.0002, "epoch": 3.6344086021505375, "step": 1690}, {"loss": 0.5748, "grad_norm": 0.8851472735404968, "learning_rate": 0.0002, "epoch": 3.6559139784946235, "step": 1700}, {"loss": 0.5412, "grad_norm": 0.7709833383560181, "learning_rate": 0.0002, "epoch": 3.6774193548387095, "step": 1710}, {"loss": 0.521, "grad_norm": 0.818236231803894, "learning_rate": 0.0002, "epoch": 3.698924731182796, "step": 1720}, {"loss": 0.5445, "grad_norm": 0.870642364025116, "learning_rate": 0.0002, "epoch": 3.720430107526882, "step": 1730}, {"loss": 0.5307, "grad_norm": 1.0245511531829834, "learning_rate": 0.0002, "epoch": 3.741935483870968, "step": 1740}, {"loss": 0.5593, "grad_norm": 0.8607558608055115, "learning_rate": 0.0002, "epoch": 3.763440860215054, "step": 1750}, {"loss": 0.536, "grad_norm": 0.8511829972267151, "learning_rate": 0.0002, "epoch": 3.78494623655914, "step": 1760}, {"loss": 0.5193, "grad_norm": 0.7969087362289429, "learning_rate": 0.0002, "epoch": 3.806451612903226, "step": 1770}, {"loss": 0.5578, "grad_norm": 0.8457245826721191, "learning_rate": 0.0002, "epoch": 3.827956989247312, "step": 1780}, {"loss": 0.5337, "grad_norm": 0.8893467783927917, "learning_rate": 0.0002, "epoch": 3.849462365591398, "step": 1790}, {"loss": 0.5024, "grad_norm": 0.8593819737434387, "learning_rate": 0.0002, "epoch": 3.870967741935484, "step": 1800}, {"loss": 0.5134, "grad_norm": 0.7574560642242432, "learning_rate": 0.0002, "epoch": 3.89247311827957, "step": 1810}, {"loss": 0.5263, "grad_norm": 0.8681567311286926, "learning_rate": 0.0002, "epoch": 3.913978494623656, "step": 1820}, {"loss": 0.532, "grad_norm": 0.9068132042884827, "learning_rate": 0.0002, "epoch": 3.935483870967742, "step": 1830}, {"loss": 0.5427, "grad_norm": 0.8668948411941528, "learning_rate": 0.0002, "epoch": 3.956989247311828, "step": 1840}, {"loss": 0.5349, "grad_norm": 1.046032428741455, "learning_rate": 0.0002, "epoch": 3.978494623655914, "step": 1850}, {"loss": 0.5087, "grad_norm": 0.904780387878418, "learning_rate": 0.0002, "epoch": 4.0, "step": 1860}, {"eval_loss": 0.5737715363502502, "eval_runtime": 21.4915, "eval_samples_per_second": 15.401, "eval_steps_per_second": 1.954, "epoch": 4.0, "step": 1860}, {"loss": 0.4843, "grad_norm": 0.8611752986907959, "learning_rate": 0.0002, "epoch": 4.021505376344086, "step": 1870}, {"loss": 0.4814, "grad_norm": 0.838782548904419, "learning_rate": 0.0002, "epoch": 4.043010752688172, "step": 1880}, {"loss": 0.474, "grad_norm": 0.9119709134101868, "learning_rate": 0.0002, "epoch": 4.064516129032258, "step": 1890}, {"loss": 0.4951, "grad_norm": 0.8026251196861267, "learning_rate": 0.0002, "epoch": 4.086021505376344, "step": 1900}, {"loss": 0.491, "grad_norm": 0.8773705363273621, "learning_rate": 0.0002, "epoch": 4.10752688172043, "step": 1910}, {"loss": 0.474, "grad_norm": 0.8762255907058716, "learning_rate": 0.0002, "epoch": 4.129032258064516, "step": 1920}, {"loss": 0.4816, "grad_norm": 0.8371861577033997, "learning_rate": 0.0002, "epoch": 4.150537634408602, "step": 1930}, {"loss": 0.472, "grad_norm": 0.9703728556632996, "learning_rate": 0.0002, "epoch": 4.172043010752688, "step": 1940}, {"loss": 0.4772, "grad_norm": 0.8802874684333801, "learning_rate": 0.0002, "epoch": 4.193548387096774, "step": 1950}, {"loss": 0.5032, "grad_norm": 1.0103057622909546, "learning_rate": 0.0002, "epoch": 4.21505376344086, "step": 1960}, {"loss": 0.4945, "grad_norm": 0.9212995171546936, "learning_rate": 0.0002, "epoch": 4.236559139784946, "step": 1970}, {"loss": 0.4753, "grad_norm": 1.009544849395752, "learning_rate": 0.0002, "epoch": 4.258064516129032, "step": 1980}, {"loss": 0.4789, "grad_norm": 0.8535077571868896, "learning_rate": 0.0002, "epoch": 4.279569892473118, "step": 1990}, {"loss": 0.4782, "grad_norm": 0.8363022804260254, "learning_rate": 0.0002, "epoch": 4.301075268817204, "step": 2000}, {"loss": 0.4875, "grad_norm": 0.9041762948036194, "learning_rate": 0.0002, "epoch": 4.32258064516129, "step": 2010}, {"loss": 0.4779, "grad_norm": 0.960790753364563, "learning_rate": 0.0002, "epoch": 4.344086021505376, "step": 2020}, {"loss": 0.4626, "grad_norm": 0.8823095560073853, "learning_rate": 0.0002, "epoch": 4.365591397849462, "step": 2030}, {"loss": 0.4883, "grad_norm": 0.952100396156311, "learning_rate": 0.0002, "epoch": 4.387096774193548, "step": 2040}, {"loss": 0.4789, "grad_norm": 1.0793498754501343, "learning_rate": 0.0002, "epoch": 4.408602150537634, "step": 2050}, {"loss": 0.4827, "grad_norm": 0.8987208008766174, "learning_rate": 0.0002, "epoch": 4.43010752688172, "step": 2060}, {"loss": 0.4594, "grad_norm": 0.8539772033691406, "learning_rate": 0.0002, "epoch": 4.451612903225806, "step": 2070}, {"loss": 0.4752, "grad_norm": 0.9160863757133484, "learning_rate": 0.0002, "epoch": 4.473118279569892, "step": 2080}, {"loss": 0.5033, "grad_norm": 0.9946850538253784, "learning_rate": 0.0002, "epoch": 4.494623655913978, "step": 2090}, {"loss": 0.4842, "grad_norm": 0.908039391040802, "learning_rate": 0.0002, "epoch": 4.516129032258064, "step": 2100}, {"loss": 0.4861, "grad_norm": 1.1462254524230957, "learning_rate": 0.0002, "epoch": 4.53763440860215, "step": 2110}, {"loss": 0.4892, "grad_norm": 0.8392056226730347, "learning_rate": 0.0002, "epoch": 4.559139784946236, "step": 2120}, {"loss": 0.4824, "grad_norm": 0.9673896431922913, "learning_rate": 0.0002, "epoch": 4.580645161290323, "step": 2130}, {"loss": 0.4665, "grad_norm": 0.9047091603279114, "learning_rate": 0.0002, "epoch": 4.602150537634409, "step": 2140}, {"loss": 0.4714, "grad_norm": 0.9013425707817078, "learning_rate": 0.0002, "epoch": 4.623655913978495, "step": 2150}, {"loss": 0.472, "grad_norm": 0.8899165391921997, "learning_rate": 0.0002, "epoch": 4.645161290322581, "step": 2160}, {"loss": 0.4635, "grad_norm": 0.748602569103241, "learning_rate": 0.0002, "epoch": 4.666666666666667, "step": 2170}, {"loss": 0.4695, "grad_norm": 0.8694155216217041, "learning_rate": 0.0002, "epoch": 4.688172043010753, "step": 2180}, {"loss": 0.4929, "grad_norm": 0.9134316444396973, "learning_rate": 0.0002, "epoch": 4.709677419354839, "step": 2190}, {"loss": 0.4855, "grad_norm": 0.8504763245582581, "learning_rate": 0.0002, "epoch": 4.731182795698925, "step": 2200}, {"loss": 0.4517, "grad_norm": 1.0321544408798218, "learning_rate": 0.0002, "epoch": 4.752688172043011, "step": 2210}, {"loss": 0.4796, "grad_norm": 0.9368237257003784, "learning_rate": 0.0002, "epoch": 4.774193548387097, "step": 2220}, {"loss": 0.4837, "grad_norm": 0.9319947361946106, "learning_rate": 0.0002, "epoch": 4.795698924731183, "step": 2230}, {"loss": 0.4696, "grad_norm": 0.904333770275116, "learning_rate": 0.0002, "epoch": 4.817204301075269, "step": 2240}, {"loss": 0.4746, "grad_norm": 0.8097078204154968, "learning_rate": 0.0002, "epoch": 4.838709677419355, "step": 2250}, {"loss": 0.4438, "grad_norm": 0.9128859043121338, "learning_rate": 0.0002, "epoch": 4.860215053763441, "step": 2260}, {"loss": 0.4693, "grad_norm": 0.883129894733429, "learning_rate": 0.0002, "epoch": 4.881720430107527, "step": 2270}, {"loss": 0.4494, "grad_norm": 0.85712730884552, "learning_rate": 0.0002, "epoch": 4.903225806451613, "step": 2280}, {"loss": 0.4593, "grad_norm": 1.2101863622665405, "learning_rate": 0.0002, "epoch": 4.924731182795699, "step": 2290}, {"loss": 0.4779, "grad_norm": 0.917966902256012, "learning_rate": 0.0002, "epoch": 4.946236559139785, "step": 2300}, {"loss": 0.4666, "grad_norm": 0.7740724086761475, "learning_rate": 0.0002, "epoch": 4.967741935483871, "step": 2310}, {"loss": 0.4629, "grad_norm": 1.0199906826019287, "learning_rate": 0.0002, "epoch": 4.989247311827957, "step": 2320}]} +{"epoch": 6.0, "step": 2790, "epoch_duration": 449.90328192710876, "total_accumulated_duration": 2703.175436735153, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.17431640625}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2325", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.4172, "grad_norm": 0.9075053930282593, "learning_rate": 0.0002, "epoch": 0.021505376344086023, "step": 10}, {"loss": 2.5888, "grad_norm": 1.4321208000183105, "learning_rate": 0.0002, "epoch": 0.043010752688172046, "step": 20}, {"loss": 2.1195, "grad_norm": 1.7500602006912231, "learning_rate": 0.0002, "epoch": 0.06451612903225806, "step": 30}, {"loss": 1.9303, "grad_norm": 0.7606641054153442, "learning_rate": 0.0002, "epoch": 0.08602150537634409, "step": 40}, {"loss": 1.6112, "grad_norm": 1.2754929065704346, "learning_rate": 0.0002, "epoch": 0.10752688172043011, "step": 50}, {"loss": 1.4319, "grad_norm": 1.0936230421066284, "learning_rate": 0.0002, "epoch": 0.12903225806451613, "step": 60}, {"loss": 1.3568, "grad_norm": 1.144593596458435, "learning_rate": 0.0002, "epoch": 0.15053763440860216, "step": 70}, {"loss": 1.2028, "grad_norm": 1.2181956768035889, "learning_rate": 0.0002, "epoch": 0.17204301075268819, "step": 80}, {"loss": 1.1534, "grad_norm": 1.1260095834732056, "learning_rate": 0.0002, "epoch": 0.1935483870967742, "step": 90}, {"loss": 1.1089, "grad_norm": 1.1155284643173218, "learning_rate": 0.0002, "epoch": 0.21505376344086022, "step": 100}, {"loss": 1.0883, "grad_norm": 1.089565396308899, "learning_rate": 0.0002, "epoch": 0.23655913978494625, "step": 110}, {"loss": 1.0814, "grad_norm": 0.9833471775054932, "learning_rate": 0.0002, "epoch": 0.25806451612903225, "step": 120}, {"loss": 1.0239, "grad_norm": 1.0265629291534424, "learning_rate": 0.0002, "epoch": 0.27956989247311825, "step": 130}, {"loss": 0.9888, "grad_norm": 0.9344286322593689, "learning_rate": 0.0002, "epoch": 0.3010752688172043, "step": 140}, {"loss": 1.0043, "grad_norm": 0.9883386492729187, "learning_rate": 0.0002, "epoch": 0.3225806451612903, "step": 150}, {"loss": 0.9338, "grad_norm": 0.9299277067184448, "learning_rate": 0.0002, "epoch": 0.34408602150537637, "step": 160}, {"loss": 0.9432, "grad_norm": 1.390045404434204, "learning_rate": 0.0002, "epoch": 0.3655913978494624, "step": 170}, {"loss": 0.9008, "grad_norm": 1.0313078165054321, "learning_rate": 0.0002, "epoch": 0.3870967741935484, "step": 180}, {"loss": 0.9434, "grad_norm": 1.1792205572128296, "learning_rate": 0.0002, "epoch": 0.40860215053763443, "step": 190}, {"loss": 0.8761, "grad_norm": 1.049809217453003, "learning_rate": 0.0002, "epoch": 0.43010752688172044, "step": 200}, {"loss": 0.8709, "grad_norm": 0.990111768245697, "learning_rate": 0.0002, "epoch": 0.45161290322580644, "step": 210}, {"loss": 0.905, "grad_norm": 0.9870412349700928, "learning_rate": 0.0002, "epoch": 0.4731182795698925, "step": 220}, {"loss": 0.9129, "grad_norm": 0.8557345867156982, "learning_rate": 0.0002, "epoch": 0.4946236559139785, "step": 230}, {"loss": 0.8836, "grad_norm": 0.9746861457824707, "learning_rate": 0.0002, "epoch": 0.5161290322580645, "step": 240}, {"loss": 0.873, "grad_norm": 0.9010438323020935, "learning_rate": 0.0002, "epoch": 0.5376344086021505, "step": 250}, {"loss": 0.8241, "grad_norm": 0.9061082005500793, "learning_rate": 0.0002, "epoch": 0.5591397849462365, "step": 260}, {"loss": 0.8652, "grad_norm": 0.9311846494674683, "learning_rate": 0.0002, "epoch": 0.5806451612903226, "step": 270}, {"loss": 0.8256, "grad_norm": 0.9140254855155945, "learning_rate": 0.0002, "epoch": 0.6021505376344086, "step": 280}, {"loss": 0.8441, "grad_norm": 0.9722253084182739, "learning_rate": 0.0002, "epoch": 0.6236559139784946, "step": 290}, {"loss": 0.8314, "grad_norm": 0.8539168238639832, "learning_rate": 0.0002, "epoch": 0.6451612903225806, "step": 300}, {"loss": 0.8528, "grad_norm": 0.9053162932395935, "learning_rate": 0.0002, "epoch": 0.6666666666666666, "step": 310}, {"loss": 0.8209, "grad_norm": 0.8444252610206604, "learning_rate": 0.0002, "epoch": 0.6881720430107527, "step": 320}, {"loss": 0.8101, "grad_norm": 0.8127437829971313, "learning_rate": 0.0002, "epoch": 0.7096774193548387, "step": 330}, {"loss": 0.8223, "grad_norm": 0.886555016040802, "learning_rate": 0.0002, "epoch": 0.7311827956989247, "step": 340}, {"loss": 0.8368, "grad_norm": 0.8458548784255981, "learning_rate": 0.0002, "epoch": 0.7526881720430108, "step": 350}, {"loss": 0.8295, "grad_norm": 0.8683297634124756, "learning_rate": 0.0002, "epoch": 0.7741935483870968, "step": 360}, {"loss": 0.8232, "grad_norm": 0.8308405876159668, "learning_rate": 0.0002, "epoch": 0.7956989247311828, "step": 370}, {"loss": 0.7752, "grad_norm": 0.8305579423904419, "learning_rate": 0.0002, "epoch": 0.8172043010752689, "step": 380}, {"loss": 0.8267, "grad_norm": 0.8545567393302917, "learning_rate": 0.0002, "epoch": 0.8387096774193549, "step": 390}, {"loss": 0.8212, "grad_norm": 0.8486055731773376, "learning_rate": 0.0002, "epoch": 0.8602150537634409, "step": 400}, {"loss": 0.743, "grad_norm": 0.8126763105392456, "learning_rate": 0.0002, "epoch": 0.8817204301075269, "step": 410}, {"loss": 0.7993, "grad_norm": 0.8494045734405518, "learning_rate": 0.0002, "epoch": 0.9032258064516129, "step": 420}, {"loss": 0.8213, "grad_norm": 0.7639183402061462, "learning_rate": 0.0002, "epoch": 0.9247311827956989, "step": 430}, {"loss": 0.8015, "grad_norm": 0.858101487159729, "learning_rate": 0.0002, "epoch": 0.946236559139785, "step": 440}, {"loss": 0.7629, "grad_norm": 0.8141381740570068, "learning_rate": 0.0002, "epoch": 0.967741935483871, "step": 450}, {"loss": 0.7357, "grad_norm": 0.8072513937950134, "learning_rate": 0.0002, "epoch": 0.989247311827957, "step": 460}, {"eval_loss": 0.7740864157676697, "eval_runtime": 21.383, "eval_samples_per_second": 15.48, "eval_steps_per_second": 1.964, "epoch": 1.0, "step": 465}, {"loss": 0.7701, "grad_norm": 0.8269494771957397, "learning_rate": 0.0002, "epoch": 1.010752688172043, "step": 470}, {"loss": 0.7532, "grad_norm": 0.7814009189605713, "learning_rate": 0.0002, "epoch": 1.032258064516129, "step": 480}, {"loss": 0.7689, "grad_norm": 0.8183923363685608, "learning_rate": 0.0002, "epoch": 1.053763440860215, "step": 490}, {"loss": 0.765, "grad_norm": 0.8146600723266602, "learning_rate": 0.0002, "epoch": 1.075268817204301, "step": 500}, {"loss": 0.7358, "grad_norm": 0.8635126352310181, "learning_rate": 0.0002, "epoch": 1.096774193548387, "step": 510}, {"loss": 0.7302, "grad_norm": 0.8520359396934509, "learning_rate": 0.0002, "epoch": 1.118279569892473, "step": 520}, {"loss": 0.7492, "grad_norm": 0.8026443123817444, "learning_rate": 0.0002, "epoch": 1.139784946236559, "step": 530}, {"loss": 0.7518, "grad_norm": 0.8157258629798889, "learning_rate": 0.0002, "epoch": 1.1612903225806452, "step": 540}, {"loss": 0.7461, "grad_norm": 0.9450796246528625, "learning_rate": 0.0002, "epoch": 1.1827956989247312, "step": 550}, {"loss": 0.7128, "grad_norm": 0.8859835863113403, "learning_rate": 0.0002, "epoch": 1.2043010752688172, "step": 560}, {"loss": 0.7067, "grad_norm": 0.7819921970367432, "learning_rate": 0.0002, "epoch": 1.2258064516129032, "step": 570}, {"loss": 0.7577, "grad_norm": 0.7823445796966553, "learning_rate": 0.0002, "epoch": 1.2473118279569892, "step": 580}, {"loss": 0.7358, "grad_norm": 0.7931883931159973, "learning_rate": 0.0002, "epoch": 1.2688172043010753, "step": 590}, {"loss": 0.723, "grad_norm": 0.7495734095573425, "learning_rate": 0.0002, "epoch": 1.2903225806451613, "step": 600}, {"loss": 0.7386, "grad_norm": 0.9272717237472534, "learning_rate": 0.0002, "epoch": 1.3118279569892473, "step": 610}, {"loss": 0.7498, "grad_norm": 0.7968398332595825, "learning_rate": 0.0002, "epoch": 1.3333333333333333, "step": 620}, {"loss": 0.7635, "grad_norm": 0.7813659310340881, "learning_rate": 0.0002, "epoch": 1.3548387096774195, "step": 630}, {"loss": 0.6665, "grad_norm": 0.730925977230072, "learning_rate": 0.0002, "epoch": 1.3763440860215055, "step": 640}, {"loss": 0.7037, "grad_norm": 0.8011482954025269, "learning_rate": 0.0002, "epoch": 1.3978494623655915, "step": 650}, {"loss": 0.6931, "grad_norm": 0.7770085334777832, "learning_rate": 0.0002, "epoch": 1.4193548387096775, "step": 660}, {"loss": 0.6949, "grad_norm": 0.7432682514190674, "learning_rate": 0.0002, "epoch": 1.4408602150537635, "step": 670}, {"loss": 0.7444, "grad_norm": 0.8820092678070068, "learning_rate": 0.0002, "epoch": 1.4623655913978495, "step": 680}, {"loss": 0.6758, "grad_norm": 0.7786208987236023, "learning_rate": 0.0002, "epoch": 1.4838709677419355, "step": 690}, {"loss": 0.6702, "grad_norm": 0.7467480301856995, "learning_rate": 0.0002, "epoch": 1.5053763440860215, "step": 700}, {"loss": 0.7107, "grad_norm": 0.8147122263908386, "learning_rate": 0.0002, "epoch": 1.5268817204301075, "step": 710}, {"loss": 0.7144, "grad_norm": 0.796030580997467, "learning_rate": 0.0002, "epoch": 1.5483870967741935, "step": 720}, {"loss": 0.6936, "grad_norm": 0.8776171207427979, "learning_rate": 0.0002, "epoch": 1.5698924731182795, "step": 730}, {"loss": 0.7101, "grad_norm": 0.8056126236915588, "learning_rate": 0.0002, "epoch": 1.5913978494623655, "step": 740}, {"loss": 0.7162, "grad_norm": 0.8141863346099854, "learning_rate": 0.0002, "epoch": 1.6129032258064515, "step": 750}, {"loss": 0.7088, "grad_norm": 0.8100557327270508, "learning_rate": 0.0002, "epoch": 1.6344086021505375, "step": 760}, {"loss": 0.7212, "grad_norm": 0.8283200860023499, "learning_rate": 0.0002, "epoch": 1.6559139784946235, "step": 770}, {"loss": 0.694, "grad_norm": 0.800865113735199, "learning_rate": 0.0002, "epoch": 1.6774193548387095, "step": 780}, {"loss": 0.7076, "grad_norm": 0.8052287697792053, "learning_rate": 0.0002, "epoch": 1.6989247311827957, "step": 790}, {"loss": 0.7257, "grad_norm": 0.8619674444198608, "learning_rate": 0.0002, "epoch": 1.7204301075268817, "step": 800}, {"loss": 0.7141, "grad_norm": 0.8907215595245361, "learning_rate": 0.0002, "epoch": 1.7419354838709677, "step": 810}, {"loss": 0.7035, "grad_norm": 0.6976316571235657, "learning_rate": 0.0002, "epoch": 1.7634408602150538, "step": 820}, {"loss": 0.6916, "grad_norm": 0.7533746957778931, "learning_rate": 0.0002, "epoch": 1.7849462365591398, "step": 830}, {"loss": 0.7094, "grad_norm": 0.7326804399490356, "learning_rate": 0.0002, "epoch": 1.8064516129032258, "step": 840}, {"loss": 0.6891, "grad_norm": 0.7782683372497559, "learning_rate": 0.0002, "epoch": 1.827956989247312, "step": 850}, {"loss": 0.6931, "grad_norm": 0.7424806356430054, "learning_rate": 0.0002, "epoch": 1.849462365591398, "step": 860}, {"loss": 0.7354, "grad_norm": 1.172325611114502, "learning_rate": 0.0002, "epoch": 1.870967741935484, "step": 870}, {"loss": 0.6866, "grad_norm": 0.771058201789856, "learning_rate": 0.0002, "epoch": 1.89247311827957, "step": 880}, {"loss": 0.7296, "grad_norm": 0.8624904155731201, "learning_rate": 0.0002, "epoch": 1.913978494623656, "step": 890}, {"loss": 0.7233, "grad_norm": 0.7062820792198181, "learning_rate": 0.0002, "epoch": 1.935483870967742, "step": 900}, {"loss": 0.6966, "grad_norm": 0.7560103535652161, "learning_rate": 0.0002, "epoch": 1.956989247311828, "step": 910}, {"loss": 0.69, "grad_norm": 0.788899838924408, "learning_rate": 0.0002, "epoch": 1.978494623655914, "step": 920}, {"loss": 0.6505, "grad_norm": 0.6562113761901855, "learning_rate": 0.0002, "epoch": 2.0, "step": 930}, {"eval_loss": 0.6885261535644531, "eval_runtime": 21.4291, "eval_samples_per_second": 15.446, "eval_steps_per_second": 1.96, "epoch": 2.0, "step": 930}, {"loss": 0.6625, "grad_norm": 0.8216531872749329, "learning_rate": 0.0002, "epoch": 2.021505376344086, "step": 940}, {"loss": 0.6398, "grad_norm": 0.8317142724990845, "learning_rate": 0.0002, "epoch": 2.043010752688172, "step": 950}, {"loss": 0.649, "grad_norm": 0.8446708917617798, "learning_rate": 0.0002, "epoch": 2.064516129032258, "step": 960}, {"loss": 0.657, "grad_norm": 0.735055148601532, "learning_rate": 0.0002, "epoch": 2.086021505376344, "step": 970}, {"loss": 0.649, "grad_norm": 0.7487243413925171, "learning_rate": 0.0002, "epoch": 2.10752688172043, "step": 980}, {"loss": 0.6419, "grad_norm": 0.8573887944221497, "learning_rate": 0.0002, "epoch": 2.129032258064516, "step": 990}, {"loss": 0.6431, "grad_norm": 0.6284521818161011, "learning_rate": 0.0002, "epoch": 2.150537634408602, "step": 1000}, {"loss": 0.6128, "grad_norm": 0.754183292388916, "learning_rate": 0.0002, "epoch": 2.172043010752688, "step": 1010}, {"loss": 0.6253, "grad_norm": 0.9445359110832214, "learning_rate": 0.0002, "epoch": 2.193548387096774, "step": 1020}, {"loss": 0.605, "grad_norm": 0.808508038520813, "learning_rate": 0.0002, "epoch": 2.21505376344086, "step": 1030}, {"loss": 0.6786, "grad_norm": 0.9394679665565491, "learning_rate": 0.0002, "epoch": 2.236559139784946, "step": 1040}, {"loss": 0.6176, "grad_norm": 0.8151357769966125, "learning_rate": 0.0002, "epoch": 2.258064516129032, "step": 1050}, {"loss": 0.66, "grad_norm": 0.7909848093986511, "learning_rate": 0.0002, "epoch": 2.279569892473118, "step": 1060}, {"loss": 0.6254, "grad_norm": 0.7506507039070129, "learning_rate": 0.0002, "epoch": 2.3010752688172045, "step": 1070}, {"loss": 0.6608, "grad_norm": 0.8240520358085632, "learning_rate": 0.0002, "epoch": 2.3225806451612905, "step": 1080}, {"loss": 0.6207, "grad_norm": 0.9342400431632996, "learning_rate": 0.0002, "epoch": 2.3440860215053765, "step": 1090}, {"loss": 0.6029, "grad_norm": 1.0598735809326172, "learning_rate": 0.0002, "epoch": 2.3655913978494625, "step": 1100}, {"loss": 0.6035, "grad_norm": 0.7907650470733643, "learning_rate": 0.0002, "epoch": 2.3870967741935485, "step": 1110}, {"loss": 0.6237, "grad_norm": 0.9388798475265503, "learning_rate": 0.0002, "epoch": 2.4086021505376345, "step": 1120}, {"loss": 0.6207, "grad_norm": 0.8985419869422913, "learning_rate": 0.0002, "epoch": 2.4301075268817205, "step": 1130}, {"loss": 0.5902, "grad_norm": 0.7471932768821716, "learning_rate": 0.0002, "epoch": 2.4516129032258065, "step": 1140}, {"loss": 0.6446, "grad_norm": 0.761131763458252, "learning_rate": 0.0002, "epoch": 2.4731182795698925, "step": 1150}, {"loss": 0.6088, "grad_norm": 0.7901819348335266, "learning_rate": 0.0002, "epoch": 2.4946236559139785, "step": 1160}, {"loss": 0.6142, "grad_norm": 0.9932922720909119, "learning_rate": 0.0002, "epoch": 2.5161290322580645, "step": 1170}, {"loss": 0.6407, "grad_norm": 0.7414287328720093, "learning_rate": 0.0002, "epoch": 2.5376344086021505, "step": 1180}, {"loss": 0.6161, "grad_norm": 0.8111771941184998, "learning_rate": 0.0002, "epoch": 2.5591397849462365, "step": 1190}, {"loss": 0.6006, "grad_norm": 0.7520156502723694, "learning_rate": 0.0002, "epoch": 2.5806451612903225, "step": 1200}, {"loss": 0.615, "grad_norm": 0.9022907018661499, "learning_rate": 0.0002, "epoch": 2.6021505376344085, "step": 1210}, {"loss": 0.6211, "grad_norm": 0.7746260166168213, "learning_rate": 0.0002, "epoch": 2.6236559139784945, "step": 1220}, {"loss": 0.616, "grad_norm": 0.8482862114906311, "learning_rate": 0.0002, "epoch": 2.6451612903225805, "step": 1230}, {"loss": 0.6417, "grad_norm": 0.7925458550453186, "learning_rate": 0.0002, "epoch": 2.6666666666666665, "step": 1240}, {"loss": 0.6187, "grad_norm": 0.8369929194450378, "learning_rate": 0.0002, "epoch": 2.688172043010753, "step": 1250}, {"loss": 0.6138, "grad_norm": 0.8311542868614197, "learning_rate": 0.0002, "epoch": 2.709677419354839, "step": 1260}, {"loss": 0.5894, "grad_norm": 0.7204853296279907, "learning_rate": 0.0002, "epoch": 2.731182795698925, "step": 1270}, {"loss": 0.6325, "grad_norm": 0.8447284698486328, "learning_rate": 0.0002, "epoch": 2.752688172043011, "step": 1280}, {"loss": 0.5946, "grad_norm": 0.7738404273986816, "learning_rate": 0.0002, "epoch": 2.774193548387097, "step": 1290}, {"loss": 0.5678, "grad_norm": 0.8393287062644958, "learning_rate": 0.0002, "epoch": 2.795698924731183, "step": 1300}, {"loss": 0.6092, "grad_norm": 0.79121994972229, "learning_rate": 0.0002, "epoch": 2.817204301075269, "step": 1310}, {"loss": 0.5889, "grad_norm": 0.7331557869911194, "learning_rate": 0.0002, "epoch": 2.838709677419355, "step": 1320}, {"loss": 0.6048, "grad_norm": 0.9593998193740845, "learning_rate": 0.0002, "epoch": 2.860215053763441, "step": 1330}, {"loss": 0.6108, "grad_norm": 0.7215158343315125, "learning_rate": 0.0002, "epoch": 2.881720430107527, "step": 1340}, {"loss": 0.5897, "grad_norm": 0.840404212474823, "learning_rate": 0.0002, "epoch": 2.903225806451613, "step": 1350}, {"loss": 0.6056, "grad_norm": 0.870659351348877, "learning_rate": 0.0002, "epoch": 2.924731182795699, "step": 1360}, {"loss": 0.6205, "grad_norm": 0.8744975328445435, "learning_rate": 0.0002, "epoch": 2.946236559139785, "step": 1370}, {"loss": 0.5966, "grad_norm": 0.8030612468719482, "learning_rate": 0.0002, "epoch": 2.967741935483871, "step": 1380}, {"loss": 0.6004, "grad_norm": 0.825814962387085, "learning_rate": 0.0002, "epoch": 2.989247311827957, "step": 1390}, {"eval_loss": 0.6257933378219604, "eval_runtime": 21.3692, "eval_samples_per_second": 15.49, "eval_steps_per_second": 1.965, "epoch": 3.0, "step": 1395}, {"loss": 0.5696, "grad_norm": 0.8650677800178528, "learning_rate": 0.0002, "epoch": 3.010752688172043, "step": 1400}, {"loss": 0.5483, "grad_norm": 0.8364197015762329, "learning_rate": 0.0002, "epoch": 3.032258064516129, "step": 1410}, {"loss": 0.5606, "grad_norm": 0.8278448581695557, "learning_rate": 0.0002, "epoch": 3.053763440860215, "step": 1420}, {"loss": 0.5572, "grad_norm": 0.8806642889976501, "learning_rate": 0.0002, "epoch": 3.075268817204301, "step": 1430}, {"loss": 0.585, "grad_norm": 0.8180029988288879, "learning_rate": 0.0002, "epoch": 3.096774193548387, "step": 1440}, {"loss": 0.5667, "grad_norm": 0.8561782836914062, "learning_rate": 0.0002, "epoch": 3.118279569892473, "step": 1450}, {"loss": 0.5246, "grad_norm": 0.8377029299736023, "learning_rate": 0.0002, "epoch": 3.139784946236559, "step": 1460}, {"loss": 0.5464, "grad_norm": 0.885779082775116, "learning_rate": 0.0002, "epoch": 3.161290322580645, "step": 1470}, {"loss": 0.541, "grad_norm": 0.9388518333435059, "learning_rate": 0.0002, "epoch": 3.182795698924731, "step": 1480}, {"loss": 0.5447, "grad_norm": 0.8816235661506653, "learning_rate": 0.0002, "epoch": 3.204301075268817, "step": 1490}, {"loss": 0.5466, "grad_norm": 0.9885783791542053, "learning_rate": 0.0002, "epoch": 3.225806451612903, "step": 1500}, {"loss": 0.5455, "grad_norm": 0.8635850548744202, "learning_rate": 0.0002, "epoch": 3.247311827956989, "step": 1510}, {"loss": 0.5419, "grad_norm": 0.829853355884552, "learning_rate": 0.0002, "epoch": 3.268817204301075, "step": 1520}, {"loss": 0.54, "grad_norm": 0.9037486910820007, "learning_rate": 0.0002, "epoch": 3.2903225806451615, "step": 1530}, {"loss": 0.5375, "grad_norm": 0.8173713684082031, "learning_rate": 0.0002, "epoch": 3.3118279569892475, "step": 1540}, {"loss": 0.5405, "grad_norm": 0.796953022480011, "learning_rate": 0.0002, "epoch": 3.3333333333333335, "step": 1550}, {"loss": 0.5505, "grad_norm": 0.7894400358200073, "learning_rate": 0.0002, "epoch": 3.3548387096774195, "step": 1560}, {"loss": 0.5395, "grad_norm": 0.9434949159622192, "learning_rate": 0.0002, "epoch": 3.3763440860215055, "step": 1570}, {"loss": 0.5271, "grad_norm": 0.8666760325431824, "learning_rate": 0.0002, "epoch": 3.3978494623655915, "step": 1580}, {"loss": 0.5439, "grad_norm": 0.7782467007637024, "learning_rate": 0.0002, "epoch": 3.4193548387096775, "step": 1590}, {"loss": 0.5161, "grad_norm": 0.8849126696586609, "learning_rate": 0.0002, "epoch": 3.4408602150537635, "step": 1600}, {"loss": 0.5353, "grad_norm": 0.7863831520080566, "learning_rate": 0.0002, "epoch": 3.4623655913978495, "step": 1610}, {"loss": 0.5308, "grad_norm": 1.0403116941452026, "learning_rate": 0.0002, "epoch": 3.4838709677419355, "step": 1620}, {"loss": 0.5339, "grad_norm": 0.8307499289512634, "learning_rate": 0.0002, "epoch": 3.5053763440860215, "step": 1630}, {"loss": 0.5361, "grad_norm": 0.9132118821144104, "learning_rate": 0.0002, "epoch": 3.5268817204301075, "step": 1640}, {"loss": 0.5828, "grad_norm": 0.9322578310966492, "learning_rate": 0.0002, "epoch": 3.5483870967741935, "step": 1650}, {"loss": 0.546, "grad_norm": 0.9782460331916809, "learning_rate": 0.0002, "epoch": 3.5698924731182795, "step": 1660}, {"loss": 0.5424, "grad_norm": 0.7189919352531433, "learning_rate": 0.0002, "epoch": 3.5913978494623655, "step": 1670}, {"loss": 0.5514, "grad_norm": 0.9689221382141113, "learning_rate": 0.0002, "epoch": 3.6129032258064515, "step": 1680}, {"loss": 0.5379, "grad_norm": 0.9684675335884094, "learning_rate": 0.0002, "epoch": 3.6344086021505375, "step": 1690}, {"loss": 0.5748, "grad_norm": 0.8851472735404968, "learning_rate": 0.0002, "epoch": 3.6559139784946235, "step": 1700}, {"loss": 0.5412, "grad_norm": 0.7709833383560181, "learning_rate": 0.0002, "epoch": 3.6774193548387095, "step": 1710}, {"loss": 0.521, "grad_norm": 0.818236231803894, "learning_rate": 0.0002, "epoch": 3.698924731182796, "step": 1720}, {"loss": 0.5445, "grad_norm": 0.870642364025116, "learning_rate": 0.0002, "epoch": 3.720430107526882, "step": 1730}, {"loss": 0.5307, "grad_norm": 1.0245511531829834, "learning_rate": 0.0002, "epoch": 3.741935483870968, "step": 1740}, {"loss": 0.5593, "grad_norm": 0.8607558608055115, "learning_rate": 0.0002, "epoch": 3.763440860215054, "step": 1750}, {"loss": 0.536, "grad_norm": 0.8511829972267151, "learning_rate": 0.0002, "epoch": 3.78494623655914, "step": 1760}, {"loss": 0.5193, "grad_norm": 0.7969087362289429, "learning_rate": 0.0002, "epoch": 3.806451612903226, "step": 1770}, {"loss": 0.5578, "grad_norm": 0.8457245826721191, "learning_rate": 0.0002, "epoch": 3.827956989247312, "step": 1780}, {"loss": 0.5337, "grad_norm": 0.8893467783927917, "learning_rate": 0.0002, "epoch": 3.849462365591398, "step": 1790}, {"loss": 0.5024, "grad_norm": 0.8593819737434387, "learning_rate": 0.0002, "epoch": 3.870967741935484, "step": 1800}, {"loss": 0.5134, "grad_norm": 0.7574560642242432, "learning_rate": 0.0002, "epoch": 3.89247311827957, "step": 1810}, {"loss": 0.5263, "grad_norm": 0.8681567311286926, "learning_rate": 0.0002, "epoch": 3.913978494623656, "step": 1820}, {"loss": 0.532, "grad_norm": 0.9068132042884827, "learning_rate": 0.0002, "epoch": 3.935483870967742, "step": 1830}, {"loss": 0.5427, "grad_norm": 0.8668948411941528, "learning_rate": 0.0002, "epoch": 3.956989247311828, "step": 1840}, {"loss": 0.5349, "grad_norm": 1.046032428741455, "learning_rate": 0.0002, "epoch": 3.978494623655914, "step": 1850}, {"loss": 0.5087, "grad_norm": 0.904780387878418, "learning_rate": 0.0002, "epoch": 4.0, "step": 1860}, {"eval_loss": 0.5737715363502502, "eval_runtime": 21.4915, "eval_samples_per_second": 15.401, "eval_steps_per_second": 1.954, "epoch": 4.0, "step": 1860}, {"loss": 0.4843, "grad_norm": 0.8611752986907959, "learning_rate": 0.0002, "epoch": 4.021505376344086, "step": 1870}, {"loss": 0.4814, "grad_norm": 0.838782548904419, "learning_rate": 0.0002, "epoch": 4.043010752688172, "step": 1880}, {"loss": 0.474, "grad_norm": 0.9119709134101868, "learning_rate": 0.0002, "epoch": 4.064516129032258, "step": 1890}, {"loss": 0.4951, "grad_norm": 0.8026251196861267, "learning_rate": 0.0002, "epoch": 4.086021505376344, "step": 1900}, {"loss": 0.491, "grad_norm": 0.8773705363273621, "learning_rate": 0.0002, "epoch": 4.10752688172043, "step": 1910}, {"loss": 0.474, "grad_norm": 0.8762255907058716, "learning_rate": 0.0002, "epoch": 4.129032258064516, "step": 1920}, {"loss": 0.4816, "grad_norm": 0.8371861577033997, "learning_rate": 0.0002, "epoch": 4.150537634408602, "step": 1930}, {"loss": 0.472, "grad_norm": 0.9703728556632996, "learning_rate": 0.0002, "epoch": 4.172043010752688, "step": 1940}, {"loss": 0.4772, "grad_norm": 0.8802874684333801, "learning_rate": 0.0002, "epoch": 4.193548387096774, "step": 1950}, {"loss": 0.5032, "grad_norm": 1.0103057622909546, "learning_rate": 0.0002, "epoch": 4.21505376344086, "step": 1960}, {"loss": 0.4945, "grad_norm": 0.9212995171546936, "learning_rate": 0.0002, "epoch": 4.236559139784946, "step": 1970}, {"loss": 0.4753, "grad_norm": 1.009544849395752, "learning_rate": 0.0002, "epoch": 4.258064516129032, "step": 1980}, {"loss": 0.4789, "grad_norm": 0.8535077571868896, "learning_rate": 0.0002, "epoch": 4.279569892473118, "step": 1990}, {"loss": 0.4782, "grad_norm": 0.8363022804260254, "learning_rate": 0.0002, "epoch": 4.301075268817204, "step": 2000}, {"loss": 0.4875, "grad_norm": 0.9041762948036194, "learning_rate": 0.0002, "epoch": 4.32258064516129, "step": 2010}, {"loss": 0.4779, "grad_norm": 0.960790753364563, "learning_rate": 0.0002, "epoch": 4.344086021505376, "step": 2020}, {"loss": 0.4626, "grad_norm": 0.8823095560073853, "learning_rate": 0.0002, "epoch": 4.365591397849462, "step": 2030}, {"loss": 0.4883, "grad_norm": 0.952100396156311, "learning_rate": 0.0002, "epoch": 4.387096774193548, "step": 2040}, {"loss": 0.4789, "grad_norm": 1.0793498754501343, "learning_rate": 0.0002, "epoch": 4.408602150537634, "step": 2050}, {"loss": 0.4827, "grad_norm": 0.8987208008766174, "learning_rate": 0.0002, "epoch": 4.43010752688172, "step": 2060}, {"loss": 0.4594, "grad_norm": 0.8539772033691406, "learning_rate": 0.0002, "epoch": 4.451612903225806, "step": 2070}, {"loss": 0.4752, "grad_norm": 0.9160863757133484, "learning_rate": 0.0002, "epoch": 4.473118279569892, "step": 2080}, {"loss": 0.5033, "grad_norm": 0.9946850538253784, "learning_rate": 0.0002, "epoch": 4.494623655913978, "step": 2090}, {"loss": 0.4842, "grad_norm": 0.908039391040802, "learning_rate": 0.0002, "epoch": 4.516129032258064, "step": 2100}, {"loss": 0.4861, "grad_norm": 1.1462254524230957, "learning_rate": 0.0002, "epoch": 4.53763440860215, "step": 2110}, {"loss": 0.4892, "grad_norm": 0.8392056226730347, "learning_rate": 0.0002, "epoch": 4.559139784946236, "step": 2120}, {"loss": 0.4824, "grad_norm": 0.9673896431922913, "learning_rate": 0.0002, "epoch": 4.580645161290323, "step": 2130}, {"loss": 0.4665, "grad_norm": 0.9047091603279114, "learning_rate": 0.0002, "epoch": 4.602150537634409, "step": 2140}, {"loss": 0.4714, "grad_norm": 0.9013425707817078, "learning_rate": 0.0002, "epoch": 4.623655913978495, "step": 2150}, {"loss": 0.472, "grad_norm": 0.8899165391921997, "learning_rate": 0.0002, "epoch": 4.645161290322581, "step": 2160}, {"loss": 0.4635, "grad_norm": 0.748602569103241, "learning_rate": 0.0002, "epoch": 4.666666666666667, "step": 2170}, {"loss": 0.4695, "grad_norm": 0.8694155216217041, "learning_rate": 0.0002, "epoch": 4.688172043010753, "step": 2180}, {"loss": 0.4929, "grad_norm": 0.9134316444396973, "learning_rate": 0.0002, "epoch": 4.709677419354839, "step": 2190}, {"loss": 0.4855, "grad_norm": 0.8504763245582581, "learning_rate": 0.0002, "epoch": 4.731182795698925, "step": 2200}, {"loss": 0.4517, "grad_norm": 1.0321544408798218, "learning_rate": 0.0002, "epoch": 4.752688172043011, "step": 2210}, {"loss": 0.4796, "grad_norm": 0.9368237257003784, "learning_rate": 0.0002, "epoch": 4.774193548387097, "step": 2220}, {"loss": 0.4837, "grad_norm": 0.9319947361946106, "learning_rate": 0.0002, "epoch": 4.795698924731183, "step": 2230}, {"loss": 0.4696, "grad_norm": 0.904333770275116, "learning_rate": 0.0002, "epoch": 4.817204301075269, "step": 2240}, {"loss": 0.4746, "grad_norm": 0.8097078204154968, "learning_rate": 0.0002, "epoch": 4.838709677419355, "step": 2250}, {"loss": 0.4438, "grad_norm": 0.9128859043121338, "learning_rate": 0.0002, "epoch": 4.860215053763441, "step": 2260}, {"loss": 0.4693, "grad_norm": 0.883129894733429, "learning_rate": 0.0002, "epoch": 4.881720430107527, "step": 2270}, {"loss": 0.4494, "grad_norm": 0.85712730884552, "learning_rate": 0.0002, "epoch": 4.903225806451613, "step": 2280}, {"loss": 0.4593, "grad_norm": 1.2101863622665405, "learning_rate": 0.0002, "epoch": 4.924731182795699, "step": 2290}, {"loss": 0.4779, "grad_norm": 0.917966902256012, "learning_rate": 0.0002, "epoch": 4.946236559139785, "step": 2300}, {"loss": 0.4666, "grad_norm": 0.7740724086761475, "learning_rate": 0.0002, "epoch": 4.967741935483871, "step": 2310}, {"loss": 0.4629, "grad_norm": 1.0199906826019287, "learning_rate": 0.0002, "epoch": 4.989247311827957, "step": 2320}, {"eval_loss": 0.5363914370536804, "eval_runtime": 21.3941, "eval_samples_per_second": 15.472, "eval_steps_per_second": 1.963, "epoch": 5.0, "step": 2325}, {"loss": 0.4543, "grad_norm": 0.8580502271652222, "learning_rate": 0.0002, "epoch": 5.010752688172043, "step": 2330}, {"loss": 0.404, "grad_norm": 0.7702704668045044, "learning_rate": 0.0002, "epoch": 5.032258064516129, "step": 2340}, {"loss": 0.4408, "grad_norm": 0.9417401552200317, "learning_rate": 0.0002, "epoch": 5.053763440860215, "step": 2350}, {"loss": 0.4306, "grad_norm": 0.9461463689804077, "learning_rate": 0.0002, "epoch": 5.075268817204301, "step": 2360}, {"loss": 0.4251, "grad_norm": 0.8931282162666321, "learning_rate": 0.0002, "epoch": 5.096774193548387, "step": 2370}, {"loss": 0.4249, "grad_norm": 1.000909447669983, "learning_rate": 0.0002, "epoch": 5.118279569892473, "step": 2380}, {"loss": 0.4231, "grad_norm": 0.8640249967575073, "learning_rate": 0.0002, "epoch": 5.139784946236559, "step": 2390}, {"loss": 0.4272, "grad_norm": 1.0451020002365112, "learning_rate": 0.0002, "epoch": 5.161290322580645, "step": 2400}, {"loss": 0.4177, "grad_norm": 0.7896912097930908, "learning_rate": 0.0002, "epoch": 5.182795698924731, "step": 2410}, {"loss": 0.4116, "grad_norm": 0.8424463272094727, "learning_rate": 0.0002, "epoch": 5.204301075268817, "step": 2420}, {"loss": 0.4225, "grad_norm": 1.0852105617523193, "learning_rate": 0.0002, "epoch": 5.225806451612903, "step": 2430}, {"loss": 0.4352, "grad_norm": 0.9285983443260193, "learning_rate": 0.0002, "epoch": 5.247311827956989, "step": 2440}, {"loss": 0.4262, "grad_norm": 0.9119299054145813, "learning_rate": 0.0002, "epoch": 5.268817204301075, "step": 2450}, {"loss": 0.4494, "grad_norm": 0.8790456056594849, "learning_rate": 0.0002, "epoch": 5.290322580645161, "step": 2460}, {"loss": 0.4421, "grad_norm": 0.8726504445075989, "learning_rate": 0.0002, "epoch": 5.311827956989247, "step": 2470}, {"loss": 0.4372, "grad_norm": 0.9415227770805359, "learning_rate": 0.0002, "epoch": 5.333333333333333, "step": 2480}, {"loss": 0.4223, "grad_norm": 0.9133324027061462, "learning_rate": 0.0002, "epoch": 5.354838709677419, "step": 2490}, {"loss": 0.4401, "grad_norm": 0.9567879438400269, "learning_rate": 0.0002, "epoch": 5.376344086021505, "step": 2500}, {"loss": 0.4094, "grad_norm": 0.9239469766616821, "learning_rate": 0.0002, "epoch": 5.397849462365591, "step": 2510}, {"loss": 0.4416, "grad_norm": 1.0293527841567993, "learning_rate": 0.0002, "epoch": 5.419354838709677, "step": 2520}, {"loss": 0.4311, "grad_norm": 0.8618718981742859, "learning_rate": 0.0002, "epoch": 5.440860215053763, "step": 2530}, {"loss": 0.462, "grad_norm": 0.740166187286377, "learning_rate": 0.0002, "epoch": 5.462365591397849, "step": 2540}, {"loss": 0.4172, "grad_norm": 0.901566743850708, "learning_rate": 0.0002, "epoch": 5.483870967741936, "step": 2550}, {"loss": 0.4315, "grad_norm": 0.7957597970962524, "learning_rate": 0.0002, "epoch": 5.505376344086022, "step": 2560}, {"loss": 0.4263, "grad_norm": 1.1139343976974487, "learning_rate": 0.0002, "epoch": 5.526881720430108, "step": 2570}, {"loss": 0.4056, "grad_norm": 0.989765465259552, "learning_rate": 0.0002, "epoch": 5.548387096774194, "step": 2580}, {"loss": 0.4311, "grad_norm": 0.9416969418525696, "learning_rate": 0.0002, "epoch": 5.56989247311828, "step": 2590}, {"loss": 0.4363, "grad_norm": 0.9184830784797668, "learning_rate": 0.0002, "epoch": 5.591397849462366, "step": 2600}, {"loss": 0.432, "grad_norm": 1.0512700080871582, "learning_rate": 0.0002, "epoch": 5.612903225806452, "step": 2610}, {"loss": 0.4227, "grad_norm": 0.901462197303772, "learning_rate": 0.0002, "epoch": 5.634408602150538, "step": 2620}, {"loss": 0.4332, "grad_norm": 0.9732566475868225, "learning_rate": 0.0002, "epoch": 5.655913978494624, "step": 2630}, {"loss": 0.4223, "grad_norm": 0.8180275559425354, "learning_rate": 0.0002, "epoch": 5.67741935483871, "step": 2640}, {"loss": 0.4311, "grad_norm": 1.1354765892028809, "learning_rate": 0.0002, "epoch": 5.698924731182796, "step": 2650}, {"loss": 0.4409, "grad_norm": 0.9161503314971924, "learning_rate": 0.0002, "epoch": 5.720430107526882, "step": 2660}, {"loss": 0.4394, "grad_norm": 1.0561772584915161, "learning_rate": 0.0002, "epoch": 5.741935483870968, "step": 2670}, {"loss": 0.424, "grad_norm": 0.7712787389755249, "learning_rate": 0.0002, "epoch": 5.763440860215054, "step": 2680}, {"loss": 0.4326, "grad_norm": 0.9674550294876099, "learning_rate": 0.0002, "epoch": 5.78494623655914, "step": 2690}, {"loss": 0.4459, "grad_norm": 0.7531843781471252, "learning_rate": 0.0002, "epoch": 5.806451612903226, "step": 2700}, {"loss": 0.4276, "grad_norm": 1.1332131624221802, "learning_rate": 0.0002, "epoch": 5.827956989247312, "step": 2710}, {"loss": 0.4113, "grad_norm": 0.9367414116859436, "learning_rate": 0.0002, "epoch": 5.849462365591398, "step": 2720}, {"loss": 0.4227, "grad_norm": 0.8267706632614136, "learning_rate": 0.0002, "epoch": 5.870967741935484, "step": 2730}, {"loss": 0.4218, "grad_norm": 1.1040657758712769, "learning_rate": 0.0002, "epoch": 5.89247311827957, "step": 2740}, {"loss": 0.4129, "grad_norm": 0.8879582285881042, "learning_rate": 0.0002, "epoch": 5.913978494623656, "step": 2750}, {"loss": 0.4241, "grad_norm": 0.9264667630195618, "learning_rate": 0.0002, "epoch": 5.935483870967742, "step": 2760}, {"loss": 0.4318, "grad_norm": 0.9373905658721924, "learning_rate": 0.0002, "epoch": 5.956989247311828, "step": 2770}, {"loss": 0.423, "grad_norm": 1.0063740015029907, "learning_rate": 0.0002, "epoch": 5.978494623655914, "step": 2780}, {"loss": 0.4382, "grad_norm": 0.8291367292404175, "learning_rate": 0.0002, "epoch": 6.0, "step": 2790}]} +{"epoch": 7.0, "step": 3255, "epoch_duration": 449.4360945224762, "total_accumulated_duration": 3152.6115312576294, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.17431640625}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-2790", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.4172, "grad_norm": 0.9075053930282593, "learning_rate": 0.0002, "epoch": 0.021505376344086023, "step": 10}, {"loss": 2.5888, "grad_norm": 1.4321208000183105, "learning_rate": 0.0002, "epoch": 0.043010752688172046, "step": 20}, {"loss": 2.1195, "grad_norm": 1.7500602006912231, "learning_rate": 0.0002, "epoch": 0.06451612903225806, "step": 30}, {"loss": 1.9303, "grad_norm": 0.7606641054153442, "learning_rate": 0.0002, "epoch": 0.08602150537634409, "step": 40}, {"loss": 1.6112, "grad_norm": 1.2754929065704346, "learning_rate": 0.0002, "epoch": 0.10752688172043011, "step": 50}, {"loss": 1.4319, "grad_norm": 1.0936230421066284, "learning_rate": 0.0002, "epoch": 0.12903225806451613, "step": 60}, {"loss": 1.3568, "grad_norm": 1.144593596458435, "learning_rate": 0.0002, "epoch": 0.15053763440860216, "step": 70}, {"loss": 1.2028, "grad_norm": 1.2181956768035889, "learning_rate": 0.0002, "epoch": 0.17204301075268819, "step": 80}, {"loss": 1.1534, "grad_norm": 1.1260095834732056, "learning_rate": 0.0002, "epoch": 0.1935483870967742, "step": 90}, {"loss": 1.1089, "grad_norm": 1.1155284643173218, "learning_rate": 0.0002, "epoch": 0.21505376344086022, "step": 100}, {"loss": 1.0883, "grad_norm": 1.089565396308899, "learning_rate": 0.0002, "epoch": 0.23655913978494625, "step": 110}, {"loss": 1.0814, "grad_norm": 0.9833471775054932, "learning_rate": 0.0002, "epoch": 0.25806451612903225, "step": 120}, {"loss": 1.0239, "grad_norm": 1.0265629291534424, "learning_rate": 0.0002, "epoch": 0.27956989247311825, "step": 130}, {"loss": 0.9888, "grad_norm": 0.9344286322593689, "learning_rate": 0.0002, "epoch": 0.3010752688172043, "step": 140}, {"loss": 1.0043, "grad_norm": 0.9883386492729187, "learning_rate": 0.0002, "epoch": 0.3225806451612903, "step": 150}, {"loss": 0.9338, "grad_norm": 0.9299277067184448, "learning_rate": 0.0002, "epoch": 0.34408602150537637, "step": 160}, {"loss": 0.9432, "grad_norm": 1.390045404434204, "learning_rate": 0.0002, "epoch": 0.3655913978494624, "step": 170}, {"loss": 0.9008, "grad_norm": 1.0313078165054321, "learning_rate": 0.0002, "epoch": 0.3870967741935484, "step": 180}, {"loss": 0.9434, "grad_norm": 1.1792205572128296, "learning_rate": 0.0002, "epoch": 0.40860215053763443, "step": 190}, {"loss": 0.8761, "grad_norm": 1.049809217453003, "learning_rate": 0.0002, "epoch": 0.43010752688172044, "step": 200}, {"loss": 0.8709, "grad_norm": 0.990111768245697, "learning_rate": 0.0002, "epoch": 0.45161290322580644, "step": 210}, {"loss": 0.905, "grad_norm": 0.9870412349700928, "learning_rate": 0.0002, "epoch": 0.4731182795698925, "step": 220}, {"loss": 0.9129, "grad_norm": 0.8557345867156982, "learning_rate": 0.0002, "epoch": 0.4946236559139785, "step": 230}, {"loss": 0.8836, "grad_norm": 0.9746861457824707, "learning_rate": 0.0002, "epoch": 0.5161290322580645, "step": 240}, {"loss": 0.873, "grad_norm": 0.9010438323020935, "learning_rate": 0.0002, "epoch": 0.5376344086021505, "step": 250}, {"loss": 0.8241, "grad_norm": 0.9061082005500793, "learning_rate": 0.0002, "epoch": 0.5591397849462365, "step": 260}, {"loss": 0.8652, "grad_norm": 0.9311846494674683, "learning_rate": 0.0002, "epoch": 0.5806451612903226, "step": 270}, {"loss": 0.8256, "grad_norm": 0.9140254855155945, "learning_rate": 0.0002, "epoch": 0.6021505376344086, "step": 280}, {"loss": 0.8441, "grad_norm": 0.9722253084182739, "learning_rate": 0.0002, "epoch": 0.6236559139784946, "step": 290}, {"loss": 0.8314, "grad_norm": 0.8539168238639832, "learning_rate": 0.0002, "epoch": 0.6451612903225806, "step": 300}, {"loss": 0.8528, "grad_norm": 0.9053162932395935, "learning_rate": 0.0002, "epoch": 0.6666666666666666, "step": 310}, {"loss": 0.8209, "grad_norm": 0.8444252610206604, "learning_rate": 0.0002, "epoch": 0.6881720430107527, "step": 320}, {"loss": 0.8101, "grad_norm": 0.8127437829971313, "learning_rate": 0.0002, "epoch": 0.7096774193548387, "step": 330}, {"loss": 0.8223, "grad_norm": 0.886555016040802, "learning_rate": 0.0002, "epoch": 0.7311827956989247, "step": 340}, {"loss": 0.8368, "grad_norm": 0.8458548784255981, "learning_rate": 0.0002, "epoch": 0.7526881720430108, "step": 350}, {"loss": 0.8295, "grad_norm": 0.8683297634124756, "learning_rate": 0.0002, "epoch": 0.7741935483870968, "step": 360}, {"loss": 0.8232, "grad_norm": 0.8308405876159668, "learning_rate": 0.0002, "epoch": 0.7956989247311828, "step": 370}, {"loss": 0.7752, "grad_norm": 0.8305579423904419, "learning_rate": 0.0002, "epoch": 0.8172043010752689, "step": 380}, {"loss": 0.8267, "grad_norm": 0.8545567393302917, "learning_rate": 0.0002, "epoch": 0.8387096774193549, "step": 390}, {"loss": 0.8212, "grad_norm": 0.8486055731773376, "learning_rate": 0.0002, "epoch": 0.8602150537634409, "step": 400}, {"loss": 0.743, "grad_norm": 0.8126763105392456, "learning_rate": 0.0002, "epoch": 0.8817204301075269, "step": 410}, {"loss": 0.7993, "grad_norm": 0.8494045734405518, "learning_rate": 0.0002, "epoch": 0.9032258064516129, "step": 420}, {"loss": 0.8213, "grad_norm": 0.7639183402061462, "learning_rate": 0.0002, "epoch": 0.9247311827956989, "step": 430}, {"loss": 0.8015, "grad_norm": 0.858101487159729, "learning_rate": 0.0002, "epoch": 0.946236559139785, "step": 440}, {"loss": 0.7629, "grad_norm": 0.8141381740570068, "learning_rate": 0.0002, "epoch": 0.967741935483871, "step": 450}, {"loss": 0.7357, "grad_norm": 0.8072513937950134, "learning_rate": 0.0002, "epoch": 0.989247311827957, "step": 460}, {"eval_loss": 0.7740864157676697, "eval_runtime": 21.383, "eval_samples_per_second": 15.48, "eval_steps_per_second": 1.964, "epoch": 1.0, "step": 465}, {"loss": 0.7701, "grad_norm": 0.8269494771957397, "learning_rate": 0.0002, "epoch": 1.010752688172043, "step": 470}, {"loss": 0.7532, "grad_norm": 0.7814009189605713, "learning_rate": 0.0002, "epoch": 1.032258064516129, "step": 480}, {"loss": 0.7689, "grad_norm": 0.8183923363685608, "learning_rate": 0.0002, "epoch": 1.053763440860215, "step": 490}, {"loss": 0.765, "grad_norm": 0.8146600723266602, "learning_rate": 0.0002, "epoch": 1.075268817204301, "step": 500}, {"loss": 0.7358, "grad_norm": 0.8635126352310181, "learning_rate": 0.0002, "epoch": 1.096774193548387, "step": 510}, {"loss": 0.7302, "grad_norm": 0.8520359396934509, "learning_rate": 0.0002, "epoch": 1.118279569892473, "step": 520}, {"loss": 0.7492, "grad_norm": 0.8026443123817444, "learning_rate": 0.0002, "epoch": 1.139784946236559, "step": 530}, {"loss": 0.7518, "grad_norm": 0.8157258629798889, "learning_rate": 0.0002, "epoch": 1.1612903225806452, "step": 540}, {"loss": 0.7461, "grad_norm": 0.9450796246528625, "learning_rate": 0.0002, "epoch": 1.1827956989247312, "step": 550}, {"loss": 0.7128, "grad_norm": 0.8859835863113403, "learning_rate": 0.0002, "epoch": 1.2043010752688172, "step": 560}, {"loss": 0.7067, "grad_norm": 0.7819921970367432, "learning_rate": 0.0002, "epoch": 1.2258064516129032, "step": 570}, {"loss": 0.7577, "grad_norm": 0.7823445796966553, "learning_rate": 0.0002, "epoch": 1.2473118279569892, "step": 580}, {"loss": 0.7358, "grad_norm": 0.7931883931159973, "learning_rate": 0.0002, "epoch": 1.2688172043010753, "step": 590}, {"loss": 0.723, "grad_norm": 0.7495734095573425, "learning_rate": 0.0002, "epoch": 1.2903225806451613, "step": 600}, {"loss": 0.7386, "grad_norm": 0.9272717237472534, "learning_rate": 0.0002, "epoch": 1.3118279569892473, "step": 610}, {"loss": 0.7498, "grad_norm": 0.7968398332595825, "learning_rate": 0.0002, "epoch": 1.3333333333333333, "step": 620}, {"loss": 0.7635, "grad_norm": 0.7813659310340881, "learning_rate": 0.0002, "epoch": 1.3548387096774195, "step": 630}, {"loss": 0.6665, "grad_norm": 0.730925977230072, "learning_rate": 0.0002, "epoch": 1.3763440860215055, "step": 640}, {"loss": 0.7037, "grad_norm": 0.8011482954025269, "learning_rate": 0.0002, "epoch": 1.3978494623655915, "step": 650}, {"loss": 0.6931, "grad_norm": 0.7770085334777832, "learning_rate": 0.0002, "epoch": 1.4193548387096775, "step": 660}, {"loss": 0.6949, "grad_norm": 0.7432682514190674, "learning_rate": 0.0002, "epoch": 1.4408602150537635, "step": 670}, {"loss": 0.7444, "grad_norm": 0.8820092678070068, "learning_rate": 0.0002, "epoch": 1.4623655913978495, "step": 680}, {"loss": 0.6758, "grad_norm": 0.7786208987236023, "learning_rate": 0.0002, "epoch": 1.4838709677419355, "step": 690}, {"loss": 0.6702, "grad_norm": 0.7467480301856995, "learning_rate": 0.0002, "epoch": 1.5053763440860215, "step": 700}, {"loss": 0.7107, "grad_norm": 0.8147122263908386, "learning_rate": 0.0002, "epoch": 1.5268817204301075, "step": 710}, {"loss": 0.7144, "grad_norm": 0.796030580997467, "learning_rate": 0.0002, "epoch": 1.5483870967741935, "step": 720}, {"loss": 0.6936, "grad_norm": 0.8776171207427979, "learning_rate": 0.0002, "epoch": 1.5698924731182795, "step": 730}, {"loss": 0.7101, "grad_norm": 0.8056126236915588, "learning_rate": 0.0002, "epoch": 1.5913978494623655, "step": 740}, {"loss": 0.7162, "grad_norm": 0.8141863346099854, "learning_rate": 0.0002, "epoch": 1.6129032258064515, "step": 750}, {"loss": 0.7088, "grad_norm": 0.8100557327270508, "learning_rate": 0.0002, "epoch": 1.6344086021505375, "step": 760}, {"loss": 0.7212, "grad_norm": 0.8283200860023499, "learning_rate": 0.0002, "epoch": 1.6559139784946235, "step": 770}, {"loss": 0.694, "grad_norm": 0.800865113735199, "learning_rate": 0.0002, "epoch": 1.6774193548387095, "step": 780}, {"loss": 0.7076, "grad_norm": 0.8052287697792053, "learning_rate": 0.0002, "epoch": 1.6989247311827957, "step": 790}, {"loss": 0.7257, "grad_norm": 0.8619674444198608, "learning_rate": 0.0002, "epoch": 1.7204301075268817, "step": 800}, {"loss": 0.7141, "grad_norm": 0.8907215595245361, "learning_rate": 0.0002, "epoch": 1.7419354838709677, "step": 810}, {"loss": 0.7035, "grad_norm": 0.6976316571235657, "learning_rate": 0.0002, "epoch": 1.7634408602150538, "step": 820}, {"loss": 0.6916, "grad_norm": 0.7533746957778931, "learning_rate": 0.0002, "epoch": 1.7849462365591398, "step": 830}, {"loss": 0.7094, "grad_norm": 0.7326804399490356, "learning_rate": 0.0002, "epoch": 1.8064516129032258, "step": 840}, {"loss": 0.6891, "grad_norm": 0.7782683372497559, "learning_rate": 0.0002, "epoch": 1.827956989247312, "step": 850}, {"loss": 0.6931, "grad_norm": 0.7424806356430054, "learning_rate": 0.0002, "epoch": 1.849462365591398, "step": 860}, {"loss": 0.7354, "grad_norm": 1.172325611114502, "learning_rate": 0.0002, "epoch": 1.870967741935484, "step": 870}, {"loss": 0.6866, "grad_norm": 0.771058201789856, "learning_rate": 0.0002, "epoch": 1.89247311827957, "step": 880}, {"loss": 0.7296, "grad_norm": 0.8624904155731201, "learning_rate": 0.0002, "epoch": 1.913978494623656, "step": 890}, {"loss": 0.7233, "grad_norm": 0.7062820792198181, "learning_rate": 0.0002, "epoch": 1.935483870967742, "step": 900}, {"loss": 0.6966, "grad_norm": 0.7560103535652161, "learning_rate": 0.0002, "epoch": 1.956989247311828, "step": 910}, {"loss": 0.69, "grad_norm": 0.788899838924408, "learning_rate": 0.0002, "epoch": 1.978494623655914, "step": 920}, {"loss": 0.6505, "grad_norm": 0.6562113761901855, "learning_rate": 0.0002, "epoch": 2.0, "step": 930}, {"eval_loss": 0.6885261535644531, "eval_runtime": 21.4291, "eval_samples_per_second": 15.446, "eval_steps_per_second": 1.96, "epoch": 2.0, "step": 930}, {"loss": 0.6625, "grad_norm": 0.8216531872749329, "learning_rate": 0.0002, "epoch": 2.021505376344086, "step": 940}, {"loss": 0.6398, "grad_norm": 0.8317142724990845, "learning_rate": 0.0002, "epoch": 2.043010752688172, "step": 950}, {"loss": 0.649, "grad_norm": 0.8446708917617798, "learning_rate": 0.0002, "epoch": 2.064516129032258, "step": 960}, {"loss": 0.657, "grad_norm": 0.735055148601532, "learning_rate": 0.0002, "epoch": 2.086021505376344, "step": 970}, {"loss": 0.649, "grad_norm": 0.7487243413925171, "learning_rate": 0.0002, "epoch": 2.10752688172043, "step": 980}, {"loss": 0.6419, "grad_norm": 0.8573887944221497, "learning_rate": 0.0002, "epoch": 2.129032258064516, "step": 990}, {"loss": 0.6431, "grad_norm": 0.6284521818161011, "learning_rate": 0.0002, "epoch": 2.150537634408602, "step": 1000}, {"loss": 0.6128, "grad_norm": 0.754183292388916, "learning_rate": 0.0002, "epoch": 2.172043010752688, "step": 1010}, {"loss": 0.6253, "grad_norm": 0.9445359110832214, "learning_rate": 0.0002, "epoch": 2.193548387096774, "step": 1020}, {"loss": 0.605, "grad_norm": 0.808508038520813, "learning_rate": 0.0002, "epoch": 2.21505376344086, "step": 1030}, {"loss": 0.6786, "grad_norm": 0.9394679665565491, "learning_rate": 0.0002, "epoch": 2.236559139784946, "step": 1040}, {"loss": 0.6176, "grad_norm": 0.8151357769966125, "learning_rate": 0.0002, "epoch": 2.258064516129032, "step": 1050}, {"loss": 0.66, "grad_norm": 0.7909848093986511, "learning_rate": 0.0002, "epoch": 2.279569892473118, "step": 1060}, {"loss": 0.6254, "grad_norm": 0.7506507039070129, "learning_rate": 0.0002, "epoch": 2.3010752688172045, "step": 1070}, {"loss": 0.6608, "grad_norm": 0.8240520358085632, "learning_rate": 0.0002, "epoch": 2.3225806451612905, "step": 1080}, {"loss": 0.6207, "grad_norm": 0.9342400431632996, "learning_rate": 0.0002, "epoch": 2.3440860215053765, "step": 1090}, {"loss": 0.6029, "grad_norm": 1.0598735809326172, "learning_rate": 0.0002, "epoch": 2.3655913978494625, "step": 1100}, {"loss": 0.6035, "grad_norm": 0.7907650470733643, "learning_rate": 0.0002, "epoch": 2.3870967741935485, "step": 1110}, {"loss": 0.6237, "grad_norm": 0.9388798475265503, "learning_rate": 0.0002, "epoch": 2.4086021505376345, "step": 1120}, {"loss": 0.6207, "grad_norm": 0.8985419869422913, "learning_rate": 0.0002, "epoch": 2.4301075268817205, "step": 1130}, {"loss": 0.5902, "grad_norm": 0.7471932768821716, "learning_rate": 0.0002, "epoch": 2.4516129032258065, "step": 1140}, {"loss": 0.6446, "grad_norm": 0.761131763458252, "learning_rate": 0.0002, "epoch": 2.4731182795698925, "step": 1150}, {"loss": 0.6088, "grad_norm": 0.7901819348335266, "learning_rate": 0.0002, "epoch": 2.4946236559139785, "step": 1160}, {"loss": 0.6142, "grad_norm": 0.9932922720909119, "learning_rate": 0.0002, "epoch": 2.5161290322580645, "step": 1170}, {"loss": 0.6407, "grad_norm": 0.7414287328720093, "learning_rate": 0.0002, "epoch": 2.5376344086021505, "step": 1180}, {"loss": 0.6161, "grad_norm": 0.8111771941184998, "learning_rate": 0.0002, "epoch": 2.5591397849462365, "step": 1190}, {"loss": 0.6006, "grad_norm": 0.7520156502723694, "learning_rate": 0.0002, "epoch": 2.5806451612903225, "step": 1200}, {"loss": 0.615, "grad_norm": 0.9022907018661499, "learning_rate": 0.0002, "epoch": 2.6021505376344085, "step": 1210}, {"loss": 0.6211, "grad_norm": 0.7746260166168213, "learning_rate": 0.0002, "epoch": 2.6236559139784945, "step": 1220}, {"loss": 0.616, "grad_norm": 0.8482862114906311, "learning_rate": 0.0002, "epoch": 2.6451612903225805, "step": 1230}, {"loss": 0.6417, "grad_norm": 0.7925458550453186, "learning_rate": 0.0002, "epoch": 2.6666666666666665, "step": 1240}, {"loss": 0.6187, "grad_norm": 0.8369929194450378, "learning_rate": 0.0002, "epoch": 2.688172043010753, "step": 1250}, {"loss": 0.6138, "grad_norm": 0.8311542868614197, "learning_rate": 0.0002, "epoch": 2.709677419354839, "step": 1260}, {"loss": 0.5894, "grad_norm": 0.7204853296279907, "learning_rate": 0.0002, "epoch": 2.731182795698925, "step": 1270}, {"loss": 0.6325, "grad_norm": 0.8447284698486328, "learning_rate": 0.0002, "epoch": 2.752688172043011, "step": 1280}, {"loss": 0.5946, "grad_norm": 0.7738404273986816, "learning_rate": 0.0002, "epoch": 2.774193548387097, "step": 1290}, {"loss": 0.5678, "grad_norm": 0.8393287062644958, "learning_rate": 0.0002, "epoch": 2.795698924731183, "step": 1300}, {"loss": 0.6092, "grad_norm": 0.79121994972229, "learning_rate": 0.0002, "epoch": 2.817204301075269, "step": 1310}, {"loss": 0.5889, "grad_norm": 0.7331557869911194, "learning_rate": 0.0002, "epoch": 2.838709677419355, "step": 1320}, {"loss": 0.6048, "grad_norm": 0.9593998193740845, "learning_rate": 0.0002, "epoch": 2.860215053763441, "step": 1330}, {"loss": 0.6108, "grad_norm": 0.7215158343315125, "learning_rate": 0.0002, "epoch": 2.881720430107527, "step": 1340}, {"loss": 0.5897, "grad_norm": 0.840404212474823, "learning_rate": 0.0002, "epoch": 2.903225806451613, "step": 1350}, {"loss": 0.6056, "grad_norm": 0.870659351348877, "learning_rate": 0.0002, "epoch": 2.924731182795699, "step": 1360}, {"loss": 0.6205, "grad_norm": 0.8744975328445435, "learning_rate": 0.0002, "epoch": 2.946236559139785, "step": 1370}, {"loss": 0.5966, "grad_norm": 0.8030612468719482, "learning_rate": 0.0002, "epoch": 2.967741935483871, "step": 1380}, {"loss": 0.6004, "grad_norm": 0.825814962387085, "learning_rate": 0.0002, "epoch": 2.989247311827957, "step": 1390}, {"eval_loss": 0.6257933378219604, "eval_runtime": 21.3692, "eval_samples_per_second": 15.49, "eval_steps_per_second": 1.965, "epoch": 3.0, "step": 1395}, {"loss": 0.5696, "grad_norm": 0.8650677800178528, "learning_rate": 0.0002, "epoch": 3.010752688172043, "step": 1400}, {"loss": 0.5483, "grad_norm": 0.8364197015762329, "learning_rate": 0.0002, "epoch": 3.032258064516129, "step": 1410}, {"loss": 0.5606, "grad_norm": 0.8278448581695557, "learning_rate": 0.0002, "epoch": 3.053763440860215, "step": 1420}, {"loss": 0.5572, "grad_norm": 0.8806642889976501, "learning_rate": 0.0002, "epoch": 3.075268817204301, "step": 1430}, {"loss": 0.585, "grad_norm": 0.8180029988288879, "learning_rate": 0.0002, "epoch": 3.096774193548387, "step": 1440}, {"loss": 0.5667, "grad_norm": 0.8561782836914062, "learning_rate": 0.0002, "epoch": 3.118279569892473, "step": 1450}, {"loss": 0.5246, "grad_norm": 0.8377029299736023, "learning_rate": 0.0002, "epoch": 3.139784946236559, "step": 1460}, {"loss": 0.5464, "grad_norm": 0.885779082775116, "learning_rate": 0.0002, "epoch": 3.161290322580645, "step": 1470}, {"loss": 0.541, "grad_norm": 0.9388518333435059, "learning_rate": 0.0002, "epoch": 3.182795698924731, "step": 1480}, {"loss": 0.5447, "grad_norm": 0.8816235661506653, "learning_rate": 0.0002, "epoch": 3.204301075268817, "step": 1490}, {"loss": 0.5466, "grad_norm": 0.9885783791542053, "learning_rate": 0.0002, "epoch": 3.225806451612903, "step": 1500}, {"loss": 0.5455, "grad_norm": 0.8635850548744202, "learning_rate": 0.0002, "epoch": 3.247311827956989, "step": 1510}, {"loss": 0.5419, "grad_norm": 0.829853355884552, "learning_rate": 0.0002, "epoch": 3.268817204301075, "step": 1520}, {"loss": 0.54, "grad_norm": 0.9037486910820007, "learning_rate": 0.0002, "epoch": 3.2903225806451615, "step": 1530}, {"loss": 0.5375, "grad_norm": 0.8173713684082031, "learning_rate": 0.0002, "epoch": 3.3118279569892475, "step": 1540}, {"loss": 0.5405, "grad_norm": 0.796953022480011, "learning_rate": 0.0002, "epoch": 3.3333333333333335, "step": 1550}, {"loss": 0.5505, "grad_norm": 0.7894400358200073, "learning_rate": 0.0002, "epoch": 3.3548387096774195, "step": 1560}, {"loss": 0.5395, "grad_norm": 0.9434949159622192, "learning_rate": 0.0002, "epoch": 3.3763440860215055, "step": 1570}, {"loss": 0.5271, "grad_norm": 0.8666760325431824, "learning_rate": 0.0002, "epoch": 3.3978494623655915, "step": 1580}, {"loss": 0.5439, "grad_norm": 0.7782467007637024, "learning_rate": 0.0002, "epoch": 3.4193548387096775, "step": 1590}, {"loss": 0.5161, "grad_norm": 0.8849126696586609, "learning_rate": 0.0002, "epoch": 3.4408602150537635, "step": 1600}, {"loss": 0.5353, "grad_norm": 0.7863831520080566, "learning_rate": 0.0002, "epoch": 3.4623655913978495, "step": 1610}, {"loss": 0.5308, "grad_norm": 1.0403116941452026, "learning_rate": 0.0002, "epoch": 3.4838709677419355, "step": 1620}, {"loss": 0.5339, "grad_norm": 0.8307499289512634, "learning_rate": 0.0002, "epoch": 3.5053763440860215, "step": 1630}, {"loss": 0.5361, "grad_norm": 0.9132118821144104, "learning_rate": 0.0002, "epoch": 3.5268817204301075, "step": 1640}, {"loss": 0.5828, "grad_norm": 0.9322578310966492, "learning_rate": 0.0002, "epoch": 3.5483870967741935, "step": 1650}, {"loss": 0.546, "grad_norm": 0.9782460331916809, "learning_rate": 0.0002, "epoch": 3.5698924731182795, "step": 1660}, {"loss": 0.5424, "grad_norm": 0.7189919352531433, "learning_rate": 0.0002, "epoch": 3.5913978494623655, "step": 1670}, {"loss": 0.5514, "grad_norm": 0.9689221382141113, "learning_rate": 0.0002, "epoch": 3.6129032258064515, "step": 1680}, {"loss": 0.5379, "grad_norm": 0.9684675335884094, "learning_rate": 0.0002, "epoch": 3.6344086021505375, "step": 1690}, {"loss": 0.5748, "grad_norm": 0.8851472735404968, "learning_rate": 0.0002, "epoch": 3.6559139784946235, "step": 1700}, {"loss": 0.5412, "grad_norm": 0.7709833383560181, "learning_rate": 0.0002, "epoch": 3.6774193548387095, "step": 1710}, {"loss": 0.521, "grad_norm": 0.818236231803894, "learning_rate": 0.0002, "epoch": 3.698924731182796, "step": 1720}, {"loss": 0.5445, "grad_norm": 0.870642364025116, "learning_rate": 0.0002, "epoch": 3.720430107526882, "step": 1730}, {"loss": 0.5307, "grad_norm": 1.0245511531829834, "learning_rate": 0.0002, "epoch": 3.741935483870968, "step": 1740}, {"loss": 0.5593, "grad_norm": 0.8607558608055115, "learning_rate": 0.0002, "epoch": 3.763440860215054, "step": 1750}, {"loss": 0.536, "grad_norm": 0.8511829972267151, "learning_rate": 0.0002, "epoch": 3.78494623655914, "step": 1760}, {"loss": 0.5193, "grad_norm": 0.7969087362289429, "learning_rate": 0.0002, "epoch": 3.806451612903226, "step": 1770}, {"loss": 0.5578, "grad_norm": 0.8457245826721191, "learning_rate": 0.0002, "epoch": 3.827956989247312, "step": 1780}, {"loss": 0.5337, "grad_norm": 0.8893467783927917, "learning_rate": 0.0002, "epoch": 3.849462365591398, "step": 1790}, {"loss": 0.5024, "grad_norm": 0.8593819737434387, "learning_rate": 0.0002, "epoch": 3.870967741935484, "step": 1800}, {"loss": 0.5134, "grad_norm": 0.7574560642242432, "learning_rate": 0.0002, "epoch": 3.89247311827957, "step": 1810}, {"loss": 0.5263, "grad_norm": 0.8681567311286926, "learning_rate": 0.0002, "epoch": 3.913978494623656, "step": 1820}, {"loss": 0.532, "grad_norm": 0.9068132042884827, "learning_rate": 0.0002, "epoch": 3.935483870967742, "step": 1830}, {"loss": 0.5427, "grad_norm": 0.8668948411941528, "learning_rate": 0.0002, "epoch": 3.956989247311828, "step": 1840}, {"loss": 0.5349, "grad_norm": 1.046032428741455, "learning_rate": 0.0002, "epoch": 3.978494623655914, "step": 1850}, {"loss": 0.5087, "grad_norm": 0.904780387878418, "learning_rate": 0.0002, "epoch": 4.0, "step": 1860}, {"eval_loss": 0.5737715363502502, "eval_runtime": 21.4915, "eval_samples_per_second": 15.401, "eval_steps_per_second": 1.954, "epoch": 4.0, "step": 1860}, {"loss": 0.4843, "grad_norm": 0.8611752986907959, "learning_rate": 0.0002, "epoch": 4.021505376344086, "step": 1870}, {"loss": 0.4814, "grad_norm": 0.838782548904419, "learning_rate": 0.0002, "epoch": 4.043010752688172, "step": 1880}, {"loss": 0.474, "grad_norm": 0.9119709134101868, "learning_rate": 0.0002, "epoch": 4.064516129032258, "step": 1890}, {"loss": 0.4951, "grad_norm": 0.8026251196861267, "learning_rate": 0.0002, "epoch": 4.086021505376344, "step": 1900}, {"loss": 0.491, "grad_norm": 0.8773705363273621, "learning_rate": 0.0002, "epoch": 4.10752688172043, "step": 1910}, {"loss": 0.474, "grad_norm": 0.8762255907058716, "learning_rate": 0.0002, "epoch": 4.129032258064516, "step": 1920}, {"loss": 0.4816, "grad_norm": 0.8371861577033997, "learning_rate": 0.0002, "epoch": 4.150537634408602, "step": 1930}, {"loss": 0.472, "grad_norm": 0.9703728556632996, "learning_rate": 0.0002, "epoch": 4.172043010752688, "step": 1940}, {"loss": 0.4772, "grad_norm": 0.8802874684333801, "learning_rate": 0.0002, "epoch": 4.193548387096774, "step": 1950}, {"loss": 0.5032, "grad_norm": 1.0103057622909546, "learning_rate": 0.0002, "epoch": 4.21505376344086, "step": 1960}, {"loss": 0.4945, "grad_norm": 0.9212995171546936, "learning_rate": 0.0002, "epoch": 4.236559139784946, "step": 1970}, {"loss": 0.4753, "grad_norm": 1.009544849395752, "learning_rate": 0.0002, "epoch": 4.258064516129032, "step": 1980}, {"loss": 0.4789, "grad_norm": 0.8535077571868896, "learning_rate": 0.0002, "epoch": 4.279569892473118, "step": 1990}, {"loss": 0.4782, "grad_norm": 0.8363022804260254, "learning_rate": 0.0002, "epoch": 4.301075268817204, "step": 2000}, {"loss": 0.4875, "grad_norm": 0.9041762948036194, "learning_rate": 0.0002, "epoch": 4.32258064516129, "step": 2010}, {"loss": 0.4779, "grad_norm": 0.960790753364563, "learning_rate": 0.0002, "epoch": 4.344086021505376, "step": 2020}, {"loss": 0.4626, "grad_norm": 0.8823095560073853, "learning_rate": 0.0002, "epoch": 4.365591397849462, "step": 2030}, {"loss": 0.4883, "grad_norm": 0.952100396156311, "learning_rate": 0.0002, "epoch": 4.387096774193548, "step": 2040}, {"loss": 0.4789, "grad_norm": 1.0793498754501343, "learning_rate": 0.0002, "epoch": 4.408602150537634, "step": 2050}, {"loss": 0.4827, "grad_norm": 0.8987208008766174, "learning_rate": 0.0002, "epoch": 4.43010752688172, "step": 2060}, {"loss": 0.4594, "grad_norm": 0.8539772033691406, "learning_rate": 0.0002, "epoch": 4.451612903225806, "step": 2070}, {"loss": 0.4752, "grad_norm": 0.9160863757133484, "learning_rate": 0.0002, "epoch": 4.473118279569892, "step": 2080}, {"loss": 0.5033, "grad_norm": 0.9946850538253784, "learning_rate": 0.0002, "epoch": 4.494623655913978, "step": 2090}, {"loss": 0.4842, "grad_norm": 0.908039391040802, "learning_rate": 0.0002, "epoch": 4.516129032258064, "step": 2100}, {"loss": 0.4861, "grad_norm": 1.1462254524230957, "learning_rate": 0.0002, "epoch": 4.53763440860215, "step": 2110}, {"loss": 0.4892, "grad_norm": 0.8392056226730347, "learning_rate": 0.0002, "epoch": 4.559139784946236, "step": 2120}, {"loss": 0.4824, "grad_norm": 0.9673896431922913, "learning_rate": 0.0002, "epoch": 4.580645161290323, "step": 2130}, {"loss": 0.4665, "grad_norm": 0.9047091603279114, "learning_rate": 0.0002, "epoch": 4.602150537634409, "step": 2140}, {"loss": 0.4714, "grad_norm": 0.9013425707817078, "learning_rate": 0.0002, "epoch": 4.623655913978495, "step": 2150}, {"loss": 0.472, "grad_norm": 0.8899165391921997, "learning_rate": 0.0002, "epoch": 4.645161290322581, "step": 2160}, {"loss": 0.4635, "grad_norm": 0.748602569103241, "learning_rate": 0.0002, "epoch": 4.666666666666667, "step": 2170}, {"loss": 0.4695, "grad_norm": 0.8694155216217041, "learning_rate": 0.0002, "epoch": 4.688172043010753, "step": 2180}, {"loss": 0.4929, "grad_norm": 0.9134316444396973, "learning_rate": 0.0002, "epoch": 4.709677419354839, "step": 2190}, {"loss": 0.4855, "grad_norm": 0.8504763245582581, "learning_rate": 0.0002, "epoch": 4.731182795698925, "step": 2200}, {"loss": 0.4517, "grad_norm": 1.0321544408798218, "learning_rate": 0.0002, "epoch": 4.752688172043011, "step": 2210}, {"loss": 0.4796, "grad_norm": 0.9368237257003784, "learning_rate": 0.0002, "epoch": 4.774193548387097, "step": 2220}, {"loss": 0.4837, "grad_norm": 0.9319947361946106, "learning_rate": 0.0002, "epoch": 4.795698924731183, "step": 2230}, {"loss": 0.4696, "grad_norm": 0.904333770275116, "learning_rate": 0.0002, "epoch": 4.817204301075269, "step": 2240}, {"loss": 0.4746, "grad_norm": 0.8097078204154968, "learning_rate": 0.0002, "epoch": 4.838709677419355, "step": 2250}, {"loss": 0.4438, "grad_norm": 0.9128859043121338, "learning_rate": 0.0002, "epoch": 4.860215053763441, "step": 2260}, {"loss": 0.4693, "grad_norm": 0.883129894733429, "learning_rate": 0.0002, "epoch": 4.881720430107527, "step": 2270}, {"loss": 0.4494, "grad_norm": 0.85712730884552, "learning_rate": 0.0002, "epoch": 4.903225806451613, "step": 2280}, {"loss": 0.4593, "grad_norm": 1.2101863622665405, "learning_rate": 0.0002, "epoch": 4.924731182795699, "step": 2290}, {"loss": 0.4779, "grad_norm": 0.917966902256012, "learning_rate": 0.0002, "epoch": 4.946236559139785, "step": 2300}, {"loss": 0.4666, "grad_norm": 0.7740724086761475, "learning_rate": 0.0002, "epoch": 4.967741935483871, "step": 2310}, {"loss": 0.4629, "grad_norm": 1.0199906826019287, "learning_rate": 0.0002, "epoch": 4.989247311827957, "step": 2320}, {"eval_loss": 0.5363914370536804, "eval_runtime": 21.3941, "eval_samples_per_second": 15.472, "eval_steps_per_second": 1.963, "epoch": 5.0, "step": 2325}, {"loss": 0.4543, "grad_norm": 0.8580502271652222, "learning_rate": 0.0002, "epoch": 5.010752688172043, "step": 2330}, {"loss": 0.404, "grad_norm": 0.7702704668045044, "learning_rate": 0.0002, "epoch": 5.032258064516129, "step": 2340}, {"loss": 0.4408, "grad_norm": 0.9417401552200317, "learning_rate": 0.0002, "epoch": 5.053763440860215, "step": 2350}, {"loss": 0.4306, "grad_norm": 0.9461463689804077, "learning_rate": 0.0002, "epoch": 5.075268817204301, "step": 2360}, {"loss": 0.4251, "grad_norm": 0.8931282162666321, "learning_rate": 0.0002, "epoch": 5.096774193548387, "step": 2370}, {"loss": 0.4249, "grad_norm": 1.000909447669983, "learning_rate": 0.0002, "epoch": 5.118279569892473, "step": 2380}, {"loss": 0.4231, "grad_norm": 0.8640249967575073, "learning_rate": 0.0002, "epoch": 5.139784946236559, "step": 2390}, {"loss": 0.4272, "grad_norm": 1.0451020002365112, "learning_rate": 0.0002, "epoch": 5.161290322580645, "step": 2400}, {"loss": 0.4177, "grad_norm": 0.7896912097930908, "learning_rate": 0.0002, "epoch": 5.182795698924731, "step": 2410}, {"loss": 0.4116, "grad_norm": 0.8424463272094727, "learning_rate": 0.0002, "epoch": 5.204301075268817, "step": 2420}, {"loss": 0.4225, "grad_norm": 1.0852105617523193, "learning_rate": 0.0002, "epoch": 5.225806451612903, "step": 2430}, {"loss": 0.4352, "grad_norm": 0.9285983443260193, "learning_rate": 0.0002, "epoch": 5.247311827956989, "step": 2440}, {"loss": 0.4262, "grad_norm": 0.9119299054145813, "learning_rate": 0.0002, "epoch": 5.268817204301075, "step": 2450}, {"loss": 0.4494, "grad_norm": 0.8790456056594849, "learning_rate": 0.0002, "epoch": 5.290322580645161, "step": 2460}, {"loss": 0.4421, "grad_norm": 0.8726504445075989, "learning_rate": 0.0002, "epoch": 5.311827956989247, "step": 2470}, {"loss": 0.4372, "grad_norm": 0.9415227770805359, "learning_rate": 0.0002, "epoch": 5.333333333333333, "step": 2480}, {"loss": 0.4223, "grad_norm": 0.9133324027061462, "learning_rate": 0.0002, "epoch": 5.354838709677419, "step": 2490}, {"loss": 0.4401, "grad_norm": 0.9567879438400269, "learning_rate": 0.0002, "epoch": 5.376344086021505, "step": 2500}, {"loss": 0.4094, "grad_norm": 0.9239469766616821, "learning_rate": 0.0002, "epoch": 5.397849462365591, "step": 2510}, {"loss": 0.4416, "grad_norm": 1.0293527841567993, "learning_rate": 0.0002, "epoch": 5.419354838709677, "step": 2520}, {"loss": 0.4311, "grad_norm": 0.8618718981742859, "learning_rate": 0.0002, "epoch": 5.440860215053763, "step": 2530}, {"loss": 0.462, "grad_norm": 0.740166187286377, "learning_rate": 0.0002, "epoch": 5.462365591397849, "step": 2540}, {"loss": 0.4172, "grad_norm": 0.901566743850708, "learning_rate": 0.0002, "epoch": 5.483870967741936, "step": 2550}, {"loss": 0.4315, "grad_norm": 0.7957597970962524, "learning_rate": 0.0002, "epoch": 5.505376344086022, "step": 2560}, {"loss": 0.4263, "grad_norm": 1.1139343976974487, "learning_rate": 0.0002, "epoch": 5.526881720430108, "step": 2570}, {"loss": 0.4056, "grad_norm": 0.989765465259552, "learning_rate": 0.0002, "epoch": 5.548387096774194, "step": 2580}, {"loss": 0.4311, "grad_norm": 0.9416969418525696, "learning_rate": 0.0002, "epoch": 5.56989247311828, "step": 2590}, {"loss": 0.4363, "grad_norm": 0.9184830784797668, "learning_rate": 0.0002, "epoch": 5.591397849462366, "step": 2600}, {"loss": 0.432, "grad_norm": 1.0512700080871582, "learning_rate": 0.0002, "epoch": 5.612903225806452, "step": 2610}, {"loss": 0.4227, "grad_norm": 0.901462197303772, "learning_rate": 0.0002, "epoch": 5.634408602150538, "step": 2620}, {"loss": 0.4332, "grad_norm": 0.9732566475868225, "learning_rate": 0.0002, "epoch": 5.655913978494624, "step": 2630}, {"loss": 0.4223, "grad_norm": 0.8180275559425354, "learning_rate": 0.0002, "epoch": 5.67741935483871, "step": 2640}, {"loss": 0.4311, "grad_norm": 1.1354765892028809, "learning_rate": 0.0002, "epoch": 5.698924731182796, "step": 2650}, {"loss": 0.4409, "grad_norm": 0.9161503314971924, "learning_rate": 0.0002, "epoch": 5.720430107526882, "step": 2660}, {"loss": 0.4394, "grad_norm": 1.0561772584915161, "learning_rate": 0.0002, "epoch": 5.741935483870968, "step": 2670}, {"loss": 0.424, "grad_norm": 0.7712787389755249, "learning_rate": 0.0002, "epoch": 5.763440860215054, "step": 2680}, {"loss": 0.4326, "grad_norm": 0.9674550294876099, "learning_rate": 0.0002, "epoch": 5.78494623655914, "step": 2690}, {"loss": 0.4459, "grad_norm": 0.7531843781471252, "learning_rate": 0.0002, "epoch": 5.806451612903226, "step": 2700}, {"loss": 0.4276, "grad_norm": 1.1332131624221802, "learning_rate": 0.0002, "epoch": 5.827956989247312, "step": 2710}, {"loss": 0.4113, "grad_norm": 0.9367414116859436, "learning_rate": 0.0002, "epoch": 5.849462365591398, "step": 2720}, {"loss": 0.4227, "grad_norm": 0.8267706632614136, "learning_rate": 0.0002, "epoch": 5.870967741935484, "step": 2730}, {"loss": 0.4218, "grad_norm": 1.1040657758712769, "learning_rate": 0.0002, "epoch": 5.89247311827957, "step": 2740}, {"loss": 0.4129, "grad_norm": 0.8879582285881042, "learning_rate": 0.0002, "epoch": 5.913978494623656, "step": 2750}, {"loss": 0.4241, "grad_norm": 0.9264667630195618, "learning_rate": 0.0002, "epoch": 5.935483870967742, "step": 2760}, {"loss": 0.4318, "grad_norm": 0.9373905658721924, "learning_rate": 0.0002, "epoch": 5.956989247311828, "step": 2770}, {"loss": 0.423, "grad_norm": 1.0063740015029907, "learning_rate": 0.0002, "epoch": 5.978494623655914, "step": 2780}, {"loss": 0.4382, "grad_norm": 0.8291367292404175, "learning_rate": 0.0002, "epoch": 6.0, "step": 2790}, {"eval_loss": 0.5057176947593689, "eval_runtime": 21.3206, "eval_samples_per_second": 15.525, "eval_steps_per_second": 1.97, "epoch": 6.0, "step": 2790}, {"loss": 0.3907, "grad_norm": 1.0137434005737305, "learning_rate": 0.0002, "epoch": 6.021505376344086, "step": 2800}, {"loss": 0.3793, "grad_norm": 0.7550579905509949, "learning_rate": 0.0002, "epoch": 6.043010752688172, "step": 2810}, {"loss": 0.4003, "grad_norm": 1.0664116144180298, "learning_rate": 0.0002, "epoch": 6.064516129032258, "step": 2820}, {"loss": 0.3876, "grad_norm": 0.7908814549446106, "learning_rate": 0.0002, "epoch": 6.086021505376344, "step": 2830}, {"loss": 0.3884, "grad_norm": 0.8101639747619629, "learning_rate": 0.0002, "epoch": 6.10752688172043, "step": 2840}, {"loss": 0.3835, "grad_norm": 0.7882567048072815, "learning_rate": 0.0002, "epoch": 6.129032258064516, "step": 2850}, {"loss": 0.3827, "grad_norm": 1.0134103298187256, "learning_rate": 0.0002, "epoch": 6.150537634408602, "step": 2860}, {"loss": 0.3963, "grad_norm": 0.9240215420722961, "learning_rate": 0.0002, "epoch": 6.172043010752688, "step": 2870}, {"loss": 0.4049, "grad_norm": 0.8322992920875549, "learning_rate": 0.0002, "epoch": 6.193548387096774, "step": 2880}, {"loss": 0.381, "grad_norm": 0.9238720536231995, "learning_rate": 0.0002, "epoch": 6.21505376344086, "step": 2890}, {"loss": 0.3852, "grad_norm": 0.9361863732337952, "learning_rate": 0.0002, "epoch": 6.236559139784946, "step": 2900}, {"loss": 0.3917, "grad_norm": 0.9670863747596741, "learning_rate": 0.0002, "epoch": 6.258064516129032, "step": 2910}, {"loss": 0.3826, "grad_norm": 0.7724685668945312, "learning_rate": 0.0002, "epoch": 6.279569892473118, "step": 2920}, {"loss": 0.3988, "grad_norm": 0.8125540614128113, "learning_rate": 0.0002, "epoch": 6.301075268817204, "step": 2930}, {"loss": 0.3778, "grad_norm": 0.9483002424240112, "learning_rate": 0.0002, "epoch": 6.32258064516129, "step": 2940}, {"loss": 0.3823, "grad_norm": 1.098374843597412, "learning_rate": 0.0002, "epoch": 6.344086021505376, "step": 2950}, {"loss": 0.3886, "grad_norm": 1.0169378519058228, "learning_rate": 0.0002, "epoch": 6.365591397849462, "step": 2960}, {"loss": 0.3936, "grad_norm": 0.8594151139259338, "learning_rate": 0.0002, "epoch": 6.387096774193548, "step": 2970}, {"loss": 0.3871, "grad_norm": 0.9507288336753845, "learning_rate": 0.0002, "epoch": 6.408602150537634, "step": 2980}, {"loss": 0.3852, "grad_norm": 0.9212459325790405, "learning_rate": 0.0002, "epoch": 6.43010752688172, "step": 2990}, {"loss": 0.3929, "grad_norm": 0.9696952104568481, "learning_rate": 0.0002, "epoch": 6.451612903225806, "step": 3000}, {"loss": 0.3933, "grad_norm": 0.8872610330581665, "learning_rate": 0.0002, "epoch": 6.473118279569892, "step": 3010}, {"loss": 0.393, "grad_norm": 0.9207532405853271, "learning_rate": 0.0002, "epoch": 6.494623655913978, "step": 3020}, {"loss": 0.3848, "grad_norm": 0.9116262793540955, "learning_rate": 0.0002, "epoch": 6.516129032258064, "step": 3030}, {"loss": 0.3964, "grad_norm": 0.83391934633255, "learning_rate": 0.0002, "epoch": 6.53763440860215, "step": 3040}, {"loss": 0.3758, "grad_norm": 0.890931248664856, "learning_rate": 0.0002, "epoch": 6.559139784946236, "step": 3050}, {"loss": 0.3944, "grad_norm": 1.0100581645965576, "learning_rate": 0.0002, "epoch": 6.580645161290323, "step": 3060}, {"loss": 0.3992, "grad_norm": 0.783526599407196, "learning_rate": 0.0002, "epoch": 6.602150537634409, "step": 3070}, {"loss": 0.4144, "grad_norm": 1.324326515197754, "learning_rate": 0.0002, "epoch": 6.623655913978495, "step": 3080}, {"loss": 0.3986, "grad_norm": 0.9102319478988647, "learning_rate": 0.0002, "epoch": 6.645161290322581, "step": 3090}, {"loss": 0.3873, "grad_norm": 0.96951824426651, "learning_rate": 0.0002, "epoch": 6.666666666666667, "step": 3100}, {"loss": 0.3931, "grad_norm": 0.9786809086799622, "learning_rate": 0.0002, "epoch": 6.688172043010753, "step": 3110}, {"loss": 0.3714, "grad_norm": 1.0301238298416138, "learning_rate": 0.0002, "epoch": 6.709677419354839, "step": 3120}, {"loss": 0.3823, "grad_norm": 1.1690906286239624, "learning_rate": 0.0002, "epoch": 6.731182795698925, "step": 3130}, {"loss": 0.3936, "grad_norm": 0.963306725025177, "learning_rate": 0.0002, "epoch": 6.752688172043011, "step": 3140}, {"loss": 0.3975, "grad_norm": 0.8565770983695984, "learning_rate": 0.0002, "epoch": 6.774193548387097, "step": 3150}, {"loss": 0.3903, "grad_norm": 0.8887158632278442, "learning_rate": 0.0002, "epoch": 6.795698924731183, "step": 3160}, {"loss": 0.4098, "grad_norm": 0.8234561085700989, "learning_rate": 0.0002, "epoch": 6.817204301075269, "step": 3170}, {"loss": 0.4041, "grad_norm": 0.9000219702720642, "learning_rate": 0.0002, "epoch": 6.838709677419355, "step": 3180}, {"loss": 0.3933, "grad_norm": 1.1366009712219238, "learning_rate": 0.0002, "epoch": 6.860215053763441, "step": 3190}, {"loss": 0.3972, "grad_norm": 0.8747097849845886, "learning_rate": 0.0002, "epoch": 6.881720430107527, "step": 3200}, {"loss": 0.404, "grad_norm": 0.8533893823623657, "learning_rate": 0.0002, "epoch": 6.903225806451613, "step": 3210}, {"loss": 0.3906, "grad_norm": 0.8127949237823486, "learning_rate": 0.0002, "epoch": 6.924731182795699, "step": 3220}, {"loss": 0.3747, "grad_norm": 0.8872477412223816, "learning_rate": 0.0002, "epoch": 6.946236559139785, "step": 3230}, {"loss": 0.3817, "grad_norm": 0.8541608452796936, "learning_rate": 0.0002, "epoch": 6.967741935483871, "step": 3240}, {"loss": 0.3863, "grad_norm": 0.8390752673149109, "learning_rate": 0.0002, "epoch": 6.989247311827957, "step": 3250}]} +{"epoch": 8.0, "step": 3720, "epoch_duration": 451.2651696205139, "total_accumulated_duration": 3603.8767008781433, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 3020.60888671875}, "peak_memory_usage": {"GPU_0": 15051.17431640625}, "avg_memory_reserved": {"GPU_0": 20170.0}, "peak_memory_reserved": {"GPU_0": 20170.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2b-it_int4_flare-headlines_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.4-num-8628-sd-4/checkpoint-3255", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 3.4172, "grad_norm": 0.9075053930282593, "learning_rate": 0.0002, "epoch": 0.021505376344086023, "step": 10}, {"loss": 2.5888, "grad_norm": 1.4321208000183105, "learning_rate": 0.0002, "epoch": 0.043010752688172046, "step": 20}, {"loss": 2.1195, "grad_norm": 1.7500602006912231, "learning_rate": 0.0002, "epoch": 0.06451612903225806, "step": 30}, {"loss": 1.9303, "grad_norm": 0.7606641054153442, "learning_rate": 0.0002, "epoch": 0.08602150537634409, "step": 40}, {"loss": 1.6112, "grad_norm": 1.2754929065704346, "learning_rate": 0.0002, "epoch": 0.10752688172043011, "step": 50}, {"loss": 1.4319, "grad_norm": 1.0936230421066284, "learning_rate": 0.0002, "epoch": 0.12903225806451613, "step": 60}, {"loss": 1.3568, "grad_norm": 1.144593596458435, "learning_rate": 0.0002, "epoch": 0.15053763440860216, "step": 70}, {"loss": 1.2028, "grad_norm": 1.2181956768035889, "learning_rate": 0.0002, "epoch": 0.17204301075268819, "step": 80}, {"loss": 1.1534, "grad_norm": 1.1260095834732056, "learning_rate": 0.0002, "epoch": 0.1935483870967742, "step": 90}, {"loss": 1.1089, "grad_norm": 1.1155284643173218, "learning_rate": 0.0002, "epoch": 0.21505376344086022, "step": 100}, {"loss": 1.0883, "grad_norm": 1.089565396308899, "learning_rate": 0.0002, "epoch": 0.23655913978494625, "step": 110}, {"loss": 1.0814, "grad_norm": 0.9833471775054932, "learning_rate": 0.0002, "epoch": 0.25806451612903225, "step": 120}, {"loss": 1.0239, "grad_norm": 1.0265629291534424, "learning_rate": 0.0002, "epoch": 0.27956989247311825, "step": 130}, {"loss": 0.9888, "grad_norm": 0.9344286322593689, "learning_rate": 0.0002, "epoch": 0.3010752688172043, "step": 140}, {"loss": 1.0043, "grad_norm": 0.9883386492729187, "learning_rate": 0.0002, "epoch": 0.3225806451612903, "step": 150}, {"loss": 0.9338, "grad_norm": 0.9299277067184448, "learning_rate": 0.0002, "epoch": 0.34408602150537637, "step": 160}, {"loss": 0.9432, "grad_norm": 1.390045404434204, "learning_rate": 0.0002, "epoch": 0.3655913978494624, "step": 170}, {"loss": 0.9008, "grad_norm": 1.0313078165054321, "learning_rate": 0.0002, "epoch": 0.3870967741935484, "step": 180}, {"loss": 0.9434, "grad_norm": 1.1792205572128296, "learning_rate": 0.0002, "epoch": 0.40860215053763443, "step": 190}, {"loss": 0.8761, "grad_norm": 1.049809217453003, "learning_rate": 0.0002, "epoch": 0.43010752688172044, "step": 200}, {"loss": 0.8709, "grad_norm": 0.990111768245697, "learning_rate": 0.0002, "epoch": 0.45161290322580644, "step": 210}, {"loss": 0.905, "grad_norm": 0.9870412349700928, "learning_rate": 0.0002, "epoch": 0.4731182795698925, "step": 220}, {"loss": 0.9129, "grad_norm": 0.8557345867156982, "learning_rate": 0.0002, "epoch": 0.4946236559139785, "step": 230}, {"loss": 0.8836, "grad_norm": 0.9746861457824707, "learning_rate": 0.0002, "epoch": 0.5161290322580645, "step": 240}, {"loss": 0.873, "grad_norm": 0.9010438323020935, "learning_rate": 0.0002, "epoch": 0.5376344086021505, "step": 250}, {"loss": 0.8241, "grad_norm": 0.9061082005500793, "learning_rate": 0.0002, "epoch": 0.5591397849462365, "step": 260}, {"loss": 0.8652, "grad_norm": 0.9311846494674683, "learning_rate": 0.0002, "epoch": 0.5806451612903226, "step": 270}, {"loss": 0.8256, "grad_norm": 0.9140254855155945, "learning_rate": 0.0002, "epoch": 0.6021505376344086, "step": 280}, {"loss": 0.8441, "grad_norm": 0.9722253084182739, "learning_rate": 0.0002, "epoch": 0.6236559139784946, "step": 290}, {"loss": 0.8314, "grad_norm": 0.8539168238639832, "learning_rate": 0.0002, "epoch": 0.6451612903225806, "step": 300}, {"loss": 0.8528, "grad_norm": 0.9053162932395935, "learning_rate": 0.0002, "epoch": 0.6666666666666666, "step": 310}, {"loss": 0.8209, "grad_norm": 0.8444252610206604, "learning_rate": 0.0002, "epoch": 0.6881720430107527, "step": 320}, {"loss": 0.8101, "grad_norm": 0.8127437829971313, "learning_rate": 0.0002, "epoch": 0.7096774193548387, "step": 330}, {"loss": 0.8223, "grad_norm": 0.886555016040802, "learning_rate": 0.0002, "epoch": 0.7311827956989247, "step": 340}, {"loss": 0.8368, "grad_norm": 0.8458548784255981, "learning_rate": 0.0002, "epoch": 0.7526881720430108, "step": 350}, {"loss": 0.8295, "grad_norm": 0.8683297634124756, "learning_rate": 0.0002, "epoch": 0.7741935483870968, "step": 360}, {"loss": 0.8232, "grad_norm": 0.8308405876159668, "learning_rate": 0.0002, "epoch": 0.7956989247311828, "step": 370}, {"loss": 0.7752, "grad_norm": 0.8305579423904419, "learning_rate": 0.0002, "epoch": 0.8172043010752689, "step": 380}, {"loss": 0.8267, "grad_norm": 0.8545567393302917, "learning_rate": 0.0002, "epoch": 0.8387096774193549, "step": 390}, {"loss": 0.8212, "grad_norm": 0.8486055731773376, "learning_rate": 0.0002, "epoch": 0.8602150537634409, "step": 400}, {"loss": 0.743, "grad_norm": 0.8126763105392456, "learning_rate": 0.0002, "epoch": 0.8817204301075269, "step": 410}, {"loss": 0.7993, "grad_norm": 0.8494045734405518, "learning_rate": 0.0002, "epoch": 0.9032258064516129, "step": 420}, {"loss": 0.8213, "grad_norm": 0.7639183402061462, "learning_rate": 0.0002, "epoch": 0.9247311827956989, "step": 430}, {"loss": 0.8015, "grad_norm": 0.858101487159729, "learning_rate": 0.0002, "epoch": 0.946236559139785, "step": 440}, {"loss": 0.7629, "grad_norm": 0.8141381740570068, "learning_rate": 0.0002, "epoch": 0.967741935483871, "step": 450}, {"loss": 0.7357, "grad_norm": 0.8072513937950134, "learning_rate": 0.0002, "epoch": 0.989247311827957, "step": 460}, {"eval_loss": 0.7740864157676697, "eval_runtime": 21.383, "eval_samples_per_second": 15.48, "eval_steps_per_second": 1.964, "epoch": 1.0, "step": 465}, {"loss": 0.7701, "grad_norm": 0.8269494771957397, "learning_rate": 0.0002, "epoch": 1.010752688172043, "step": 470}, {"loss": 0.7532, "grad_norm": 0.7814009189605713, "learning_rate": 0.0002, "epoch": 1.032258064516129, "step": 480}, {"loss": 0.7689, "grad_norm": 0.8183923363685608, "learning_rate": 0.0002, "epoch": 1.053763440860215, "step": 490}, {"loss": 0.765, "grad_norm": 0.8146600723266602, "learning_rate": 0.0002, "epoch": 1.075268817204301, "step": 500}, {"loss": 0.7358, "grad_norm": 0.8635126352310181, "learning_rate": 0.0002, "epoch": 1.096774193548387, "step": 510}, {"loss": 0.7302, "grad_norm": 0.8520359396934509, "learning_rate": 0.0002, "epoch": 1.118279569892473, "step": 520}, {"loss": 0.7492, "grad_norm": 0.8026443123817444, "learning_rate": 0.0002, "epoch": 1.139784946236559, "step": 530}, {"loss": 0.7518, "grad_norm": 0.8157258629798889, "learning_rate": 0.0002, "epoch": 1.1612903225806452, "step": 540}, {"loss": 0.7461, "grad_norm": 0.9450796246528625, "learning_rate": 0.0002, "epoch": 1.1827956989247312, "step": 550}, {"loss": 0.7128, "grad_norm": 0.8859835863113403, "learning_rate": 0.0002, "epoch": 1.2043010752688172, "step": 560}, {"loss": 0.7067, "grad_norm": 0.7819921970367432, "learning_rate": 0.0002, "epoch": 1.2258064516129032, "step": 570}, {"loss": 0.7577, "grad_norm": 0.7823445796966553, "learning_rate": 0.0002, "epoch": 1.2473118279569892, "step": 580}, {"loss": 0.7358, "grad_norm": 0.7931883931159973, "learning_rate": 0.0002, "epoch": 1.2688172043010753, "step": 590}, {"loss": 0.723, "grad_norm": 0.7495734095573425, "learning_rate": 0.0002, "epoch": 1.2903225806451613, "step": 600}, {"loss": 0.7386, "grad_norm": 0.9272717237472534, "learning_rate": 0.0002, "epoch": 1.3118279569892473, "step": 610}, {"loss": 0.7498, "grad_norm": 0.7968398332595825, "learning_rate": 0.0002, "epoch": 1.3333333333333333, "step": 620}, {"loss": 0.7635, "grad_norm": 0.7813659310340881, "learning_rate": 0.0002, "epoch": 1.3548387096774195, "step": 630}, {"loss": 0.6665, "grad_norm": 0.730925977230072, "learning_rate": 0.0002, "epoch": 1.3763440860215055, "step": 640}, {"loss": 0.7037, "grad_norm": 0.8011482954025269, "learning_rate": 0.0002, "epoch": 1.3978494623655915, "step": 650}, {"loss": 0.6931, "grad_norm": 0.7770085334777832, "learning_rate": 0.0002, "epoch": 1.4193548387096775, "step": 660}, {"loss": 0.6949, "grad_norm": 0.7432682514190674, "learning_rate": 0.0002, "epoch": 1.4408602150537635, "step": 670}, {"loss": 0.7444, "grad_norm": 0.8820092678070068, "learning_rate": 0.0002, "epoch": 1.4623655913978495, "step": 680}, {"loss": 0.6758, "grad_norm": 0.7786208987236023, "learning_rate": 0.0002, "epoch": 1.4838709677419355, "step": 690}, {"loss": 0.6702, "grad_norm": 0.7467480301856995, "learning_rate": 0.0002, "epoch": 1.5053763440860215, "step": 700}, {"loss": 0.7107, "grad_norm": 0.8147122263908386, "learning_rate": 0.0002, "epoch": 1.5268817204301075, "step": 710}, {"loss": 0.7144, "grad_norm": 0.796030580997467, "learning_rate": 0.0002, "epoch": 1.5483870967741935, "step": 720}, {"loss": 0.6936, "grad_norm": 0.8776171207427979, "learning_rate": 0.0002, "epoch": 1.5698924731182795, "step": 730}, {"loss": 0.7101, "grad_norm": 0.8056126236915588, "learning_rate": 0.0002, "epoch": 1.5913978494623655, "step": 740}, {"loss": 0.7162, "grad_norm": 0.8141863346099854, "learning_rate": 0.0002, "epoch": 1.6129032258064515, "step": 750}, {"loss": 0.7088, "grad_norm": 0.8100557327270508, "learning_rate": 0.0002, "epoch": 1.6344086021505375, "step": 760}, {"loss": 0.7212, "grad_norm": 0.8283200860023499, "learning_rate": 0.0002, "epoch": 1.6559139784946235, "step": 770}, {"loss": 0.694, "grad_norm": 0.800865113735199, "learning_rate": 0.0002, "epoch": 1.6774193548387095, "step": 780}, {"loss": 0.7076, "grad_norm": 0.8052287697792053, "learning_rate": 0.0002, "epoch": 1.6989247311827957, "step": 790}, {"loss": 0.7257, "grad_norm": 0.8619674444198608, "learning_rate": 0.0002, "epoch": 1.7204301075268817, "step": 800}, {"loss": 0.7141, "grad_norm": 0.8907215595245361, "learning_rate": 0.0002, "epoch": 1.7419354838709677, "step": 810}, {"loss": 0.7035, "grad_norm": 0.6976316571235657, "learning_rate": 0.0002, "epoch": 1.7634408602150538, "step": 820}, {"loss": 0.6916, "grad_norm": 0.7533746957778931, "learning_rate": 0.0002, "epoch": 1.7849462365591398, "step": 830}, {"loss": 0.7094, "grad_norm": 0.7326804399490356, "learning_rate": 0.0002, "epoch": 1.8064516129032258, "step": 840}, {"loss": 0.6891, "grad_norm": 0.7782683372497559, "learning_rate": 0.0002, "epoch": 1.827956989247312, "step": 850}, {"loss": 0.6931, "grad_norm": 0.7424806356430054, "learning_rate": 0.0002, "epoch": 1.849462365591398, "step": 860}, {"loss": 0.7354, "grad_norm": 1.172325611114502, "learning_rate": 0.0002, "epoch": 1.870967741935484, "step": 870}, {"loss": 0.6866, "grad_norm": 0.771058201789856, "learning_rate": 0.0002, "epoch": 1.89247311827957, "step": 880}, {"loss": 0.7296, "grad_norm": 0.8624904155731201, "learning_rate": 0.0002, "epoch": 1.913978494623656, "step": 890}, {"loss": 0.7233, "grad_norm": 0.7062820792198181, "learning_rate": 0.0002, "epoch": 1.935483870967742, "step": 900}, {"loss": 0.6966, "grad_norm": 0.7560103535652161, "learning_rate": 0.0002, "epoch": 1.956989247311828, "step": 910}, {"loss": 0.69, "grad_norm": 0.788899838924408, "learning_rate": 0.0002, "epoch": 1.978494623655914, "step": 920}, {"loss": 0.6505, "grad_norm": 0.6562113761901855, "learning_rate": 0.0002, "epoch": 2.0, "step": 930}, {"eval_loss": 0.6885261535644531, "eval_runtime": 21.4291, "eval_samples_per_second": 15.446, "eval_steps_per_second": 1.96, "epoch": 2.0, "step": 930}, {"loss": 0.6625, "grad_norm": 0.8216531872749329, "learning_rate": 0.0002, "epoch": 2.021505376344086, "step": 940}, {"loss": 0.6398, "grad_norm": 0.8317142724990845, "learning_rate": 0.0002, "epoch": 2.043010752688172, "step": 950}, {"loss": 0.649, "grad_norm": 0.8446708917617798, "learning_rate": 0.0002, "epoch": 2.064516129032258, "step": 960}, {"loss": 0.657, "grad_norm": 0.735055148601532, "learning_rate": 0.0002, "epoch": 2.086021505376344, "step": 970}, {"loss": 0.649, "grad_norm": 0.7487243413925171, "learning_rate": 0.0002, "epoch": 2.10752688172043, "step": 980}, {"loss": 0.6419, "grad_norm": 0.8573887944221497, "learning_rate": 0.0002, "epoch": 2.129032258064516, "step": 990}, {"loss": 0.6431, "grad_norm": 0.6284521818161011, "learning_rate": 0.0002, "epoch": 2.150537634408602, "step": 1000}, {"loss": 0.6128, "grad_norm": 0.754183292388916, "learning_rate": 0.0002, "epoch": 2.172043010752688, "step": 1010}, {"loss": 0.6253, "grad_norm": 0.9445359110832214, "learning_rate": 0.0002, "epoch": 2.193548387096774, "step": 1020}, {"loss": 0.605, "grad_norm": 0.808508038520813, "learning_rate": 0.0002, "epoch": 2.21505376344086, "step": 1030}, {"loss": 0.6786, "grad_norm": 0.9394679665565491, "learning_rate": 0.0002, "epoch": 2.236559139784946, "step": 1040}, {"loss": 0.6176, "grad_norm": 0.8151357769966125, "learning_rate": 0.0002, "epoch": 2.258064516129032, "step": 1050}, {"loss": 0.66, "grad_norm": 0.7909848093986511, "learning_rate": 0.0002, "epoch": 2.279569892473118, "step": 1060}, {"loss": 0.6254, "grad_norm": 0.7506507039070129, "learning_rate": 0.0002, "epoch": 2.3010752688172045, "step": 1070}, {"loss": 0.6608, "grad_norm": 0.8240520358085632, "learning_rate": 0.0002, "epoch": 2.3225806451612905, "step": 1080}, {"loss": 0.6207, "grad_norm": 0.9342400431632996, "learning_rate": 0.0002, "epoch": 2.3440860215053765, "step": 1090}, {"loss": 0.6029, "grad_norm": 1.0598735809326172, "learning_rate": 0.0002, "epoch": 2.3655913978494625, "step": 1100}, {"loss": 0.6035, "grad_norm": 0.7907650470733643, "learning_rate": 0.0002, "epoch": 2.3870967741935485, "step": 1110}, {"loss": 0.6237, "grad_norm": 0.9388798475265503, "learning_rate": 0.0002, "epoch": 2.4086021505376345, "step": 1120}, {"loss": 0.6207, "grad_norm": 0.8985419869422913, "learning_rate": 0.0002, "epoch": 2.4301075268817205, "step": 1130}, {"loss": 0.5902, "grad_norm": 0.7471932768821716, "learning_rate": 0.0002, "epoch": 2.4516129032258065, "step": 1140}, {"loss": 0.6446, "grad_norm": 0.761131763458252, "learning_rate": 0.0002, "epoch": 2.4731182795698925, "step": 1150}, {"loss": 0.6088, "grad_norm": 0.7901819348335266, "learning_rate": 0.0002, "epoch": 2.4946236559139785, "step": 1160}, {"loss": 0.6142, "grad_norm": 0.9932922720909119, "learning_rate": 0.0002, "epoch": 2.5161290322580645, "step": 1170}, {"loss": 0.6407, "grad_norm": 0.7414287328720093, "learning_rate": 0.0002, "epoch": 2.5376344086021505, "step": 1180}, {"loss": 0.6161, "grad_norm": 0.8111771941184998, "learning_rate": 0.0002, "epoch": 2.5591397849462365, "step": 1190}, {"loss": 0.6006, "grad_norm": 0.7520156502723694, "learning_rate": 0.0002, "epoch": 2.5806451612903225, "step": 1200}, {"loss": 0.615, "grad_norm": 0.9022907018661499, "learning_rate": 0.0002, "epoch": 2.6021505376344085, "step": 1210}, {"loss": 0.6211, "grad_norm": 0.7746260166168213, "learning_rate": 0.0002, "epoch": 2.6236559139784945, "step": 1220}, {"loss": 0.616, "grad_norm": 0.8482862114906311, "learning_rate": 0.0002, "epoch": 2.6451612903225805, "step": 1230}, {"loss": 0.6417, "grad_norm": 0.7925458550453186, "learning_rate": 0.0002, "epoch": 2.6666666666666665, "step": 1240}, {"loss": 0.6187, "grad_norm": 0.8369929194450378, "learning_rate": 0.0002, "epoch": 2.688172043010753, "step": 1250}, {"loss": 0.6138, "grad_norm": 0.8311542868614197, "learning_rate": 0.0002, "epoch": 2.709677419354839, "step": 1260}, {"loss": 0.5894, "grad_norm": 0.7204853296279907, "learning_rate": 0.0002, "epoch": 2.731182795698925, "step": 1270}, {"loss": 0.6325, "grad_norm": 0.8447284698486328, "learning_rate": 0.0002, "epoch": 2.752688172043011, "step": 1280}, {"loss": 0.5946, "grad_norm": 0.7738404273986816, "learning_rate": 0.0002, "epoch": 2.774193548387097, "step": 1290}, {"loss": 0.5678, "grad_norm": 0.8393287062644958, "learning_rate": 0.0002, "epoch": 2.795698924731183, "step": 1300}, {"loss": 0.6092, "grad_norm": 0.79121994972229, "learning_rate": 0.0002, "epoch": 2.817204301075269, "step": 1310}, {"loss": 0.5889, "grad_norm": 0.7331557869911194, "learning_rate": 0.0002, "epoch": 2.838709677419355, "step": 1320}, {"loss": 0.6048, "grad_norm": 0.9593998193740845, "learning_rate": 0.0002, "epoch": 2.860215053763441, "step": 1330}, {"loss": 0.6108, "grad_norm": 0.7215158343315125, "learning_rate": 0.0002, "epoch": 2.881720430107527, "step": 1340}, {"loss": 0.5897, "grad_norm": 0.840404212474823, "learning_rate": 0.0002, "epoch": 2.903225806451613, "step": 1350}, {"loss": 0.6056, "grad_norm": 0.870659351348877, "learning_rate": 0.0002, "epoch": 2.924731182795699, "step": 1360}, {"loss": 0.6205, "grad_norm": 0.8744975328445435, "learning_rate": 0.0002, "epoch": 2.946236559139785, "step": 1370}, {"loss": 0.5966, "grad_norm": 0.8030612468719482, "learning_rate": 0.0002, "epoch": 2.967741935483871, "step": 1380}, {"loss": 0.6004, "grad_norm": 0.825814962387085, "learning_rate": 0.0002, "epoch": 2.989247311827957, "step": 1390}, {"eval_loss": 0.6257933378219604, "eval_runtime": 21.3692, "eval_samples_per_second": 15.49, "eval_steps_per_second": 1.965, "epoch": 3.0, "step": 1395}, {"loss": 0.5696, "grad_norm": 0.8650677800178528, "learning_rate": 0.0002, "epoch": 3.010752688172043, "step": 1400}, {"loss": 0.5483, "grad_norm": 0.8364197015762329, "learning_rate": 0.0002, "epoch": 3.032258064516129, "step": 1410}, {"loss": 0.5606, "grad_norm": 0.8278448581695557, "learning_rate": 0.0002, "epoch": 3.053763440860215, "step": 1420}, {"loss": 0.5572, "grad_norm": 0.8806642889976501, "learning_rate": 0.0002, "epoch": 3.075268817204301, "step": 1430}, {"loss": 0.585, "grad_norm": 0.8180029988288879, "learning_rate": 0.0002, "epoch": 3.096774193548387, "step": 1440}, {"loss": 0.5667, "grad_norm": 0.8561782836914062, "learning_rate": 0.0002, "epoch": 3.118279569892473, "step": 1450}, {"loss": 0.5246, "grad_norm": 0.8377029299736023, "learning_rate": 0.0002, "epoch": 3.139784946236559, "step": 1460}, {"loss": 0.5464, "grad_norm": 0.885779082775116, "learning_rate": 0.0002, "epoch": 3.161290322580645, "step": 1470}, {"loss": 0.541, "grad_norm": 0.9388518333435059, "learning_rate": 0.0002, "epoch": 3.182795698924731, "step": 1480}, {"loss": 0.5447, "grad_norm": 0.8816235661506653, "learning_rate": 0.0002, "epoch": 3.204301075268817, "step": 1490}, {"loss": 0.5466, "grad_norm": 0.9885783791542053, "learning_rate": 0.0002, "epoch": 3.225806451612903, "step": 1500}, {"loss": 0.5455, "grad_norm": 0.8635850548744202, "learning_rate": 0.0002, "epoch": 3.247311827956989, "step": 1510}, {"loss": 0.5419, "grad_norm": 0.829853355884552, "learning_rate": 0.0002, "epoch": 3.268817204301075, "step": 1520}, {"loss": 0.54, "grad_norm": 0.9037486910820007, "learning_rate": 0.0002, "epoch": 3.2903225806451615, "step": 1530}, {"loss": 0.5375, "grad_norm": 0.8173713684082031, "learning_rate": 0.0002, "epoch": 3.3118279569892475, "step": 1540}, {"loss": 0.5405, "grad_norm": 0.796953022480011, "learning_rate": 0.0002, "epoch": 3.3333333333333335, "step": 1550}, {"loss": 0.5505, "grad_norm": 0.7894400358200073, "learning_rate": 0.0002, "epoch": 3.3548387096774195, "step": 1560}, {"loss": 0.5395, "grad_norm": 0.9434949159622192, "learning_rate": 0.0002, "epoch": 3.3763440860215055, "step": 1570}, {"loss": 0.5271, "grad_norm": 0.8666760325431824, "learning_rate": 0.0002, "epoch": 3.3978494623655915, "step": 1580}, {"loss": 0.5439, "grad_norm": 0.7782467007637024, "learning_rate": 0.0002, "epoch": 3.4193548387096775, "step": 1590}, {"loss": 0.5161, "grad_norm": 0.8849126696586609, "learning_rate": 0.0002, "epoch": 3.4408602150537635, "step": 1600}, {"loss": 0.5353, "grad_norm": 0.7863831520080566, "learning_rate": 0.0002, "epoch": 3.4623655913978495, "step": 1610}, {"loss": 0.5308, "grad_norm": 1.0403116941452026, "learning_rate": 0.0002, "epoch": 3.4838709677419355, "step": 1620}, {"loss": 0.5339, "grad_norm": 0.8307499289512634, "learning_rate": 0.0002, "epoch": 3.5053763440860215, "step": 1630}, {"loss": 0.5361, "grad_norm": 0.9132118821144104, "learning_rate": 0.0002, "epoch": 3.5268817204301075, "step": 1640}, {"loss": 0.5828, "grad_norm": 0.9322578310966492, "learning_rate": 0.0002, "epoch": 3.5483870967741935, "step": 1650}, {"loss": 0.546, "grad_norm": 0.9782460331916809, "learning_rate": 0.0002, "epoch": 3.5698924731182795, "step": 1660}, {"loss": 0.5424, "grad_norm": 0.7189919352531433, "learning_rate": 0.0002, "epoch": 3.5913978494623655, "step": 1670}, {"loss": 0.5514, "grad_norm": 0.9689221382141113, "learning_rate": 0.0002, "epoch": 3.6129032258064515, "step": 1680}, {"loss": 0.5379, "grad_norm": 0.9684675335884094, "learning_rate": 0.0002, "epoch": 3.6344086021505375, "step": 1690}, {"loss": 0.5748, "grad_norm": 0.8851472735404968, "learning_rate": 0.0002, "epoch": 3.6559139784946235, "step": 1700}, {"loss": 0.5412, "grad_norm": 0.7709833383560181, "learning_rate": 0.0002, "epoch": 3.6774193548387095, "step": 1710}, {"loss": 0.521, "grad_norm": 0.818236231803894, "learning_rate": 0.0002, "epoch": 3.698924731182796, "step": 1720}, {"loss": 0.5445, "grad_norm": 0.870642364025116, "learning_rate": 0.0002, "epoch": 3.720430107526882, "step": 1730}, {"loss": 0.5307, "grad_norm": 1.0245511531829834, "learning_rate": 0.0002, "epoch": 3.741935483870968, "step": 1740}, {"loss": 0.5593, "grad_norm": 0.8607558608055115, "learning_rate": 0.0002, "epoch": 3.763440860215054, "step": 1750}, {"loss": 0.536, "grad_norm": 0.8511829972267151, "learning_rate": 0.0002, "epoch": 3.78494623655914, "step": 1760}, {"loss": 0.5193, "grad_norm": 0.7969087362289429, "learning_rate": 0.0002, "epoch": 3.806451612903226, "step": 1770}, {"loss": 0.5578, "grad_norm": 0.8457245826721191, "learning_rate": 0.0002, "epoch": 3.827956989247312, "step": 1780}, {"loss": 0.5337, "grad_norm": 0.8893467783927917, "learning_rate": 0.0002, "epoch": 3.849462365591398, "step": 1790}, {"loss": 0.5024, "grad_norm": 0.8593819737434387, "learning_rate": 0.0002, "epoch": 3.870967741935484, "step": 1800}, {"loss": 0.5134, "grad_norm": 0.7574560642242432, "learning_rate": 0.0002, "epoch": 3.89247311827957, "step": 1810}, {"loss": 0.5263, "grad_norm": 0.8681567311286926, "learning_rate": 0.0002, "epoch": 3.913978494623656, "step": 1820}, {"loss": 0.532, "grad_norm": 0.9068132042884827, "learning_rate": 0.0002, "epoch": 3.935483870967742, "step": 1830}, {"loss": 0.5427, "grad_norm": 0.8668948411941528, "learning_rate": 0.0002, "epoch": 3.956989247311828, "step": 1840}, {"loss": 0.5349, "grad_norm": 1.046032428741455, "learning_rate": 0.0002, "epoch": 3.978494623655914, "step": 1850}, {"loss": 0.5087, "grad_norm": 0.904780387878418, "learning_rate": 0.0002, "epoch": 4.0, "step": 1860}, {"eval_loss": 0.5737715363502502, "eval_runtime": 21.4915, "eval_samples_per_second": 15.401, "eval_steps_per_second": 1.954, "epoch": 4.0, "step": 1860}, {"loss": 0.4843, "grad_norm": 0.8611752986907959, "learning_rate": 0.0002, "epoch": 4.021505376344086, "step": 1870}, {"loss": 0.4814, "grad_norm": 0.838782548904419, "learning_rate": 0.0002, "epoch": 4.043010752688172, "step": 1880}, {"loss": 0.474, "grad_norm": 0.9119709134101868, "learning_rate": 0.0002, "epoch": 4.064516129032258, "step": 1890}, {"loss": 0.4951, "grad_norm": 0.8026251196861267, "learning_rate": 0.0002, "epoch": 4.086021505376344, "step": 1900}, {"loss": 0.491, "grad_norm": 0.8773705363273621, "learning_rate": 0.0002, "epoch": 4.10752688172043, "step": 1910}, {"loss": 0.474, "grad_norm": 0.8762255907058716, "learning_rate": 0.0002, "epoch": 4.129032258064516, "step": 1920}, {"loss": 0.4816, "grad_norm": 0.8371861577033997, "learning_rate": 0.0002, "epoch": 4.150537634408602, "step": 1930}, {"loss": 0.472, "grad_norm": 0.9703728556632996, "learning_rate": 0.0002, "epoch": 4.172043010752688, "step": 1940}, {"loss": 0.4772, "grad_norm": 0.8802874684333801, "learning_rate": 0.0002, "epoch": 4.193548387096774, "step": 1950}, {"loss": 0.5032, "grad_norm": 1.0103057622909546, "learning_rate": 0.0002, "epoch": 4.21505376344086, "step": 1960}, {"loss": 0.4945, "grad_norm": 0.9212995171546936, "learning_rate": 0.0002, "epoch": 4.236559139784946, "step": 1970}, {"loss": 0.4753, "grad_norm": 1.009544849395752, "learning_rate": 0.0002, "epoch": 4.258064516129032, "step": 1980}, {"loss": 0.4789, "grad_norm": 0.8535077571868896, "learning_rate": 0.0002, "epoch": 4.279569892473118, "step": 1990}, {"loss": 0.4782, "grad_norm": 0.8363022804260254, "learning_rate": 0.0002, "epoch": 4.301075268817204, "step": 2000}, {"loss": 0.4875, "grad_norm": 0.9041762948036194, "learning_rate": 0.0002, "epoch": 4.32258064516129, "step": 2010}, {"loss": 0.4779, "grad_norm": 0.960790753364563, "learning_rate": 0.0002, "epoch": 4.344086021505376, "step": 2020}, {"loss": 0.4626, "grad_norm": 0.8823095560073853, "learning_rate": 0.0002, "epoch": 4.365591397849462, "step": 2030}, {"loss": 0.4883, "grad_norm": 0.952100396156311, "learning_rate": 0.0002, "epoch": 4.387096774193548, "step": 2040}, {"loss": 0.4789, "grad_norm": 1.0793498754501343, "learning_rate": 0.0002, "epoch": 4.408602150537634, "step": 2050}, {"loss": 0.4827, "grad_norm": 0.8987208008766174, "learning_rate": 0.0002, "epoch": 4.43010752688172, "step": 2060}, {"loss": 0.4594, "grad_norm": 0.8539772033691406, "learning_rate": 0.0002, "epoch": 4.451612903225806, "step": 2070}, {"loss": 0.4752, "grad_norm": 0.9160863757133484, "learning_rate": 0.0002, "epoch": 4.473118279569892, "step": 2080}, {"loss": 0.5033, "grad_norm": 0.9946850538253784, "learning_rate": 0.0002, "epoch": 4.494623655913978, "step": 2090}, {"loss": 0.4842, "grad_norm": 0.908039391040802, "learning_rate": 0.0002, "epoch": 4.516129032258064, "step": 2100}, {"loss": 0.4861, "grad_norm": 1.1462254524230957, "learning_rate": 0.0002, "epoch": 4.53763440860215, "step": 2110}, {"loss": 0.4892, "grad_norm": 0.8392056226730347, "learning_rate": 0.0002, "epoch": 4.559139784946236, "step": 2120}, {"loss": 0.4824, "grad_norm": 0.9673896431922913, "learning_rate": 0.0002, "epoch": 4.580645161290323, "step": 2130}, {"loss": 0.4665, "grad_norm": 0.9047091603279114, "learning_rate": 0.0002, "epoch": 4.602150537634409, "step": 2140}, {"loss": 0.4714, "grad_norm": 0.9013425707817078, "learning_rate": 0.0002, "epoch": 4.623655913978495, "step": 2150}, {"loss": 0.472, "grad_norm": 0.8899165391921997, "learning_rate": 0.0002, "epoch": 4.645161290322581, "step": 2160}, {"loss": 0.4635, "grad_norm": 0.748602569103241, "learning_rate": 0.0002, "epoch": 4.666666666666667, "step": 2170}, {"loss": 0.4695, "grad_norm": 0.8694155216217041, "learning_rate": 0.0002, "epoch": 4.688172043010753, "step": 2180}, {"loss": 0.4929, "grad_norm": 0.9134316444396973, "learning_rate": 0.0002, "epoch": 4.709677419354839, "step": 2190}, {"loss": 0.4855, "grad_norm": 0.8504763245582581, "learning_rate": 0.0002, "epoch": 4.731182795698925, "step": 2200}, {"loss": 0.4517, "grad_norm": 1.0321544408798218, "learning_rate": 0.0002, "epoch": 4.752688172043011, "step": 2210}, {"loss": 0.4796, "grad_norm": 0.9368237257003784, "learning_rate": 0.0002, "epoch": 4.774193548387097, "step": 2220}, {"loss": 0.4837, "grad_norm": 0.9319947361946106, "learning_rate": 0.0002, "epoch": 4.795698924731183, "step": 2230}, {"loss": 0.4696, "grad_norm": 0.904333770275116, "learning_rate": 0.0002, "epoch": 4.817204301075269, "step": 2240}, {"loss": 0.4746, "grad_norm": 0.8097078204154968, "learning_rate": 0.0002, "epoch": 4.838709677419355, "step": 2250}, {"loss": 0.4438, "grad_norm": 0.9128859043121338, "learning_rate": 0.0002, "epoch": 4.860215053763441, "step": 2260}, {"loss": 0.4693, "grad_norm": 0.883129894733429, "learning_rate": 0.0002, "epoch": 4.881720430107527, "step": 2270}, {"loss": 0.4494, "grad_norm": 0.85712730884552, "learning_rate": 0.0002, "epoch": 4.903225806451613, "step": 2280}, {"loss": 0.4593, "grad_norm": 1.2101863622665405, "learning_rate": 0.0002, "epoch": 4.924731182795699, "step": 2290}, {"loss": 0.4779, "grad_norm": 0.917966902256012, "learning_rate": 0.0002, "epoch": 4.946236559139785, "step": 2300}, {"loss": 0.4666, "grad_norm": 0.7740724086761475, "learning_rate": 0.0002, "epoch": 4.967741935483871, "step": 2310}, {"loss": 0.4629, "grad_norm": 1.0199906826019287, "learning_rate": 0.0002, "epoch": 4.989247311827957, "step": 2320}, {"eval_loss": 0.5363914370536804, "eval_runtime": 21.3941, "eval_samples_per_second": 15.472, "eval_steps_per_second": 1.963, "epoch": 5.0, "step": 2325}, {"loss": 0.4543, "grad_norm": 0.8580502271652222, "learning_rate": 0.0002, "epoch": 5.010752688172043, "step": 2330}, {"loss": 0.404, "grad_norm": 0.7702704668045044, "learning_rate": 0.0002, "epoch": 5.032258064516129, "step": 2340}, {"loss": 0.4408, "grad_norm": 0.9417401552200317, "learning_rate": 0.0002, "epoch": 5.053763440860215, "step": 2350}, {"loss": 0.4306, "grad_norm": 0.9461463689804077, "learning_rate": 0.0002, "epoch": 5.075268817204301, "step": 2360}, {"loss": 0.4251, "grad_norm": 0.8931282162666321, "learning_rate": 0.0002, "epoch": 5.096774193548387, "step": 2370}, {"loss": 0.4249, "grad_norm": 1.000909447669983, "learning_rate": 0.0002, "epoch": 5.118279569892473, "step": 2380}, {"loss": 0.4231, "grad_norm": 0.8640249967575073, "learning_rate": 0.0002, "epoch": 5.139784946236559, "step": 2390}, {"loss": 0.4272, "grad_norm": 1.0451020002365112, "learning_rate": 0.0002, "epoch": 5.161290322580645, "step": 2400}, {"loss": 0.4177, "grad_norm": 0.7896912097930908, "learning_rate": 0.0002, "epoch": 5.182795698924731, "step": 2410}, {"loss": 0.4116, "grad_norm": 0.8424463272094727, "learning_rate": 0.0002, "epoch": 5.204301075268817, "step": 2420}, {"loss": 0.4225, "grad_norm": 1.0852105617523193, "learning_rate": 0.0002, "epoch": 5.225806451612903, "step": 2430}, {"loss": 0.4352, "grad_norm": 0.9285983443260193, "learning_rate": 0.0002, "epoch": 5.247311827956989, "step": 2440}, {"loss": 0.4262, "grad_norm": 0.9119299054145813, "learning_rate": 0.0002, "epoch": 5.268817204301075, "step": 2450}, {"loss": 0.4494, "grad_norm": 0.8790456056594849, "learning_rate": 0.0002, "epoch": 5.290322580645161, "step": 2460}, {"loss": 0.4421, "grad_norm": 0.8726504445075989, "learning_rate": 0.0002, "epoch": 5.311827956989247, "step": 2470}, {"loss": 0.4372, "grad_norm": 0.9415227770805359, "learning_rate": 0.0002, "epoch": 5.333333333333333, "step": 2480}, {"loss": 0.4223, "grad_norm": 0.9133324027061462, "learning_rate": 0.0002, "epoch": 5.354838709677419, "step": 2490}, {"loss": 0.4401, "grad_norm": 0.9567879438400269, "learning_rate": 0.0002, "epoch": 5.376344086021505, "step": 2500}, {"loss": 0.4094, "grad_norm": 0.9239469766616821, "learning_rate": 0.0002, "epoch": 5.397849462365591, "step": 2510}, {"loss": 0.4416, "grad_norm": 1.0293527841567993, "learning_rate": 0.0002, "epoch": 5.419354838709677, "step": 2520}, {"loss": 0.4311, "grad_norm": 0.8618718981742859, "learning_rate": 0.0002, "epoch": 5.440860215053763, "step": 2530}, {"loss": 0.462, "grad_norm": 0.740166187286377, "learning_rate": 0.0002, "epoch": 5.462365591397849, "step": 2540}, {"loss": 0.4172, "grad_norm": 0.901566743850708, "learning_rate": 0.0002, "epoch": 5.483870967741936, "step": 2550}, {"loss": 0.4315, "grad_norm": 0.7957597970962524, "learning_rate": 0.0002, "epoch": 5.505376344086022, "step": 2560}, {"loss": 0.4263, "grad_norm": 1.1139343976974487, "learning_rate": 0.0002, "epoch": 5.526881720430108, "step": 2570}, {"loss": 0.4056, "grad_norm": 0.989765465259552, "learning_rate": 0.0002, "epoch": 5.548387096774194, "step": 2580}, {"loss": 0.4311, "grad_norm": 0.9416969418525696, "learning_rate": 0.0002, "epoch": 5.56989247311828, "step": 2590}, {"loss": 0.4363, "grad_norm": 0.9184830784797668, "learning_rate": 0.0002, "epoch": 5.591397849462366, "step": 2600}, {"loss": 0.432, "grad_norm": 1.0512700080871582, "learning_rate": 0.0002, "epoch": 5.612903225806452, "step": 2610}, {"loss": 0.4227, "grad_norm": 0.901462197303772, "learning_rate": 0.0002, "epoch": 5.634408602150538, "step": 2620}, {"loss": 0.4332, "grad_norm": 0.9732566475868225, "learning_rate": 0.0002, "epoch": 5.655913978494624, "step": 2630}, {"loss": 0.4223, "grad_norm": 0.8180275559425354, "learning_rate": 0.0002, "epoch": 5.67741935483871, "step": 2640}, {"loss": 0.4311, "grad_norm": 1.1354765892028809, "learning_rate": 0.0002, "epoch": 5.698924731182796, "step": 2650}, {"loss": 0.4409, "grad_norm": 0.9161503314971924, "learning_rate": 0.0002, "epoch": 5.720430107526882, "step": 2660}, {"loss": 0.4394, "grad_norm": 1.0561772584915161, "learning_rate": 0.0002, "epoch": 5.741935483870968, "step": 2670}, {"loss": 0.424, "grad_norm": 0.7712787389755249, "learning_rate": 0.0002, "epoch": 5.763440860215054, "step": 2680}, {"loss": 0.4326, "grad_norm": 0.9674550294876099, "learning_rate": 0.0002, "epoch": 5.78494623655914, "step": 2690}, {"loss": 0.4459, "grad_norm": 0.7531843781471252, "learning_rate": 0.0002, "epoch": 5.806451612903226, "step": 2700}, {"loss": 0.4276, "grad_norm": 1.1332131624221802, "learning_rate": 0.0002, "epoch": 5.827956989247312, "step": 2710}, {"loss": 0.4113, "grad_norm": 0.9367414116859436, "learning_rate": 0.0002, "epoch": 5.849462365591398, "step": 2720}, {"loss": 0.4227, "grad_norm": 0.8267706632614136, "learning_rate": 0.0002, "epoch": 5.870967741935484, "step": 2730}, {"loss": 0.4218, "grad_norm": 1.1040657758712769, "learning_rate": 0.0002, "epoch": 5.89247311827957, "step": 2740}, {"loss": 0.4129, "grad_norm": 0.8879582285881042, "learning_rate": 0.0002, "epoch": 5.913978494623656, "step": 2750}, {"loss": 0.4241, "grad_norm": 0.9264667630195618, "learning_rate": 0.0002, "epoch": 5.935483870967742, "step": 2760}, {"loss": 0.4318, "grad_norm": 0.9373905658721924, "learning_rate": 0.0002, "epoch": 5.956989247311828, "step": 2770}, {"loss": 0.423, "grad_norm": 1.0063740015029907, "learning_rate": 0.0002, "epoch": 5.978494623655914, "step": 2780}, {"loss": 0.4382, "grad_norm": 0.8291367292404175, "learning_rate": 0.0002, "epoch": 6.0, "step": 2790}, {"eval_loss": 0.5057176947593689, "eval_runtime": 21.3206, "eval_samples_per_second": 15.525, "eval_steps_per_second": 1.97, "epoch": 6.0, "step": 2790}, {"loss": 0.3907, "grad_norm": 1.0137434005737305, "learning_rate": 0.0002, "epoch": 6.021505376344086, "step": 2800}, {"loss": 0.3793, "grad_norm": 0.7550579905509949, "learning_rate": 0.0002, "epoch": 6.043010752688172, "step": 2810}, {"loss": 0.4003, "grad_norm": 1.0664116144180298, "learning_rate": 0.0002, "epoch": 6.064516129032258, "step": 2820}, {"loss": 0.3876, "grad_norm": 0.7908814549446106, "learning_rate": 0.0002, "epoch": 6.086021505376344, "step": 2830}, {"loss": 0.3884, "grad_norm": 0.8101639747619629, "learning_rate": 0.0002, "epoch": 6.10752688172043, "step": 2840}, {"loss": 0.3835, "grad_norm": 0.7882567048072815, "learning_rate": 0.0002, "epoch": 6.129032258064516, "step": 2850}, {"loss": 0.3827, "grad_norm": 1.0134103298187256, "learning_rate": 0.0002, "epoch": 6.150537634408602, "step": 2860}, {"loss": 0.3963, "grad_norm": 0.9240215420722961, "learning_rate": 0.0002, "epoch": 6.172043010752688, "step": 2870}, {"loss": 0.4049, "grad_norm": 0.8322992920875549, "learning_rate": 0.0002, "epoch": 6.193548387096774, "step": 2880}, {"loss": 0.381, "grad_norm": 0.9238720536231995, "learning_rate": 0.0002, "epoch": 6.21505376344086, "step": 2890}, {"loss": 0.3852, "grad_norm": 0.9361863732337952, "learning_rate": 0.0002, "epoch": 6.236559139784946, "step": 2900}, {"loss": 0.3917, "grad_norm": 0.9670863747596741, "learning_rate": 0.0002, "epoch": 6.258064516129032, "step": 2910}, {"loss": 0.3826, "grad_norm": 0.7724685668945312, "learning_rate": 0.0002, "epoch": 6.279569892473118, "step": 2920}, {"loss": 0.3988, "grad_norm": 0.8125540614128113, "learning_rate": 0.0002, "epoch": 6.301075268817204, "step": 2930}, {"loss": 0.3778, "grad_norm": 0.9483002424240112, "learning_rate": 0.0002, "epoch": 6.32258064516129, "step": 2940}, {"loss": 0.3823, "grad_norm": 1.098374843597412, "learning_rate": 0.0002, "epoch": 6.344086021505376, "step": 2950}, {"loss": 0.3886, "grad_norm": 1.0169378519058228, "learning_rate": 0.0002, "epoch": 6.365591397849462, "step": 2960}, {"loss": 0.3936, "grad_norm": 0.8594151139259338, "learning_rate": 0.0002, "epoch": 6.387096774193548, "step": 2970}, {"loss": 0.3871, "grad_norm": 0.9507288336753845, "learning_rate": 0.0002, "epoch": 6.408602150537634, "step": 2980}, {"loss": 0.3852, "grad_norm": 0.9212459325790405, "learning_rate": 0.0002, "epoch": 6.43010752688172, "step": 2990}, {"loss": 0.3929, "grad_norm": 0.9696952104568481, "learning_rate": 0.0002, "epoch": 6.451612903225806, "step": 3000}, {"loss": 0.3933, "grad_norm": 0.8872610330581665, "learning_rate": 0.0002, "epoch": 6.473118279569892, "step": 3010}, {"loss": 0.393, "grad_norm": 0.9207532405853271, "learning_rate": 0.0002, "epoch": 6.494623655913978, "step": 3020}, {"loss": 0.3848, "grad_norm": 0.9116262793540955, "learning_rate": 0.0002, "epoch": 6.516129032258064, "step": 3030}, {"loss": 0.3964, "grad_norm": 0.83391934633255, "learning_rate": 0.0002, "epoch": 6.53763440860215, "step": 3040}, {"loss": 0.3758, "grad_norm": 0.890931248664856, "learning_rate": 0.0002, "epoch": 6.559139784946236, "step": 3050}, {"loss": 0.3944, "grad_norm": 1.0100581645965576, "learning_rate": 0.0002, "epoch": 6.580645161290323, "step": 3060}, {"loss": 0.3992, "grad_norm": 0.783526599407196, "learning_rate": 0.0002, "epoch": 6.602150537634409, "step": 3070}, {"loss": 0.4144, "grad_norm": 1.324326515197754, "learning_rate": 0.0002, "epoch": 6.623655913978495, "step": 3080}, {"loss": 0.3986, "grad_norm": 0.9102319478988647, "learning_rate": 0.0002, "epoch": 6.645161290322581, "step": 3090}, {"loss": 0.3873, "grad_norm": 0.96951824426651, "learning_rate": 0.0002, "epoch": 6.666666666666667, "step": 3100}, {"loss": 0.3931, "grad_norm": 0.9786809086799622, "learning_rate": 0.0002, "epoch": 6.688172043010753, "step": 3110}, {"loss": 0.3714, "grad_norm": 1.0301238298416138, "learning_rate": 0.0002, "epoch": 6.709677419354839, "step": 3120}, {"loss": 0.3823, "grad_norm": 1.1690906286239624, "learning_rate": 0.0002, "epoch": 6.731182795698925, "step": 3130}, {"loss": 0.3936, "grad_norm": 0.963306725025177, "learning_rate": 0.0002, "epoch": 6.752688172043011, "step": 3140}, {"loss": 0.3975, "grad_norm": 0.8565770983695984, "learning_rate": 0.0002, "epoch": 6.774193548387097, "step": 3150}, {"loss": 0.3903, "grad_norm": 0.8887158632278442, "learning_rate": 0.0002, "epoch": 6.795698924731183, "step": 3160}, {"loss": 0.4098, "grad_norm": 0.8234561085700989, "learning_rate": 0.0002, "epoch": 6.817204301075269, "step": 3170}, {"loss": 0.4041, "grad_norm": 0.9000219702720642, "learning_rate": 0.0002, "epoch": 6.838709677419355, "step": 3180}, {"loss": 0.3933, "grad_norm": 1.1366009712219238, "learning_rate": 0.0002, "epoch": 6.860215053763441, "step": 3190}, {"loss": 0.3972, "grad_norm": 0.8747097849845886, "learning_rate": 0.0002, "epoch": 6.881720430107527, "step": 3200}, {"loss": 0.404, "grad_norm": 0.8533893823623657, "learning_rate": 0.0002, "epoch": 6.903225806451613, "step": 3210}, {"loss": 0.3906, "grad_norm": 0.8127949237823486, "learning_rate": 0.0002, "epoch": 6.924731182795699, "step": 3220}, {"loss": 0.3747, "grad_norm": 0.8872477412223816, "learning_rate": 0.0002, "epoch": 6.946236559139785, "step": 3230}, {"loss": 0.3817, "grad_norm": 0.8541608452796936, "learning_rate": 0.0002, "epoch": 6.967741935483871, "step": 3240}, {"loss": 0.3863, "grad_norm": 0.8390752673149109, "learning_rate": 0.0002, "epoch": 6.989247311827957, "step": 3250}, {"eval_loss": 0.48264747858047485, "eval_runtime": 21.3942, "eval_samples_per_second": 15.471, "eval_steps_per_second": 1.963, "epoch": 7.0, "step": 3255}, {"loss": 0.391, "grad_norm": 1.0476834774017334, "learning_rate": 0.0002, "epoch": 7.010752688172043, "step": 3260}, {"loss": 0.3422, "grad_norm": 0.7501131296157837, "learning_rate": 0.0002, "epoch": 7.032258064516129, "step": 3270}, {"loss": 0.3542, "grad_norm": 0.9057435393333435, "learning_rate": 0.0002, "epoch": 7.053763440860215, "step": 3280}, {"loss": 0.3522, "grad_norm": 0.7058833241462708, "learning_rate": 0.0002, "epoch": 7.075268817204301, "step": 3290}, {"loss": 0.3575, "grad_norm": 0.9908691644668579, "learning_rate": 0.0002, "epoch": 7.096774193548387, "step": 3300}, {"loss": 0.3755, "grad_norm": 0.9515542984008789, "learning_rate": 0.0002, "epoch": 7.118279569892473, "step": 3310}, {"loss": 0.3612, "grad_norm": 0.997296154499054, "learning_rate": 0.0002, "epoch": 7.139784946236559, "step": 3320}, {"loss": 0.3616, "grad_norm": 0.9810499548912048, "learning_rate": 0.0002, "epoch": 7.161290322580645, "step": 3330}, {"loss": 0.3584, "grad_norm": 0.8133336901664734, "learning_rate": 0.0002, "epoch": 7.182795698924731, "step": 3340}, {"loss": 0.3644, "grad_norm": 1.0679855346679688, "learning_rate": 0.0002, "epoch": 7.204301075268817, "step": 3350}, {"loss": 0.358, "grad_norm": 0.7656611204147339, "learning_rate": 0.0002, "epoch": 7.225806451612903, "step": 3360}, {"loss": 0.3616, "grad_norm": 0.9478468298912048, "learning_rate": 0.0002, "epoch": 7.247311827956989, "step": 3370}, {"loss": 0.3631, "grad_norm": 0.8425832986831665, "learning_rate": 0.0002, "epoch": 7.268817204301075, "step": 3380}, {"loss": 0.3735, "grad_norm": 0.9573627710342407, "learning_rate": 0.0002, "epoch": 7.290322580645161, "step": 3390}, {"loss": 0.359, "grad_norm": 0.9219972491264343, "learning_rate": 0.0002, "epoch": 7.311827956989247, "step": 3400}, {"loss": 0.3644, "grad_norm": 0.876099705696106, "learning_rate": 0.0002, "epoch": 7.333333333333333, "step": 3410}, {"loss": 0.3747, "grad_norm": 1.0051969289779663, "learning_rate": 0.0002, "epoch": 7.354838709677419, "step": 3420}, {"loss": 0.3527, "grad_norm": 1.1347692012786865, "learning_rate": 0.0002, "epoch": 7.376344086021505, "step": 3430}, {"loss": 0.3644, "grad_norm": 0.9641520380973816, "learning_rate": 0.0002, "epoch": 7.397849462365591, "step": 3440}, {"loss": 0.3486, "grad_norm": 0.7777793407440186, "learning_rate": 0.0002, "epoch": 7.419354838709677, "step": 3450}, {"loss": 0.3593, "grad_norm": 0.9649308323860168, "learning_rate": 0.0002, "epoch": 7.440860215053763, "step": 3460}, {"loss": 0.3754, "grad_norm": 0.9245585203170776, "learning_rate": 0.0002, "epoch": 7.462365591397849, "step": 3470}, {"loss": 0.3732, "grad_norm": 0.8298666477203369, "learning_rate": 0.0002, "epoch": 7.483870967741936, "step": 3480}, {"loss": 0.3585, "grad_norm": 1.1579877138137817, "learning_rate": 0.0002, "epoch": 7.505376344086022, "step": 3490}, {"loss": 0.3505, "grad_norm": 0.8718803524971008, "learning_rate": 0.0002, "epoch": 7.526881720430108, "step": 3500}, {"loss": 0.3725, "grad_norm": 0.7785154581069946, "learning_rate": 0.0002, "epoch": 7.548387096774194, "step": 3510}, {"loss": 0.3507, "grad_norm": 0.753657877445221, "learning_rate": 0.0002, "epoch": 7.56989247311828, "step": 3520}, {"loss": 0.3665, "grad_norm": 0.834524929523468, "learning_rate": 0.0002, "epoch": 7.591397849462366, "step": 3530}, {"loss": 0.3686, "grad_norm": 0.9546446800231934, "learning_rate": 0.0002, "epoch": 7.612903225806452, "step": 3540}, {"loss": 0.3673, "grad_norm": 0.8275105357170105, "learning_rate": 0.0002, "epoch": 7.634408602150538, "step": 3550}, {"loss": 0.381, "grad_norm": 0.9137991070747375, "learning_rate": 0.0002, "epoch": 7.655913978494624, "step": 3560}, {"loss": 0.3565, "grad_norm": 0.993617057800293, "learning_rate": 0.0002, "epoch": 7.67741935483871, "step": 3570}, {"loss": 0.3701, "grad_norm": 1.0079665184020996, "learning_rate": 0.0002, "epoch": 7.698924731182796, "step": 3580}, {"loss": 0.3495, "grad_norm": 0.8295491337776184, "learning_rate": 0.0002, "epoch": 7.720430107526882, "step": 3590}, {"loss": 0.374, "grad_norm": 0.814578115940094, "learning_rate": 0.0002, "epoch": 7.741935483870968, "step": 3600}, {"loss": 0.3673, "grad_norm": 0.8422811031341553, "learning_rate": 0.0002, "epoch": 7.763440860215054, "step": 3610}, {"loss": 0.3452, "grad_norm": 1.0220918655395508, "learning_rate": 0.0002, "epoch": 7.78494623655914, "step": 3620}, {"loss": 0.3641, "grad_norm": 0.8065739870071411, "learning_rate": 0.0002, "epoch": 7.806451612903226, "step": 3630}, {"loss": 0.3699, "grad_norm": 0.8039169907569885, "learning_rate": 0.0002, "epoch": 7.827956989247312, "step": 3640}, {"loss": 0.3635, "grad_norm": 1.0766745805740356, "learning_rate": 0.0002, "epoch": 7.849462365591398, "step": 3650}, {"loss": 0.3633, "grad_norm": 1.0806103944778442, "learning_rate": 0.0002, "epoch": 7.870967741935484, "step": 3660}, {"loss": 0.3678, "grad_norm": 1.1005314588546753, "learning_rate": 0.0002, "epoch": 7.89247311827957, "step": 3670}, {"loss": 0.3583, "grad_norm": 0.9276911616325378, "learning_rate": 0.0002, "epoch": 7.913978494623656, "step": 3680}, {"loss": 0.374, "grad_norm": 0.9914153814315796, "learning_rate": 0.0002, "epoch": 7.935483870967742, "step": 3690}, {"loss": 0.3575, "grad_norm": 0.8128159046173096, "learning_rate": 0.0002, "epoch": 7.956989247311828, "step": 3700}, {"loss": 0.3491, "grad_norm": 0.8122950196266174, "learning_rate": 0.0002, "epoch": 7.978494623655914, "step": 3710}, {"loss": 0.3723, "grad_norm": 1.0291857719421387, "learning_rate": 0.0002, "epoch": 8.0, "step": 3720}]}