diff --git a/.gitattributes b/.gitattributes index 79281f5320e1b5f3e2102893cf10358bc788d823..581fd0b47aaa654231ad67bf0d6021398bcd8418 100644 --- a/.gitattributes +++ b/.gitattributes @@ -6126,3 +6126,4 @@ Meta-Llama-3-8B-Instruct_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d Meta-Llama-3-8B-Instruct_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.5-num-63987-sd-1/checkpoint-61896/tokenizer.json filter=lfs diff=lfs merge=lfs -text Meta-Llama-3-8B-Instruct_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.5-num-63987-sd-1/checkpoint-7737/tokenizer.json filter=lfs diff=lfs merge=lfs -text Meta-Llama-3-8B-Instruct_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-0.5-num-63987-sd-1/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/README.md b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/README.md new file mode 100644 index 0000000000000000000000000000000000000000..503a34a03e25483aa99213835fd87bfc8289a3fe --- /dev/null +++ b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/README.md @@ -0,0 +1,202 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.13.1 \ No newline at end of file diff --git a/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/adapter_config.json b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e04e4441e0c1b29f69c16b26c142944e440b8076 --- /dev/null +++ b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/adapter_config.json @@ -0,0 +1,29 @@ +{ + "alpha_pattern": {}, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM", + "use_dora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/adapter_model.safetensors b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..065df76beeda65cc8491975fec6c64dd3f06d0d0 --- /dev/null +++ b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ce8cb66e633086937bb05740fe402ff6f354a42a289c3e622ec9962c9635518 +size 143153376 diff --git a/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/optimizer.pt b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd262e5c7ec5877477ad0f41ed57eab86200fd35 --- /dev/null +++ b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d20de08f145be75ce39b608265eae9bf482f5cb1153ec08a93d12df4e26db2e +size 72886650 diff --git a/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/rng_state.pth b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c4d8335ba08df4c241e490d04a6d08b8bab497f8 --- /dev/null +++ b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c58304ec5a15a645bb6b486370c0ffec8bf48e6cbd63951b0994a6c8b2733e1d +size 14244 diff --git a/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/scheduler.pt b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3e74997fdf0af6da45d90796dc099ed122f3a94e --- /dev/null +++ b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f752719d085c8860528c71d6c6b05296b0a818986a455e29d557b40ee05ef810 +size 1064 diff --git a/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/special_tokens_map.json b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..0acb52c84d6ea33178bee426ec6706bfba8ba637 --- /dev/null +++ b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/special_tokens_map.json @@ -0,0 +1,28 @@ +{ + "additional_special_tokens": [ + "", + "" + ], + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/tokenizer.json b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..a4a305d1de4d8f47c0252b4d7fe65a10dd8e2c22 --- /dev/null +++ b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f7eee611703c5ce5d1eee32d9cdcfe465647b8aff0c1dfb3bed7ad7dbb05060 +size 34362873 diff --git a/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/tokenizer.model b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..796efe9ab515c15e146ce7588e6d7b9b8134dbf8 --- /dev/null +++ b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a7b147390c64585d6c3543dd6fc636906c9af3865a5548f27f31aee1d4c8e2 +size 4241003 diff --git a/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/tokenizer_config.json b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1aa249f4dc9f84e87ad8983458e7800ae5bf5454 --- /dev/null +++ b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/tokenizer_config.json @@ -0,0 +1,2013 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "5": { + "content": "<2mass>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "6": { + "content": "[@BOS@]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "7": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "8": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "9": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "10": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "13": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "14": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "15": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "16": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "17": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "18": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "19": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "20": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "21": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "22": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "23": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "24": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "25": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "26": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "27": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "28": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "29": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "30": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "31": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "32": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "33": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "34": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "35": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "36": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "37": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "38": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "39": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "40": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "41": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "42": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "43": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "44": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "45": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "46": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "47": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "48": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "49": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "50": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "51": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "52": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "53": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "54": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "55": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "56": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "57": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "58": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "59": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "60": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "61": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "62": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "63": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "64": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "65": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "66": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "67": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "68": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "69": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "70": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "71": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "72": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "73": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "74": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "75": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "76": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "77": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "78": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "79": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "80": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "81": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "82": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "83": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "84": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "85": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "86": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "87": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "88": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "89": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "90": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "91": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "92": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "93": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "94": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "95": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "96": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "97": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "98": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "99": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "100": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "101": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "102": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "103": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "104": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "105": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "106": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "109": { + "content": "\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "110": { + "content": "\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "111": { + "content": "\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "112": { + "content": "\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "113": { + "content": "\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "114": { + "content": "\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "115": { + "content": "\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "116": { + "content": "\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "117": { + "content": "\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "118": { + "content": "\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "119": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "120": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "121": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "122": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "123": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "124": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "125": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "126": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "127": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "128": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "129": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "130": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "131": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "132": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "133": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "134": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "135": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "136": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "137": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "138": { + "content": "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "139": { + "content": "▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "140": { + "content": "▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "141": { + "content": "▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "142": { + "content": "▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "143": { + "content": "▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "144": { + "content": "▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "145": { + "content": "▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "146": { + "content": "▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "147": { + "content": "▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "148": { + "content": "▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "149": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "150": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "152": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "153": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "154": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "155": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "156": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "157": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "158": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "159": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "160": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "161": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "162": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "163": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "164": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "165": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "166": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "167": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "168": { + "content": "▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "169": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "170": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "172": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "173": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "174": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "175": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "171": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "176": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "177": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "178": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "179": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "180": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "181": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "182": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "183": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "184": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "185": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "186": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "187": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "188": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "189": { + "content": "

", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "190": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "191": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "192": { + "content": "
", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "193": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "194": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "195": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "196": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "197": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "198": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "199": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "200": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "201": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "202": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "203": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "204": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "205": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "206": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "207": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "208": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "209": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "210": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "211": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "212": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "213": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "214": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "215": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "216": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255968": { + "content": "[toxicity=0]", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255969": { + "content": "\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255970": { + "content": "\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255971": { + "content": "\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255972": { + "content": "\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255973": { + "content": "\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255974": { + "content": "\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255975": { + "content": "\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255976": { + "content": "\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255977": { + "content": "\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255978": { + "content": "\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255979": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255980": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255981": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255982": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255983": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255984": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255985": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255986": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255987": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255988": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255989": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255990": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255991": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255992": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255993": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255994": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255995": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255996": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255997": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255998": { + "content": "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "255999": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "", + "" + ], + "bos_token": "", + "chat_template": "{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + '\n' + message['content'] | trim + '\n' }}{% endfor %}{% if add_generation_prompt %}{{'model\n'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/trainer_state.json b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7bb4b4c8ebf2efe24240c827d48cf9fc192e6375 --- /dev/null +++ b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/trainer_state.json @@ -0,0 +1,10702 @@ +{ + "best_metric": 1.055190086364746, + "best_model_checkpoint": "outputs-001/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237", + "epoch": 0.9999671862182117, + "eval_steps": 10, + "global_step": 15237, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0006562756357670221, + "grad_norm": 1.0711857080459595, + "learning_rate": 0.0002, + "loss": 2.6059, + "step": 10 + }, + { + "epoch": 0.0013125512715340443, + "grad_norm": 0.935492992401123, + "learning_rate": 0.0002, + "loss": 1.7643, + "step": 20 + }, + { + "epoch": 0.0019688269073010667, + "grad_norm": 0.908809244632721, + "learning_rate": 0.0002, + "loss": 1.2573, + "step": 30 + }, + { + "epoch": 0.0026251025430680886, + "grad_norm": 0.3497907221317291, + "learning_rate": 0.0002, + "loss": 1.1011, + "step": 40 + }, + { + "epoch": 0.003281378178835111, + "grad_norm": 0.32830339670181274, + "learning_rate": 0.0002, + "loss": 1.0866, + "step": 50 + }, + { + "epoch": 0.003937653814602133, + "grad_norm": 0.2850394546985626, + "learning_rate": 0.0002, + "loss": 1.0104, + "step": 60 + }, + { + "epoch": 0.004593929450369155, + "grad_norm": 0.3804827928543091, + "learning_rate": 0.0002, + "loss": 1.082, + "step": 70 + }, + { + "epoch": 0.005250205086136177, + "grad_norm": 0.23506930470466614, + "learning_rate": 0.0002, + "loss": 1.052, + "step": 80 + }, + { + "epoch": 0.005906480721903199, + "grad_norm": 0.2644859552383423, + "learning_rate": 0.0002, + "loss": 1.0593, + "step": 90 + }, + { + "epoch": 0.006562756357670222, + "grad_norm": 0.36523646116256714, + "learning_rate": 0.0002, + "loss": 0.9978, + "step": 100 + }, + { + "epoch": 0.007219031993437244, + "grad_norm": 0.3195570111274719, + "learning_rate": 0.0002, + "loss": 0.9958, + "step": 110 + }, + { + "epoch": 0.007875307629204267, + "grad_norm": 0.2886694371700287, + "learning_rate": 0.0002, + "loss": 1.0041, + "step": 120 + }, + { + "epoch": 0.008531583264971289, + "grad_norm": 0.3913154900074005, + "learning_rate": 0.0002, + "loss": 1.0623, + "step": 130 + }, + { + "epoch": 0.00918785890073831, + "grad_norm": 0.3181937336921692, + "learning_rate": 0.0002, + "loss": 1.1128, + "step": 140 + }, + { + "epoch": 0.009844134536505332, + "grad_norm": 0.2629619538784027, + "learning_rate": 0.0002, + "loss": 0.9989, + "step": 150 + }, + { + "epoch": 0.010500410172272354, + "grad_norm": 0.30438563227653503, + "learning_rate": 0.0002, + "loss": 1.0226, + "step": 160 + }, + { + "epoch": 0.011156685808039376, + "grad_norm": 0.2876931130886078, + "learning_rate": 0.0002, + "loss": 1.0321, + "step": 170 + }, + { + "epoch": 0.011812961443806398, + "grad_norm": 0.29188141226768494, + "learning_rate": 0.0002, + "loss": 0.9824, + "step": 180 + }, + { + "epoch": 0.01246923707957342, + "grad_norm": 0.2645126283168793, + "learning_rate": 0.0002, + "loss": 0.9439, + "step": 190 + }, + { + "epoch": 0.013125512715340444, + "grad_norm": 0.26031428575515747, + "learning_rate": 0.0002, + "loss": 1.0273, + "step": 200 + }, + { + "epoch": 0.013781788351107466, + "grad_norm": 0.25812748074531555, + "learning_rate": 0.0002, + "loss": 1.0518, + "step": 210 + }, + { + "epoch": 0.014438063986874488, + "grad_norm": 0.24913132190704346, + "learning_rate": 0.0002, + "loss": 0.9502, + "step": 220 + }, + { + "epoch": 0.01509433962264151, + "grad_norm": 0.30332663655281067, + "learning_rate": 0.0002, + "loss": 1.0131, + "step": 230 + }, + { + "epoch": 0.015750615258408533, + "grad_norm": 0.25207284092903137, + "learning_rate": 0.0002, + "loss": 1.0459, + "step": 240 + }, + { + "epoch": 0.016406890894175553, + "grad_norm": 0.26384010910987854, + "learning_rate": 0.0002, + "loss": 0.9798, + "step": 250 + }, + { + "epoch": 0.017063166529942577, + "grad_norm": 0.28651612997055054, + "learning_rate": 0.0002, + "loss": 1.061, + "step": 260 + }, + { + "epoch": 0.017719442165709597, + "grad_norm": 0.2879799008369446, + "learning_rate": 0.0002, + "loss": 0.991, + "step": 270 + }, + { + "epoch": 0.01837571780147662, + "grad_norm": 0.28661131858825684, + "learning_rate": 0.0002, + "loss": 0.9643, + "step": 280 + }, + { + "epoch": 0.01903199343724364, + "grad_norm": 0.265348345041275, + "learning_rate": 0.0002, + "loss": 0.9903, + "step": 290 + }, + { + "epoch": 0.019688269073010665, + "grad_norm": 0.24186863005161285, + "learning_rate": 0.0002, + "loss": 1.011, + "step": 300 + }, + { + "epoch": 0.020344544708777685, + "grad_norm": 0.25996068120002747, + "learning_rate": 0.0002, + "loss": 1.0245, + "step": 310 + }, + { + "epoch": 0.02100082034454471, + "grad_norm": 0.29615098237991333, + "learning_rate": 0.0002, + "loss": 0.9987, + "step": 320 + }, + { + "epoch": 0.021657095980311732, + "grad_norm": 0.2429388016462326, + "learning_rate": 0.0002, + "loss": 1.04, + "step": 330 + }, + { + "epoch": 0.022313371616078752, + "grad_norm": 0.26882505416870117, + "learning_rate": 0.0002, + "loss": 0.9703, + "step": 340 + }, + { + "epoch": 0.022969647251845776, + "grad_norm": 0.283328652381897, + "learning_rate": 0.0002, + "loss": 0.9686, + "step": 350 + }, + { + "epoch": 0.023625922887612796, + "grad_norm": 0.3115910589694977, + "learning_rate": 0.0002, + "loss": 0.952, + "step": 360 + }, + { + "epoch": 0.02428219852337982, + "grad_norm": 0.27969497442245483, + "learning_rate": 0.0002, + "loss": 1.0112, + "step": 370 + }, + { + "epoch": 0.02493847415914684, + "grad_norm": 0.30471885204315186, + "learning_rate": 0.0002, + "loss": 1.0618, + "step": 380 + }, + { + "epoch": 0.025594749794913864, + "grad_norm": 0.3183926045894623, + "learning_rate": 0.0002, + "loss": 1.0189, + "step": 390 + }, + { + "epoch": 0.026251025430680888, + "grad_norm": 0.27311646938323975, + "learning_rate": 0.0002, + "loss": 1.0148, + "step": 400 + }, + { + "epoch": 0.026907301066447908, + "grad_norm": 0.29148945212364197, + "learning_rate": 0.0002, + "loss": 0.9672, + "step": 410 + }, + { + "epoch": 0.02756357670221493, + "grad_norm": 0.2386617809534073, + "learning_rate": 0.0002, + "loss": 0.9473, + "step": 420 + }, + { + "epoch": 0.02821985233798195, + "grad_norm": 0.2546529471874237, + "learning_rate": 0.0002, + "loss": 1.0722, + "step": 430 + }, + { + "epoch": 0.028876127973748975, + "grad_norm": 0.27932611107826233, + "learning_rate": 0.0002, + "loss": 1.0017, + "step": 440 + }, + { + "epoch": 0.029532403609515995, + "grad_norm": 0.31259334087371826, + "learning_rate": 0.0002, + "loss": 0.9988, + "step": 450 + }, + { + "epoch": 0.03018867924528302, + "grad_norm": 0.2675893008708954, + "learning_rate": 0.0002, + "loss": 1.0404, + "step": 460 + }, + { + "epoch": 0.03084495488105004, + "grad_norm": 0.24047039449214935, + "learning_rate": 0.0002, + "loss": 1.0018, + "step": 470 + }, + { + "epoch": 0.031501230516817066, + "grad_norm": 0.2637856900691986, + "learning_rate": 0.0002, + "loss": 1.0685, + "step": 480 + }, + { + "epoch": 0.03215750615258409, + "grad_norm": 0.3064589500427246, + "learning_rate": 0.0002, + "loss": 0.9879, + "step": 490 + }, + { + "epoch": 0.03281378178835111, + "grad_norm": 0.25345391035079956, + "learning_rate": 0.0002, + "loss": 0.9657, + "step": 500 + }, + { + "epoch": 0.03347005742411813, + "grad_norm": 0.3100789785385132, + "learning_rate": 0.0002, + "loss": 0.9518, + "step": 510 + }, + { + "epoch": 0.034126333059885154, + "grad_norm": 0.3312002420425415, + "learning_rate": 0.0002, + "loss": 1.0108, + "step": 520 + }, + { + "epoch": 0.034782608695652174, + "grad_norm": 0.23432421684265137, + "learning_rate": 0.0002, + "loss": 1.0157, + "step": 530 + }, + { + "epoch": 0.035438884331419195, + "grad_norm": 0.3079119622707367, + "learning_rate": 0.0002, + "loss": 1.0093, + "step": 540 + }, + { + "epoch": 0.03609515996718622, + "grad_norm": 0.2555035650730133, + "learning_rate": 0.0002, + "loss": 0.9534, + "step": 550 + }, + { + "epoch": 0.03675143560295324, + "grad_norm": 0.3530837893486023, + "learning_rate": 0.0002, + "loss": 0.9648, + "step": 560 + }, + { + "epoch": 0.03740771123872026, + "grad_norm": 0.26614823937416077, + "learning_rate": 0.0002, + "loss": 1.0432, + "step": 570 + }, + { + "epoch": 0.03806398687448728, + "grad_norm": 0.2618412971496582, + "learning_rate": 0.0002, + "loss": 1.0108, + "step": 580 + }, + { + "epoch": 0.03872026251025431, + "grad_norm": 0.26110127568244934, + "learning_rate": 0.0002, + "loss": 0.9834, + "step": 590 + }, + { + "epoch": 0.03937653814602133, + "grad_norm": 0.3050612211227417, + "learning_rate": 0.0002, + "loss": 0.9766, + "step": 600 + }, + { + "epoch": 0.04003281378178835, + "grad_norm": 0.27394089102745056, + "learning_rate": 0.0002, + "loss": 1.0195, + "step": 610 + }, + { + "epoch": 0.04068908941755537, + "grad_norm": 0.25288277864456177, + "learning_rate": 0.0002, + "loss": 0.9776, + "step": 620 + }, + { + "epoch": 0.0413453650533224, + "grad_norm": 0.2835882902145386, + "learning_rate": 0.0002, + "loss": 0.9199, + "step": 630 + }, + { + "epoch": 0.04200164068908942, + "grad_norm": 0.2621902823448181, + "learning_rate": 0.0002, + "loss": 0.9605, + "step": 640 + }, + { + "epoch": 0.04265791632485644, + "grad_norm": 0.23218439519405365, + "learning_rate": 0.0002, + "loss": 0.9494, + "step": 650 + }, + { + "epoch": 0.043314191960623465, + "grad_norm": 0.26911118626594543, + "learning_rate": 0.0002, + "loss": 0.994, + "step": 660 + }, + { + "epoch": 0.043970467596390485, + "grad_norm": 0.25920751690864563, + "learning_rate": 0.0002, + "loss": 0.972, + "step": 670 + }, + { + "epoch": 0.044626743232157505, + "grad_norm": 0.2772065997123718, + "learning_rate": 0.0002, + "loss": 0.9774, + "step": 680 + }, + { + "epoch": 0.045283018867924525, + "grad_norm": 0.275421142578125, + "learning_rate": 0.0002, + "loss": 0.9114, + "step": 690 + }, + { + "epoch": 0.04593929450369155, + "grad_norm": 0.23931777477264404, + "learning_rate": 0.0002, + "loss": 1.028, + "step": 700 + }, + { + "epoch": 0.04659557013945857, + "grad_norm": 0.3031066060066223, + "learning_rate": 0.0002, + "loss": 0.9309, + "step": 710 + }, + { + "epoch": 0.04725184577522559, + "grad_norm": 0.2655068039894104, + "learning_rate": 0.0002, + "loss": 0.9511, + "step": 720 + }, + { + "epoch": 0.04790812141099262, + "grad_norm": 0.26064610481262207, + "learning_rate": 0.0002, + "loss": 0.9012, + "step": 730 + }, + { + "epoch": 0.04856439704675964, + "grad_norm": 0.2934698760509491, + "learning_rate": 0.0002, + "loss": 0.9809, + "step": 740 + }, + { + "epoch": 0.04922067268252666, + "grad_norm": 0.2672717571258545, + "learning_rate": 0.0002, + "loss": 0.8859, + "step": 750 + }, + { + "epoch": 0.04987694831829368, + "grad_norm": 0.27246803045272827, + "learning_rate": 0.0002, + "loss": 1.0251, + "step": 760 + }, + { + "epoch": 0.05053322395406071, + "grad_norm": 0.27560853958129883, + "learning_rate": 0.0002, + "loss": 0.977, + "step": 770 + }, + { + "epoch": 0.05118949958982773, + "grad_norm": 0.2449599653482437, + "learning_rate": 0.0002, + "loss": 0.9735, + "step": 780 + }, + { + "epoch": 0.05184577522559475, + "grad_norm": 0.2873939871788025, + "learning_rate": 0.0002, + "loss": 0.9765, + "step": 790 + }, + { + "epoch": 0.052502050861361775, + "grad_norm": 0.2875595688819885, + "learning_rate": 0.0002, + "loss": 0.9647, + "step": 800 + }, + { + "epoch": 0.053158326497128795, + "grad_norm": 0.4079909324645996, + "learning_rate": 0.0002, + "loss": 1.0379, + "step": 810 + }, + { + "epoch": 0.053814602132895815, + "grad_norm": 0.2733079195022583, + "learning_rate": 0.0002, + "loss": 0.9782, + "step": 820 + }, + { + "epoch": 0.054470877768662836, + "grad_norm": 0.2624184489250183, + "learning_rate": 0.0002, + "loss": 0.98, + "step": 830 + }, + { + "epoch": 0.05512715340442986, + "grad_norm": 0.279851496219635, + "learning_rate": 0.0002, + "loss": 1.0212, + "step": 840 + }, + { + "epoch": 0.05578342904019688, + "grad_norm": 0.2573111951351166, + "learning_rate": 0.0002, + "loss": 0.8994, + "step": 850 + }, + { + "epoch": 0.0564397046759639, + "grad_norm": 0.28704535961151123, + "learning_rate": 0.0002, + "loss": 0.9823, + "step": 860 + }, + { + "epoch": 0.05709598031173093, + "grad_norm": 0.2664150297641754, + "learning_rate": 0.0002, + "loss": 0.9744, + "step": 870 + }, + { + "epoch": 0.05775225594749795, + "grad_norm": 0.2858041822910309, + "learning_rate": 0.0002, + "loss": 0.9379, + "step": 880 + }, + { + "epoch": 0.05840853158326497, + "grad_norm": 0.2394150048494339, + "learning_rate": 0.0002, + "loss": 0.9453, + "step": 890 + }, + { + "epoch": 0.05906480721903199, + "grad_norm": 0.30714845657348633, + "learning_rate": 0.0002, + "loss": 0.9372, + "step": 900 + }, + { + "epoch": 0.05972108285479902, + "grad_norm": 0.2740330994129181, + "learning_rate": 0.0002, + "loss": 0.9677, + "step": 910 + }, + { + "epoch": 0.06037735849056604, + "grad_norm": 0.33505478501319885, + "learning_rate": 0.0002, + "loss": 1.0112, + "step": 920 + }, + { + "epoch": 0.06103363412633306, + "grad_norm": 0.2945438325405121, + "learning_rate": 0.0002, + "loss": 0.986, + "step": 930 + }, + { + "epoch": 0.06168990976210008, + "grad_norm": 0.24749146401882172, + "learning_rate": 0.0002, + "loss": 0.9498, + "step": 940 + }, + { + "epoch": 0.062346185397867106, + "grad_norm": 0.3315669894218445, + "learning_rate": 0.0002, + "loss": 1.0357, + "step": 950 + }, + { + "epoch": 0.06300246103363413, + "grad_norm": 0.27253520488739014, + "learning_rate": 0.0002, + "loss": 0.9698, + "step": 960 + }, + { + "epoch": 0.06365873666940115, + "grad_norm": 0.26884031295776367, + "learning_rate": 0.0002, + "loss": 0.9544, + "step": 970 + }, + { + "epoch": 0.06431501230516817, + "grad_norm": 0.286920964717865, + "learning_rate": 0.0002, + "loss": 0.9697, + "step": 980 + }, + { + "epoch": 0.0649712879409352, + "grad_norm": 0.28334400057792664, + "learning_rate": 0.0002, + "loss": 0.9704, + "step": 990 + }, + { + "epoch": 0.06562756357670221, + "grad_norm": 0.2672102749347687, + "learning_rate": 0.0002, + "loss": 0.917, + "step": 1000 + }, + { + "epoch": 0.06628383921246923, + "grad_norm": 0.3247123062610626, + "learning_rate": 0.0002, + "loss": 1.0062, + "step": 1010 + }, + { + "epoch": 0.06694011484823625, + "grad_norm": 0.259440541267395, + "learning_rate": 0.0002, + "loss": 0.9716, + "step": 1020 + }, + { + "epoch": 0.06759639048400329, + "grad_norm": 0.2795625329017639, + "learning_rate": 0.0002, + "loss": 0.9569, + "step": 1030 + }, + { + "epoch": 0.06825266611977031, + "grad_norm": 0.2784935534000397, + "learning_rate": 0.0002, + "loss": 0.9842, + "step": 1040 + }, + { + "epoch": 0.06890894175553733, + "grad_norm": 0.24605989456176758, + "learning_rate": 0.0002, + "loss": 0.9171, + "step": 1050 + }, + { + "epoch": 0.06956521739130435, + "grad_norm": 0.25421491265296936, + "learning_rate": 0.0002, + "loss": 0.9538, + "step": 1060 + }, + { + "epoch": 0.07022149302707137, + "grad_norm": 0.2693536579608917, + "learning_rate": 0.0002, + "loss": 1.0142, + "step": 1070 + }, + { + "epoch": 0.07087776866283839, + "grad_norm": 0.28166458010673523, + "learning_rate": 0.0002, + "loss": 0.9436, + "step": 1080 + }, + { + "epoch": 0.07153404429860541, + "grad_norm": 0.2752484679222107, + "learning_rate": 0.0002, + "loss": 0.9187, + "step": 1090 + }, + { + "epoch": 0.07219031993437244, + "grad_norm": 0.24141381680965424, + "learning_rate": 0.0002, + "loss": 1.0069, + "step": 1100 + }, + { + "epoch": 0.07284659557013946, + "grad_norm": 0.2347770482301712, + "learning_rate": 0.0002, + "loss": 0.9282, + "step": 1110 + }, + { + "epoch": 0.07350287120590648, + "grad_norm": 0.29999610781669617, + "learning_rate": 0.0002, + "loss": 0.978, + "step": 1120 + }, + { + "epoch": 0.0741591468416735, + "grad_norm": 0.2811068892478943, + "learning_rate": 0.0002, + "loss": 0.9763, + "step": 1130 + }, + { + "epoch": 0.07481542247744052, + "grad_norm": 0.24790801107883453, + "learning_rate": 0.0002, + "loss": 0.978, + "step": 1140 + }, + { + "epoch": 0.07547169811320754, + "grad_norm": 0.3251914978027344, + "learning_rate": 0.0002, + "loss": 1.001, + "step": 1150 + }, + { + "epoch": 0.07612797374897456, + "grad_norm": 0.2616347074508667, + "learning_rate": 0.0002, + "loss": 1.0407, + "step": 1160 + }, + { + "epoch": 0.07678424938474158, + "grad_norm": 0.2568797171115875, + "learning_rate": 0.0002, + "loss": 0.978, + "step": 1170 + }, + { + "epoch": 0.07744052502050862, + "grad_norm": 0.2693248689174652, + "learning_rate": 0.0002, + "loss": 0.9851, + "step": 1180 + }, + { + "epoch": 0.07809680065627564, + "grad_norm": 0.3270677924156189, + "learning_rate": 0.0002, + "loss": 0.996, + "step": 1190 + }, + { + "epoch": 0.07875307629204266, + "grad_norm": 0.2481861710548401, + "learning_rate": 0.0002, + "loss": 0.9751, + "step": 1200 + }, + { + "epoch": 0.07940935192780968, + "grad_norm": 0.2612398564815521, + "learning_rate": 0.0002, + "loss": 1.0047, + "step": 1210 + }, + { + "epoch": 0.0800656275635767, + "grad_norm": 0.26566916704177856, + "learning_rate": 0.0002, + "loss": 0.9789, + "step": 1220 + }, + { + "epoch": 0.08072190319934372, + "grad_norm": 0.28026407957077026, + "learning_rate": 0.0002, + "loss": 1.0468, + "step": 1230 + }, + { + "epoch": 0.08137817883511074, + "grad_norm": 0.25609225034713745, + "learning_rate": 0.0002, + "loss": 0.9651, + "step": 1240 + }, + { + "epoch": 0.08203445447087777, + "grad_norm": 0.29063138365745544, + "learning_rate": 0.0002, + "loss": 0.9648, + "step": 1250 + }, + { + "epoch": 0.0826907301066448, + "grad_norm": 0.25998231768608093, + "learning_rate": 0.0002, + "loss": 0.9302, + "step": 1260 + }, + { + "epoch": 0.08334700574241181, + "grad_norm": 0.295261412858963, + "learning_rate": 0.0002, + "loss": 0.9733, + "step": 1270 + }, + { + "epoch": 0.08400328137817883, + "grad_norm": 0.2479529082775116, + "learning_rate": 0.0002, + "loss": 0.9775, + "step": 1280 + }, + { + "epoch": 0.08465955701394585, + "grad_norm": 0.25226080417633057, + "learning_rate": 0.0002, + "loss": 0.9891, + "step": 1290 + }, + { + "epoch": 0.08531583264971287, + "grad_norm": 0.290462851524353, + "learning_rate": 0.0002, + "loss": 0.9693, + "step": 1300 + }, + { + "epoch": 0.0859721082854799, + "grad_norm": 0.2832583785057068, + "learning_rate": 0.0002, + "loss": 1.0319, + "step": 1310 + }, + { + "epoch": 0.08662838392124693, + "grad_norm": 0.2515616714954376, + "learning_rate": 0.0002, + "loss": 1.0105, + "step": 1320 + }, + { + "epoch": 0.08728465955701395, + "grad_norm": 0.28894907236099243, + "learning_rate": 0.0002, + "loss": 0.9717, + "step": 1330 + }, + { + "epoch": 0.08794093519278097, + "grad_norm": 0.2881310284137726, + "learning_rate": 0.0002, + "loss": 0.9736, + "step": 1340 + }, + { + "epoch": 0.08859721082854799, + "grad_norm": 0.2654068171977997, + "learning_rate": 0.0002, + "loss": 1.0107, + "step": 1350 + }, + { + "epoch": 0.08925348646431501, + "grad_norm": 0.2929916977882385, + "learning_rate": 0.0002, + "loss": 1.0249, + "step": 1360 + }, + { + "epoch": 0.08990976210008203, + "grad_norm": 0.27295321226119995, + "learning_rate": 0.0002, + "loss": 0.9605, + "step": 1370 + }, + { + "epoch": 0.09056603773584905, + "grad_norm": 0.27720171213150024, + "learning_rate": 0.0002, + "loss": 0.9709, + "step": 1380 + }, + { + "epoch": 0.09122231337161608, + "grad_norm": 0.2784966826438904, + "learning_rate": 0.0002, + "loss": 1.0449, + "step": 1390 + }, + { + "epoch": 0.0918785890073831, + "grad_norm": 0.25110408663749695, + "learning_rate": 0.0002, + "loss": 0.9442, + "step": 1400 + }, + { + "epoch": 0.09253486464315012, + "grad_norm": 0.2501158118247986, + "learning_rate": 0.0002, + "loss": 0.9552, + "step": 1410 + }, + { + "epoch": 0.09319114027891715, + "grad_norm": 0.3097717761993408, + "learning_rate": 0.0002, + "loss": 0.9493, + "step": 1420 + }, + { + "epoch": 0.09384741591468417, + "grad_norm": 0.257308691740036, + "learning_rate": 0.0002, + "loss": 0.9635, + "step": 1430 + }, + { + "epoch": 0.09450369155045119, + "grad_norm": 0.2885759472846985, + "learning_rate": 0.0002, + "loss": 0.9749, + "step": 1440 + }, + { + "epoch": 0.0951599671862182, + "grad_norm": 0.2938264012336731, + "learning_rate": 0.0002, + "loss": 0.9592, + "step": 1450 + }, + { + "epoch": 0.09581624282198524, + "grad_norm": 0.2543550729751587, + "learning_rate": 0.0002, + "loss": 1.0326, + "step": 1460 + }, + { + "epoch": 0.09647251845775226, + "grad_norm": 0.3020402789115906, + "learning_rate": 0.0002, + "loss": 0.9433, + "step": 1470 + }, + { + "epoch": 0.09712879409351928, + "grad_norm": 0.2818080186843872, + "learning_rate": 0.0002, + "loss": 0.9132, + "step": 1480 + }, + { + "epoch": 0.0977850697292863, + "grad_norm": 0.2420656979084015, + "learning_rate": 0.0002, + "loss": 0.9217, + "step": 1490 + }, + { + "epoch": 0.09844134536505332, + "grad_norm": 0.31427690386772156, + "learning_rate": 0.0002, + "loss": 0.938, + "step": 1500 + }, + { + "epoch": 0.09909762100082034, + "grad_norm": 0.27618250250816345, + "learning_rate": 0.0002, + "loss": 0.9234, + "step": 1510 + }, + { + "epoch": 0.09975389663658736, + "grad_norm": 0.26401254534721375, + "learning_rate": 0.0002, + "loss": 0.9019, + "step": 1520 + }, + { + "epoch": 0.1004101722723544, + "grad_norm": 0.23575739562511444, + "learning_rate": 0.0002, + "loss": 0.9992, + "step": 1530 + }, + { + "epoch": 0.10106644790812142, + "grad_norm": 0.3076087534427643, + "learning_rate": 0.0002, + "loss": 0.9315, + "step": 1540 + }, + { + "epoch": 0.10172272354388844, + "grad_norm": 0.28179317712783813, + "learning_rate": 0.0002, + "loss": 1.0138, + "step": 1550 + }, + { + "epoch": 0.10237899917965546, + "grad_norm": 0.28638190031051636, + "learning_rate": 0.0002, + "loss": 0.9587, + "step": 1560 + }, + { + "epoch": 0.10303527481542248, + "grad_norm": 0.24788478016853333, + "learning_rate": 0.0002, + "loss": 0.9576, + "step": 1570 + }, + { + "epoch": 0.1036915504511895, + "grad_norm": 0.2987830340862274, + "learning_rate": 0.0002, + "loss": 0.9523, + "step": 1580 + }, + { + "epoch": 0.10434782608695652, + "grad_norm": 0.3252484202384949, + "learning_rate": 0.0002, + "loss": 0.9782, + "step": 1590 + }, + { + "epoch": 0.10500410172272355, + "grad_norm": 0.2763068377971649, + "learning_rate": 0.0002, + "loss": 0.9378, + "step": 1600 + }, + { + "epoch": 0.10566037735849057, + "grad_norm": 0.3219335973262787, + "learning_rate": 0.0002, + "loss": 0.9491, + "step": 1610 + }, + { + "epoch": 0.10631665299425759, + "grad_norm": 0.2708939015865326, + "learning_rate": 0.0002, + "loss": 0.9181, + "step": 1620 + }, + { + "epoch": 0.10697292863002461, + "grad_norm": 0.29320818185806274, + "learning_rate": 0.0002, + "loss": 1.0008, + "step": 1630 + }, + { + "epoch": 0.10762920426579163, + "grad_norm": 0.2731851041316986, + "learning_rate": 0.0002, + "loss": 0.9593, + "step": 1640 + }, + { + "epoch": 0.10828547990155865, + "grad_norm": 0.29563331604003906, + "learning_rate": 0.0002, + "loss": 0.9915, + "step": 1650 + }, + { + "epoch": 0.10894175553732567, + "grad_norm": 0.33387669920921326, + "learning_rate": 0.0002, + "loss": 0.9543, + "step": 1660 + }, + { + "epoch": 0.1095980311730927, + "grad_norm": 0.3157867193222046, + "learning_rate": 0.0002, + "loss": 0.941, + "step": 1670 + }, + { + "epoch": 0.11025430680885973, + "grad_norm": 0.24688409268856049, + "learning_rate": 0.0002, + "loss": 0.9107, + "step": 1680 + }, + { + "epoch": 0.11091058244462675, + "grad_norm": 0.2868671417236328, + "learning_rate": 0.0002, + "loss": 0.9392, + "step": 1690 + }, + { + "epoch": 0.11156685808039377, + "grad_norm": 0.2555101811885834, + "learning_rate": 0.0002, + "loss": 0.9476, + "step": 1700 + }, + { + "epoch": 0.11222313371616079, + "grad_norm": 0.3224661946296692, + "learning_rate": 0.0002, + "loss": 0.9261, + "step": 1710 + }, + { + "epoch": 0.1128794093519278, + "grad_norm": 0.28927505016326904, + "learning_rate": 0.0002, + "loss": 0.9372, + "step": 1720 + }, + { + "epoch": 0.11353568498769483, + "grad_norm": 0.25575608015060425, + "learning_rate": 0.0002, + "loss": 0.9237, + "step": 1730 + }, + { + "epoch": 0.11419196062346186, + "grad_norm": 0.27490100264549255, + "learning_rate": 0.0002, + "loss": 0.9526, + "step": 1740 + }, + { + "epoch": 0.11484823625922888, + "grad_norm": 0.28235989809036255, + "learning_rate": 0.0002, + "loss": 1.0311, + "step": 1750 + }, + { + "epoch": 0.1155045118949959, + "grad_norm": 0.2636060416698456, + "learning_rate": 0.0002, + "loss": 0.9112, + "step": 1760 + }, + { + "epoch": 0.11616078753076292, + "grad_norm": 0.2642059624195099, + "learning_rate": 0.0002, + "loss": 0.9249, + "step": 1770 + }, + { + "epoch": 0.11681706316652994, + "grad_norm": 0.261807918548584, + "learning_rate": 0.0002, + "loss": 0.964, + "step": 1780 + }, + { + "epoch": 0.11747333880229696, + "grad_norm": 0.24618281424045563, + "learning_rate": 0.0002, + "loss": 0.9473, + "step": 1790 + }, + { + "epoch": 0.11812961443806398, + "grad_norm": 0.2700425982475281, + "learning_rate": 0.0002, + "loss": 0.9452, + "step": 1800 + }, + { + "epoch": 0.11878589007383102, + "grad_norm": 0.22687454521656036, + "learning_rate": 0.0002, + "loss": 0.9687, + "step": 1810 + }, + { + "epoch": 0.11944216570959804, + "grad_norm": 0.2843499481678009, + "learning_rate": 0.0002, + "loss": 0.9099, + "step": 1820 + }, + { + "epoch": 0.12009844134536506, + "grad_norm": 0.23511677980422974, + "learning_rate": 0.0002, + "loss": 0.9295, + "step": 1830 + }, + { + "epoch": 0.12075471698113208, + "grad_norm": 0.34054702520370483, + "learning_rate": 0.0002, + "loss": 0.998, + "step": 1840 + }, + { + "epoch": 0.1214109926168991, + "grad_norm": 0.32845374941825867, + "learning_rate": 0.0002, + "loss": 0.9343, + "step": 1850 + }, + { + "epoch": 0.12206726825266612, + "grad_norm": 0.2843034267425537, + "learning_rate": 0.0002, + "loss": 0.983, + "step": 1860 + }, + { + "epoch": 0.12272354388843314, + "grad_norm": 0.2824552357196808, + "learning_rate": 0.0002, + "loss": 0.963, + "step": 1870 + }, + { + "epoch": 0.12337981952420016, + "grad_norm": 0.26878267526626587, + "learning_rate": 0.0002, + "loss": 0.9875, + "step": 1880 + }, + { + "epoch": 0.12403609515996719, + "grad_norm": 0.31207871437072754, + "learning_rate": 0.0002, + "loss": 0.9784, + "step": 1890 + }, + { + "epoch": 0.12469237079573421, + "grad_norm": 0.2617819905281067, + "learning_rate": 0.0002, + "loss": 0.9497, + "step": 1900 + }, + { + "epoch": 0.12534864643150123, + "grad_norm": 0.2652885615825653, + "learning_rate": 0.0002, + "loss": 0.8948, + "step": 1910 + }, + { + "epoch": 0.12600492206726827, + "grad_norm": 0.29473352432250977, + "learning_rate": 0.0002, + "loss": 0.9534, + "step": 1920 + }, + { + "epoch": 0.12666119770303527, + "grad_norm": 0.3101664185523987, + "learning_rate": 0.0002, + "loss": 0.9695, + "step": 1930 + }, + { + "epoch": 0.1273174733388023, + "grad_norm": 0.2714068293571472, + "learning_rate": 0.0002, + "loss": 0.9928, + "step": 1940 + }, + { + "epoch": 0.1279737489745693, + "grad_norm": 0.2684655487537384, + "learning_rate": 0.0002, + "loss": 1.0293, + "step": 1950 + }, + { + "epoch": 0.12863002461033635, + "grad_norm": 0.2543509900569916, + "learning_rate": 0.0002, + "loss": 0.9236, + "step": 1960 + }, + { + "epoch": 0.12928630024610335, + "grad_norm": 0.25268790125846863, + "learning_rate": 0.0002, + "loss": 0.9598, + "step": 1970 + }, + { + "epoch": 0.1299425758818704, + "grad_norm": 0.27713078260421753, + "learning_rate": 0.0002, + "loss": 0.967, + "step": 1980 + }, + { + "epoch": 0.13059885151763742, + "grad_norm": 0.3185126483440399, + "learning_rate": 0.0002, + "loss": 0.9411, + "step": 1990 + }, + { + "epoch": 0.13125512715340443, + "grad_norm": 0.26718857884407043, + "learning_rate": 0.0002, + "loss": 0.9864, + "step": 2000 + }, + { + "epoch": 0.13191140278917146, + "grad_norm": 0.2843841314315796, + "learning_rate": 0.0002, + "loss": 0.9728, + "step": 2010 + }, + { + "epoch": 0.13256767842493847, + "grad_norm": 0.3013055622577667, + "learning_rate": 0.0002, + "loss": 0.993, + "step": 2020 + }, + { + "epoch": 0.1332239540607055, + "grad_norm": 0.27985867857933044, + "learning_rate": 0.0002, + "loss": 0.9437, + "step": 2030 + }, + { + "epoch": 0.1338802296964725, + "grad_norm": 0.2501908242702484, + "learning_rate": 0.0002, + "loss": 0.905, + "step": 2040 + }, + { + "epoch": 0.13453650533223954, + "grad_norm": 0.441340833902359, + "learning_rate": 0.0002, + "loss": 0.9603, + "step": 2050 + }, + { + "epoch": 0.13519278096800658, + "grad_norm": 2.568060874938965, + "learning_rate": 0.0002, + "loss": 0.9166, + "step": 2060 + }, + { + "epoch": 0.13584905660377358, + "grad_norm": 0.2505454421043396, + "learning_rate": 0.0002, + "loss": 0.9803, + "step": 2070 + }, + { + "epoch": 0.13650533223954062, + "grad_norm": 0.33997446298599243, + "learning_rate": 0.0002, + "loss": 1.0159, + "step": 2080 + }, + { + "epoch": 0.13716160787530762, + "grad_norm": 0.28161343932151794, + "learning_rate": 0.0002, + "loss": 0.908, + "step": 2090 + }, + { + "epoch": 0.13781788351107466, + "grad_norm": 0.28142687678337097, + "learning_rate": 0.0002, + "loss": 0.9709, + "step": 2100 + }, + { + "epoch": 0.13847415914684166, + "grad_norm": 0.3137170076370239, + "learning_rate": 0.0002, + "loss": 0.9716, + "step": 2110 + }, + { + "epoch": 0.1391304347826087, + "grad_norm": 0.3072240352630615, + "learning_rate": 0.0002, + "loss": 0.9698, + "step": 2120 + }, + { + "epoch": 0.13978671041837573, + "grad_norm": 0.24328380823135376, + "learning_rate": 0.0002, + "loss": 0.9979, + "step": 2130 + }, + { + "epoch": 0.14044298605414274, + "grad_norm": 0.3065047860145569, + "learning_rate": 0.0002, + "loss": 0.9549, + "step": 2140 + }, + { + "epoch": 0.14109926168990977, + "grad_norm": 0.34212175011634827, + "learning_rate": 0.0002, + "loss": 0.9897, + "step": 2150 + }, + { + "epoch": 0.14175553732567678, + "grad_norm": 0.27491796016693115, + "learning_rate": 0.0002, + "loss": 0.9601, + "step": 2160 + }, + { + "epoch": 0.1424118129614438, + "grad_norm": 0.26518693566322327, + "learning_rate": 0.0002, + "loss": 0.9928, + "step": 2170 + }, + { + "epoch": 0.14306808859721082, + "grad_norm": 0.28350934386253357, + "learning_rate": 0.0002, + "loss": 0.9385, + "step": 2180 + }, + { + "epoch": 0.14372436423297785, + "grad_norm": 0.3287768065929413, + "learning_rate": 0.0002, + "loss": 0.9878, + "step": 2190 + }, + { + "epoch": 0.1443806398687449, + "grad_norm": 0.26362666487693787, + "learning_rate": 0.0002, + "loss": 0.9192, + "step": 2200 + }, + { + "epoch": 0.1450369155045119, + "grad_norm": 0.31169235706329346, + "learning_rate": 0.0002, + "loss": 0.9804, + "step": 2210 + }, + { + "epoch": 0.14569319114027893, + "grad_norm": 0.258667528629303, + "learning_rate": 0.0002, + "loss": 0.9857, + "step": 2220 + }, + { + "epoch": 0.14634946677604593, + "grad_norm": 0.32289111614227295, + "learning_rate": 0.0002, + "loss": 0.9898, + "step": 2230 + }, + { + "epoch": 0.14700574241181297, + "grad_norm": 0.3344270586967468, + "learning_rate": 0.0002, + "loss": 1.0051, + "step": 2240 + }, + { + "epoch": 0.14766201804757997, + "grad_norm": 0.3001033067703247, + "learning_rate": 0.0002, + "loss": 0.9337, + "step": 2250 + }, + { + "epoch": 0.148318293683347, + "grad_norm": 0.2718261778354645, + "learning_rate": 0.0002, + "loss": 0.9788, + "step": 2260 + }, + { + "epoch": 0.14897456931911401, + "grad_norm": 0.3059164583683014, + "learning_rate": 0.0002, + "loss": 0.9801, + "step": 2270 + }, + { + "epoch": 0.14963084495488105, + "grad_norm": 0.2939850389957428, + "learning_rate": 0.0002, + "loss": 0.8914, + "step": 2280 + }, + { + "epoch": 0.15028712059064808, + "grad_norm": 0.2803564965724945, + "learning_rate": 0.0002, + "loss": 0.951, + "step": 2290 + }, + { + "epoch": 0.1509433962264151, + "grad_norm": 0.2843068242073059, + "learning_rate": 0.0002, + "loss": 0.9131, + "step": 2300 + }, + { + "epoch": 0.15159967186218212, + "grad_norm": 0.28517085313796997, + "learning_rate": 0.0002, + "loss": 0.9721, + "step": 2310 + }, + { + "epoch": 0.15225594749794913, + "grad_norm": 0.291030615568161, + "learning_rate": 0.0002, + "loss": 0.9402, + "step": 2320 + }, + { + "epoch": 0.15291222313371616, + "grad_norm": 0.2712008059024811, + "learning_rate": 0.0002, + "loss": 0.9509, + "step": 2330 + }, + { + "epoch": 0.15356849876948317, + "grad_norm": 0.3357657790184021, + "learning_rate": 0.0002, + "loss": 0.9406, + "step": 2340 + }, + { + "epoch": 0.1542247744052502, + "grad_norm": 0.28412291407585144, + "learning_rate": 0.0002, + "loss": 0.961, + "step": 2350 + }, + { + "epoch": 0.15488105004101724, + "grad_norm": 0.3217862844467163, + "learning_rate": 0.0002, + "loss": 0.9254, + "step": 2360 + }, + { + "epoch": 0.15553732567678424, + "grad_norm": 0.32076528668403625, + "learning_rate": 0.0002, + "loss": 0.9172, + "step": 2370 + }, + { + "epoch": 0.15619360131255128, + "grad_norm": 0.3490257263183594, + "learning_rate": 0.0002, + "loss": 0.9459, + "step": 2380 + }, + { + "epoch": 0.15684987694831828, + "grad_norm": 0.28253331780433655, + "learning_rate": 0.0002, + "loss": 0.9051, + "step": 2390 + }, + { + "epoch": 0.15750615258408532, + "grad_norm": 0.2875654399394989, + "learning_rate": 0.0002, + "loss": 0.9597, + "step": 2400 + }, + { + "epoch": 0.15816242821985232, + "grad_norm": 0.3347921669483185, + "learning_rate": 0.0002, + "loss": 0.9164, + "step": 2410 + }, + { + "epoch": 0.15881870385561936, + "grad_norm": 0.2624322474002838, + "learning_rate": 0.0002, + "loss": 1.0023, + "step": 2420 + }, + { + "epoch": 0.1594749794913864, + "grad_norm": 0.29683780670166016, + "learning_rate": 0.0002, + "loss": 0.9751, + "step": 2430 + }, + { + "epoch": 0.1601312551271534, + "grad_norm": 0.29237303137779236, + "learning_rate": 0.0002, + "loss": 0.967, + "step": 2440 + }, + { + "epoch": 0.16078753076292043, + "grad_norm": 0.3541257977485657, + "learning_rate": 0.0002, + "loss": 0.9754, + "step": 2450 + }, + { + "epoch": 0.16144380639868744, + "grad_norm": 0.28203415870666504, + "learning_rate": 0.0002, + "loss": 0.8977, + "step": 2460 + }, + { + "epoch": 0.16210008203445447, + "grad_norm": 0.322329044342041, + "learning_rate": 0.0002, + "loss": 0.9489, + "step": 2470 + }, + { + "epoch": 0.16275635767022148, + "grad_norm": 0.27731558680534363, + "learning_rate": 0.0002, + "loss": 0.9224, + "step": 2480 + }, + { + "epoch": 0.16341263330598851, + "grad_norm": 0.30348825454711914, + "learning_rate": 0.0002, + "loss": 0.9773, + "step": 2490 + }, + { + "epoch": 0.16406890894175555, + "grad_norm": 0.29504773020744324, + "learning_rate": 0.0002, + "loss": 0.9318, + "step": 2500 + }, + { + "epoch": 0.16472518457752255, + "grad_norm": 0.2977028489112854, + "learning_rate": 0.0002, + "loss": 0.8931, + "step": 2510 + }, + { + "epoch": 0.1653814602132896, + "grad_norm": 0.3172457218170166, + "learning_rate": 0.0002, + "loss": 0.953, + "step": 2520 + }, + { + "epoch": 0.1660377358490566, + "grad_norm": 0.3320612907409668, + "learning_rate": 0.0002, + "loss": 0.9786, + "step": 2530 + }, + { + "epoch": 0.16669401148482363, + "grad_norm": 0.2473023235797882, + "learning_rate": 0.0002, + "loss": 0.895, + "step": 2540 + }, + { + "epoch": 0.16735028712059064, + "grad_norm": 0.3250006139278412, + "learning_rate": 0.0002, + "loss": 0.9692, + "step": 2550 + }, + { + "epoch": 0.16800656275635767, + "grad_norm": 0.2940689027309418, + "learning_rate": 0.0002, + "loss": 0.956, + "step": 2560 + }, + { + "epoch": 0.1686628383921247, + "grad_norm": 0.34614887833595276, + "learning_rate": 0.0002, + "loss": 0.8926, + "step": 2570 + }, + { + "epoch": 0.1693191140278917, + "grad_norm": 0.3513164520263672, + "learning_rate": 0.0002, + "loss": 1.0153, + "step": 2580 + }, + { + "epoch": 0.16997538966365874, + "grad_norm": 0.2530531585216522, + "learning_rate": 0.0002, + "loss": 0.9608, + "step": 2590 + }, + { + "epoch": 0.17063166529942575, + "grad_norm": 0.3028896152973175, + "learning_rate": 0.0002, + "loss": 0.9876, + "step": 2600 + }, + { + "epoch": 0.17128794093519278, + "grad_norm": 0.304739773273468, + "learning_rate": 0.0002, + "loss": 0.9415, + "step": 2610 + }, + { + "epoch": 0.1719442165709598, + "grad_norm": 0.3360660672187805, + "learning_rate": 0.0002, + "loss": 0.9857, + "step": 2620 + }, + { + "epoch": 0.17260049220672682, + "grad_norm": 0.24901753664016724, + "learning_rate": 0.0002, + "loss": 0.9173, + "step": 2630 + }, + { + "epoch": 0.17325676784249386, + "grad_norm": 0.32212400436401367, + "learning_rate": 0.0002, + "loss": 0.9351, + "step": 2640 + }, + { + "epoch": 0.17391304347826086, + "grad_norm": 0.2780437767505646, + "learning_rate": 0.0002, + "loss": 0.9284, + "step": 2650 + }, + { + "epoch": 0.1745693191140279, + "grad_norm": 0.25902262330055237, + "learning_rate": 0.0002, + "loss": 0.9368, + "step": 2660 + }, + { + "epoch": 0.1752255947497949, + "grad_norm": 0.29171422123908997, + "learning_rate": 0.0002, + "loss": 0.9378, + "step": 2670 + }, + { + "epoch": 0.17588187038556194, + "grad_norm": 0.2586783766746521, + "learning_rate": 0.0002, + "loss": 0.9525, + "step": 2680 + }, + { + "epoch": 0.17653814602132895, + "grad_norm": 0.25609949231147766, + "learning_rate": 0.0002, + "loss": 0.9367, + "step": 2690 + }, + { + "epoch": 0.17719442165709598, + "grad_norm": 0.29789483547210693, + "learning_rate": 0.0002, + "loss": 0.8675, + "step": 2700 + }, + { + "epoch": 0.177850697292863, + "grad_norm": 0.2938411235809326, + "learning_rate": 0.0002, + "loss": 0.9918, + "step": 2710 + }, + { + "epoch": 0.17850697292863002, + "grad_norm": 0.31436532735824585, + "learning_rate": 0.0002, + "loss": 0.9824, + "step": 2720 + }, + { + "epoch": 0.17916324856439705, + "grad_norm": 0.31310203671455383, + "learning_rate": 0.0002, + "loss": 1.0094, + "step": 2730 + }, + { + "epoch": 0.17981952420016406, + "grad_norm": 0.2695367634296417, + "learning_rate": 0.0002, + "loss": 0.9105, + "step": 2740 + }, + { + "epoch": 0.1804757998359311, + "grad_norm": 0.32590436935424805, + "learning_rate": 0.0002, + "loss": 0.9216, + "step": 2750 + }, + { + "epoch": 0.1811320754716981, + "grad_norm": 0.32683756947517395, + "learning_rate": 0.0002, + "loss": 0.923, + "step": 2760 + }, + { + "epoch": 0.18178835110746513, + "grad_norm": 0.33043643832206726, + "learning_rate": 0.0002, + "loss": 0.938, + "step": 2770 + }, + { + "epoch": 0.18244462674323217, + "grad_norm": 0.3137816786766052, + "learning_rate": 0.0002, + "loss": 0.937, + "step": 2780 + }, + { + "epoch": 0.18310090237899918, + "grad_norm": 0.33037737011909485, + "learning_rate": 0.0002, + "loss": 0.8923, + "step": 2790 + }, + { + "epoch": 0.1837571780147662, + "grad_norm": 0.27679184079170227, + "learning_rate": 0.0002, + "loss": 0.9179, + "step": 2800 + }, + { + "epoch": 0.18441345365053322, + "grad_norm": 0.42975902557373047, + "learning_rate": 0.0002, + "loss": 0.9047, + "step": 2810 + }, + { + "epoch": 0.18506972928630025, + "grad_norm": 0.2915041744709015, + "learning_rate": 0.0002, + "loss": 0.9431, + "step": 2820 + }, + { + "epoch": 0.18572600492206726, + "grad_norm": 0.315197229385376, + "learning_rate": 0.0002, + "loss": 0.9573, + "step": 2830 + }, + { + "epoch": 0.1863822805578343, + "grad_norm": 0.31129002571105957, + "learning_rate": 0.0002, + "loss": 0.9909, + "step": 2840 + }, + { + "epoch": 0.18703855619360132, + "grad_norm": 0.2783232033252716, + "learning_rate": 0.0002, + "loss": 0.9222, + "step": 2850 + }, + { + "epoch": 0.18769483182936833, + "grad_norm": 0.34501492977142334, + "learning_rate": 0.0002, + "loss": 0.9347, + "step": 2860 + }, + { + "epoch": 0.18835110746513536, + "grad_norm": 0.32616767287254333, + "learning_rate": 0.0002, + "loss": 0.949, + "step": 2870 + }, + { + "epoch": 0.18900738310090237, + "grad_norm": 0.25213682651519775, + "learning_rate": 0.0002, + "loss": 0.9909, + "step": 2880 + }, + { + "epoch": 0.1896636587366694, + "grad_norm": 0.2745118737220764, + "learning_rate": 0.0002, + "loss": 0.892, + "step": 2890 + }, + { + "epoch": 0.1903199343724364, + "grad_norm": 0.26175656914711, + "learning_rate": 0.0002, + "loss": 0.9737, + "step": 2900 + }, + { + "epoch": 0.19097621000820345, + "grad_norm": 0.3019673228263855, + "learning_rate": 0.0002, + "loss": 0.8853, + "step": 2910 + }, + { + "epoch": 0.19163248564397048, + "grad_norm": 0.30918899178504944, + "learning_rate": 0.0002, + "loss": 1.0016, + "step": 2920 + }, + { + "epoch": 0.19228876127973749, + "grad_norm": 0.320893794298172, + "learning_rate": 0.0002, + "loss": 0.9301, + "step": 2930 + }, + { + "epoch": 0.19294503691550452, + "grad_norm": 0.2736368477344513, + "learning_rate": 0.0002, + "loss": 0.9592, + "step": 2940 + }, + { + "epoch": 0.19360131255127153, + "grad_norm": 0.28817251324653625, + "learning_rate": 0.0002, + "loss": 0.9334, + "step": 2950 + }, + { + "epoch": 0.19425758818703856, + "grad_norm": 0.26863837242126465, + "learning_rate": 0.0002, + "loss": 0.9484, + "step": 2960 + }, + { + "epoch": 0.19491386382280557, + "grad_norm": 0.28012585639953613, + "learning_rate": 0.0002, + "loss": 0.9466, + "step": 2970 + }, + { + "epoch": 0.1955701394585726, + "grad_norm": 0.2929932773113251, + "learning_rate": 0.0002, + "loss": 0.9623, + "step": 2980 + }, + { + "epoch": 0.19622641509433963, + "grad_norm": 0.29878300428390503, + "learning_rate": 0.0002, + "loss": 0.9406, + "step": 2990 + }, + { + "epoch": 0.19688269073010664, + "grad_norm": 0.2573733329772949, + "learning_rate": 0.0002, + "loss": 0.9714, + "step": 3000 + }, + { + "epoch": 0.19753896636587367, + "grad_norm": 0.31736820936203003, + "learning_rate": 0.0002, + "loss": 0.9763, + "step": 3010 + }, + { + "epoch": 0.19819524200164068, + "grad_norm": 0.2864682972431183, + "learning_rate": 0.0002, + "loss": 1.0111, + "step": 3020 + }, + { + "epoch": 0.19885151763740772, + "grad_norm": 0.3161790668964386, + "learning_rate": 0.0002, + "loss": 0.9193, + "step": 3030 + }, + { + "epoch": 0.19950779327317472, + "grad_norm": 0.37538009881973267, + "learning_rate": 0.0002, + "loss": 0.9298, + "step": 3040 + }, + { + "epoch": 0.20016406890894176, + "grad_norm": 0.2850522994995117, + "learning_rate": 0.0002, + "loss": 0.9329, + "step": 3050 + }, + { + "epoch": 0.2008203445447088, + "grad_norm": 0.36680465936660767, + "learning_rate": 0.0002, + "loss": 1.0777, + "step": 3060 + }, + { + "epoch": 0.2014766201804758, + "grad_norm": 0.2879831790924072, + "learning_rate": 0.0002, + "loss": 0.9042, + "step": 3070 + }, + { + "epoch": 0.20213289581624283, + "grad_norm": 0.29652512073516846, + "learning_rate": 0.0002, + "loss": 0.8925, + "step": 3080 + }, + { + "epoch": 0.20278917145200984, + "grad_norm": 0.3131853938102722, + "learning_rate": 0.0002, + "loss": 0.9928, + "step": 3090 + }, + { + "epoch": 0.20344544708777687, + "grad_norm": 0.34605351090431213, + "learning_rate": 0.0002, + "loss": 0.9968, + "step": 3100 + }, + { + "epoch": 0.20410172272354388, + "grad_norm": 0.30542251467704773, + "learning_rate": 0.0002, + "loss": 0.9701, + "step": 3110 + }, + { + "epoch": 0.2047579983593109, + "grad_norm": 0.28131186962127686, + "learning_rate": 0.0002, + "loss": 0.9483, + "step": 3120 + }, + { + "epoch": 0.20541427399507795, + "grad_norm": 0.257859468460083, + "learning_rate": 0.0002, + "loss": 0.9332, + "step": 3130 + }, + { + "epoch": 0.20607054963084495, + "grad_norm": 0.32994887232780457, + "learning_rate": 0.0002, + "loss": 0.9771, + "step": 3140 + }, + { + "epoch": 0.20672682526661199, + "grad_norm": 0.26293760538101196, + "learning_rate": 0.0002, + "loss": 0.9368, + "step": 3150 + }, + { + "epoch": 0.207383100902379, + "grad_norm": 0.29514846205711365, + "learning_rate": 0.0002, + "loss": 0.9801, + "step": 3160 + }, + { + "epoch": 0.20803937653814603, + "grad_norm": 0.36102691292762756, + "learning_rate": 0.0002, + "loss": 0.9879, + "step": 3170 + }, + { + "epoch": 0.20869565217391303, + "grad_norm": 0.3298998475074768, + "learning_rate": 0.0002, + "loss": 0.9322, + "step": 3180 + }, + { + "epoch": 0.20935192780968007, + "grad_norm": 0.29835769534111023, + "learning_rate": 0.0002, + "loss": 0.9456, + "step": 3190 + }, + { + "epoch": 0.2100082034454471, + "grad_norm": 0.3438013792037964, + "learning_rate": 0.0002, + "loss": 1.0199, + "step": 3200 + }, + { + "epoch": 0.2106644790812141, + "grad_norm": 0.2945845127105713, + "learning_rate": 0.0002, + "loss": 0.9104, + "step": 3210 + }, + { + "epoch": 0.21132075471698114, + "grad_norm": 0.3173643946647644, + "learning_rate": 0.0002, + "loss": 0.9231, + "step": 3220 + }, + { + "epoch": 0.21197703035274815, + "grad_norm": 0.30580341815948486, + "learning_rate": 0.0002, + "loss": 0.892, + "step": 3230 + }, + { + "epoch": 0.21263330598851518, + "grad_norm": 0.32913172245025635, + "learning_rate": 0.0002, + "loss": 0.9458, + "step": 3240 + }, + { + "epoch": 0.2132895816242822, + "grad_norm": 0.2739659249782562, + "learning_rate": 0.0002, + "loss": 0.9475, + "step": 3250 + }, + { + "epoch": 0.21394585726004922, + "grad_norm": 0.3016273081302643, + "learning_rate": 0.0002, + "loss": 0.9185, + "step": 3260 + }, + { + "epoch": 0.21460213289581626, + "grad_norm": 0.2818678319454193, + "learning_rate": 0.0002, + "loss": 0.9819, + "step": 3270 + }, + { + "epoch": 0.21525840853158326, + "grad_norm": 0.3243506848812103, + "learning_rate": 0.0002, + "loss": 0.9535, + "step": 3280 + }, + { + "epoch": 0.2159146841673503, + "grad_norm": 0.38120919466018677, + "learning_rate": 0.0002, + "loss": 0.9355, + "step": 3290 + }, + { + "epoch": 0.2165709598031173, + "grad_norm": 0.3041105568408966, + "learning_rate": 0.0002, + "loss": 0.9556, + "step": 3300 + }, + { + "epoch": 0.21722723543888434, + "grad_norm": 0.2648089528083801, + "learning_rate": 0.0002, + "loss": 0.9172, + "step": 3310 + }, + { + "epoch": 0.21788351107465134, + "grad_norm": 0.324095219373703, + "learning_rate": 0.0002, + "loss": 0.9567, + "step": 3320 + }, + { + "epoch": 0.21853978671041838, + "grad_norm": 0.2796897888183594, + "learning_rate": 0.0002, + "loss": 0.9586, + "step": 3330 + }, + { + "epoch": 0.2191960623461854, + "grad_norm": 0.30163177847862244, + "learning_rate": 0.0002, + "loss": 0.9505, + "step": 3340 + }, + { + "epoch": 0.21985233798195242, + "grad_norm": 0.29213520884513855, + "learning_rate": 0.0002, + "loss": 0.9515, + "step": 3350 + }, + { + "epoch": 0.22050861361771945, + "grad_norm": 0.28203412890434265, + "learning_rate": 0.0002, + "loss": 0.9692, + "step": 3360 + }, + { + "epoch": 0.22116488925348646, + "grad_norm": 0.31636562943458557, + "learning_rate": 0.0002, + "loss": 0.94, + "step": 3370 + }, + { + "epoch": 0.2218211648892535, + "grad_norm": 0.3153952956199646, + "learning_rate": 0.0002, + "loss": 0.901, + "step": 3380 + }, + { + "epoch": 0.2224774405250205, + "grad_norm": 0.2793780267238617, + "learning_rate": 0.0002, + "loss": 0.8994, + "step": 3390 + }, + { + "epoch": 0.22313371616078753, + "grad_norm": 0.36783504486083984, + "learning_rate": 0.0002, + "loss": 0.9424, + "step": 3400 + }, + { + "epoch": 0.22378999179655457, + "grad_norm": 0.31803956627845764, + "learning_rate": 0.0002, + "loss": 0.9837, + "step": 3410 + }, + { + "epoch": 0.22444626743232157, + "grad_norm": 0.31517738103866577, + "learning_rate": 0.0002, + "loss": 0.9234, + "step": 3420 + }, + { + "epoch": 0.2251025430680886, + "grad_norm": 0.4124458432197571, + "learning_rate": 0.0002, + "loss": 0.9336, + "step": 3430 + }, + { + "epoch": 0.2257588187038556, + "grad_norm": 0.29330259561538696, + "learning_rate": 0.0002, + "loss": 0.8587, + "step": 3440 + }, + { + "epoch": 0.22641509433962265, + "grad_norm": 0.281255304813385, + "learning_rate": 0.0002, + "loss": 0.925, + "step": 3450 + }, + { + "epoch": 0.22707136997538965, + "grad_norm": 0.3765242397785187, + "learning_rate": 0.0002, + "loss": 0.9797, + "step": 3460 + }, + { + "epoch": 0.2277276456111567, + "grad_norm": 0.33658838272094727, + "learning_rate": 0.0002, + "loss": 1.0201, + "step": 3470 + }, + { + "epoch": 0.22838392124692372, + "grad_norm": 0.2809208035469055, + "learning_rate": 0.0002, + "loss": 0.9255, + "step": 3480 + }, + { + "epoch": 0.22904019688269073, + "grad_norm": 0.3064846098423004, + "learning_rate": 0.0002, + "loss": 0.8948, + "step": 3490 + }, + { + "epoch": 0.22969647251845776, + "grad_norm": 0.31982484459877014, + "learning_rate": 0.0002, + "loss": 0.8805, + "step": 3500 + }, + { + "epoch": 0.23035274815422477, + "grad_norm": 0.27903324365615845, + "learning_rate": 0.0002, + "loss": 0.927, + "step": 3510 + }, + { + "epoch": 0.2310090237899918, + "grad_norm": 0.35411202907562256, + "learning_rate": 0.0002, + "loss": 0.9103, + "step": 3520 + }, + { + "epoch": 0.2316652994257588, + "grad_norm": 0.2796868681907654, + "learning_rate": 0.0002, + "loss": 0.9087, + "step": 3530 + }, + { + "epoch": 0.23232157506152584, + "grad_norm": 0.3428329825401306, + "learning_rate": 0.0002, + "loss": 0.9861, + "step": 3540 + }, + { + "epoch": 0.23297785069729288, + "grad_norm": 0.30563032627105713, + "learning_rate": 0.0002, + "loss": 0.9025, + "step": 3550 + }, + { + "epoch": 0.23363412633305988, + "grad_norm": 0.2954406142234802, + "learning_rate": 0.0002, + "loss": 0.9132, + "step": 3560 + }, + { + "epoch": 0.23429040196882692, + "grad_norm": 0.3328028917312622, + "learning_rate": 0.0002, + "loss": 0.9444, + "step": 3570 + }, + { + "epoch": 0.23494667760459392, + "grad_norm": 0.32020696997642517, + "learning_rate": 0.0002, + "loss": 0.9161, + "step": 3580 + }, + { + "epoch": 0.23560295324036096, + "grad_norm": 0.2774750292301178, + "learning_rate": 0.0002, + "loss": 0.9193, + "step": 3590 + }, + { + "epoch": 0.23625922887612796, + "grad_norm": 0.24560679495334625, + "learning_rate": 0.0002, + "loss": 0.8935, + "step": 3600 + }, + { + "epoch": 0.236915504511895, + "grad_norm": 0.3278765082359314, + "learning_rate": 0.0002, + "loss": 0.9335, + "step": 3610 + }, + { + "epoch": 0.23757178014766203, + "grad_norm": 0.4417719542980194, + "learning_rate": 0.0002, + "loss": 0.8958, + "step": 3620 + }, + { + "epoch": 0.23822805578342904, + "grad_norm": 0.3339618742465973, + "learning_rate": 0.0002, + "loss": 0.9683, + "step": 3630 + }, + { + "epoch": 0.23888433141919607, + "grad_norm": 0.2925402522087097, + "learning_rate": 0.0002, + "loss": 0.9536, + "step": 3640 + }, + { + "epoch": 0.23954060705496308, + "grad_norm": 0.3060242831707001, + "learning_rate": 0.0002, + "loss": 0.9695, + "step": 3650 + }, + { + "epoch": 0.2401968826907301, + "grad_norm": 0.3095077872276306, + "learning_rate": 0.0002, + "loss": 0.9573, + "step": 3660 + }, + { + "epoch": 0.24085315832649712, + "grad_norm": 0.31151828169822693, + "learning_rate": 0.0002, + "loss": 0.9023, + "step": 3670 + }, + { + "epoch": 0.24150943396226415, + "grad_norm": 0.293610543012619, + "learning_rate": 0.0002, + "loss": 0.9642, + "step": 3680 + }, + { + "epoch": 0.2421657095980312, + "grad_norm": 0.28868040442466736, + "learning_rate": 0.0002, + "loss": 0.9449, + "step": 3690 + }, + { + "epoch": 0.2428219852337982, + "grad_norm": 0.3123609721660614, + "learning_rate": 0.0002, + "loss": 0.9258, + "step": 3700 + }, + { + "epoch": 0.24347826086956523, + "grad_norm": 0.29155978560447693, + "learning_rate": 0.0002, + "loss": 0.9013, + "step": 3710 + }, + { + "epoch": 0.24413453650533223, + "grad_norm": 0.36799871921539307, + "learning_rate": 0.0002, + "loss": 0.8977, + "step": 3720 + }, + { + "epoch": 0.24479081214109927, + "grad_norm": 0.30915290117263794, + "learning_rate": 0.0002, + "loss": 1.0157, + "step": 3730 + }, + { + "epoch": 0.24544708777686627, + "grad_norm": 0.2889885902404785, + "learning_rate": 0.0002, + "loss": 0.9137, + "step": 3740 + }, + { + "epoch": 0.2461033634126333, + "grad_norm": 0.30213671922683716, + "learning_rate": 0.0002, + "loss": 0.9279, + "step": 3750 + }, + { + "epoch": 0.24675963904840031, + "grad_norm": 0.3242565393447876, + "learning_rate": 0.0002, + "loss": 0.9202, + "step": 3760 + }, + { + "epoch": 0.24741591468416735, + "grad_norm": 0.27826303243637085, + "learning_rate": 0.0002, + "loss": 0.9323, + "step": 3770 + }, + { + "epoch": 0.24807219031993438, + "grad_norm": 0.2813768982887268, + "learning_rate": 0.0002, + "loss": 0.933, + "step": 3780 + }, + { + "epoch": 0.2487284659557014, + "grad_norm": 0.34054139256477356, + "learning_rate": 0.0002, + "loss": 0.9046, + "step": 3790 + }, + { + "epoch": 0.24938474159146842, + "grad_norm": 0.2804257869720459, + "learning_rate": 0.0002, + "loss": 0.9252, + "step": 3800 + }, + { + "epoch": 0.25004101722723543, + "grad_norm": 0.2920171618461609, + "learning_rate": 0.0002, + "loss": 0.8305, + "step": 3810 + }, + { + "epoch": 0.25069729286300246, + "grad_norm": 0.2967351973056793, + "learning_rate": 0.0002, + "loss": 0.883, + "step": 3820 + }, + { + "epoch": 0.2513535684987695, + "grad_norm": 0.27417901158332825, + "learning_rate": 0.0002, + "loss": 0.8767, + "step": 3830 + }, + { + "epoch": 0.25200984413453653, + "grad_norm": 0.30916500091552734, + "learning_rate": 0.0002, + "loss": 0.9459, + "step": 3840 + }, + { + "epoch": 0.2526661197703035, + "grad_norm": 0.3281477391719818, + "learning_rate": 0.0002, + "loss": 0.9774, + "step": 3850 + }, + { + "epoch": 0.25332239540607054, + "grad_norm": 0.3417379856109619, + "learning_rate": 0.0002, + "loss": 0.8779, + "step": 3860 + }, + { + "epoch": 0.2539786710418376, + "grad_norm": 0.3119784891605377, + "learning_rate": 0.0002, + "loss": 0.9155, + "step": 3870 + }, + { + "epoch": 0.2546349466776046, + "grad_norm": 0.3053247332572937, + "learning_rate": 0.0002, + "loss": 0.9466, + "step": 3880 + }, + { + "epoch": 0.2552912223133716, + "grad_norm": 0.3155108094215393, + "learning_rate": 0.0002, + "loss": 0.8882, + "step": 3890 + }, + { + "epoch": 0.2559474979491386, + "grad_norm": 0.3240940272808075, + "learning_rate": 0.0002, + "loss": 0.9464, + "step": 3900 + }, + { + "epoch": 0.25660377358490566, + "grad_norm": 0.32433149218559265, + "learning_rate": 0.0002, + "loss": 0.9765, + "step": 3910 + }, + { + "epoch": 0.2572600492206727, + "grad_norm": 0.2982637286186218, + "learning_rate": 0.0002, + "loss": 0.9714, + "step": 3920 + }, + { + "epoch": 0.2579163248564397, + "grad_norm": 0.34742113947868347, + "learning_rate": 0.0002, + "loss": 0.8758, + "step": 3930 + }, + { + "epoch": 0.2585726004922067, + "grad_norm": 0.3415607213973999, + "learning_rate": 0.0002, + "loss": 0.9335, + "step": 3940 + }, + { + "epoch": 0.25922887612797374, + "grad_norm": 0.2852018475532532, + "learning_rate": 0.0002, + "loss": 0.9016, + "step": 3950 + }, + { + "epoch": 0.2598851517637408, + "grad_norm": 0.2924928665161133, + "learning_rate": 0.0002, + "loss": 0.9972, + "step": 3960 + }, + { + "epoch": 0.2605414273995078, + "grad_norm": 0.29387834668159485, + "learning_rate": 0.0002, + "loss": 0.9524, + "step": 3970 + }, + { + "epoch": 0.26119770303527484, + "grad_norm": 0.32400810718536377, + "learning_rate": 0.0002, + "loss": 0.9197, + "step": 3980 + }, + { + "epoch": 0.2618539786710418, + "grad_norm": 0.35629674792289734, + "learning_rate": 0.0002, + "loss": 0.9886, + "step": 3990 + }, + { + "epoch": 0.26251025430680885, + "grad_norm": 0.40292513370513916, + "learning_rate": 0.0002, + "loss": 0.9002, + "step": 4000 + }, + { + "epoch": 0.2631665299425759, + "grad_norm": 0.3452664613723755, + "learning_rate": 0.0002, + "loss": 0.9561, + "step": 4010 + }, + { + "epoch": 0.2638228055783429, + "grad_norm": 0.2956177592277527, + "learning_rate": 0.0002, + "loss": 0.9231, + "step": 4020 + }, + { + "epoch": 0.2644790812141099, + "grad_norm": 0.3159438669681549, + "learning_rate": 0.0002, + "loss": 0.9269, + "step": 4030 + }, + { + "epoch": 0.26513535684987694, + "grad_norm": 0.25804826617240906, + "learning_rate": 0.0002, + "loss": 0.9682, + "step": 4040 + }, + { + "epoch": 0.26579163248564397, + "grad_norm": 0.3446296751499176, + "learning_rate": 0.0002, + "loss": 0.9681, + "step": 4050 + }, + { + "epoch": 0.266447908121411, + "grad_norm": 0.32568585872650146, + "learning_rate": 0.0002, + "loss": 0.9451, + "step": 4060 + }, + { + "epoch": 0.26710418375717804, + "grad_norm": 0.2872511148452759, + "learning_rate": 0.0002, + "loss": 0.907, + "step": 4070 + }, + { + "epoch": 0.267760459392945, + "grad_norm": 0.327495276927948, + "learning_rate": 0.0002, + "loss": 0.9365, + "step": 4080 + }, + { + "epoch": 0.26841673502871205, + "grad_norm": 0.40853601694107056, + "learning_rate": 0.0002, + "loss": 0.9778, + "step": 4090 + }, + { + "epoch": 0.2690730106644791, + "grad_norm": 0.32097089290618896, + "learning_rate": 0.0002, + "loss": 0.9232, + "step": 4100 + }, + { + "epoch": 0.2697292863002461, + "grad_norm": 0.32956209778785706, + "learning_rate": 0.0002, + "loss": 0.9622, + "step": 4110 + }, + { + "epoch": 0.27038556193601315, + "grad_norm": 0.30057013034820557, + "learning_rate": 0.0002, + "loss": 0.911, + "step": 4120 + }, + { + "epoch": 0.27104183757178013, + "grad_norm": 0.30630001425743103, + "learning_rate": 0.0002, + "loss": 0.9045, + "step": 4130 + }, + { + "epoch": 0.27169811320754716, + "grad_norm": 0.31977957487106323, + "learning_rate": 0.0002, + "loss": 0.8914, + "step": 4140 + }, + { + "epoch": 0.2723543888433142, + "grad_norm": 0.29783955216407776, + "learning_rate": 0.0002, + "loss": 0.9619, + "step": 4150 + }, + { + "epoch": 0.27301066447908123, + "grad_norm": 0.37371826171875, + "learning_rate": 0.0002, + "loss": 1.0299, + "step": 4160 + }, + { + "epoch": 0.2736669401148482, + "grad_norm": 0.27656471729278564, + "learning_rate": 0.0002, + "loss": 0.9072, + "step": 4170 + }, + { + "epoch": 0.27432321575061525, + "grad_norm": 0.29685157537460327, + "learning_rate": 0.0002, + "loss": 0.873, + "step": 4180 + }, + { + "epoch": 0.2749794913863823, + "grad_norm": 0.3376981317996979, + "learning_rate": 0.0002, + "loss": 1.0057, + "step": 4190 + }, + { + "epoch": 0.2756357670221493, + "grad_norm": 0.2933194041252136, + "learning_rate": 0.0002, + "loss": 0.9671, + "step": 4200 + }, + { + "epoch": 0.27629204265791635, + "grad_norm": 0.27594974637031555, + "learning_rate": 0.0002, + "loss": 0.9359, + "step": 4210 + }, + { + "epoch": 0.2769483182936833, + "grad_norm": 0.29267510771751404, + "learning_rate": 0.0002, + "loss": 0.9336, + "step": 4220 + }, + { + "epoch": 0.27760459392945036, + "grad_norm": 0.3266076445579529, + "learning_rate": 0.0002, + "loss": 0.9241, + "step": 4230 + }, + { + "epoch": 0.2782608695652174, + "grad_norm": 0.3626921474933624, + "learning_rate": 0.0002, + "loss": 0.933, + "step": 4240 + }, + { + "epoch": 0.27891714520098443, + "grad_norm": 0.3043886423110962, + "learning_rate": 0.0002, + "loss": 0.9533, + "step": 4250 + }, + { + "epoch": 0.27957342083675146, + "grad_norm": 0.28676939010620117, + "learning_rate": 0.0002, + "loss": 0.902, + "step": 4260 + }, + { + "epoch": 0.28022969647251844, + "grad_norm": 0.32501107454299927, + "learning_rate": 0.0002, + "loss": 0.9565, + "step": 4270 + }, + { + "epoch": 0.2808859721082855, + "grad_norm": 0.3533550202846527, + "learning_rate": 0.0002, + "loss": 1.0009, + "step": 4280 + }, + { + "epoch": 0.2815422477440525, + "grad_norm": 0.28312650322914124, + "learning_rate": 0.0002, + "loss": 0.9128, + "step": 4290 + }, + { + "epoch": 0.28219852337981954, + "grad_norm": 0.2910906672477722, + "learning_rate": 0.0002, + "loss": 0.9626, + "step": 4300 + }, + { + "epoch": 0.2828547990155865, + "grad_norm": 0.27612248063087463, + "learning_rate": 0.0002, + "loss": 0.8803, + "step": 4310 + }, + { + "epoch": 0.28351107465135356, + "grad_norm": 0.416061133146286, + "learning_rate": 0.0002, + "loss": 0.9451, + "step": 4320 + }, + { + "epoch": 0.2841673502871206, + "grad_norm": 0.36900338530540466, + "learning_rate": 0.0002, + "loss": 0.9197, + "step": 4330 + }, + { + "epoch": 0.2848236259228876, + "grad_norm": 0.32224977016448975, + "learning_rate": 0.0002, + "loss": 0.9623, + "step": 4340 + }, + { + "epoch": 0.28547990155865466, + "grad_norm": 0.2805930972099304, + "learning_rate": 0.0002, + "loss": 0.9264, + "step": 4350 + }, + { + "epoch": 0.28613617719442164, + "grad_norm": 0.286539226770401, + "learning_rate": 0.0002, + "loss": 0.9486, + "step": 4360 + }, + { + "epoch": 0.28679245283018867, + "grad_norm": 0.3326348066329956, + "learning_rate": 0.0002, + "loss": 0.9225, + "step": 4370 + }, + { + "epoch": 0.2874487284659557, + "grad_norm": 0.29906603693962097, + "learning_rate": 0.0002, + "loss": 0.8774, + "step": 4380 + }, + { + "epoch": 0.28810500410172274, + "grad_norm": 0.33906206488609314, + "learning_rate": 0.0002, + "loss": 0.9582, + "step": 4390 + }, + { + "epoch": 0.2887612797374898, + "grad_norm": 0.36083030700683594, + "learning_rate": 0.0002, + "loss": 0.8987, + "step": 4400 + }, + { + "epoch": 0.28941755537325675, + "grad_norm": 0.30945461988449097, + "learning_rate": 0.0002, + "loss": 0.9189, + "step": 4410 + }, + { + "epoch": 0.2900738310090238, + "grad_norm": 0.29673129320144653, + "learning_rate": 0.0002, + "loss": 0.9454, + "step": 4420 + }, + { + "epoch": 0.2907301066447908, + "grad_norm": 0.29949837923049927, + "learning_rate": 0.0002, + "loss": 0.9454, + "step": 4430 + }, + { + "epoch": 0.29138638228055785, + "grad_norm": 0.29319390654563904, + "learning_rate": 0.0002, + "loss": 0.9026, + "step": 4440 + }, + { + "epoch": 0.29204265791632483, + "grad_norm": 0.38954150676727295, + "learning_rate": 0.0002, + "loss": 0.9318, + "step": 4450 + }, + { + "epoch": 0.29269893355209187, + "grad_norm": 0.32702240347862244, + "learning_rate": 0.0002, + "loss": 1.0558, + "step": 4460 + }, + { + "epoch": 0.2933552091878589, + "grad_norm": 0.3480510413646698, + "learning_rate": 0.0002, + "loss": 0.9684, + "step": 4470 + }, + { + "epoch": 0.29401148482362593, + "grad_norm": 0.34290337562561035, + "learning_rate": 0.0002, + "loss": 0.9267, + "step": 4480 + }, + { + "epoch": 0.29466776045939297, + "grad_norm": 0.3169049620628357, + "learning_rate": 0.0002, + "loss": 0.9561, + "step": 4490 + }, + { + "epoch": 0.29532403609515995, + "grad_norm": 0.3368853032588959, + "learning_rate": 0.0002, + "loss": 0.9386, + "step": 4500 + }, + { + "epoch": 0.295980311730927, + "grad_norm": 0.30214768648147583, + "learning_rate": 0.0002, + "loss": 0.8803, + "step": 4510 + }, + { + "epoch": 0.296636587366694, + "grad_norm": 0.33051690459251404, + "learning_rate": 0.0002, + "loss": 0.9485, + "step": 4520 + }, + { + "epoch": 0.29729286300246105, + "grad_norm": 0.37137898802757263, + "learning_rate": 0.0002, + "loss": 0.9498, + "step": 4530 + }, + { + "epoch": 0.29794913863822803, + "grad_norm": 0.3780321180820465, + "learning_rate": 0.0002, + "loss": 0.9518, + "step": 4540 + }, + { + "epoch": 0.29860541427399506, + "grad_norm": 0.3048851788043976, + "learning_rate": 0.0002, + "loss": 0.8912, + "step": 4550 + }, + { + "epoch": 0.2992616899097621, + "grad_norm": 0.36871910095214844, + "learning_rate": 0.0002, + "loss": 1.0054, + "step": 4560 + }, + { + "epoch": 0.29991796554552913, + "grad_norm": 0.3335227966308594, + "learning_rate": 0.0002, + "loss": 0.9172, + "step": 4570 + }, + { + "epoch": 0.30057424118129616, + "grad_norm": 0.3256683349609375, + "learning_rate": 0.0002, + "loss": 0.9368, + "step": 4580 + }, + { + "epoch": 0.30123051681706314, + "grad_norm": 0.34209194779396057, + "learning_rate": 0.0002, + "loss": 0.9641, + "step": 4590 + }, + { + "epoch": 0.3018867924528302, + "grad_norm": 0.278749018907547, + "learning_rate": 0.0002, + "loss": 0.9128, + "step": 4600 + }, + { + "epoch": 0.3025430680885972, + "grad_norm": 0.28081533312797546, + "learning_rate": 0.0002, + "loss": 0.931, + "step": 4610 + }, + { + "epoch": 0.30319934372436425, + "grad_norm": 0.35687389969825745, + "learning_rate": 0.0002, + "loss": 0.9084, + "step": 4620 + }, + { + "epoch": 0.3038556193601313, + "grad_norm": 0.367288738489151, + "learning_rate": 0.0002, + "loss": 0.8927, + "step": 4630 + }, + { + "epoch": 0.30451189499589826, + "grad_norm": 0.40433239936828613, + "learning_rate": 0.0002, + "loss": 0.9175, + "step": 4640 + }, + { + "epoch": 0.3051681706316653, + "grad_norm": 0.31489279866218567, + "learning_rate": 0.0002, + "loss": 0.9115, + "step": 4650 + }, + { + "epoch": 0.3058244462674323, + "grad_norm": 0.30823174118995667, + "learning_rate": 0.0002, + "loss": 0.9843, + "step": 4660 + }, + { + "epoch": 0.30648072190319936, + "grad_norm": 0.3204546570777893, + "learning_rate": 0.0002, + "loss": 0.964, + "step": 4670 + }, + { + "epoch": 0.30713699753896634, + "grad_norm": 0.3188243508338928, + "learning_rate": 0.0002, + "loss": 0.9754, + "step": 4680 + }, + { + "epoch": 0.3077932731747334, + "grad_norm": 0.31931981444358826, + "learning_rate": 0.0002, + "loss": 0.8923, + "step": 4690 + }, + { + "epoch": 0.3084495488105004, + "grad_norm": 0.33908605575561523, + "learning_rate": 0.0002, + "loss": 0.9113, + "step": 4700 + }, + { + "epoch": 0.30910582444626744, + "grad_norm": 0.3362937569618225, + "learning_rate": 0.0002, + "loss": 0.9552, + "step": 4710 + }, + { + "epoch": 0.3097621000820345, + "grad_norm": 0.3304995000362396, + "learning_rate": 0.0002, + "loss": 0.9695, + "step": 4720 + }, + { + "epoch": 0.31041837571780145, + "grad_norm": 0.39165404438972473, + "learning_rate": 0.0002, + "loss": 0.9588, + "step": 4730 + }, + { + "epoch": 0.3110746513535685, + "grad_norm": 0.29097774624824524, + "learning_rate": 0.0002, + "loss": 0.9583, + "step": 4740 + }, + { + "epoch": 0.3117309269893355, + "grad_norm": 0.289065420627594, + "learning_rate": 0.0002, + "loss": 0.9224, + "step": 4750 + }, + { + "epoch": 0.31238720262510256, + "grad_norm": 0.32188501954078674, + "learning_rate": 0.0002, + "loss": 0.8905, + "step": 4760 + }, + { + "epoch": 0.3130434782608696, + "grad_norm": 0.2738335430622101, + "learning_rate": 0.0002, + "loss": 0.951, + "step": 4770 + }, + { + "epoch": 0.31369975389663657, + "grad_norm": 0.2811134457588196, + "learning_rate": 0.0002, + "loss": 0.9662, + "step": 4780 + }, + { + "epoch": 0.3143560295324036, + "grad_norm": 0.2986338138580322, + "learning_rate": 0.0002, + "loss": 0.9372, + "step": 4790 + }, + { + "epoch": 0.31501230516817064, + "grad_norm": 0.30152231454849243, + "learning_rate": 0.0002, + "loss": 0.9016, + "step": 4800 + }, + { + "epoch": 0.31566858080393767, + "grad_norm": 0.37527933716773987, + "learning_rate": 0.0002, + "loss": 0.9902, + "step": 4810 + }, + { + "epoch": 0.31632485643970465, + "grad_norm": 0.4206887483596802, + "learning_rate": 0.0002, + "loss": 0.9786, + "step": 4820 + }, + { + "epoch": 0.3169811320754717, + "grad_norm": 0.3060953617095947, + "learning_rate": 0.0002, + "loss": 0.9265, + "step": 4830 + }, + { + "epoch": 0.3176374077112387, + "grad_norm": 0.28399568796157837, + "learning_rate": 0.0002, + "loss": 0.874, + "step": 4840 + }, + { + "epoch": 0.31829368334700575, + "grad_norm": 0.30498018860816956, + "learning_rate": 0.0002, + "loss": 0.9287, + "step": 4850 + }, + { + "epoch": 0.3189499589827728, + "grad_norm": 0.3168811798095703, + "learning_rate": 0.0002, + "loss": 0.9682, + "step": 4860 + }, + { + "epoch": 0.31960623461853976, + "grad_norm": 0.33051279187202454, + "learning_rate": 0.0002, + "loss": 0.9916, + "step": 4870 + }, + { + "epoch": 0.3202625102543068, + "grad_norm": 0.34563300013542175, + "learning_rate": 0.0002, + "loss": 0.9427, + "step": 4880 + }, + { + "epoch": 0.32091878589007383, + "grad_norm": 0.28437602519989014, + "learning_rate": 0.0002, + "loss": 0.9018, + "step": 4890 + }, + { + "epoch": 0.32157506152584087, + "grad_norm": 0.3883301913738251, + "learning_rate": 0.0002, + "loss": 0.9357, + "step": 4900 + }, + { + "epoch": 0.3222313371616079, + "grad_norm": 0.2933879792690277, + "learning_rate": 0.0002, + "loss": 0.9034, + "step": 4910 + }, + { + "epoch": 0.3228876127973749, + "grad_norm": 0.3490493595600128, + "learning_rate": 0.0002, + "loss": 0.9332, + "step": 4920 + }, + { + "epoch": 0.3235438884331419, + "grad_norm": 0.29370415210723877, + "learning_rate": 0.0002, + "loss": 0.9424, + "step": 4930 + }, + { + "epoch": 0.32420016406890895, + "grad_norm": 0.32653453946113586, + "learning_rate": 0.0002, + "loss": 0.9751, + "step": 4940 + }, + { + "epoch": 0.324856439704676, + "grad_norm": 0.32540783286094666, + "learning_rate": 0.0002, + "loss": 0.9278, + "step": 4950 + }, + { + "epoch": 0.32551271534044296, + "grad_norm": 0.37576451897621155, + "learning_rate": 0.0002, + "loss": 0.9309, + "step": 4960 + }, + { + "epoch": 0.32616899097621, + "grad_norm": 0.27892962098121643, + "learning_rate": 0.0002, + "loss": 0.9612, + "step": 4970 + }, + { + "epoch": 0.32682526661197703, + "grad_norm": 0.38215309381484985, + "learning_rate": 0.0002, + "loss": 0.9765, + "step": 4980 + }, + { + "epoch": 0.32748154224774406, + "grad_norm": 0.4062992334365845, + "learning_rate": 0.0002, + "loss": 0.9226, + "step": 4990 + }, + { + "epoch": 0.3281378178835111, + "grad_norm": 0.3053376376628876, + "learning_rate": 0.0002, + "loss": 0.9157, + "step": 5000 + }, + { + "epoch": 0.3287940935192781, + "grad_norm": 0.3228836953639984, + "learning_rate": 0.0002, + "loss": 0.9117, + "step": 5010 + }, + { + "epoch": 0.3294503691550451, + "grad_norm": 0.27714234590530396, + "learning_rate": 0.0002, + "loss": 0.8644, + "step": 5020 + }, + { + "epoch": 0.33010664479081214, + "grad_norm": 0.3127416968345642, + "learning_rate": 0.0002, + "loss": 0.9034, + "step": 5030 + }, + { + "epoch": 0.3307629204265792, + "grad_norm": 0.3653234839439392, + "learning_rate": 0.0002, + "loss": 0.89, + "step": 5040 + }, + { + "epoch": 0.3314191960623462, + "grad_norm": 0.33517029881477356, + "learning_rate": 0.0002, + "loss": 0.9313, + "step": 5050 + }, + { + "epoch": 0.3320754716981132, + "grad_norm": 0.3412803113460541, + "learning_rate": 0.0002, + "loss": 0.9219, + "step": 5060 + }, + { + "epoch": 0.3327317473338802, + "grad_norm": 0.32958096265792847, + "learning_rate": 0.0002, + "loss": 0.9358, + "step": 5070 + }, + { + "epoch": 0.33338802296964726, + "grad_norm": 0.2991558015346527, + "learning_rate": 0.0002, + "loss": 0.9479, + "step": 5080 + }, + { + "epoch": 0.3340442986054143, + "grad_norm": 0.35870906710624695, + "learning_rate": 0.0002, + "loss": 0.9754, + "step": 5090 + }, + { + "epoch": 0.33470057424118127, + "grad_norm": 0.26751458644866943, + "learning_rate": 0.0002, + "loss": 0.9183, + "step": 5100 + }, + { + "epoch": 0.3353568498769483, + "grad_norm": 0.35588568449020386, + "learning_rate": 0.0002, + "loss": 0.9676, + "step": 5110 + }, + { + "epoch": 0.33601312551271534, + "grad_norm": 0.41509315371513367, + "learning_rate": 0.0002, + "loss": 0.8964, + "step": 5120 + }, + { + "epoch": 0.3366694011484824, + "grad_norm": 0.37643107771873474, + "learning_rate": 0.0002, + "loss": 0.9864, + "step": 5130 + }, + { + "epoch": 0.3373256767842494, + "grad_norm": 0.3062657117843628, + "learning_rate": 0.0002, + "loss": 0.9367, + "step": 5140 + }, + { + "epoch": 0.3379819524200164, + "grad_norm": 0.2778759300708771, + "learning_rate": 0.0002, + "loss": 0.9167, + "step": 5150 + }, + { + "epoch": 0.3386382280557834, + "grad_norm": 0.40632501244544983, + "learning_rate": 0.0002, + "loss": 0.9787, + "step": 5160 + }, + { + "epoch": 0.33929450369155045, + "grad_norm": 0.2996899485588074, + "learning_rate": 0.0002, + "loss": 0.8941, + "step": 5170 + }, + { + "epoch": 0.3399507793273175, + "grad_norm": 0.2869918942451477, + "learning_rate": 0.0002, + "loss": 0.9611, + "step": 5180 + }, + { + "epoch": 0.3406070549630845, + "grad_norm": 0.3303343951702118, + "learning_rate": 0.0002, + "loss": 0.9201, + "step": 5190 + }, + { + "epoch": 0.3412633305988515, + "grad_norm": 0.3582284450531006, + "learning_rate": 0.0002, + "loss": 0.9162, + "step": 5200 + }, + { + "epoch": 0.34191960623461853, + "grad_norm": 0.3600800037384033, + "learning_rate": 0.0002, + "loss": 0.9441, + "step": 5210 + }, + { + "epoch": 0.34257588187038557, + "grad_norm": 0.3147357106208801, + "learning_rate": 0.0002, + "loss": 0.9061, + "step": 5220 + }, + { + "epoch": 0.3432321575061526, + "grad_norm": 0.3327115774154663, + "learning_rate": 0.0002, + "loss": 0.9189, + "step": 5230 + }, + { + "epoch": 0.3438884331419196, + "grad_norm": 0.3514555096626282, + "learning_rate": 0.0002, + "loss": 0.9606, + "step": 5240 + }, + { + "epoch": 0.3445447087776866, + "grad_norm": 0.35748785734176636, + "learning_rate": 0.0002, + "loss": 0.8944, + "step": 5250 + }, + { + "epoch": 0.34520098441345365, + "grad_norm": 0.3179738223552704, + "learning_rate": 0.0002, + "loss": 0.9228, + "step": 5260 + }, + { + "epoch": 0.3458572600492207, + "grad_norm": 0.3399673104286194, + "learning_rate": 0.0002, + "loss": 0.9313, + "step": 5270 + }, + { + "epoch": 0.3465135356849877, + "grad_norm": 0.32293835282325745, + "learning_rate": 0.0002, + "loss": 0.9379, + "step": 5280 + }, + { + "epoch": 0.3471698113207547, + "grad_norm": 0.3028234839439392, + "learning_rate": 0.0002, + "loss": 0.9066, + "step": 5290 + }, + { + "epoch": 0.34782608695652173, + "grad_norm": 0.3056369423866272, + "learning_rate": 0.0002, + "loss": 0.929, + "step": 5300 + }, + { + "epoch": 0.34848236259228876, + "grad_norm": 0.36918318271636963, + "learning_rate": 0.0002, + "loss": 0.9597, + "step": 5310 + }, + { + "epoch": 0.3491386382280558, + "grad_norm": 0.2661174535751343, + "learning_rate": 0.0002, + "loss": 0.9365, + "step": 5320 + }, + { + "epoch": 0.34979491386382283, + "grad_norm": 0.294240266084671, + "learning_rate": 0.0002, + "loss": 0.8832, + "step": 5330 + }, + { + "epoch": 0.3504511894995898, + "grad_norm": 0.3509284555912018, + "learning_rate": 0.0002, + "loss": 0.9135, + "step": 5340 + }, + { + "epoch": 0.35110746513535684, + "grad_norm": 0.331851989030838, + "learning_rate": 0.0002, + "loss": 0.8919, + "step": 5350 + }, + { + "epoch": 0.3517637407711239, + "grad_norm": 0.39378881454467773, + "learning_rate": 0.0002, + "loss": 0.9357, + "step": 5360 + }, + { + "epoch": 0.3524200164068909, + "grad_norm": 0.3014044761657715, + "learning_rate": 0.0002, + "loss": 0.8844, + "step": 5370 + }, + { + "epoch": 0.3530762920426579, + "grad_norm": 0.3350718021392822, + "learning_rate": 0.0002, + "loss": 0.9234, + "step": 5380 + }, + { + "epoch": 0.3537325676784249, + "grad_norm": 0.29478874802589417, + "learning_rate": 0.0002, + "loss": 0.9119, + "step": 5390 + }, + { + "epoch": 0.35438884331419196, + "grad_norm": 0.3219947814941406, + "learning_rate": 0.0002, + "loss": 0.8781, + "step": 5400 + }, + { + "epoch": 0.355045118949959, + "grad_norm": 0.31258803606033325, + "learning_rate": 0.0002, + "loss": 0.9527, + "step": 5410 + }, + { + "epoch": 0.355701394585726, + "grad_norm": 0.2951710820198059, + "learning_rate": 0.0002, + "loss": 0.9124, + "step": 5420 + }, + { + "epoch": 0.356357670221493, + "grad_norm": 0.3726331293582916, + "learning_rate": 0.0002, + "loss": 0.8997, + "step": 5430 + }, + { + "epoch": 0.35701394585726004, + "grad_norm": 0.3571377098560333, + "learning_rate": 0.0002, + "loss": 0.9288, + "step": 5440 + }, + { + "epoch": 0.3576702214930271, + "grad_norm": 0.3004431128501892, + "learning_rate": 0.0002, + "loss": 0.9538, + "step": 5450 + }, + { + "epoch": 0.3583264971287941, + "grad_norm": 0.3258959949016571, + "learning_rate": 0.0002, + "loss": 0.9466, + "step": 5460 + }, + { + "epoch": 0.35898277276456114, + "grad_norm": 0.31265372037887573, + "learning_rate": 0.0002, + "loss": 0.8999, + "step": 5470 + }, + { + "epoch": 0.3596390484003281, + "grad_norm": 0.3120972514152527, + "learning_rate": 0.0002, + "loss": 0.9438, + "step": 5480 + }, + { + "epoch": 0.36029532403609515, + "grad_norm": 0.31449300050735474, + "learning_rate": 0.0002, + "loss": 0.9541, + "step": 5490 + }, + { + "epoch": 0.3609515996718622, + "grad_norm": 0.378205806016922, + "learning_rate": 0.0002, + "loss": 0.9569, + "step": 5500 + }, + { + "epoch": 0.3616078753076292, + "grad_norm": 0.34647420048713684, + "learning_rate": 0.0002, + "loss": 0.8964, + "step": 5510 + }, + { + "epoch": 0.3622641509433962, + "grad_norm": 0.34195807576179504, + "learning_rate": 0.0002, + "loss": 0.9965, + "step": 5520 + }, + { + "epoch": 0.36292042657916324, + "grad_norm": 0.35365694761276245, + "learning_rate": 0.0002, + "loss": 0.8916, + "step": 5530 + }, + { + "epoch": 0.36357670221493027, + "grad_norm": 0.3188548982143402, + "learning_rate": 0.0002, + "loss": 0.9429, + "step": 5540 + }, + { + "epoch": 0.3642329778506973, + "grad_norm": 0.35012900829315186, + "learning_rate": 0.0002, + "loss": 0.9258, + "step": 5550 + }, + { + "epoch": 0.36488925348646434, + "grad_norm": 0.3564446270465851, + "learning_rate": 0.0002, + "loss": 0.9338, + "step": 5560 + }, + { + "epoch": 0.3655455291222313, + "grad_norm": 0.29641905426979065, + "learning_rate": 0.0002, + "loss": 0.9531, + "step": 5570 + }, + { + "epoch": 0.36620180475799835, + "grad_norm": 0.3476453721523285, + "learning_rate": 0.0002, + "loss": 0.9507, + "step": 5580 + }, + { + "epoch": 0.3668580803937654, + "grad_norm": 0.32947802543640137, + "learning_rate": 0.0002, + "loss": 0.9509, + "step": 5590 + }, + { + "epoch": 0.3675143560295324, + "grad_norm": 0.28639012575149536, + "learning_rate": 0.0002, + "loss": 0.9577, + "step": 5600 + }, + { + "epoch": 0.36817063166529945, + "grad_norm": 0.35645192861557007, + "learning_rate": 0.0002, + "loss": 0.9196, + "step": 5610 + }, + { + "epoch": 0.36882690730106643, + "grad_norm": 0.3594934046268463, + "learning_rate": 0.0002, + "loss": 0.9544, + "step": 5620 + }, + { + "epoch": 0.36948318293683347, + "grad_norm": 0.3584282398223877, + "learning_rate": 0.0002, + "loss": 0.9081, + "step": 5630 + }, + { + "epoch": 0.3701394585726005, + "grad_norm": 0.32094448804855347, + "learning_rate": 0.0002, + "loss": 0.955, + "step": 5640 + }, + { + "epoch": 0.37079573420836753, + "grad_norm": 0.3229917287826538, + "learning_rate": 0.0002, + "loss": 0.9271, + "step": 5650 + }, + { + "epoch": 0.3714520098441345, + "grad_norm": 0.3371448218822479, + "learning_rate": 0.0002, + "loss": 0.933, + "step": 5660 + }, + { + "epoch": 0.37210828547990155, + "grad_norm": 0.38639435172080994, + "learning_rate": 0.0002, + "loss": 0.9189, + "step": 5670 + }, + { + "epoch": 0.3727645611156686, + "grad_norm": 0.37710604071617126, + "learning_rate": 0.0002, + "loss": 0.8987, + "step": 5680 + }, + { + "epoch": 0.3734208367514356, + "grad_norm": 0.28154921531677246, + "learning_rate": 0.0002, + "loss": 0.9133, + "step": 5690 + }, + { + "epoch": 0.37407711238720265, + "grad_norm": 0.35519927740097046, + "learning_rate": 0.0002, + "loss": 0.9104, + "step": 5700 + }, + { + "epoch": 0.3747333880229696, + "grad_norm": 0.3271195888519287, + "learning_rate": 0.0002, + "loss": 0.9187, + "step": 5710 + }, + { + "epoch": 0.37538966365873666, + "grad_norm": 0.30836206674575806, + "learning_rate": 0.0002, + "loss": 1.0015, + "step": 5720 + }, + { + "epoch": 0.3760459392945037, + "grad_norm": 0.36397358775138855, + "learning_rate": 0.0002, + "loss": 0.8888, + "step": 5730 + }, + { + "epoch": 0.37670221493027073, + "grad_norm": 0.31649520993232727, + "learning_rate": 0.0002, + "loss": 0.8962, + "step": 5740 + }, + { + "epoch": 0.37735849056603776, + "grad_norm": 0.3790256977081299, + "learning_rate": 0.0002, + "loss": 0.9667, + "step": 5750 + }, + { + "epoch": 0.37801476620180474, + "grad_norm": 0.34175634384155273, + "learning_rate": 0.0002, + "loss": 0.9548, + "step": 5760 + }, + { + "epoch": 0.3786710418375718, + "grad_norm": 0.4169650673866272, + "learning_rate": 0.0002, + "loss": 0.9031, + "step": 5770 + }, + { + "epoch": 0.3793273174733388, + "grad_norm": 0.3118833899497986, + "learning_rate": 0.0002, + "loss": 0.9279, + "step": 5780 + }, + { + "epoch": 0.37998359310910584, + "grad_norm": 0.3474520444869995, + "learning_rate": 0.0002, + "loss": 0.9204, + "step": 5790 + }, + { + "epoch": 0.3806398687448728, + "grad_norm": 0.40819892287254333, + "learning_rate": 0.0002, + "loss": 0.9845, + "step": 5800 + }, + { + "epoch": 0.38129614438063986, + "grad_norm": 0.34402596950531006, + "learning_rate": 0.0002, + "loss": 0.8863, + "step": 5810 + }, + { + "epoch": 0.3819524200164069, + "grad_norm": 0.31899532675743103, + "learning_rate": 0.0002, + "loss": 0.9356, + "step": 5820 + }, + { + "epoch": 0.3826086956521739, + "grad_norm": 0.38860151171684265, + "learning_rate": 0.0002, + "loss": 0.857, + "step": 5830 + }, + { + "epoch": 0.38326497128794096, + "grad_norm": 0.2960244417190552, + "learning_rate": 0.0002, + "loss": 0.9433, + "step": 5840 + }, + { + "epoch": 0.38392124692370794, + "grad_norm": 0.3614438772201538, + "learning_rate": 0.0002, + "loss": 0.9546, + "step": 5850 + }, + { + "epoch": 0.38457752255947497, + "grad_norm": 0.33000093698501587, + "learning_rate": 0.0002, + "loss": 0.9447, + "step": 5860 + }, + { + "epoch": 0.385233798195242, + "grad_norm": 0.3543432354927063, + "learning_rate": 0.0002, + "loss": 0.964, + "step": 5870 + }, + { + "epoch": 0.38589007383100904, + "grad_norm": 0.3281349837779999, + "learning_rate": 0.0002, + "loss": 0.9329, + "step": 5880 + }, + { + "epoch": 0.3865463494667761, + "grad_norm": 0.3775436580181122, + "learning_rate": 0.0002, + "loss": 0.9463, + "step": 5890 + }, + { + "epoch": 0.38720262510254305, + "grad_norm": 0.3037714660167694, + "learning_rate": 0.0002, + "loss": 0.9035, + "step": 5900 + }, + { + "epoch": 0.3878589007383101, + "grad_norm": 0.30885955691337585, + "learning_rate": 0.0002, + "loss": 0.8953, + "step": 5910 + }, + { + "epoch": 0.3885151763740771, + "grad_norm": 0.2839881181716919, + "learning_rate": 0.0002, + "loss": 0.8789, + "step": 5920 + }, + { + "epoch": 0.38917145200984415, + "grad_norm": 0.30544501543045044, + "learning_rate": 0.0002, + "loss": 0.9585, + "step": 5930 + }, + { + "epoch": 0.38982772764561113, + "grad_norm": 0.281946063041687, + "learning_rate": 0.0002, + "loss": 0.9163, + "step": 5940 + }, + { + "epoch": 0.39048400328137817, + "grad_norm": 0.44500306248664856, + "learning_rate": 0.0002, + "loss": 0.8999, + "step": 5950 + }, + { + "epoch": 0.3911402789171452, + "grad_norm": 0.324260950088501, + "learning_rate": 0.0002, + "loss": 0.8928, + "step": 5960 + }, + { + "epoch": 0.39179655455291224, + "grad_norm": 0.31535372138023376, + "learning_rate": 0.0002, + "loss": 0.8844, + "step": 5970 + }, + { + "epoch": 0.39245283018867927, + "grad_norm": 0.31681323051452637, + "learning_rate": 0.0002, + "loss": 0.9185, + "step": 5980 + }, + { + "epoch": 0.39310910582444625, + "grad_norm": 0.32668736577033997, + "learning_rate": 0.0002, + "loss": 0.9258, + "step": 5990 + }, + { + "epoch": 0.3937653814602133, + "grad_norm": 0.34531155228614807, + "learning_rate": 0.0002, + "loss": 0.9278, + "step": 6000 + }, + { + "epoch": 0.3944216570959803, + "grad_norm": 0.32312485575675964, + "learning_rate": 0.0002, + "loss": 0.8969, + "step": 6010 + }, + { + "epoch": 0.39507793273174735, + "grad_norm": 0.31243696808815, + "learning_rate": 0.0002, + "loss": 0.9501, + "step": 6020 + }, + { + "epoch": 0.39573420836751433, + "grad_norm": 0.3558828830718994, + "learning_rate": 0.0002, + "loss": 0.9174, + "step": 6030 + }, + { + "epoch": 0.39639048400328136, + "grad_norm": 0.3894706964492798, + "learning_rate": 0.0002, + "loss": 0.9486, + "step": 6040 + }, + { + "epoch": 0.3970467596390484, + "grad_norm": 0.36445918679237366, + "learning_rate": 0.0002, + "loss": 0.9549, + "step": 6050 + }, + { + "epoch": 0.39770303527481543, + "grad_norm": 0.34586355090141296, + "learning_rate": 0.0002, + "loss": 0.9268, + "step": 6060 + }, + { + "epoch": 0.39835931091058246, + "grad_norm": 0.4428504705429077, + "learning_rate": 0.0002, + "loss": 0.9256, + "step": 6070 + }, + { + "epoch": 0.39901558654634944, + "grad_norm": 0.32784751057624817, + "learning_rate": 0.0002, + "loss": 0.9706, + "step": 6080 + }, + { + "epoch": 0.3996718621821165, + "grad_norm": 0.30480754375457764, + "learning_rate": 0.0002, + "loss": 0.9107, + "step": 6090 + }, + { + "epoch": 0.4003281378178835, + "grad_norm": 0.3382161557674408, + "learning_rate": 0.0002, + "loss": 0.9431, + "step": 6100 + }, + { + "epoch": 0.40098441345365055, + "grad_norm": 0.34486305713653564, + "learning_rate": 0.0002, + "loss": 0.8669, + "step": 6110 + }, + { + "epoch": 0.4016406890894176, + "grad_norm": 0.3488329350948334, + "learning_rate": 0.0002, + "loss": 0.9174, + "step": 6120 + }, + { + "epoch": 0.40229696472518456, + "grad_norm": 0.2977526783943176, + "learning_rate": 0.0002, + "loss": 0.8907, + "step": 6130 + }, + { + "epoch": 0.4029532403609516, + "grad_norm": 0.27584630250930786, + "learning_rate": 0.0002, + "loss": 0.9088, + "step": 6140 + }, + { + "epoch": 0.4036095159967186, + "grad_norm": 0.30325573682785034, + "learning_rate": 0.0002, + "loss": 0.9062, + "step": 6150 + }, + { + "epoch": 0.40426579163248566, + "grad_norm": 0.3318769633769989, + "learning_rate": 0.0002, + "loss": 0.9626, + "step": 6160 + }, + { + "epoch": 0.40492206726825264, + "grad_norm": 0.27929258346557617, + "learning_rate": 0.0002, + "loss": 0.8849, + "step": 6170 + }, + { + "epoch": 0.4055783429040197, + "grad_norm": 0.3127989172935486, + "learning_rate": 0.0002, + "loss": 0.9118, + "step": 6180 + }, + { + "epoch": 0.4062346185397867, + "grad_norm": 0.34189802408218384, + "learning_rate": 0.0002, + "loss": 0.9659, + "step": 6190 + }, + { + "epoch": 0.40689089417555374, + "grad_norm": 0.2901368737220764, + "learning_rate": 0.0002, + "loss": 0.9041, + "step": 6200 + }, + { + "epoch": 0.4075471698113208, + "grad_norm": 0.2957440912723541, + "learning_rate": 0.0002, + "loss": 0.8863, + "step": 6210 + }, + { + "epoch": 0.40820344544708775, + "grad_norm": 0.35854384303092957, + "learning_rate": 0.0002, + "loss": 0.8872, + "step": 6220 + }, + { + "epoch": 0.4088597210828548, + "grad_norm": 0.3516639471054077, + "learning_rate": 0.0002, + "loss": 0.9913, + "step": 6230 + }, + { + "epoch": 0.4095159967186218, + "grad_norm": 0.33773764967918396, + "learning_rate": 0.0002, + "loss": 0.9526, + "step": 6240 + }, + { + "epoch": 0.41017227235438886, + "grad_norm": 0.3359714448451996, + "learning_rate": 0.0002, + "loss": 0.9116, + "step": 6250 + }, + { + "epoch": 0.4108285479901559, + "grad_norm": 0.37056079506874084, + "learning_rate": 0.0002, + "loss": 0.8789, + "step": 6260 + }, + { + "epoch": 0.41148482362592287, + "grad_norm": 0.37948423624038696, + "learning_rate": 0.0002, + "loss": 0.9262, + "step": 6270 + }, + { + "epoch": 0.4121410992616899, + "grad_norm": 0.30971240997314453, + "learning_rate": 0.0002, + "loss": 0.9333, + "step": 6280 + }, + { + "epoch": 0.41279737489745694, + "grad_norm": 0.3357695937156677, + "learning_rate": 0.0002, + "loss": 0.8636, + "step": 6290 + }, + { + "epoch": 0.41345365053322397, + "grad_norm": 0.2873750627040863, + "learning_rate": 0.0002, + "loss": 0.8857, + "step": 6300 + }, + { + "epoch": 0.41410992616899095, + "grad_norm": 0.31098586320877075, + "learning_rate": 0.0002, + "loss": 0.9401, + "step": 6310 + }, + { + "epoch": 0.414766201804758, + "grad_norm": 0.3232232630252838, + "learning_rate": 0.0002, + "loss": 0.9113, + "step": 6320 + }, + { + "epoch": 0.415422477440525, + "grad_norm": 0.3094746172428131, + "learning_rate": 0.0002, + "loss": 0.9165, + "step": 6330 + }, + { + "epoch": 0.41607875307629205, + "grad_norm": 0.3032565414905548, + "learning_rate": 0.0002, + "loss": 0.92, + "step": 6340 + }, + { + "epoch": 0.4167350287120591, + "grad_norm": 0.30271366238594055, + "learning_rate": 0.0002, + "loss": 0.9254, + "step": 6350 + }, + { + "epoch": 0.41739130434782606, + "grad_norm": 0.35404741764068604, + "learning_rate": 0.0002, + "loss": 0.9766, + "step": 6360 + }, + { + "epoch": 0.4180475799835931, + "grad_norm": 0.33428773283958435, + "learning_rate": 0.0002, + "loss": 0.889, + "step": 6370 + }, + { + "epoch": 0.41870385561936013, + "grad_norm": 0.3523367941379547, + "learning_rate": 0.0002, + "loss": 0.9284, + "step": 6380 + }, + { + "epoch": 0.41936013125512717, + "grad_norm": 0.28411954641342163, + "learning_rate": 0.0002, + "loss": 0.8831, + "step": 6390 + }, + { + "epoch": 0.4200164068908942, + "grad_norm": 0.3410111963748932, + "learning_rate": 0.0002, + "loss": 0.9429, + "step": 6400 + }, + { + "epoch": 0.4206726825266612, + "grad_norm": 0.33837398886680603, + "learning_rate": 0.0002, + "loss": 0.8956, + "step": 6410 + }, + { + "epoch": 0.4213289581624282, + "grad_norm": 0.38739341497421265, + "learning_rate": 0.0002, + "loss": 0.9169, + "step": 6420 + }, + { + "epoch": 0.42198523379819525, + "grad_norm": 0.28810667991638184, + "learning_rate": 0.0002, + "loss": 0.8943, + "step": 6430 + }, + { + "epoch": 0.4226415094339623, + "grad_norm": 0.3242197036743164, + "learning_rate": 0.0002, + "loss": 0.9505, + "step": 6440 + }, + { + "epoch": 0.42329778506972926, + "grad_norm": 0.39538851380348206, + "learning_rate": 0.0002, + "loss": 0.9409, + "step": 6450 + }, + { + "epoch": 0.4239540607054963, + "grad_norm": 0.36792051792144775, + "learning_rate": 0.0002, + "loss": 0.9152, + "step": 6460 + }, + { + "epoch": 0.42461033634126333, + "grad_norm": 0.31599462032318115, + "learning_rate": 0.0002, + "loss": 0.9203, + "step": 6470 + }, + { + "epoch": 0.42526661197703036, + "grad_norm": 0.3358552157878876, + "learning_rate": 0.0002, + "loss": 0.926, + "step": 6480 + }, + { + "epoch": 0.4259228876127974, + "grad_norm": 0.40409335494041443, + "learning_rate": 0.0002, + "loss": 0.9337, + "step": 6490 + }, + { + "epoch": 0.4265791632485644, + "grad_norm": 0.3785623610019684, + "learning_rate": 0.0002, + "loss": 0.901, + "step": 6500 + }, + { + "epoch": 0.4272354388843314, + "grad_norm": 0.43033692240715027, + "learning_rate": 0.0002, + "loss": 0.9281, + "step": 6510 + }, + { + "epoch": 0.42789171452009844, + "grad_norm": 0.3074797987937927, + "learning_rate": 0.0002, + "loss": 0.9332, + "step": 6520 + }, + { + "epoch": 0.4285479901558655, + "grad_norm": 0.3498784005641937, + "learning_rate": 0.0002, + "loss": 0.9161, + "step": 6530 + }, + { + "epoch": 0.4292042657916325, + "grad_norm": 0.40507981181144714, + "learning_rate": 0.0002, + "loss": 0.9829, + "step": 6540 + }, + { + "epoch": 0.4298605414273995, + "grad_norm": 0.35614013671875, + "learning_rate": 0.0002, + "loss": 0.9269, + "step": 6550 + }, + { + "epoch": 0.4305168170631665, + "grad_norm": 0.451616495847702, + "learning_rate": 0.0002, + "loss": 0.9033, + "step": 6560 + }, + { + "epoch": 0.43117309269893356, + "grad_norm": 0.3707262873649597, + "learning_rate": 0.0002, + "loss": 0.9723, + "step": 6570 + }, + { + "epoch": 0.4318293683347006, + "grad_norm": 0.3271283209323883, + "learning_rate": 0.0002, + "loss": 0.9512, + "step": 6580 + }, + { + "epoch": 0.43248564397046757, + "grad_norm": 0.38105711340904236, + "learning_rate": 0.0002, + "loss": 0.8529, + "step": 6590 + }, + { + "epoch": 0.4331419196062346, + "grad_norm": 0.345217227935791, + "learning_rate": 0.0002, + "loss": 0.9208, + "step": 6600 + }, + { + "epoch": 0.43379819524200164, + "grad_norm": 0.3507174551486969, + "learning_rate": 0.0002, + "loss": 0.8958, + "step": 6610 + }, + { + "epoch": 0.4344544708777687, + "grad_norm": 0.318654328584671, + "learning_rate": 0.0002, + "loss": 0.9402, + "step": 6620 + }, + { + "epoch": 0.4351107465135357, + "grad_norm": 0.3628910183906555, + "learning_rate": 0.0002, + "loss": 0.9068, + "step": 6630 + }, + { + "epoch": 0.4357670221493027, + "grad_norm": 0.4454437494277954, + "learning_rate": 0.0002, + "loss": 0.9122, + "step": 6640 + }, + { + "epoch": 0.4364232977850697, + "grad_norm": 0.36227893829345703, + "learning_rate": 0.0002, + "loss": 1.0092, + "step": 6650 + }, + { + "epoch": 0.43707957342083675, + "grad_norm": 0.33995604515075684, + "learning_rate": 0.0002, + "loss": 0.9289, + "step": 6660 + }, + { + "epoch": 0.4377358490566038, + "grad_norm": 0.3784409165382385, + "learning_rate": 0.0002, + "loss": 0.9032, + "step": 6670 + }, + { + "epoch": 0.4383921246923708, + "grad_norm": 0.39470377564430237, + "learning_rate": 0.0002, + "loss": 0.9399, + "step": 6680 + }, + { + "epoch": 0.4390484003281378, + "grad_norm": 0.34198591113090515, + "learning_rate": 0.0002, + "loss": 0.8795, + "step": 6690 + }, + { + "epoch": 0.43970467596390483, + "grad_norm": 0.35575005412101746, + "learning_rate": 0.0002, + "loss": 0.9665, + "step": 6700 + }, + { + "epoch": 0.44036095159967187, + "grad_norm": 0.40915772318840027, + "learning_rate": 0.0002, + "loss": 0.9133, + "step": 6710 + }, + { + "epoch": 0.4410172272354389, + "grad_norm": 0.3526946008205414, + "learning_rate": 0.0002, + "loss": 0.9112, + "step": 6720 + }, + { + "epoch": 0.4416735028712059, + "grad_norm": 0.30411285161972046, + "learning_rate": 0.0002, + "loss": 0.8886, + "step": 6730 + }, + { + "epoch": 0.4423297785069729, + "grad_norm": 0.2935996949672699, + "learning_rate": 0.0002, + "loss": 0.8969, + "step": 6740 + }, + { + "epoch": 0.44298605414273995, + "grad_norm": 0.3806987702846527, + "learning_rate": 0.0002, + "loss": 0.8864, + "step": 6750 + }, + { + "epoch": 0.443642329778507, + "grad_norm": 0.3329904079437256, + "learning_rate": 0.0002, + "loss": 0.9267, + "step": 6760 + }, + { + "epoch": 0.444298605414274, + "grad_norm": 0.33832767605781555, + "learning_rate": 0.0002, + "loss": 0.9223, + "step": 6770 + }, + { + "epoch": 0.444954881050041, + "grad_norm": 0.3387627899646759, + "learning_rate": 0.0002, + "loss": 0.9127, + "step": 6780 + }, + { + "epoch": 0.44561115668580803, + "grad_norm": 0.32709822058677673, + "learning_rate": 0.0002, + "loss": 0.8853, + "step": 6790 + }, + { + "epoch": 0.44626743232157506, + "grad_norm": 0.2914820909500122, + "learning_rate": 0.0002, + "loss": 0.9059, + "step": 6800 + }, + { + "epoch": 0.4469237079573421, + "grad_norm": 0.3288695216178894, + "learning_rate": 0.0002, + "loss": 0.9209, + "step": 6810 + }, + { + "epoch": 0.44757998359310913, + "grad_norm": 0.5003459453582764, + "learning_rate": 0.0002, + "loss": 0.9208, + "step": 6820 + }, + { + "epoch": 0.4482362592288761, + "grad_norm": 0.359402060508728, + "learning_rate": 0.0002, + "loss": 0.9797, + "step": 6830 + }, + { + "epoch": 0.44889253486464314, + "grad_norm": 0.35508984327316284, + "learning_rate": 0.0002, + "loss": 0.856, + "step": 6840 + }, + { + "epoch": 0.4495488105004102, + "grad_norm": 0.33327680826187134, + "learning_rate": 0.0002, + "loss": 0.9513, + "step": 6850 + }, + { + "epoch": 0.4502050861361772, + "grad_norm": 0.4084452688694, + "learning_rate": 0.0002, + "loss": 0.965, + "step": 6860 + }, + { + "epoch": 0.4508613617719442, + "grad_norm": 0.30232545733451843, + "learning_rate": 0.0002, + "loss": 0.899, + "step": 6870 + }, + { + "epoch": 0.4515176374077112, + "grad_norm": 0.3244702219963074, + "learning_rate": 0.0002, + "loss": 0.9294, + "step": 6880 + }, + { + "epoch": 0.45217391304347826, + "grad_norm": 0.32148104906082153, + "learning_rate": 0.0002, + "loss": 0.9312, + "step": 6890 + }, + { + "epoch": 0.4528301886792453, + "grad_norm": 0.3831937313079834, + "learning_rate": 0.0002, + "loss": 1.007, + "step": 6900 + }, + { + "epoch": 0.4534864643150123, + "grad_norm": 0.33381497859954834, + "learning_rate": 0.0002, + "loss": 0.9111, + "step": 6910 + }, + { + "epoch": 0.4541427399507793, + "grad_norm": 0.4074220359325409, + "learning_rate": 0.0002, + "loss": 0.8955, + "step": 6920 + }, + { + "epoch": 0.45479901558654634, + "grad_norm": 0.35602903366088867, + "learning_rate": 0.0002, + "loss": 0.9326, + "step": 6930 + }, + { + "epoch": 0.4554552912223134, + "grad_norm": 0.3411916196346283, + "learning_rate": 0.0002, + "loss": 0.9146, + "step": 6940 + }, + { + "epoch": 0.4561115668580804, + "grad_norm": 0.38504868745803833, + "learning_rate": 0.0002, + "loss": 0.9382, + "step": 6950 + }, + { + "epoch": 0.45676784249384744, + "grad_norm": 0.5072926878929138, + "learning_rate": 0.0002, + "loss": 0.9105, + "step": 6960 + }, + { + "epoch": 0.4574241181296144, + "grad_norm": 0.38257330656051636, + "learning_rate": 0.0002, + "loss": 0.9383, + "step": 6970 + }, + { + "epoch": 0.45808039376538146, + "grad_norm": 0.40472176671028137, + "learning_rate": 0.0002, + "loss": 0.8864, + "step": 6980 + }, + { + "epoch": 0.4587366694011485, + "grad_norm": 0.34501466155052185, + "learning_rate": 0.0002, + "loss": 0.949, + "step": 6990 + }, + { + "epoch": 0.4593929450369155, + "grad_norm": 0.32561028003692627, + "learning_rate": 0.0002, + "loss": 0.9717, + "step": 7000 + }, + { + "epoch": 0.4600492206726825, + "grad_norm": 0.3388477861881256, + "learning_rate": 0.0002, + "loss": 0.9655, + "step": 7010 + }, + { + "epoch": 0.46070549630844954, + "grad_norm": 0.38045769929885864, + "learning_rate": 0.0002, + "loss": 0.9039, + "step": 7020 + }, + { + "epoch": 0.46136177194421657, + "grad_norm": 0.32500898838043213, + "learning_rate": 0.0002, + "loss": 0.8879, + "step": 7030 + }, + { + "epoch": 0.4620180475799836, + "grad_norm": 0.3758746385574341, + "learning_rate": 0.0002, + "loss": 0.9872, + "step": 7040 + }, + { + "epoch": 0.46267432321575064, + "grad_norm": 0.39033588767051697, + "learning_rate": 0.0002, + "loss": 0.9043, + "step": 7050 + }, + { + "epoch": 0.4633305988515176, + "grad_norm": 0.35141658782958984, + "learning_rate": 0.0002, + "loss": 0.891, + "step": 7060 + }, + { + "epoch": 0.46398687448728465, + "grad_norm": 0.3715546727180481, + "learning_rate": 0.0002, + "loss": 0.9196, + "step": 7070 + }, + { + "epoch": 0.4646431501230517, + "grad_norm": 0.3062947988510132, + "learning_rate": 0.0002, + "loss": 0.9051, + "step": 7080 + }, + { + "epoch": 0.4652994257588187, + "grad_norm": 0.43001696467399597, + "learning_rate": 0.0002, + "loss": 0.9395, + "step": 7090 + }, + { + "epoch": 0.46595570139458575, + "grad_norm": 0.34747597575187683, + "learning_rate": 0.0002, + "loss": 0.9363, + "step": 7100 + }, + { + "epoch": 0.46661197703035273, + "grad_norm": 0.33514827489852905, + "learning_rate": 0.0002, + "loss": 0.9052, + "step": 7110 + }, + { + "epoch": 0.46726825266611977, + "grad_norm": 0.3500545918941498, + "learning_rate": 0.0002, + "loss": 0.8673, + "step": 7120 + }, + { + "epoch": 0.4679245283018868, + "grad_norm": 0.3959280252456665, + "learning_rate": 0.0002, + "loss": 0.9373, + "step": 7130 + }, + { + "epoch": 0.46858080393765383, + "grad_norm": 0.3119291067123413, + "learning_rate": 0.0002, + "loss": 0.868, + "step": 7140 + }, + { + "epoch": 0.4692370795734208, + "grad_norm": 0.36544349789619446, + "learning_rate": 0.0002, + "loss": 0.923, + "step": 7150 + }, + { + "epoch": 0.46989335520918785, + "grad_norm": 0.3375662565231323, + "learning_rate": 0.0002, + "loss": 0.9281, + "step": 7160 + }, + { + "epoch": 0.4705496308449549, + "grad_norm": 0.48938584327697754, + "learning_rate": 0.0002, + "loss": 0.8948, + "step": 7170 + }, + { + "epoch": 0.4712059064807219, + "grad_norm": 0.36706942319869995, + "learning_rate": 0.0002, + "loss": 0.9016, + "step": 7180 + }, + { + "epoch": 0.47186218211648895, + "grad_norm": 0.43472692370414734, + "learning_rate": 0.0002, + "loss": 0.906, + "step": 7190 + }, + { + "epoch": 0.4725184577522559, + "grad_norm": 0.3433953821659088, + "learning_rate": 0.0002, + "loss": 0.9114, + "step": 7200 + }, + { + "epoch": 0.47317473338802296, + "grad_norm": 0.3224952518939972, + "learning_rate": 0.0002, + "loss": 0.899, + "step": 7210 + }, + { + "epoch": 0.47383100902379, + "grad_norm": 0.34640923142433167, + "learning_rate": 0.0002, + "loss": 0.9039, + "step": 7220 + }, + { + "epoch": 0.47448728465955703, + "grad_norm": 0.36221131682395935, + "learning_rate": 0.0002, + "loss": 0.9372, + "step": 7230 + }, + { + "epoch": 0.47514356029532406, + "grad_norm": 0.3695160150527954, + "learning_rate": 0.0002, + "loss": 0.9328, + "step": 7240 + }, + { + "epoch": 0.47579983593109104, + "grad_norm": 0.3915520906448364, + "learning_rate": 0.0002, + "loss": 0.8726, + "step": 7250 + }, + { + "epoch": 0.4764561115668581, + "grad_norm": 0.34111252427101135, + "learning_rate": 0.0002, + "loss": 0.9305, + "step": 7260 + }, + { + "epoch": 0.4771123872026251, + "grad_norm": 0.3156047761440277, + "learning_rate": 0.0002, + "loss": 0.8853, + "step": 7270 + }, + { + "epoch": 0.47776866283839214, + "grad_norm": 0.349992573261261, + "learning_rate": 0.0002, + "loss": 0.9442, + "step": 7280 + }, + { + "epoch": 0.4784249384741591, + "grad_norm": 0.3627530634403229, + "learning_rate": 0.0002, + "loss": 0.8841, + "step": 7290 + }, + { + "epoch": 0.47908121410992616, + "grad_norm": 0.39342308044433594, + "learning_rate": 0.0002, + "loss": 0.937, + "step": 7300 + }, + { + "epoch": 0.4797374897456932, + "grad_norm": 0.321122944355011, + "learning_rate": 0.0002, + "loss": 0.951, + "step": 7310 + }, + { + "epoch": 0.4803937653814602, + "grad_norm": 0.37064728140830994, + "learning_rate": 0.0002, + "loss": 0.9085, + "step": 7320 + }, + { + "epoch": 0.48105004101722726, + "grad_norm": 0.348779559135437, + "learning_rate": 0.0002, + "loss": 0.9479, + "step": 7330 + }, + { + "epoch": 0.48170631665299424, + "grad_norm": 0.3484368324279785, + "learning_rate": 0.0002, + "loss": 0.8651, + "step": 7340 + }, + { + "epoch": 0.48236259228876127, + "grad_norm": 0.3343070149421692, + "learning_rate": 0.0002, + "loss": 0.9096, + "step": 7350 + }, + { + "epoch": 0.4830188679245283, + "grad_norm": 0.3310532867908478, + "learning_rate": 0.0002, + "loss": 0.9148, + "step": 7360 + }, + { + "epoch": 0.48367514356029534, + "grad_norm": 0.37587985396385193, + "learning_rate": 0.0002, + "loss": 0.8652, + "step": 7370 + }, + { + "epoch": 0.4843314191960624, + "grad_norm": 0.37325888872146606, + "learning_rate": 0.0002, + "loss": 0.9302, + "step": 7380 + }, + { + "epoch": 0.48498769483182935, + "grad_norm": 0.3869538903236389, + "learning_rate": 0.0002, + "loss": 0.9382, + "step": 7390 + }, + { + "epoch": 0.4856439704675964, + "grad_norm": 0.34064534306526184, + "learning_rate": 0.0002, + "loss": 0.8967, + "step": 7400 + }, + { + "epoch": 0.4863002461033634, + "grad_norm": 0.35124653577804565, + "learning_rate": 0.0002, + "loss": 0.8884, + "step": 7410 + }, + { + "epoch": 0.48695652173913045, + "grad_norm": 0.38339361548423767, + "learning_rate": 0.0002, + "loss": 0.9178, + "step": 7420 + }, + { + "epoch": 0.48761279737489743, + "grad_norm": 0.32823026180267334, + "learning_rate": 0.0002, + "loss": 0.8984, + "step": 7430 + }, + { + "epoch": 0.48826907301066447, + "grad_norm": 0.32759153842926025, + "learning_rate": 0.0002, + "loss": 0.9348, + "step": 7440 + }, + { + "epoch": 0.4889253486464315, + "grad_norm": 0.3855777680873871, + "learning_rate": 0.0002, + "loss": 0.9023, + "step": 7450 + }, + { + "epoch": 0.48958162428219854, + "grad_norm": 0.34890854358673096, + "learning_rate": 0.0002, + "loss": 0.9201, + "step": 7460 + }, + { + "epoch": 0.49023789991796557, + "grad_norm": 0.3580712378025055, + "learning_rate": 0.0002, + "loss": 0.8947, + "step": 7470 + }, + { + "epoch": 0.49089417555373255, + "grad_norm": 0.34897708892822266, + "learning_rate": 0.0002, + "loss": 0.8413, + "step": 7480 + }, + { + "epoch": 0.4915504511894996, + "grad_norm": 0.32964155077934265, + "learning_rate": 0.0002, + "loss": 0.8508, + "step": 7490 + }, + { + "epoch": 0.4922067268252666, + "grad_norm": 0.3805377185344696, + "learning_rate": 0.0002, + "loss": 0.9298, + "step": 7500 + }, + { + "epoch": 0.49286300246103365, + "grad_norm": 0.3312858045101166, + "learning_rate": 0.0002, + "loss": 0.9354, + "step": 7510 + }, + { + "epoch": 0.49351927809680063, + "grad_norm": 0.32818418741226196, + "learning_rate": 0.0002, + "loss": 0.8966, + "step": 7520 + }, + { + "epoch": 0.49417555373256766, + "grad_norm": 0.3721756339073181, + "learning_rate": 0.0002, + "loss": 0.9036, + "step": 7530 + }, + { + "epoch": 0.4948318293683347, + "grad_norm": 0.4272252321243286, + "learning_rate": 0.0002, + "loss": 0.9483, + "step": 7540 + }, + { + "epoch": 0.49548810500410173, + "grad_norm": 0.3034988343715668, + "learning_rate": 0.0002, + "loss": 0.9159, + "step": 7550 + }, + { + "epoch": 0.49614438063986877, + "grad_norm": 0.34006139636039734, + "learning_rate": 0.0002, + "loss": 0.8996, + "step": 7560 + }, + { + "epoch": 0.49680065627563574, + "grad_norm": 0.33074137568473816, + "learning_rate": 0.0002, + "loss": 0.9122, + "step": 7570 + }, + { + "epoch": 0.4974569319114028, + "grad_norm": 0.3747742772102356, + "learning_rate": 0.0002, + "loss": 0.877, + "step": 7580 + }, + { + "epoch": 0.4981132075471698, + "grad_norm": 0.29682815074920654, + "learning_rate": 0.0002, + "loss": 0.8738, + "step": 7590 + }, + { + "epoch": 0.49876948318293685, + "grad_norm": 0.36150112748146057, + "learning_rate": 0.0002, + "loss": 0.9669, + "step": 7600 + }, + { + "epoch": 0.4994257588187039, + "grad_norm": 0.3893260955810547, + "learning_rate": 0.0002, + "loss": 0.8936, + "step": 7610 + }, + { + "epoch": 0.5000820344544709, + "grad_norm": 0.358791708946228, + "learning_rate": 0.0002, + "loss": 0.9273, + "step": 7620 + }, + { + "epoch": 0.500738310090238, + "grad_norm": 0.3799469470977783, + "learning_rate": 0.0002, + "loss": 0.9053, + "step": 7630 + }, + { + "epoch": 0.5013945857260049, + "grad_norm": 0.35908520221710205, + "learning_rate": 0.0002, + "loss": 0.9655, + "step": 7640 + }, + { + "epoch": 0.5020508613617719, + "grad_norm": 0.33538103103637695, + "learning_rate": 0.0002, + "loss": 0.8993, + "step": 7650 + }, + { + "epoch": 0.502707136997539, + "grad_norm": 0.2990545928478241, + "learning_rate": 0.0002, + "loss": 0.8812, + "step": 7660 + }, + { + "epoch": 0.503363412633306, + "grad_norm": 0.3342699408531189, + "learning_rate": 0.0002, + "loss": 0.8965, + "step": 7670 + }, + { + "epoch": 0.5040196882690731, + "grad_norm": 0.3656594455242157, + "learning_rate": 0.0002, + "loss": 0.9476, + "step": 7680 + }, + { + "epoch": 0.50467596390484, + "grad_norm": 0.315176784992218, + "learning_rate": 0.0002, + "loss": 0.9225, + "step": 7690 + }, + { + "epoch": 0.505332239540607, + "grad_norm": 0.3070623576641083, + "learning_rate": 0.0002, + "loss": 0.8998, + "step": 7700 + }, + { + "epoch": 0.5059885151763741, + "grad_norm": 0.3741548955440521, + "learning_rate": 0.0002, + "loss": 0.8565, + "step": 7710 + }, + { + "epoch": 0.5066447908121411, + "grad_norm": 0.3047017753124237, + "learning_rate": 0.0002, + "loss": 0.8882, + "step": 7720 + }, + { + "epoch": 0.5073010664479081, + "grad_norm": 0.5524629354476929, + "learning_rate": 0.0002, + "loss": 0.9002, + "step": 7730 + }, + { + "epoch": 0.5079573420836752, + "grad_norm": 0.39234456419944763, + "learning_rate": 0.0002, + "loss": 0.901, + "step": 7740 + }, + { + "epoch": 0.5086136177194421, + "grad_norm": 0.3306262791156769, + "learning_rate": 0.0002, + "loss": 0.8948, + "step": 7750 + }, + { + "epoch": 0.5092698933552092, + "grad_norm": 0.3181036710739136, + "learning_rate": 0.0002, + "loss": 0.8627, + "step": 7760 + }, + { + "epoch": 0.5099261689909762, + "grad_norm": 0.3531821668148041, + "learning_rate": 0.0002, + "loss": 0.9068, + "step": 7770 + }, + { + "epoch": 0.5105824446267432, + "grad_norm": 0.39781567454338074, + "learning_rate": 0.0002, + "loss": 0.9452, + "step": 7780 + }, + { + "epoch": 0.5112387202625103, + "grad_norm": 0.32413530349731445, + "learning_rate": 0.0002, + "loss": 0.8829, + "step": 7790 + }, + { + "epoch": 0.5118949958982772, + "grad_norm": 0.33857491612434387, + "learning_rate": 0.0002, + "loss": 0.9162, + "step": 7800 + }, + { + "epoch": 0.5125512715340443, + "grad_norm": 0.33426207304000854, + "learning_rate": 0.0002, + "loss": 0.9183, + "step": 7810 + }, + { + "epoch": 0.5132075471698113, + "grad_norm": 0.31170961260795593, + "learning_rate": 0.0002, + "loss": 0.8487, + "step": 7820 + }, + { + "epoch": 0.5138638228055783, + "grad_norm": 0.3401695787906647, + "learning_rate": 0.0002, + "loss": 0.9425, + "step": 7830 + }, + { + "epoch": 0.5145200984413454, + "grad_norm": 0.4325079619884491, + "learning_rate": 0.0002, + "loss": 0.8852, + "step": 7840 + }, + { + "epoch": 0.5151763740771124, + "grad_norm": 0.33266621828079224, + "learning_rate": 0.0002, + "loss": 0.9165, + "step": 7850 + }, + { + "epoch": 0.5158326497128795, + "grad_norm": 0.353476345539093, + "learning_rate": 0.0002, + "loss": 0.9178, + "step": 7860 + }, + { + "epoch": 0.5164889253486464, + "grad_norm": 0.34789302945137024, + "learning_rate": 0.0002, + "loss": 0.9734, + "step": 7870 + }, + { + "epoch": 0.5171452009844134, + "grad_norm": 0.3378570079803467, + "learning_rate": 0.0002, + "loss": 0.8674, + "step": 7880 + }, + { + "epoch": 0.5178014766201805, + "grad_norm": 0.340122789144516, + "learning_rate": 0.0002, + "loss": 0.9147, + "step": 7890 + }, + { + "epoch": 0.5184577522559475, + "grad_norm": 0.39235031604766846, + "learning_rate": 0.0002, + "loss": 0.9627, + "step": 7900 + }, + { + "epoch": 0.5191140278917146, + "grad_norm": 0.31752005219459534, + "learning_rate": 0.0002, + "loss": 0.9003, + "step": 7910 + }, + { + "epoch": 0.5197703035274815, + "grad_norm": 0.3948894739151001, + "learning_rate": 0.0002, + "loss": 0.9353, + "step": 7920 + }, + { + "epoch": 0.5204265791632485, + "grad_norm": 0.29803305864334106, + "learning_rate": 0.0002, + "loss": 0.8697, + "step": 7930 + }, + { + "epoch": 0.5210828547990156, + "grad_norm": 0.34285855293273926, + "learning_rate": 0.0002, + "loss": 0.9912, + "step": 7940 + }, + { + "epoch": 0.5217391304347826, + "grad_norm": 0.40978604555130005, + "learning_rate": 0.0002, + "loss": 0.9241, + "step": 7950 + }, + { + "epoch": 0.5223954060705497, + "grad_norm": 0.35473906993865967, + "learning_rate": 0.0002, + "loss": 0.9609, + "step": 7960 + }, + { + "epoch": 0.5230516817063167, + "grad_norm": 0.3896619379520416, + "learning_rate": 0.0002, + "loss": 0.925, + "step": 7970 + }, + { + "epoch": 0.5237079573420836, + "grad_norm": 0.3622605502605438, + "learning_rate": 0.0002, + "loss": 0.8863, + "step": 7980 + }, + { + "epoch": 0.5243642329778507, + "grad_norm": 0.4097590744495392, + "learning_rate": 0.0002, + "loss": 0.939, + "step": 7990 + }, + { + "epoch": 0.5250205086136177, + "grad_norm": 0.31397542357444763, + "learning_rate": 0.0002, + "loss": 0.8895, + "step": 8000 + }, + { + "epoch": 0.5256767842493847, + "grad_norm": 0.45559775829315186, + "learning_rate": 0.0002, + "loss": 0.9185, + "step": 8010 + }, + { + "epoch": 0.5263330598851518, + "grad_norm": 0.3471437692642212, + "learning_rate": 0.0002, + "loss": 0.9318, + "step": 8020 + }, + { + "epoch": 0.5269893355209188, + "grad_norm": 0.3105354607105255, + "learning_rate": 0.0002, + "loss": 0.8684, + "step": 8030 + }, + { + "epoch": 0.5276456111566858, + "grad_norm": 0.42847758531570435, + "learning_rate": 0.0002, + "loss": 0.8633, + "step": 8040 + }, + { + "epoch": 0.5283018867924528, + "grad_norm": 0.732471227645874, + "learning_rate": 0.0002, + "loss": 0.924, + "step": 8050 + }, + { + "epoch": 0.5289581624282198, + "grad_norm": 0.3242183327674866, + "learning_rate": 0.0002, + "loss": 0.8332, + "step": 8060 + }, + { + "epoch": 0.5296144380639869, + "grad_norm": 0.36898288130760193, + "learning_rate": 0.0002, + "loss": 0.9531, + "step": 8070 + }, + { + "epoch": 0.5302707136997539, + "grad_norm": 0.3850383162498474, + "learning_rate": 0.0002, + "loss": 0.933, + "step": 8080 + }, + { + "epoch": 0.530926989335521, + "grad_norm": 0.3375824987888336, + "learning_rate": 0.0002, + "loss": 0.9184, + "step": 8090 + }, + { + "epoch": 0.5315832649712879, + "grad_norm": 0.4321442246437073, + "learning_rate": 0.0002, + "loss": 0.9284, + "step": 8100 + }, + { + "epoch": 0.5322395406070549, + "grad_norm": 0.3836155831813812, + "learning_rate": 0.0002, + "loss": 0.9389, + "step": 8110 + }, + { + "epoch": 0.532895816242822, + "grad_norm": 0.36536362767219543, + "learning_rate": 0.0002, + "loss": 0.9117, + "step": 8120 + }, + { + "epoch": 0.533552091878589, + "grad_norm": 0.4364359676837921, + "learning_rate": 0.0002, + "loss": 0.8724, + "step": 8130 + }, + { + "epoch": 0.5342083675143561, + "grad_norm": 0.413506418466568, + "learning_rate": 0.0002, + "loss": 0.8866, + "step": 8140 + }, + { + "epoch": 0.534864643150123, + "grad_norm": 0.33120569586753845, + "learning_rate": 0.0002, + "loss": 0.8985, + "step": 8150 + }, + { + "epoch": 0.53552091878589, + "grad_norm": 0.3408608138561249, + "learning_rate": 0.0002, + "loss": 0.9553, + "step": 8160 + }, + { + "epoch": 0.5361771944216571, + "grad_norm": 0.30265191197395325, + "learning_rate": 0.0002, + "loss": 0.888, + "step": 8170 + }, + { + "epoch": 0.5368334700574241, + "grad_norm": 0.40208759903907776, + "learning_rate": 0.0002, + "loss": 0.8799, + "step": 8180 + }, + { + "epoch": 0.5374897456931912, + "grad_norm": 0.38238683342933655, + "learning_rate": 0.0002, + "loss": 0.9343, + "step": 8190 + }, + { + "epoch": 0.5381460213289582, + "grad_norm": 0.3405042290687561, + "learning_rate": 0.0002, + "loss": 0.9051, + "step": 8200 + }, + { + "epoch": 0.5388022969647251, + "grad_norm": 0.3806214928627014, + "learning_rate": 0.0002, + "loss": 0.9355, + "step": 8210 + }, + { + "epoch": 0.5394585726004922, + "grad_norm": 0.3639737069606781, + "learning_rate": 0.0002, + "loss": 0.9148, + "step": 8220 + }, + { + "epoch": 0.5401148482362592, + "grad_norm": 0.3366181552410126, + "learning_rate": 0.0002, + "loss": 0.9297, + "step": 8230 + }, + { + "epoch": 0.5407711238720263, + "grad_norm": 0.34822607040405273, + "learning_rate": 0.0002, + "loss": 0.9439, + "step": 8240 + }, + { + "epoch": 0.5414273995077933, + "grad_norm": 0.3682822287082672, + "learning_rate": 0.0002, + "loss": 0.925, + "step": 8250 + }, + { + "epoch": 0.5420836751435603, + "grad_norm": 0.39051371812820435, + "learning_rate": 0.0002, + "loss": 0.9246, + "step": 8260 + }, + { + "epoch": 0.5427399507793274, + "grad_norm": 0.37731602787971497, + "learning_rate": 0.0002, + "loss": 0.9008, + "step": 8270 + }, + { + "epoch": 0.5433962264150943, + "grad_norm": 0.34222212433815, + "learning_rate": 0.0002, + "loss": 0.8849, + "step": 8280 + }, + { + "epoch": 0.5440525020508613, + "grad_norm": 0.3870106637477875, + "learning_rate": 0.0002, + "loss": 0.8956, + "step": 8290 + }, + { + "epoch": 0.5447087776866284, + "grad_norm": 0.35894039273262024, + "learning_rate": 0.0002, + "loss": 0.8974, + "step": 8300 + }, + { + "epoch": 0.5453650533223954, + "grad_norm": 0.38450875878334045, + "learning_rate": 0.0002, + "loss": 0.9664, + "step": 8310 + }, + { + "epoch": 0.5460213289581625, + "grad_norm": 0.3106869161128998, + "learning_rate": 0.0002, + "loss": 0.9024, + "step": 8320 + }, + { + "epoch": 0.5466776045939294, + "grad_norm": 0.35935118794441223, + "learning_rate": 0.0002, + "loss": 0.8578, + "step": 8330 + }, + { + "epoch": 0.5473338802296964, + "grad_norm": 0.37256261706352234, + "learning_rate": 0.0002, + "loss": 0.904, + "step": 8340 + }, + { + "epoch": 0.5479901558654635, + "grad_norm": 0.3807767629623413, + "learning_rate": 0.0002, + "loss": 0.9091, + "step": 8350 + }, + { + "epoch": 0.5486464315012305, + "grad_norm": 0.3551439046859741, + "learning_rate": 0.0002, + "loss": 0.9314, + "step": 8360 + }, + { + "epoch": 0.5493027071369976, + "grad_norm": 0.39783644676208496, + "learning_rate": 0.0002, + "loss": 0.9046, + "step": 8370 + }, + { + "epoch": 0.5499589827727646, + "grad_norm": 0.37729522585868835, + "learning_rate": 0.0002, + "loss": 0.9466, + "step": 8380 + }, + { + "epoch": 0.5506152584085315, + "grad_norm": 0.32453545928001404, + "learning_rate": 0.0002, + "loss": 0.9559, + "step": 8390 + }, + { + "epoch": 0.5512715340442986, + "grad_norm": 0.353837251663208, + "learning_rate": 0.0002, + "loss": 0.9073, + "step": 8400 + }, + { + "epoch": 0.5519278096800656, + "grad_norm": 0.37711212038993835, + "learning_rate": 0.0002, + "loss": 0.961, + "step": 8410 + }, + { + "epoch": 0.5525840853158327, + "grad_norm": 0.3966476619243622, + "learning_rate": 0.0002, + "loss": 0.9844, + "step": 8420 + }, + { + "epoch": 0.5532403609515997, + "grad_norm": 0.338074266910553, + "learning_rate": 0.0002, + "loss": 0.9221, + "step": 8430 + }, + { + "epoch": 0.5538966365873667, + "grad_norm": 0.3984934389591217, + "learning_rate": 0.0002, + "loss": 0.9101, + "step": 8440 + }, + { + "epoch": 0.5545529122231337, + "grad_norm": 0.3430469334125519, + "learning_rate": 0.0002, + "loss": 0.9177, + "step": 8450 + }, + { + "epoch": 0.5552091878589007, + "grad_norm": 0.34223586320877075, + "learning_rate": 0.0002, + "loss": 0.9128, + "step": 8460 + }, + { + "epoch": 0.5558654634946678, + "grad_norm": 0.4057091474533081, + "learning_rate": 0.0002, + "loss": 0.8961, + "step": 8470 + }, + { + "epoch": 0.5565217391304348, + "grad_norm": 0.39272257685661316, + "learning_rate": 0.0002, + "loss": 0.9121, + "step": 8480 + }, + { + "epoch": 0.5571780147662018, + "grad_norm": 0.34801942110061646, + "learning_rate": 0.0002, + "loss": 0.8941, + "step": 8490 + }, + { + "epoch": 0.5578342904019689, + "grad_norm": 0.5177333354949951, + "learning_rate": 0.0002, + "loss": 0.8771, + "step": 8500 + }, + { + "epoch": 0.5584905660377358, + "grad_norm": 0.4139311909675598, + "learning_rate": 0.0002, + "loss": 0.9289, + "step": 8510 + }, + { + "epoch": 0.5591468416735029, + "grad_norm": 0.3526647388935089, + "learning_rate": 0.0002, + "loss": 0.9391, + "step": 8520 + }, + { + "epoch": 0.5598031173092699, + "grad_norm": 0.3475663661956787, + "learning_rate": 0.0002, + "loss": 0.9354, + "step": 8530 + }, + { + "epoch": 0.5604593929450369, + "grad_norm": 0.3338899314403534, + "learning_rate": 0.0002, + "loss": 0.9623, + "step": 8540 + }, + { + "epoch": 0.561115668580804, + "grad_norm": 0.392212837934494, + "learning_rate": 0.0002, + "loss": 0.9156, + "step": 8550 + }, + { + "epoch": 0.561771944216571, + "grad_norm": 0.3058992028236389, + "learning_rate": 0.0002, + "loss": 0.9469, + "step": 8560 + }, + { + "epoch": 0.5624282198523379, + "grad_norm": 0.40603798627853394, + "learning_rate": 0.0002, + "loss": 0.8465, + "step": 8570 + }, + { + "epoch": 0.563084495488105, + "grad_norm": 0.37745383381843567, + "learning_rate": 0.0002, + "loss": 0.8962, + "step": 8580 + }, + { + "epoch": 0.563740771123872, + "grad_norm": 0.40886175632476807, + "learning_rate": 0.0002, + "loss": 0.8787, + "step": 8590 + }, + { + "epoch": 0.5643970467596391, + "grad_norm": 0.3653600215911865, + "learning_rate": 0.0002, + "loss": 0.9469, + "step": 8600 + }, + { + "epoch": 0.5650533223954061, + "grad_norm": 0.32329508662223816, + "learning_rate": 0.0002, + "loss": 0.8976, + "step": 8610 + }, + { + "epoch": 0.565709598031173, + "grad_norm": 0.3535236716270447, + "learning_rate": 0.0002, + "loss": 0.9081, + "step": 8620 + }, + { + "epoch": 0.5663658736669401, + "grad_norm": 0.37497273087501526, + "learning_rate": 0.0002, + "loss": 0.8703, + "step": 8630 + }, + { + "epoch": 0.5670221493027071, + "grad_norm": 0.3781719505786896, + "learning_rate": 0.0002, + "loss": 0.8853, + "step": 8640 + }, + { + "epoch": 0.5676784249384742, + "grad_norm": 0.4481894075870514, + "learning_rate": 0.0002, + "loss": 0.9074, + "step": 8650 + }, + { + "epoch": 0.5683347005742412, + "grad_norm": 0.35790109634399414, + "learning_rate": 0.0002, + "loss": 0.8923, + "step": 8660 + }, + { + "epoch": 0.5689909762100082, + "grad_norm": 0.4617280662059784, + "learning_rate": 0.0002, + "loss": 0.8946, + "step": 8670 + }, + { + "epoch": 0.5696472518457752, + "grad_norm": 0.3813382089138031, + "learning_rate": 0.0002, + "loss": 0.9059, + "step": 8680 + }, + { + "epoch": 0.5703035274815422, + "grad_norm": 0.33388257026672363, + "learning_rate": 0.0002, + "loss": 0.9126, + "step": 8690 + }, + { + "epoch": 0.5709598031173093, + "grad_norm": 0.330185204744339, + "learning_rate": 0.0002, + "loss": 0.8866, + "step": 8700 + }, + { + "epoch": 0.5716160787530763, + "grad_norm": 0.3768845200538635, + "learning_rate": 0.0002, + "loss": 0.916, + "step": 8710 + }, + { + "epoch": 0.5722723543888433, + "grad_norm": 0.3221369683742523, + "learning_rate": 0.0002, + "loss": 0.9213, + "step": 8720 + }, + { + "epoch": 0.5729286300246104, + "grad_norm": 0.3221580684185028, + "learning_rate": 0.0002, + "loss": 0.8919, + "step": 8730 + }, + { + "epoch": 0.5735849056603773, + "grad_norm": 0.3693002760410309, + "learning_rate": 0.0002, + "loss": 0.924, + "step": 8740 + }, + { + "epoch": 0.5742411812961444, + "grad_norm": 0.3859325349330902, + "learning_rate": 0.0002, + "loss": 0.8983, + "step": 8750 + }, + { + "epoch": 0.5748974569319114, + "grad_norm": 0.3891449570655823, + "learning_rate": 0.0002, + "loss": 0.9321, + "step": 8760 + }, + { + "epoch": 0.5755537325676784, + "grad_norm": 0.3313274681568146, + "learning_rate": 0.0002, + "loss": 0.9315, + "step": 8770 + }, + { + "epoch": 0.5762100082034455, + "grad_norm": 0.33418914675712585, + "learning_rate": 0.0002, + "loss": 0.89, + "step": 8780 + }, + { + "epoch": 0.5768662838392125, + "grad_norm": 0.4153108596801758, + "learning_rate": 0.0002, + "loss": 0.9029, + "step": 8790 + }, + { + "epoch": 0.5775225594749795, + "grad_norm": 0.3614528477191925, + "learning_rate": 0.0002, + "loss": 0.9126, + "step": 8800 + }, + { + "epoch": 0.5781788351107465, + "grad_norm": 0.3172459602355957, + "learning_rate": 0.0002, + "loss": 0.9086, + "step": 8810 + }, + { + "epoch": 0.5788351107465135, + "grad_norm": 0.3795868456363678, + "learning_rate": 0.0002, + "loss": 0.9225, + "step": 8820 + }, + { + "epoch": 0.5794913863822806, + "grad_norm": 0.3750050961971283, + "learning_rate": 0.0002, + "loss": 0.9199, + "step": 8830 + }, + { + "epoch": 0.5801476620180476, + "grad_norm": 0.3348597288131714, + "learning_rate": 0.0002, + "loss": 0.9527, + "step": 8840 + }, + { + "epoch": 0.5808039376538146, + "grad_norm": 0.39116451144218445, + "learning_rate": 0.0002, + "loss": 0.9379, + "step": 8850 + }, + { + "epoch": 0.5814602132895816, + "grad_norm": 0.33639633655548096, + "learning_rate": 0.0002, + "loss": 0.9087, + "step": 8860 + }, + { + "epoch": 0.5821164889253486, + "grad_norm": 0.33336129784584045, + "learning_rate": 0.0002, + "loss": 0.9181, + "step": 8870 + }, + { + "epoch": 0.5827727645611157, + "grad_norm": 0.3624817132949829, + "learning_rate": 0.0002, + "loss": 0.9194, + "step": 8880 + }, + { + "epoch": 0.5834290401968827, + "grad_norm": 0.2976662218570709, + "learning_rate": 0.0002, + "loss": 0.9059, + "step": 8890 + }, + { + "epoch": 0.5840853158326497, + "grad_norm": 0.4231838583946228, + "learning_rate": 0.0002, + "loss": 0.8903, + "step": 8900 + }, + { + "epoch": 0.5847415914684168, + "grad_norm": 0.343156099319458, + "learning_rate": 0.0002, + "loss": 0.9157, + "step": 8910 + }, + { + "epoch": 0.5853978671041837, + "grad_norm": 0.39717403054237366, + "learning_rate": 0.0002, + "loss": 0.9655, + "step": 8920 + }, + { + "epoch": 0.5860541427399508, + "grad_norm": 0.3388144075870514, + "learning_rate": 0.0002, + "loss": 0.8758, + "step": 8930 + }, + { + "epoch": 0.5867104183757178, + "grad_norm": 0.3949063718318939, + "learning_rate": 0.0002, + "loss": 0.922, + "step": 8940 + }, + { + "epoch": 0.5873666940114848, + "grad_norm": 0.358415424823761, + "learning_rate": 0.0002, + "loss": 0.9081, + "step": 8950 + }, + { + "epoch": 0.5880229696472519, + "grad_norm": 0.367883563041687, + "learning_rate": 0.0002, + "loss": 0.906, + "step": 8960 + }, + { + "epoch": 0.5886792452830188, + "grad_norm": 0.3859010636806488, + "learning_rate": 0.0002, + "loss": 0.9131, + "step": 8970 + }, + { + "epoch": 0.5893355209187859, + "grad_norm": 0.3540095090866089, + "learning_rate": 0.0002, + "loss": 0.916, + "step": 8980 + }, + { + "epoch": 0.5899917965545529, + "grad_norm": 0.33913302421569824, + "learning_rate": 0.0002, + "loss": 0.9704, + "step": 8990 + }, + { + "epoch": 0.5906480721903199, + "grad_norm": 0.35641804337501526, + "learning_rate": 0.0002, + "loss": 0.8772, + "step": 9000 + }, + { + "epoch": 0.591304347826087, + "grad_norm": 0.3448907434940338, + "learning_rate": 0.0002, + "loss": 0.8644, + "step": 9010 + }, + { + "epoch": 0.591960623461854, + "grad_norm": 0.4147050082683563, + "learning_rate": 0.0002, + "loss": 0.9059, + "step": 9020 + }, + { + "epoch": 0.592616899097621, + "grad_norm": 0.45032307505607605, + "learning_rate": 0.0002, + "loss": 0.9134, + "step": 9030 + }, + { + "epoch": 0.593273174733388, + "grad_norm": 0.3628511428833008, + "learning_rate": 0.0002, + "loss": 0.8855, + "step": 9040 + }, + { + "epoch": 0.593929450369155, + "grad_norm": 0.42760607600212097, + "learning_rate": 0.0002, + "loss": 0.8924, + "step": 9050 + }, + { + "epoch": 0.5945857260049221, + "grad_norm": 0.3591140806674957, + "learning_rate": 0.0002, + "loss": 0.889, + "step": 9060 + }, + { + "epoch": 0.5952420016406891, + "grad_norm": 0.3496510982513428, + "learning_rate": 0.0002, + "loss": 0.9409, + "step": 9070 + }, + { + "epoch": 0.5958982772764561, + "grad_norm": 0.4098506569862366, + "learning_rate": 0.0002, + "loss": 0.9394, + "step": 9080 + }, + { + "epoch": 0.5965545529122231, + "grad_norm": 0.3535410165786743, + "learning_rate": 0.0002, + "loss": 0.9455, + "step": 9090 + }, + { + "epoch": 0.5972108285479901, + "grad_norm": 0.3716369569301605, + "learning_rate": 0.0002, + "loss": 0.8814, + "step": 9100 + }, + { + "epoch": 0.5978671041837572, + "grad_norm": 0.3236614763736725, + "learning_rate": 0.0002, + "loss": 0.8656, + "step": 9110 + }, + { + "epoch": 0.5985233798195242, + "grad_norm": 0.311577171087265, + "learning_rate": 0.0002, + "loss": 0.905, + "step": 9120 + }, + { + "epoch": 0.5991796554552912, + "grad_norm": 0.3261461555957794, + "learning_rate": 0.0002, + "loss": 0.9118, + "step": 9130 + }, + { + "epoch": 0.5998359310910583, + "grad_norm": 0.3794492781162262, + "learning_rate": 0.0002, + "loss": 0.9274, + "step": 9140 + }, + { + "epoch": 0.6004922067268252, + "grad_norm": 0.31726357340812683, + "learning_rate": 0.0002, + "loss": 0.8967, + "step": 9150 + }, + { + "epoch": 0.6011484823625923, + "grad_norm": 0.38432174921035767, + "learning_rate": 0.0002, + "loss": 0.9156, + "step": 9160 + }, + { + "epoch": 0.6018047579983593, + "grad_norm": 0.4364495873451233, + "learning_rate": 0.0002, + "loss": 0.9703, + "step": 9170 + }, + { + "epoch": 0.6024610336341263, + "grad_norm": 0.41058987379074097, + "learning_rate": 0.0002, + "loss": 0.9162, + "step": 9180 + }, + { + "epoch": 0.6031173092698934, + "grad_norm": 0.3146302103996277, + "learning_rate": 0.0002, + "loss": 0.8914, + "step": 9190 + }, + { + "epoch": 0.6037735849056604, + "grad_norm": 0.3117610216140747, + "learning_rate": 0.0002, + "loss": 0.8859, + "step": 9200 + }, + { + "epoch": 0.6044298605414274, + "grad_norm": 0.31921523809432983, + "learning_rate": 0.0002, + "loss": 0.8741, + "step": 9210 + }, + { + "epoch": 0.6050861361771944, + "grad_norm": 0.3295772969722748, + "learning_rate": 0.0002, + "loss": 0.8756, + "step": 9220 + }, + { + "epoch": 0.6057424118129614, + "grad_norm": 0.3216910660266876, + "learning_rate": 0.0002, + "loss": 0.8578, + "step": 9230 + }, + { + "epoch": 0.6063986874487285, + "grad_norm": 0.40590721368789673, + "learning_rate": 0.0002, + "loss": 0.9341, + "step": 9240 + }, + { + "epoch": 0.6070549630844955, + "grad_norm": 0.4337029457092285, + "learning_rate": 0.0002, + "loss": 0.935, + "step": 9250 + }, + { + "epoch": 0.6077112387202626, + "grad_norm": 0.3434010148048401, + "learning_rate": 0.0002, + "loss": 0.8866, + "step": 9260 + }, + { + "epoch": 0.6083675143560295, + "grad_norm": 0.340589702129364, + "learning_rate": 0.0002, + "loss": 0.8692, + "step": 9270 + }, + { + "epoch": 0.6090237899917965, + "grad_norm": 0.33650949597358704, + "learning_rate": 0.0002, + "loss": 0.8741, + "step": 9280 + }, + { + "epoch": 0.6096800656275636, + "grad_norm": 0.4195605218410492, + "learning_rate": 0.0002, + "loss": 0.9396, + "step": 9290 + }, + { + "epoch": 0.6103363412633306, + "grad_norm": 0.444624125957489, + "learning_rate": 0.0002, + "loss": 0.8877, + "step": 9300 + }, + { + "epoch": 0.6109926168990977, + "grad_norm": 0.5425066947937012, + "learning_rate": 0.0002, + "loss": 0.8792, + "step": 9310 + }, + { + "epoch": 0.6116488925348647, + "grad_norm": 0.36225831508636475, + "learning_rate": 0.0002, + "loss": 0.865, + "step": 9320 + }, + { + "epoch": 0.6123051681706316, + "grad_norm": 0.3421785533428192, + "learning_rate": 0.0002, + "loss": 0.9439, + "step": 9330 + }, + { + "epoch": 0.6129614438063987, + "grad_norm": 0.3792729079723358, + "learning_rate": 0.0002, + "loss": 0.8842, + "step": 9340 + }, + { + "epoch": 0.6136177194421657, + "grad_norm": 0.41844120621681213, + "learning_rate": 0.0002, + "loss": 0.8791, + "step": 9350 + }, + { + "epoch": 0.6142739950779327, + "grad_norm": 0.3430991768836975, + "learning_rate": 0.0002, + "loss": 0.9091, + "step": 9360 + }, + { + "epoch": 0.6149302707136998, + "grad_norm": 0.3460402190685272, + "learning_rate": 0.0002, + "loss": 0.9165, + "step": 9370 + }, + { + "epoch": 0.6155865463494667, + "grad_norm": 0.3775254189968109, + "learning_rate": 0.0002, + "loss": 0.8571, + "step": 9380 + }, + { + "epoch": 0.6162428219852338, + "grad_norm": 0.3706645965576172, + "learning_rate": 0.0002, + "loss": 0.8915, + "step": 9390 + }, + { + "epoch": 0.6168990976210008, + "grad_norm": 0.3817055821418762, + "learning_rate": 0.0002, + "loss": 0.9085, + "step": 9400 + }, + { + "epoch": 0.6175553732567678, + "grad_norm": 0.3856641352176666, + "learning_rate": 0.0002, + "loss": 0.9086, + "step": 9410 + }, + { + "epoch": 0.6182116488925349, + "grad_norm": 0.3271346688270569, + "learning_rate": 0.0002, + "loss": 0.9031, + "step": 9420 + }, + { + "epoch": 0.6188679245283019, + "grad_norm": 0.334314227104187, + "learning_rate": 0.0002, + "loss": 0.883, + "step": 9430 + }, + { + "epoch": 0.619524200164069, + "grad_norm": 0.3850700557231903, + "learning_rate": 0.0002, + "loss": 0.9398, + "step": 9440 + }, + { + "epoch": 0.6201804757998359, + "grad_norm": 0.37492436170578003, + "learning_rate": 0.0002, + "loss": 0.9246, + "step": 9450 + }, + { + "epoch": 0.6208367514356029, + "grad_norm": 0.44262826442718506, + "learning_rate": 0.0002, + "loss": 0.9308, + "step": 9460 + }, + { + "epoch": 0.62149302707137, + "grad_norm": 0.4070657193660736, + "learning_rate": 0.0002, + "loss": 0.9659, + "step": 9470 + }, + { + "epoch": 0.622149302707137, + "grad_norm": 0.37636154890060425, + "learning_rate": 0.0002, + "loss": 0.8812, + "step": 9480 + }, + { + "epoch": 0.6228055783429041, + "grad_norm": 0.38988572359085083, + "learning_rate": 0.0002, + "loss": 0.941, + "step": 9490 + }, + { + "epoch": 0.623461853978671, + "grad_norm": 0.36479735374450684, + "learning_rate": 0.0002, + "loss": 0.8959, + "step": 9500 + }, + { + "epoch": 0.624118129614438, + "grad_norm": 0.3759172260761261, + "learning_rate": 0.0002, + "loss": 0.9431, + "step": 9510 + }, + { + "epoch": 0.6247744052502051, + "grad_norm": 0.3380950689315796, + "learning_rate": 0.0002, + "loss": 0.889, + "step": 9520 + }, + { + "epoch": 0.6254306808859721, + "grad_norm": 0.3483046889305115, + "learning_rate": 0.0002, + "loss": 0.8597, + "step": 9530 + }, + { + "epoch": 0.6260869565217392, + "grad_norm": 0.3562379777431488, + "learning_rate": 0.0002, + "loss": 0.9195, + "step": 9540 + }, + { + "epoch": 0.6267432321575062, + "grad_norm": 0.40167364478111267, + "learning_rate": 0.0002, + "loss": 0.9434, + "step": 9550 + }, + { + "epoch": 0.6273995077932731, + "grad_norm": 0.36348867416381836, + "learning_rate": 0.0002, + "loss": 0.8677, + "step": 9560 + }, + { + "epoch": 0.6280557834290402, + "grad_norm": 0.33701515197753906, + "learning_rate": 0.0002, + "loss": 0.9061, + "step": 9570 + }, + { + "epoch": 0.6287120590648072, + "grad_norm": 0.3795888125896454, + "learning_rate": 0.0002, + "loss": 0.8926, + "step": 9580 + }, + { + "epoch": 0.6293683347005743, + "grad_norm": 0.3701418340206146, + "learning_rate": 0.0002, + "loss": 0.8672, + "step": 9590 + }, + { + "epoch": 0.6300246103363413, + "grad_norm": 0.32559722661972046, + "learning_rate": 0.0002, + "loss": 0.9025, + "step": 9600 + }, + { + "epoch": 0.6306808859721083, + "grad_norm": 0.3093271553516388, + "learning_rate": 0.0002, + "loss": 0.9223, + "step": 9610 + }, + { + "epoch": 0.6313371616078753, + "grad_norm": 0.41225478053092957, + "learning_rate": 0.0002, + "loss": 0.9032, + "step": 9620 + }, + { + "epoch": 0.6319934372436423, + "grad_norm": 0.3798231780529022, + "learning_rate": 0.0002, + "loss": 0.9125, + "step": 9630 + }, + { + "epoch": 0.6326497128794093, + "grad_norm": 0.38690295815467834, + "learning_rate": 0.0002, + "loss": 0.9252, + "step": 9640 + }, + { + "epoch": 0.6333059885151764, + "grad_norm": 0.3629007637500763, + "learning_rate": 0.0002, + "loss": 0.8668, + "step": 9650 + }, + { + "epoch": 0.6339622641509434, + "grad_norm": 0.39467132091522217, + "learning_rate": 0.0002, + "loss": 0.9143, + "step": 9660 + }, + { + "epoch": 0.6346185397867105, + "grad_norm": 0.37746182084083557, + "learning_rate": 0.0002, + "loss": 0.9267, + "step": 9670 + }, + { + "epoch": 0.6352748154224774, + "grad_norm": 0.3718436062335968, + "learning_rate": 0.0002, + "loss": 0.8915, + "step": 9680 + }, + { + "epoch": 0.6359310910582444, + "grad_norm": 0.3951144218444824, + "learning_rate": 0.0002, + "loss": 0.9111, + "step": 9690 + }, + { + "epoch": 0.6365873666940115, + "grad_norm": 0.4104543924331665, + "learning_rate": 0.0002, + "loss": 0.897, + "step": 9700 + }, + { + "epoch": 0.6372436423297785, + "grad_norm": 0.37318137288093567, + "learning_rate": 0.0002, + "loss": 0.9349, + "step": 9710 + }, + { + "epoch": 0.6378999179655456, + "grad_norm": 0.35614442825317383, + "learning_rate": 0.0002, + "loss": 0.9009, + "step": 9720 + }, + { + "epoch": 0.6385561936013125, + "grad_norm": 0.37922942638397217, + "learning_rate": 0.0002, + "loss": 0.9858, + "step": 9730 + }, + { + "epoch": 0.6392124692370795, + "grad_norm": 0.35624784231185913, + "learning_rate": 0.0002, + "loss": 0.8533, + "step": 9740 + }, + { + "epoch": 0.6398687448728466, + "grad_norm": 0.34946876764297485, + "learning_rate": 0.0002, + "loss": 0.934, + "step": 9750 + }, + { + "epoch": 0.6405250205086136, + "grad_norm": 0.37666648626327515, + "learning_rate": 0.0002, + "loss": 0.9176, + "step": 9760 + }, + { + "epoch": 0.6411812961443807, + "grad_norm": 0.3619046211242676, + "learning_rate": 0.0002, + "loss": 0.9297, + "step": 9770 + }, + { + "epoch": 0.6418375717801477, + "grad_norm": 0.3814936578273773, + "learning_rate": 0.0002, + "loss": 0.9135, + "step": 9780 + }, + { + "epoch": 0.6424938474159146, + "grad_norm": 0.4181577265262604, + "learning_rate": 0.0002, + "loss": 0.9143, + "step": 9790 + }, + { + "epoch": 0.6431501230516817, + "grad_norm": 0.3707144558429718, + "learning_rate": 0.0002, + "loss": 0.8978, + "step": 9800 + }, + { + "epoch": 0.6438063986874487, + "grad_norm": 0.3364716172218323, + "learning_rate": 0.0002, + "loss": 0.9311, + "step": 9810 + }, + { + "epoch": 0.6444626743232158, + "grad_norm": 0.31425684690475464, + "learning_rate": 0.0002, + "loss": 0.8566, + "step": 9820 + }, + { + "epoch": 0.6451189499589828, + "grad_norm": 0.32928282022476196, + "learning_rate": 0.0002, + "loss": 0.8792, + "step": 9830 + }, + { + "epoch": 0.6457752255947498, + "grad_norm": 0.39178264141082764, + "learning_rate": 0.0002, + "loss": 0.9232, + "step": 9840 + }, + { + "epoch": 0.6464315012305168, + "grad_norm": 0.37753361463546753, + "learning_rate": 0.0002, + "loss": 0.9413, + "step": 9850 + }, + { + "epoch": 0.6470877768662838, + "grad_norm": 0.4574730396270752, + "learning_rate": 0.0002, + "loss": 0.8777, + "step": 9860 + }, + { + "epoch": 0.6477440525020509, + "grad_norm": 0.3926962614059448, + "learning_rate": 0.0002, + "loss": 0.9078, + "step": 9870 + }, + { + "epoch": 0.6484003281378179, + "grad_norm": 0.3919081687927246, + "learning_rate": 0.0002, + "loss": 0.9221, + "step": 9880 + }, + { + "epoch": 0.6490566037735849, + "grad_norm": 0.3904387652873993, + "learning_rate": 0.0002, + "loss": 0.8681, + "step": 9890 + }, + { + "epoch": 0.649712879409352, + "grad_norm": 0.40808236598968506, + "learning_rate": 0.0002, + "loss": 0.883, + "step": 9900 + }, + { + "epoch": 0.6503691550451189, + "grad_norm": 0.40531080961227417, + "learning_rate": 0.0002, + "loss": 0.9133, + "step": 9910 + }, + { + "epoch": 0.6510254306808859, + "grad_norm": 0.3374323844909668, + "learning_rate": 0.0002, + "loss": 0.9417, + "step": 9920 + }, + { + "epoch": 0.651681706316653, + "grad_norm": 0.4000678062438965, + "learning_rate": 0.0002, + "loss": 0.9769, + "step": 9930 + }, + { + "epoch": 0.65233798195242, + "grad_norm": 0.35415270924568176, + "learning_rate": 0.0002, + "loss": 0.9069, + "step": 9940 + }, + { + "epoch": 0.6529942575881871, + "grad_norm": 0.3612231910228729, + "learning_rate": 0.0002, + "loss": 0.9337, + "step": 9950 + }, + { + "epoch": 0.6536505332239541, + "grad_norm": 0.3081146776676178, + "learning_rate": 0.0002, + "loss": 0.9402, + "step": 9960 + }, + { + "epoch": 0.654306808859721, + "grad_norm": 0.31079018115997314, + "learning_rate": 0.0002, + "loss": 0.8942, + "step": 9970 + }, + { + "epoch": 0.6549630844954881, + "grad_norm": 0.4256346523761749, + "learning_rate": 0.0002, + "loss": 0.905, + "step": 9980 + }, + { + "epoch": 0.6556193601312551, + "grad_norm": 0.3592916429042816, + "learning_rate": 0.0002, + "loss": 0.9226, + "step": 9990 + }, + { + "epoch": 0.6562756357670222, + "grad_norm": 0.3496004641056061, + "learning_rate": 0.0002, + "loss": 0.9018, + "step": 10000 + }, + { + "epoch": 0.6569319114027892, + "grad_norm": 0.4177037179470062, + "learning_rate": 0.0002, + "loss": 0.931, + "step": 10010 + }, + { + "epoch": 0.6575881870385561, + "grad_norm": 0.36680638790130615, + "learning_rate": 0.0002, + "loss": 0.9034, + "step": 10020 + }, + { + "epoch": 0.6582444626743232, + "grad_norm": 0.3916943073272705, + "learning_rate": 0.0002, + "loss": 0.9706, + "step": 10030 + }, + { + "epoch": 0.6589007383100902, + "grad_norm": 0.44537290930747986, + "learning_rate": 0.0002, + "loss": 0.886, + "step": 10040 + }, + { + "epoch": 0.6595570139458573, + "grad_norm": 0.3404697775840759, + "learning_rate": 0.0002, + "loss": 0.8798, + "step": 10050 + }, + { + "epoch": 0.6602132895816243, + "grad_norm": 0.3304594159126282, + "learning_rate": 0.0002, + "loss": 0.9801, + "step": 10060 + }, + { + "epoch": 0.6608695652173913, + "grad_norm": 0.39067313075065613, + "learning_rate": 0.0002, + "loss": 0.9107, + "step": 10070 + }, + { + "epoch": 0.6615258408531584, + "grad_norm": 0.366178959608078, + "learning_rate": 0.0002, + "loss": 0.9336, + "step": 10080 + }, + { + "epoch": 0.6621821164889253, + "grad_norm": 0.3880734145641327, + "learning_rate": 0.0002, + "loss": 0.8817, + "step": 10090 + }, + { + "epoch": 0.6628383921246924, + "grad_norm": 0.42047396302223206, + "learning_rate": 0.0002, + "loss": 0.9087, + "step": 10100 + }, + { + "epoch": 0.6634946677604594, + "grad_norm": 0.3299349546432495, + "learning_rate": 0.0002, + "loss": 0.8859, + "step": 10110 + }, + { + "epoch": 0.6641509433962264, + "grad_norm": 0.33470937609672546, + "learning_rate": 0.0002, + "loss": 0.9461, + "step": 10120 + }, + { + "epoch": 0.6648072190319935, + "grad_norm": 0.36240577697753906, + "learning_rate": 0.0002, + "loss": 0.9453, + "step": 10130 + }, + { + "epoch": 0.6654634946677604, + "grad_norm": 0.41457104682922363, + "learning_rate": 0.0002, + "loss": 0.8706, + "step": 10140 + }, + { + "epoch": 0.6661197703035275, + "grad_norm": 0.39064788818359375, + "learning_rate": 0.0002, + "loss": 0.9385, + "step": 10150 + }, + { + "epoch": 0.6667760459392945, + "grad_norm": 0.4137183725833893, + "learning_rate": 0.0002, + "loss": 0.9252, + "step": 10160 + }, + { + "epoch": 0.6674323215750615, + "grad_norm": 0.363413542509079, + "learning_rate": 0.0002, + "loss": 0.8874, + "step": 10170 + }, + { + "epoch": 0.6680885972108286, + "grad_norm": 0.3631424307823181, + "learning_rate": 0.0002, + "loss": 0.8936, + "step": 10180 + }, + { + "epoch": 0.6687448728465956, + "grad_norm": 0.460721880197525, + "learning_rate": 0.0002, + "loss": 0.9181, + "step": 10190 + }, + { + "epoch": 0.6694011484823625, + "grad_norm": 0.3525084853172302, + "learning_rate": 0.0002, + "loss": 0.8879, + "step": 10200 + }, + { + "epoch": 0.6700574241181296, + "grad_norm": 0.34321045875549316, + "learning_rate": 0.0002, + "loss": 0.934, + "step": 10210 + }, + { + "epoch": 0.6707136997538966, + "grad_norm": 0.3397759795188904, + "learning_rate": 0.0002, + "loss": 0.9056, + "step": 10220 + }, + { + "epoch": 0.6713699753896637, + "grad_norm": 0.3680257201194763, + "learning_rate": 0.0002, + "loss": 0.8883, + "step": 10230 + }, + { + "epoch": 0.6720262510254307, + "grad_norm": 0.4023214876651764, + "learning_rate": 0.0002, + "loss": 0.9316, + "step": 10240 + }, + { + "epoch": 0.6726825266611977, + "grad_norm": 0.3645709753036499, + "learning_rate": 0.0002, + "loss": 0.8809, + "step": 10250 + }, + { + "epoch": 0.6733388022969647, + "grad_norm": 0.3558615744113922, + "learning_rate": 0.0002, + "loss": 0.9766, + "step": 10260 + }, + { + "epoch": 0.6739950779327317, + "grad_norm": 0.3725513517856598, + "learning_rate": 0.0002, + "loss": 0.872, + "step": 10270 + }, + { + "epoch": 0.6746513535684988, + "grad_norm": 0.42790961265563965, + "learning_rate": 0.0002, + "loss": 0.9354, + "step": 10280 + }, + { + "epoch": 0.6753076292042658, + "grad_norm": 0.3808377683162689, + "learning_rate": 0.0002, + "loss": 0.8955, + "step": 10290 + }, + { + "epoch": 0.6759639048400328, + "grad_norm": 0.40500468015670776, + "learning_rate": 0.0002, + "loss": 0.8761, + "step": 10300 + }, + { + "epoch": 0.6766201804757999, + "grad_norm": 0.3631184995174408, + "learning_rate": 0.0002, + "loss": 0.9212, + "step": 10310 + }, + { + "epoch": 0.6772764561115668, + "grad_norm": 0.3319573998451233, + "learning_rate": 0.0002, + "loss": 0.8867, + "step": 10320 + }, + { + "epoch": 0.6779327317473339, + "grad_norm": 0.3851188123226166, + "learning_rate": 0.0002, + "loss": 0.857, + "step": 10330 + }, + { + "epoch": 0.6785890073831009, + "grad_norm": 0.37413159012794495, + "learning_rate": 0.0002, + "loss": 0.9159, + "step": 10340 + }, + { + "epoch": 0.6792452830188679, + "grad_norm": 0.34124433994293213, + "learning_rate": 0.0002, + "loss": 0.8728, + "step": 10350 + }, + { + "epoch": 0.679901558654635, + "grad_norm": 0.3786381781101227, + "learning_rate": 0.0002, + "loss": 0.9654, + "step": 10360 + }, + { + "epoch": 0.680557834290402, + "grad_norm": 0.3848523199558258, + "learning_rate": 0.0002, + "loss": 0.915, + "step": 10370 + }, + { + "epoch": 0.681214109926169, + "grad_norm": 0.29365262389183044, + "learning_rate": 0.0002, + "loss": 0.8873, + "step": 10380 + }, + { + "epoch": 0.681870385561936, + "grad_norm": 0.3481557369232178, + "learning_rate": 0.0002, + "loss": 0.8692, + "step": 10390 + }, + { + "epoch": 0.682526661197703, + "grad_norm": 0.38596320152282715, + "learning_rate": 0.0002, + "loss": 0.885, + "step": 10400 + }, + { + "epoch": 0.6831829368334701, + "grad_norm": 0.30872032046318054, + "learning_rate": 0.0002, + "loss": 0.8545, + "step": 10410 + }, + { + "epoch": 0.6838392124692371, + "grad_norm": 0.3729351758956909, + "learning_rate": 0.0002, + "loss": 0.867, + "step": 10420 + }, + { + "epoch": 0.684495488105004, + "grad_norm": 0.35829052329063416, + "learning_rate": 0.0002, + "loss": 0.9089, + "step": 10430 + }, + { + "epoch": 0.6851517637407711, + "grad_norm": 0.34916967153549194, + "learning_rate": 0.0002, + "loss": 0.8797, + "step": 10440 + }, + { + "epoch": 0.6858080393765381, + "grad_norm": 0.40377020835876465, + "learning_rate": 0.0002, + "loss": 0.8731, + "step": 10450 + }, + { + "epoch": 0.6864643150123052, + "grad_norm": 0.36841881275177, + "learning_rate": 0.0002, + "loss": 0.9027, + "step": 10460 + }, + { + "epoch": 0.6871205906480722, + "grad_norm": 0.35462167859077454, + "learning_rate": 0.0002, + "loss": 0.8808, + "step": 10470 + }, + { + "epoch": 0.6877768662838392, + "grad_norm": 0.4630918502807617, + "learning_rate": 0.0002, + "loss": 0.967, + "step": 10480 + }, + { + "epoch": 0.6884331419196063, + "grad_norm": 0.4230295419692993, + "learning_rate": 0.0002, + "loss": 0.9114, + "step": 10490 + }, + { + "epoch": 0.6890894175553732, + "grad_norm": 0.35059425234794617, + "learning_rate": 0.0002, + "loss": 0.8908, + "step": 10500 + }, + { + "epoch": 0.6897456931911403, + "grad_norm": 0.4072548747062683, + "learning_rate": 0.0002, + "loss": 0.9325, + "step": 10510 + }, + { + "epoch": 0.6904019688269073, + "grad_norm": 0.33755314350128174, + "learning_rate": 0.0002, + "loss": 0.9026, + "step": 10520 + }, + { + "epoch": 0.6910582444626743, + "grad_norm": 0.4411669075489044, + "learning_rate": 0.0002, + "loss": 0.908, + "step": 10530 + }, + { + "epoch": 0.6917145200984414, + "grad_norm": 0.9155740737915039, + "learning_rate": 0.0002, + "loss": 0.8639, + "step": 10540 + }, + { + "epoch": 0.6923707957342083, + "grad_norm": 0.39329391717910767, + "learning_rate": 0.0002, + "loss": 0.8975, + "step": 10550 + }, + { + "epoch": 0.6930270713699754, + "grad_norm": 0.3602689206600189, + "learning_rate": 0.0002, + "loss": 0.92, + "step": 10560 + }, + { + "epoch": 0.6936833470057424, + "grad_norm": 0.35848474502563477, + "learning_rate": 0.0002, + "loss": 0.8247, + "step": 10570 + }, + { + "epoch": 0.6943396226415094, + "grad_norm": 0.37857621908187866, + "learning_rate": 0.0002, + "loss": 0.9171, + "step": 10580 + }, + { + "epoch": 0.6949958982772765, + "grad_norm": 0.37071332335472107, + "learning_rate": 0.0002, + "loss": 0.9533, + "step": 10590 + }, + { + "epoch": 0.6956521739130435, + "grad_norm": 0.33589425683021545, + "learning_rate": 0.0002, + "loss": 0.9237, + "step": 10600 + }, + { + "epoch": 0.6963084495488105, + "grad_norm": 0.4279285669326782, + "learning_rate": 0.0002, + "loss": 0.8772, + "step": 10610 + }, + { + "epoch": 0.6969647251845775, + "grad_norm": 0.47047463059425354, + "learning_rate": 0.0002, + "loss": 0.9452, + "step": 10620 + }, + { + "epoch": 0.6976210008203445, + "grad_norm": 0.41214805841445923, + "learning_rate": 0.0002, + "loss": 0.8996, + "step": 10630 + }, + { + "epoch": 0.6982772764561116, + "grad_norm": 0.3999953866004944, + "learning_rate": 0.0002, + "loss": 0.8776, + "step": 10640 + }, + { + "epoch": 0.6989335520918786, + "grad_norm": 0.42441290616989136, + "learning_rate": 0.0002, + "loss": 0.9426, + "step": 10650 + }, + { + "epoch": 0.6995898277276457, + "grad_norm": 0.41990748047828674, + "learning_rate": 0.0002, + "loss": 0.9319, + "step": 10660 + }, + { + "epoch": 0.7002461033634126, + "grad_norm": 0.37445810437202454, + "learning_rate": 0.0002, + "loss": 0.9024, + "step": 10670 + }, + { + "epoch": 0.7009023789991796, + "grad_norm": 0.37638649344444275, + "learning_rate": 0.0002, + "loss": 0.9029, + "step": 10680 + }, + { + "epoch": 0.7015586546349467, + "grad_norm": 0.41475534439086914, + "learning_rate": 0.0002, + "loss": 0.9211, + "step": 10690 + }, + { + "epoch": 0.7022149302707137, + "grad_norm": 0.3430996537208557, + "learning_rate": 0.0002, + "loss": 0.914, + "step": 10700 + }, + { + "epoch": 0.7028712059064807, + "grad_norm": 0.3569522798061371, + "learning_rate": 0.0002, + "loss": 0.9051, + "step": 10710 + }, + { + "epoch": 0.7035274815422478, + "grad_norm": 0.3145142197608948, + "learning_rate": 0.0002, + "loss": 0.8792, + "step": 10720 + }, + { + "epoch": 0.7041837571780147, + "grad_norm": 0.40700432658195496, + "learning_rate": 0.0002, + "loss": 0.9537, + "step": 10730 + }, + { + "epoch": 0.7048400328137818, + "grad_norm": 0.37982651591300964, + "learning_rate": 0.0002, + "loss": 0.8677, + "step": 10740 + }, + { + "epoch": 0.7054963084495488, + "grad_norm": 0.39240679144859314, + "learning_rate": 0.0002, + "loss": 0.8899, + "step": 10750 + }, + { + "epoch": 0.7061525840853158, + "grad_norm": 0.41788724064826965, + "learning_rate": 0.0002, + "loss": 0.9516, + "step": 10760 + }, + { + "epoch": 0.7068088597210829, + "grad_norm": 0.4015905559062958, + "learning_rate": 0.0002, + "loss": 0.8672, + "step": 10770 + }, + { + "epoch": 0.7074651353568499, + "grad_norm": 0.4019724130630493, + "learning_rate": 0.0002, + "loss": 0.9131, + "step": 10780 + }, + { + "epoch": 0.7081214109926169, + "grad_norm": 0.37387898564338684, + "learning_rate": 0.0002, + "loss": 0.8698, + "step": 10790 + }, + { + "epoch": 0.7087776866283839, + "grad_norm": 0.32087528705596924, + "learning_rate": 0.0002, + "loss": 0.8547, + "step": 10800 + }, + { + "epoch": 0.7094339622641509, + "grad_norm": 0.38160258531570435, + "learning_rate": 0.0002, + "loss": 0.9241, + "step": 10810 + }, + { + "epoch": 0.710090237899918, + "grad_norm": 0.340973436832428, + "learning_rate": 0.0002, + "loss": 0.8941, + "step": 10820 + }, + { + "epoch": 0.710746513535685, + "grad_norm": 0.3848867416381836, + "learning_rate": 0.0002, + "loss": 0.9404, + "step": 10830 + }, + { + "epoch": 0.711402789171452, + "grad_norm": 0.36736220121383667, + "learning_rate": 0.0002, + "loss": 0.9299, + "step": 10840 + }, + { + "epoch": 0.712059064807219, + "grad_norm": 0.367404967546463, + "learning_rate": 0.0002, + "loss": 0.8881, + "step": 10850 + }, + { + "epoch": 0.712715340442986, + "grad_norm": 0.333751380443573, + "learning_rate": 0.0002, + "loss": 0.9224, + "step": 10860 + }, + { + "epoch": 0.7133716160787531, + "grad_norm": 0.3865894079208374, + "learning_rate": 0.0002, + "loss": 0.8712, + "step": 10870 + }, + { + "epoch": 0.7140278917145201, + "grad_norm": 0.34016433358192444, + "learning_rate": 0.0002, + "loss": 0.8957, + "step": 10880 + }, + { + "epoch": 0.7146841673502872, + "grad_norm": 0.3233864903450012, + "learning_rate": 0.0002, + "loss": 0.8493, + "step": 10890 + }, + { + "epoch": 0.7153404429860541, + "grad_norm": 0.4275553822517395, + "learning_rate": 0.0002, + "loss": 0.9148, + "step": 10900 + }, + { + "epoch": 0.7159967186218211, + "grad_norm": 0.41715168952941895, + "learning_rate": 0.0002, + "loss": 0.9345, + "step": 10910 + }, + { + "epoch": 0.7166529942575882, + "grad_norm": 0.40540871024131775, + "learning_rate": 0.0002, + "loss": 0.9321, + "step": 10920 + }, + { + "epoch": 0.7173092698933552, + "grad_norm": 0.35520824790000916, + "learning_rate": 0.0002, + "loss": 0.9035, + "step": 10930 + }, + { + "epoch": 0.7179655455291223, + "grad_norm": 0.3805985748767853, + "learning_rate": 0.0002, + "loss": 0.9243, + "step": 10940 + }, + { + "epoch": 0.7186218211648893, + "grad_norm": 0.36969226598739624, + "learning_rate": 0.0002, + "loss": 0.9542, + "step": 10950 + }, + { + "epoch": 0.7192780968006562, + "grad_norm": 0.38408684730529785, + "learning_rate": 0.0002, + "loss": 0.892, + "step": 10960 + }, + { + "epoch": 0.7199343724364233, + "grad_norm": 0.3865699768066406, + "learning_rate": 0.0002, + "loss": 0.8699, + "step": 10970 + }, + { + "epoch": 0.7205906480721903, + "grad_norm": 0.37172383069992065, + "learning_rate": 0.0002, + "loss": 0.8767, + "step": 10980 + }, + { + "epoch": 0.7212469237079573, + "grad_norm": 0.39816758036613464, + "learning_rate": 0.0002, + "loss": 0.8609, + "step": 10990 + }, + { + "epoch": 0.7219031993437244, + "grad_norm": 0.4290051758289337, + "learning_rate": 0.0002, + "loss": 0.94, + "step": 11000 + }, + { + "epoch": 0.7225594749794914, + "grad_norm": 0.3733605146408081, + "learning_rate": 0.0002, + "loss": 0.9328, + "step": 11010 + }, + { + "epoch": 0.7232157506152584, + "grad_norm": 0.37305396795272827, + "learning_rate": 0.0002, + "loss": 0.9048, + "step": 11020 + }, + { + "epoch": 0.7238720262510254, + "grad_norm": 0.3557985723018646, + "learning_rate": 0.0002, + "loss": 0.8713, + "step": 11030 + }, + { + "epoch": 0.7245283018867924, + "grad_norm": 0.4635949730873108, + "learning_rate": 0.0002, + "loss": 0.8558, + "step": 11040 + }, + { + "epoch": 0.7251845775225595, + "grad_norm": 0.3419910967350006, + "learning_rate": 0.0002, + "loss": 0.8711, + "step": 11050 + }, + { + "epoch": 0.7258408531583265, + "grad_norm": 0.41529953479766846, + "learning_rate": 0.0002, + "loss": 0.9138, + "step": 11060 + }, + { + "epoch": 0.7264971287940936, + "grad_norm": 0.325923353433609, + "learning_rate": 0.0002, + "loss": 0.8859, + "step": 11070 + }, + { + "epoch": 0.7271534044298605, + "grad_norm": 0.41932010650634766, + "learning_rate": 0.0002, + "loss": 0.9924, + "step": 11080 + }, + { + "epoch": 0.7278096800656275, + "grad_norm": 0.39984336495399475, + "learning_rate": 0.0002, + "loss": 0.9662, + "step": 11090 + }, + { + "epoch": 0.7284659557013946, + "grad_norm": 0.3673977851867676, + "learning_rate": 0.0002, + "loss": 0.8849, + "step": 11100 + }, + { + "epoch": 0.7291222313371616, + "grad_norm": 0.3722262680530548, + "learning_rate": 0.0002, + "loss": 0.9201, + "step": 11110 + }, + { + "epoch": 0.7297785069729287, + "grad_norm": 0.3859870135784149, + "learning_rate": 0.0002, + "loss": 0.9156, + "step": 11120 + }, + { + "epoch": 0.7304347826086957, + "grad_norm": 0.3323642313480377, + "learning_rate": 0.0002, + "loss": 0.899, + "step": 11130 + }, + { + "epoch": 0.7310910582444626, + "grad_norm": 0.37793564796447754, + "learning_rate": 0.0002, + "loss": 0.9244, + "step": 11140 + }, + { + "epoch": 0.7317473338802297, + "grad_norm": 0.44874733686447144, + "learning_rate": 0.0002, + "loss": 0.8767, + "step": 11150 + }, + { + "epoch": 0.7324036095159967, + "grad_norm": 0.4104187786579132, + "learning_rate": 0.0002, + "loss": 0.9078, + "step": 11160 + }, + { + "epoch": 0.7330598851517638, + "grad_norm": 0.3699926435947418, + "learning_rate": 0.0002, + "loss": 0.8828, + "step": 11170 + }, + { + "epoch": 0.7337161607875308, + "grad_norm": 0.41013723611831665, + "learning_rate": 0.0002, + "loss": 0.8909, + "step": 11180 + }, + { + "epoch": 0.7343724364232977, + "grad_norm": 0.370669424533844, + "learning_rate": 0.0002, + "loss": 0.8934, + "step": 11190 + }, + { + "epoch": 0.7350287120590648, + "grad_norm": 0.4059436619281769, + "learning_rate": 0.0002, + "loss": 0.881, + "step": 11200 + }, + { + "epoch": 0.7356849876948318, + "grad_norm": 0.5038959383964539, + "learning_rate": 0.0002, + "loss": 0.903, + "step": 11210 + }, + { + "epoch": 0.7363412633305989, + "grad_norm": 0.337137371301651, + "learning_rate": 0.0002, + "loss": 0.9022, + "step": 11220 + }, + { + "epoch": 0.7369975389663659, + "grad_norm": 0.412392795085907, + "learning_rate": 0.0002, + "loss": 0.9171, + "step": 11230 + }, + { + "epoch": 0.7376538146021329, + "grad_norm": 0.4415507912635803, + "learning_rate": 0.0002, + "loss": 0.8736, + "step": 11240 + }, + { + "epoch": 0.7383100902379, + "grad_norm": 0.35013696551322937, + "learning_rate": 0.0002, + "loss": 0.8936, + "step": 11250 + }, + { + "epoch": 0.7389663658736669, + "grad_norm": 0.3677300810813904, + "learning_rate": 0.0002, + "loss": 0.8841, + "step": 11260 + }, + { + "epoch": 0.7396226415094339, + "grad_norm": 0.36722511053085327, + "learning_rate": 0.0002, + "loss": 0.8756, + "step": 11270 + }, + { + "epoch": 0.740278917145201, + "grad_norm": 0.41611534357070923, + "learning_rate": 0.0002, + "loss": 0.8727, + "step": 11280 + }, + { + "epoch": 0.740935192780968, + "grad_norm": 0.4244968295097351, + "learning_rate": 0.0002, + "loss": 0.921, + "step": 11290 + }, + { + "epoch": 0.7415914684167351, + "grad_norm": 0.38986068964004517, + "learning_rate": 0.0002, + "loss": 0.9696, + "step": 11300 + }, + { + "epoch": 0.742247744052502, + "grad_norm": 0.33212459087371826, + "learning_rate": 0.0002, + "loss": 0.855, + "step": 11310 + }, + { + "epoch": 0.742904019688269, + "grad_norm": 0.4290331304073334, + "learning_rate": 0.0002, + "loss": 0.9296, + "step": 11320 + }, + { + "epoch": 0.7435602953240361, + "grad_norm": 0.3750900626182556, + "learning_rate": 0.0002, + "loss": 0.867, + "step": 11330 + }, + { + "epoch": 0.7442165709598031, + "grad_norm": 0.37758177518844604, + "learning_rate": 0.0002, + "loss": 0.9462, + "step": 11340 + }, + { + "epoch": 0.7448728465955702, + "grad_norm": 0.31147271394729614, + "learning_rate": 0.0002, + "loss": 0.849, + "step": 11350 + }, + { + "epoch": 0.7455291222313372, + "grad_norm": 0.4493428170681, + "learning_rate": 0.0002, + "loss": 0.9675, + "step": 11360 + }, + { + "epoch": 0.7461853978671041, + "grad_norm": 0.4268129765987396, + "learning_rate": 0.0002, + "loss": 0.9506, + "step": 11370 + }, + { + "epoch": 0.7468416735028712, + "grad_norm": 0.3716314733028412, + "learning_rate": 0.0002, + "loss": 0.9371, + "step": 11380 + }, + { + "epoch": 0.7474979491386382, + "grad_norm": 0.33728593587875366, + "learning_rate": 0.0002, + "loss": 0.8762, + "step": 11390 + }, + { + "epoch": 0.7481542247744053, + "grad_norm": 0.36548155546188354, + "learning_rate": 0.0002, + "loss": 0.8532, + "step": 11400 + }, + { + "epoch": 0.7488105004101723, + "grad_norm": 0.32645145058631897, + "learning_rate": 0.0002, + "loss": 0.8709, + "step": 11410 + }, + { + "epoch": 0.7494667760459393, + "grad_norm": 0.36403900384902954, + "learning_rate": 0.0002, + "loss": 0.9169, + "step": 11420 + }, + { + "epoch": 0.7501230516817063, + "grad_norm": 0.4186992049217224, + "learning_rate": 0.0002, + "loss": 0.9734, + "step": 11430 + }, + { + "epoch": 0.7507793273174733, + "grad_norm": 0.342457115650177, + "learning_rate": 0.0002, + "loss": 0.8982, + "step": 11440 + }, + { + "epoch": 0.7514356029532404, + "grad_norm": 0.3882320821285248, + "learning_rate": 0.0002, + "loss": 0.8785, + "step": 11450 + }, + { + "epoch": 0.7520918785890074, + "grad_norm": 0.4653763771057129, + "learning_rate": 0.0002, + "loss": 0.9492, + "step": 11460 + }, + { + "epoch": 0.7527481542247744, + "grad_norm": 0.3960241377353668, + "learning_rate": 0.0002, + "loss": 0.9357, + "step": 11470 + }, + { + "epoch": 0.7534044298605415, + "grad_norm": 0.38367652893066406, + "learning_rate": 0.0002, + "loss": 0.8912, + "step": 11480 + }, + { + "epoch": 0.7540607054963084, + "grad_norm": 0.39881640672683716, + "learning_rate": 0.0002, + "loss": 0.9367, + "step": 11490 + }, + { + "epoch": 0.7547169811320755, + "grad_norm": 0.40201085805892944, + "learning_rate": 0.0002, + "loss": 0.9018, + "step": 11500 + }, + { + "epoch": 0.7553732567678425, + "grad_norm": 0.38070711493492126, + "learning_rate": 0.0002, + "loss": 0.8668, + "step": 11510 + }, + { + "epoch": 0.7560295324036095, + "grad_norm": 0.334245502948761, + "learning_rate": 0.0002, + "loss": 0.8542, + "step": 11520 + }, + { + "epoch": 0.7566858080393766, + "grad_norm": 0.42725566029548645, + "learning_rate": 0.0002, + "loss": 0.9036, + "step": 11530 + }, + { + "epoch": 0.7573420836751436, + "grad_norm": 1.4140206575393677, + "learning_rate": 0.0002, + "loss": 0.9443, + "step": 11540 + }, + { + "epoch": 0.7579983593109105, + "grad_norm": 0.5802565813064575, + "learning_rate": 0.0002, + "loss": 0.8962, + "step": 11550 + }, + { + "epoch": 0.7586546349466776, + "grad_norm": 0.45892661809921265, + "learning_rate": 0.0002, + "loss": 0.9095, + "step": 11560 + }, + { + "epoch": 0.7593109105824446, + "grad_norm": 0.3837396800518036, + "learning_rate": 0.0002, + "loss": 0.8373, + "step": 11570 + }, + { + "epoch": 0.7599671862182117, + "grad_norm": 0.4158342480659485, + "learning_rate": 0.0002, + "loss": 0.9452, + "step": 11580 + }, + { + "epoch": 0.7606234618539787, + "grad_norm": 0.4374973773956299, + "learning_rate": 0.0002, + "loss": 0.9551, + "step": 11590 + }, + { + "epoch": 0.7612797374897456, + "grad_norm": 0.36342450976371765, + "learning_rate": 0.0002, + "loss": 0.9181, + "step": 11600 + }, + { + "epoch": 0.7619360131255127, + "grad_norm": 0.37019461393356323, + "learning_rate": 0.0002, + "loss": 0.8596, + "step": 11610 + }, + { + "epoch": 0.7625922887612797, + "grad_norm": 0.3745611608028412, + "learning_rate": 0.0002, + "loss": 0.8804, + "step": 11620 + }, + { + "epoch": 0.7632485643970468, + "grad_norm": 0.33488842844963074, + "learning_rate": 0.0002, + "loss": 0.8745, + "step": 11630 + }, + { + "epoch": 0.7639048400328138, + "grad_norm": 0.3700532615184784, + "learning_rate": 0.0002, + "loss": 0.8798, + "step": 11640 + }, + { + "epoch": 0.7645611156685808, + "grad_norm": 0.3722131848335266, + "learning_rate": 0.0002, + "loss": 0.8943, + "step": 11650 + }, + { + "epoch": 0.7652173913043478, + "grad_norm": 0.3463144302368164, + "learning_rate": 0.0002, + "loss": 0.8702, + "step": 11660 + }, + { + "epoch": 0.7658736669401148, + "grad_norm": 0.339691162109375, + "learning_rate": 0.0002, + "loss": 0.865, + "step": 11670 + }, + { + "epoch": 0.7665299425758819, + "grad_norm": 0.33323508501052856, + "learning_rate": 0.0002, + "loss": 0.9536, + "step": 11680 + }, + { + "epoch": 0.7671862182116489, + "grad_norm": 0.3937166631221771, + "learning_rate": 0.0002, + "loss": 0.8657, + "step": 11690 + }, + { + "epoch": 0.7678424938474159, + "grad_norm": 0.4112081527709961, + "learning_rate": 0.0002, + "loss": 0.8857, + "step": 11700 + }, + { + "epoch": 0.768498769483183, + "grad_norm": 0.4242405593395233, + "learning_rate": 0.0002, + "loss": 0.9108, + "step": 11710 + }, + { + "epoch": 0.7691550451189499, + "grad_norm": 0.33512821793556213, + "learning_rate": 0.0002, + "loss": 0.9148, + "step": 11720 + }, + { + "epoch": 0.769811320754717, + "grad_norm": 0.5148407816886902, + "learning_rate": 0.0002, + "loss": 0.8599, + "step": 11730 + }, + { + "epoch": 0.770467596390484, + "grad_norm": 0.4562109112739563, + "learning_rate": 0.0002, + "loss": 0.9678, + "step": 11740 + }, + { + "epoch": 0.771123872026251, + "grad_norm": 0.4246354401111603, + "learning_rate": 0.0002, + "loss": 0.8937, + "step": 11750 + }, + { + "epoch": 0.7717801476620181, + "grad_norm": 0.39338991045951843, + "learning_rate": 0.0002, + "loss": 0.8821, + "step": 11760 + }, + { + "epoch": 0.7724364232977851, + "grad_norm": 0.403199166059494, + "learning_rate": 0.0002, + "loss": 0.8769, + "step": 11770 + }, + { + "epoch": 0.7730926989335521, + "grad_norm": 0.4494798183441162, + "learning_rate": 0.0002, + "loss": 0.8914, + "step": 11780 + }, + { + "epoch": 0.7737489745693191, + "grad_norm": 0.3649079501628876, + "learning_rate": 0.0002, + "loss": 0.8633, + "step": 11790 + }, + { + "epoch": 0.7744052502050861, + "grad_norm": 0.3314788341522217, + "learning_rate": 0.0002, + "loss": 0.8684, + "step": 11800 + }, + { + "epoch": 0.7750615258408532, + "grad_norm": 0.36505308747291565, + "learning_rate": 0.0002, + "loss": 0.8756, + "step": 11810 + }, + { + "epoch": 0.7757178014766202, + "grad_norm": 0.45973560214042664, + "learning_rate": 0.0002, + "loss": 0.8778, + "step": 11820 + }, + { + "epoch": 0.7763740771123872, + "grad_norm": 0.3941294550895691, + "learning_rate": 0.0002, + "loss": 0.8845, + "step": 11830 + }, + { + "epoch": 0.7770303527481542, + "grad_norm": 0.34385251998901367, + "learning_rate": 0.0002, + "loss": 0.9033, + "step": 11840 + }, + { + "epoch": 0.7776866283839212, + "grad_norm": 0.35967403650283813, + "learning_rate": 0.0002, + "loss": 0.9595, + "step": 11850 + }, + { + "epoch": 0.7783429040196883, + "grad_norm": 0.4731179177761078, + "learning_rate": 0.0002, + "loss": 0.9193, + "step": 11860 + }, + { + "epoch": 0.7789991796554553, + "grad_norm": 0.38854387402534485, + "learning_rate": 0.0002, + "loss": 0.9344, + "step": 11870 + }, + { + "epoch": 0.7796554552912223, + "grad_norm": 0.3925110995769501, + "learning_rate": 0.0002, + "loss": 0.9056, + "step": 11880 + }, + { + "epoch": 0.7803117309269894, + "grad_norm": 0.35882773995399475, + "learning_rate": 0.0002, + "loss": 0.8818, + "step": 11890 + }, + { + "epoch": 0.7809680065627563, + "grad_norm": 0.4351222813129425, + "learning_rate": 0.0002, + "loss": 0.9097, + "step": 11900 + }, + { + "epoch": 0.7816242821985234, + "grad_norm": 0.39528653025627136, + "learning_rate": 0.0002, + "loss": 0.8866, + "step": 11910 + }, + { + "epoch": 0.7822805578342904, + "grad_norm": 0.34876471757888794, + "learning_rate": 0.0002, + "loss": 0.91, + "step": 11920 + }, + { + "epoch": 0.7829368334700574, + "grad_norm": 0.44766634702682495, + "learning_rate": 0.0002, + "loss": 0.9329, + "step": 11930 + }, + { + "epoch": 0.7835931091058245, + "grad_norm": 0.42268314957618713, + "learning_rate": 0.0002, + "loss": 0.9687, + "step": 11940 + }, + { + "epoch": 0.7842493847415914, + "grad_norm": 0.377101868391037, + "learning_rate": 0.0002, + "loss": 0.911, + "step": 11950 + }, + { + "epoch": 0.7849056603773585, + "grad_norm": 0.4489518404006958, + "learning_rate": 0.0002, + "loss": 0.8711, + "step": 11960 + }, + { + "epoch": 0.7855619360131255, + "grad_norm": 0.4585464596748352, + "learning_rate": 0.0002, + "loss": 0.9177, + "step": 11970 + }, + { + "epoch": 0.7862182116488925, + "grad_norm": 0.41329675912857056, + "learning_rate": 0.0002, + "loss": 0.901, + "step": 11980 + }, + { + "epoch": 0.7868744872846596, + "grad_norm": 0.42738014459609985, + "learning_rate": 0.0002, + "loss": 0.8993, + "step": 11990 + }, + { + "epoch": 0.7875307629204266, + "grad_norm": 0.40386950969696045, + "learning_rate": 0.0002, + "loss": 0.9206, + "step": 12000 + }, + { + "epoch": 0.7881870385561937, + "grad_norm": 0.3859177827835083, + "learning_rate": 0.0002, + "loss": 0.8702, + "step": 12010 + }, + { + "epoch": 0.7888433141919606, + "grad_norm": 0.3537571430206299, + "learning_rate": 0.0002, + "loss": 0.87, + "step": 12020 + }, + { + "epoch": 0.7894995898277276, + "grad_norm": 0.39319470524787903, + "learning_rate": 0.0002, + "loss": 0.9106, + "step": 12030 + }, + { + "epoch": 0.7901558654634947, + "grad_norm": 0.32762688398361206, + "learning_rate": 0.0002, + "loss": 0.9197, + "step": 12040 + }, + { + "epoch": 0.7908121410992617, + "grad_norm": 0.34617939591407776, + "learning_rate": 0.0002, + "loss": 0.9218, + "step": 12050 + }, + { + "epoch": 0.7914684167350287, + "grad_norm": 0.4012453258037567, + "learning_rate": 0.0002, + "loss": 0.9099, + "step": 12060 + }, + { + "epoch": 0.7921246923707957, + "grad_norm": 0.4265747666358948, + "learning_rate": 0.0002, + "loss": 0.9857, + "step": 12070 + }, + { + "epoch": 0.7927809680065627, + "grad_norm": 0.42164385318756104, + "learning_rate": 0.0002, + "loss": 0.9169, + "step": 12080 + }, + { + "epoch": 0.7934372436423298, + "grad_norm": 0.4054335951805115, + "learning_rate": 0.0002, + "loss": 0.8952, + "step": 12090 + }, + { + "epoch": 0.7940935192780968, + "grad_norm": 0.38486307859420776, + "learning_rate": 0.0002, + "loss": 0.9706, + "step": 12100 + }, + { + "epoch": 0.7947497949138638, + "grad_norm": 0.3842015266418457, + "learning_rate": 0.0002, + "loss": 0.8703, + "step": 12110 + }, + { + "epoch": 0.7954060705496309, + "grad_norm": 0.3772023320198059, + "learning_rate": 0.0002, + "loss": 0.8847, + "step": 12120 + }, + { + "epoch": 0.7960623461853978, + "grad_norm": 0.39477309584617615, + "learning_rate": 0.0002, + "loss": 0.9248, + "step": 12130 + }, + { + "epoch": 0.7967186218211649, + "grad_norm": 0.3458614945411682, + "learning_rate": 0.0002, + "loss": 0.8865, + "step": 12140 + }, + { + "epoch": 0.7973748974569319, + "grad_norm": 0.42238670587539673, + "learning_rate": 0.0002, + "loss": 0.884, + "step": 12150 + }, + { + "epoch": 0.7980311730926989, + "grad_norm": 0.3623220920562744, + "learning_rate": 0.0002, + "loss": 0.9561, + "step": 12160 + }, + { + "epoch": 0.798687448728466, + "grad_norm": 0.426715224981308, + "learning_rate": 0.0002, + "loss": 1.0033, + "step": 12170 + }, + { + "epoch": 0.799343724364233, + "grad_norm": 0.3558938205242157, + "learning_rate": 0.0002, + "loss": 0.9158, + "step": 12180 + }, + { + "epoch": 0.8, + "grad_norm": 0.426761269569397, + "learning_rate": 0.0002, + "loss": 0.9323, + "step": 12190 + }, + { + "epoch": 0.800656275635767, + "grad_norm": 0.42333319783210754, + "learning_rate": 0.0002, + "loss": 0.9256, + "step": 12200 + }, + { + "epoch": 0.801312551271534, + "grad_norm": 0.42534688115119934, + "learning_rate": 0.0002, + "loss": 0.9307, + "step": 12210 + }, + { + "epoch": 0.8019688269073011, + "grad_norm": 0.37565773725509644, + "learning_rate": 0.0002, + "loss": 0.8999, + "step": 12220 + }, + { + "epoch": 0.8026251025430681, + "grad_norm": 0.3591224253177643, + "learning_rate": 0.0002, + "loss": 0.8531, + "step": 12230 + }, + { + "epoch": 0.8032813781788352, + "grad_norm": 0.35887411236763, + "learning_rate": 0.0002, + "loss": 0.8817, + "step": 12240 + }, + { + "epoch": 0.8039376538146021, + "grad_norm": 0.3895672559738159, + "learning_rate": 0.0002, + "loss": 0.8562, + "step": 12250 + }, + { + "epoch": 0.8045939294503691, + "grad_norm": 0.3483835756778717, + "learning_rate": 0.0002, + "loss": 0.8885, + "step": 12260 + }, + { + "epoch": 0.8052502050861362, + "grad_norm": 0.37694090604782104, + "learning_rate": 0.0002, + "loss": 0.9343, + "step": 12270 + }, + { + "epoch": 0.8059064807219032, + "grad_norm": 0.4011424779891968, + "learning_rate": 0.0002, + "loss": 0.9315, + "step": 12280 + }, + { + "epoch": 0.8065627563576703, + "grad_norm": 0.3254278004169464, + "learning_rate": 0.0002, + "loss": 0.9341, + "step": 12290 + }, + { + "epoch": 0.8072190319934373, + "grad_norm": 0.3868531286716461, + "learning_rate": 0.0002, + "loss": 0.9025, + "step": 12300 + }, + { + "epoch": 0.8078753076292042, + "grad_norm": 0.44830775260925293, + "learning_rate": 0.0002, + "loss": 0.8959, + "step": 12310 + }, + { + "epoch": 0.8085315832649713, + "grad_norm": 0.38808006048202515, + "learning_rate": 0.0002, + "loss": 0.9639, + "step": 12320 + }, + { + "epoch": 0.8091878589007383, + "grad_norm": 0.42684856057167053, + "learning_rate": 0.0002, + "loss": 0.9119, + "step": 12330 + }, + { + "epoch": 0.8098441345365053, + "grad_norm": 0.310099333524704, + "learning_rate": 0.0002, + "loss": 0.9231, + "step": 12340 + }, + { + "epoch": 0.8105004101722724, + "grad_norm": 0.5252287983894348, + "learning_rate": 0.0002, + "loss": 0.8607, + "step": 12350 + }, + { + "epoch": 0.8111566858080393, + "grad_norm": 0.41571253538131714, + "learning_rate": 0.0002, + "loss": 0.8559, + "step": 12360 + }, + { + "epoch": 0.8118129614438064, + "grad_norm": 0.4471582770347595, + "learning_rate": 0.0002, + "loss": 0.8626, + "step": 12370 + }, + { + "epoch": 0.8124692370795734, + "grad_norm": 0.3977353274822235, + "learning_rate": 0.0002, + "loss": 0.923, + "step": 12380 + }, + { + "epoch": 0.8131255127153404, + "grad_norm": 0.3274862766265869, + "learning_rate": 0.0002, + "loss": 0.8792, + "step": 12390 + }, + { + "epoch": 0.8137817883511075, + "grad_norm": 0.4114132821559906, + "learning_rate": 0.0002, + "loss": 0.907, + "step": 12400 + }, + { + "epoch": 0.8144380639868745, + "grad_norm": 0.40929168462753296, + "learning_rate": 0.0002, + "loss": 0.9324, + "step": 12410 + }, + { + "epoch": 0.8150943396226416, + "grad_norm": 0.32346615195274353, + "learning_rate": 0.0002, + "loss": 0.8745, + "step": 12420 + }, + { + "epoch": 0.8157506152584085, + "grad_norm": 0.36344656348228455, + "learning_rate": 0.0002, + "loss": 0.8766, + "step": 12430 + }, + { + "epoch": 0.8164068908941755, + "grad_norm": 0.3749464452266693, + "learning_rate": 0.0002, + "loss": 0.872, + "step": 12440 + }, + { + "epoch": 0.8170631665299426, + "grad_norm": 0.4742373526096344, + "learning_rate": 0.0002, + "loss": 0.9128, + "step": 12450 + }, + { + "epoch": 0.8177194421657096, + "grad_norm": 0.3863218128681183, + "learning_rate": 0.0002, + "loss": 0.8972, + "step": 12460 + }, + { + "epoch": 0.8183757178014767, + "grad_norm": 0.47756487131118774, + "learning_rate": 0.0002, + "loss": 0.9148, + "step": 12470 + }, + { + "epoch": 0.8190319934372436, + "grad_norm": 0.4045886993408203, + "learning_rate": 0.0002, + "loss": 0.8817, + "step": 12480 + }, + { + "epoch": 0.8196882690730106, + "grad_norm": 0.4119892418384552, + "learning_rate": 0.0002, + "loss": 0.866, + "step": 12490 + }, + { + "epoch": 0.8203445447087777, + "grad_norm": 0.3714514672756195, + "learning_rate": 0.0002, + "loss": 0.866, + "step": 12500 + }, + { + "epoch": 0.8210008203445447, + "grad_norm": 0.38410791754722595, + "learning_rate": 0.0002, + "loss": 0.8943, + "step": 12510 + }, + { + "epoch": 0.8216570959803118, + "grad_norm": 0.3178478479385376, + "learning_rate": 0.0002, + "loss": 0.8857, + "step": 12520 + }, + { + "epoch": 0.8223133716160788, + "grad_norm": 0.4347972869873047, + "learning_rate": 0.0002, + "loss": 0.9356, + "step": 12530 + }, + { + "epoch": 0.8229696472518457, + "grad_norm": 0.4276008903980255, + "learning_rate": 0.0002, + "loss": 0.8926, + "step": 12540 + }, + { + "epoch": 0.8236259228876128, + "grad_norm": 0.36309465765953064, + "learning_rate": 0.0002, + "loss": 0.9213, + "step": 12550 + }, + { + "epoch": 0.8242821985233798, + "grad_norm": 0.45721492171287537, + "learning_rate": 0.0002, + "loss": 0.8847, + "step": 12560 + }, + { + "epoch": 0.8249384741591469, + "grad_norm": 0.37675052881240845, + "learning_rate": 0.0002, + "loss": 0.8655, + "step": 12570 + }, + { + "epoch": 0.8255947497949139, + "grad_norm": 0.41907957196235657, + "learning_rate": 0.0002, + "loss": 0.8717, + "step": 12580 + }, + { + "epoch": 0.8262510254306809, + "grad_norm": 0.36430326104164124, + "learning_rate": 0.0002, + "loss": 0.9465, + "step": 12590 + }, + { + "epoch": 0.8269073010664479, + "grad_norm": 0.45015767216682434, + "learning_rate": 0.0002, + "loss": 0.9244, + "step": 12600 + }, + { + "epoch": 0.8275635767022149, + "grad_norm": 0.369115948677063, + "learning_rate": 0.0002, + "loss": 0.9076, + "step": 12610 + }, + { + "epoch": 0.8282198523379819, + "grad_norm": 0.4821915030479431, + "learning_rate": 0.0002, + "loss": 0.92, + "step": 12620 + }, + { + "epoch": 0.828876127973749, + "grad_norm": 0.4291541874408722, + "learning_rate": 0.0002, + "loss": 0.8813, + "step": 12630 + }, + { + "epoch": 0.829532403609516, + "grad_norm": 0.3816904127597809, + "learning_rate": 0.0002, + "loss": 0.9196, + "step": 12640 + }, + { + "epoch": 0.8301886792452831, + "grad_norm": 0.3915407657623291, + "learning_rate": 0.0002, + "loss": 0.8821, + "step": 12650 + }, + { + "epoch": 0.83084495488105, + "grad_norm": 0.35964086651802063, + "learning_rate": 0.0002, + "loss": 0.907, + "step": 12660 + }, + { + "epoch": 0.831501230516817, + "grad_norm": 0.3201609253883362, + "learning_rate": 0.0002, + "loss": 0.8512, + "step": 12670 + }, + { + "epoch": 0.8321575061525841, + "grad_norm": 0.44153767824172974, + "learning_rate": 0.0002, + "loss": 0.9085, + "step": 12680 + }, + { + "epoch": 0.8328137817883511, + "grad_norm": 0.37000054121017456, + "learning_rate": 0.0002, + "loss": 0.859, + "step": 12690 + }, + { + "epoch": 0.8334700574241182, + "grad_norm": 0.39843010902404785, + "learning_rate": 0.0002, + "loss": 0.9413, + "step": 12700 + }, + { + "epoch": 0.8341263330598851, + "grad_norm": 0.3594053387641907, + "learning_rate": 0.0002, + "loss": 0.8787, + "step": 12710 + }, + { + "epoch": 0.8347826086956521, + "grad_norm": 0.38574180006980896, + "learning_rate": 0.0002, + "loss": 0.8542, + "step": 12720 + }, + { + "epoch": 0.8354388843314192, + "grad_norm": 0.4101716876029968, + "learning_rate": 0.0002, + "loss": 0.8746, + "step": 12730 + }, + { + "epoch": 0.8360951599671862, + "grad_norm": 0.3665215075016022, + "learning_rate": 0.0002, + "loss": 0.8442, + "step": 12740 + }, + { + "epoch": 0.8367514356029533, + "grad_norm": 0.39471596479415894, + "learning_rate": 0.0002, + "loss": 0.8898, + "step": 12750 + }, + { + "epoch": 0.8374077112387203, + "grad_norm": 0.35624340176582336, + "learning_rate": 0.0002, + "loss": 0.9026, + "step": 12760 + }, + { + "epoch": 0.8380639868744872, + "grad_norm": 0.3838249742984772, + "learning_rate": 0.0002, + "loss": 0.917, + "step": 12770 + }, + { + "epoch": 0.8387202625102543, + "grad_norm": 0.408368319272995, + "learning_rate": 0.0002, + "loss": 0.874, + "step": 12780 + }, + { + "epoch": 0.8393765381460213, + "grad_norm": 0.46758291125297546, + "learning_rate": 0.0002, + "loss": 0.934, + "step": 12790 + }, + { + "epoch": 0.8400328137817884, + "grad_norm": 0.35787731409072876, + "learning_rate": 0.0002, + "loss": 0.8603, + "step": 12800 + }, + { + "epoch": 0.8406890894175554, + "grad_norm": 0.39618661999702454, + "learning_rate": 0.0002, + "loss": 0.9026, + "step": 12810 + }, + { + "epoch": 0.8413453650533224, + "grad_norm": 0.44962066411972046, + "learning_rate": 0.0002, + "loss": 0.9307, + "step": 12820 + }, + { + "epoch": 0.8420016406890894, + "grad_norm": 0.36435529589653015, + "learning_rate": 0.0002, + "loss": 0.8756, + "step": 12830 + }, + { + "epoch": 0.8426579163248564, + "grad_norm": 0.37484753131866455, + "learning_rate": 0.0002, + "loss": 0.93, + "step": 12840 + }, + { + "epoch": 0.8433141919606235, + "grad_norm": 0.36679843068122864, + "learning_rate": 0.0002, + "loss": 0.8655, + "step": 12850 + }, + { + "epoch": 0.8439704675963905, + "grad_norm": 0.3948156237602234, + "learning_rate": 0.0002, + "loss": 0.9339, + "step": 12860 + }, + { + "epoch": 0.8446267432321575, + "grad_norm": 0.3789501488208771, + "learning_rate": 0.0002, + "loss": 0.8915, + "step": 12870 + }, + { + "epoch": 0.8452830188679246, + "grad_norm": 0.45156800746917725, + "learning_rate": 0.0002, + "loss": 0.916, + "step": 12880 + }, + { + "epoch": 0.8459392945036915, + "grad_norm": 0.37762370705604553, + "learning_rate": 0.0002, + "loss": 0.8762, + "step": 12890 + }, + { + "epoch": 0.8465955701394585, + "grad_norm": 0.41654065251350403, + "learning_rate": 0.0002, + "loss": 0.8922, + "step": 12900 + }, + { + "epoch": 0.8472518457752256, + "grad_norm": 0.35026174783706665, + "learning_rate": 0.0002, + "loss": 0.9136, + "step": 12910 + }, + { + "epoch": 0.8479081214109926, + "grad_norm": 0.5227314829826355, + "learning_rate": 0.0002, + "loss": 0.8443, + "step": 12920 + }, + { + "epoch": 0.8485643970467597, + "grad_norm": 0.365063339471817, + "learning_rate": 0.0002, + "loss": 0.8533, + "step": 12930 + }, + { + "epoch": 0.8492206726825267, + "grad_norm": 0.40983277559280396, + "learning_rate": 0.0002, + "loss": 0.9012, + "step": 12940 + }, + { + "epoch": 0.8498769483182936, + "grad_norm": 0.36414071917533875, + "learning_rate": 0.0002, + "loss": 0.8788, + "step": 12950 + }, + { + "epoch": 0.8505332239540607, + "grad_norm": 0.3703882694244385, + "learning_rate": 0.0002, + "loss": 0.8981, + "step": 12960 + }, + { + "epoch": 0.8511894995898277, + "grad_norm": 0.3218643367290497, + "learning_rate": 0.0002, + "loss": 0.9256, + "step": 12970 + }, + { + "epoch": 0.8518457752255948, + "grad_norm": 0.38008660078048706, + "learning_rate": 0.0002, + "loss": 0.8525, + "step": 12980 + }, + { + "epoch": 0.8525020508613618, + "grad_norm": 0.4357127547264099, + "learning_rate": 0.0002, + "loss": 0.8651, + "step": 12990 + }, + { + "epoch": 0.8531583264971287, + "grad_norm": 0.36831775307655334, + "learning_rate": 0.0002, + "loss": 0.8592, + "step": 13000 + }, + { + "epoch": 0.8538146021328958, + "grad_norm": 0.4546806812286377, + "learning_rate": 0.0002, + "loss": 0.8772, + "step": 13010 + }, + { + "epoch": 0.8544708777686628, + "grad_norm": 0.3474937379360199, + "learning_rate": 0.0002, + "loss": 0.865, + "step": 13020 + }, + { + "epoch": 0.8551271534044299, + "grad_norm": 0.438834547996521, + "learning_rate": 0.0002, + "loss": 0.9098, + "step": 13030 + }, + { + "epoch": 0.8557834290401969, + "grad_norm": 0.3401171565055847, + "learning_rate": 0.0002, + "loss": 0.9573, + "step": 13040 + }, + { + "epoch": 0.8564397046759639, + "grad_norm": 0.3887326419353485, + "learning_rate": 0.0002, + "loss": 0.8286, + "step": 13050 + }, + { + "epoch": 0.857095980311731, + "grad_norm": 0.3489287197589874, + "learning_rate": 0.0002, + "loss": 0.8695, + "step": 13060 + }, + { + "epoch": 0.8577522559474979, + "grad_norm": 0.481189489364624, + "learning_rate": 0.0002, + "loss": 0.9118, + "step": 13070 + }, + { + "epoch": 0.858408531583265, + "grad_norm": 0.4638312757015228, + "learning_rate": 0.0002, + "loss": 0.9116, + "step": 13080 + }, + { + "epoch": 0.859064807219032, + "grad_norm": 0.43477529287338257, + "learning_rate": 0.0002, + "loss": 0.8775, + "step": 13090 + }, + { + "epoch": 0.859721082854799, + "grad_norm": 0.43358466029167175, + "learning_rate": 0.0002, + "loss": 0.9232, + "step": 13100 + }, + { + "epoch": 0.8603773584905661, + "grad_norm": 0.385527104139328, + "learning_rate": 0.0002, + "loss": 0.8771, + "step": 13110 + }, + { + "epoch": 0.861033634126333, + "grad_norm": 0.37878429889678955, + "learning_rate": 0.0002, + "loss": 0.8708, + "step": 13120 + }, + { + "epoch": 0.8616899097621001, + "grad_norm": 0.409476637840271, + "learning_rate": 0.0002, + "loss": 0.83, + "step": 13130 + }, + { + "epoch": 0.8623461853978671, + "grad_norm": 0.3888716697692871, + "learning_rate": 0.0002, + "loss": 0.9149, + "step": 13140 + }, + { + "epoch": 0.8630024610336341, + "grad_norm": 0.38093528151512146, + "learning_rate": 0.0002, + "loss": 0.8261, + "step": 13150 + }, + { + "epoch": 0.8636587366694012, + "grad_norm": 0.45613282918930054, + "learning_rate": 0.0002, + "loss": 0.9163, + "step": 13160 + }, + { + "epoch": 0.8643150123051682, + "grad_norm": 0.41665518283843994, + "learning_rate": 0.0002, + "loss": 0.8967, + "step": 13170 + }, + { + "epoch": 0.8649712879409351, + "grad_norm": 0.4322538673877716, + "learning_rate": 0.0002, + "loss": 0.8692, + "step": 13180 + }, + { + "epoch": 0.8656275635767022, + "grad_norm": 0.3452875018119812, + "learning_rate": 0.0002, + "loss": 0.8429, + "step": 13190 + }, + { + "epoch": 0.8662838392124692, + "grad_norm": 0.36628788709640503, + "learning_rate": 0.0002, + "loss": 0.8872, + "step": 13200 + }, + { + "epoch": 0.8669401148482363, + "grad_norm": 0.4329487383365631, + "learning_rate": 0.0002, + "loss": 0.9141, + "step": 13210 + }, + { + "epoch": 0.8675963904840033, + "grad_norm": 0.36800137162208557, + "learning_rate": 0.0002, + "loss": 0.8586, + "step": 13220 + }, + { + "epoch": 0.8682526661197703, + "grad_norm": 0.5235224366188049, + "learning_rate": 0.0002, + "loss": 0.9133, + "step": 13230 + }, + { + "epoch": 0.8689089417555373, + "grad_norm": 0.40818873047828674, + "learning_rate": 0.0002, + "loss": 0.8826, + "step": 13240 + }, + { + "epoch": 0.8695652173913043, + "grad_norm": 0.46719685196876526, + "learning_rate": 0.0002, + "loss": 0.8869, + "step": 13250 + }, + { + "epoch": 0.8702214930270714, + "grad_norm": 0.34422767162323, + "learning_rate": 0.0002, + "loss": 0.9426, + "step": 13260 + }, + { + "epoch": 0.8708777686628384, + "grad_norm": 0.37454837560653687, + "learning_rate": 0.0002, + "loss": 0.8921, + "step": 13270 + }, + { + "epoch": 0.8715340442986054, + "grad_norm": 0.39750349521636963, + "learning_rate": 0.0002, + "loss": 0.89, + "step": 13280 + }, + { + "epoch": 0.8721903199343725, + "grad_norm": 0.3864808678627014, + "learning_rate": 0.0002, + "loss": 0.9256, + "step": 13290 + }, + { + "epoch": 0.8728465955701394, + "grad_norm": 0.36088764667510986, + "learning_rate": 0.0002, + "loss": 0.8663, + "step": 13300 + }, + { + "epoch": 0.8735028712059065, + "grad_norm": 0.384287029504776, + "learning_rate": 0.0002, + "loss": 0.8334, + "step": 13310 + }, + { + "epoch": 0.8741591468416735, + "grad_norm": 0.3988962173461914, + "learning_rate": 0.0002, + "loss": 0.8697, + "step": 13320 + }, + { + "epoch": 0.8748154224774405, + "grad_norm": 0.40126702189445496, + "learning_rate": 0.0002, + "loss": 0.8924, + "step": 13330 + }, + { + "epoch": 0.8754716981132076, + "grad_norm": 0.3931732475757599, + "learning_rate": 0.0002, + "loss": 0.8766, + "step": 13340 + }, + { + "epoch": 0.8761279737489746, + "grad_norm": 0.35348305106163025, + "learning_rate": 0.0002, + "loss": 0.9249, + "step": 13350 + }, + { + "epoch": 0.8767842493847416, + "grad_norm": 0.3603714108467102, + "learning_rate": 0.0002, + "loss": 0.9412, + "step": 13360 + }, + { + "epoch": 0.8774405250205086, + "grad_norm": 0.3853464126586914, + "learning_rate": 0.0002, + "loss": 0.8432, + "step": 13370 + }, + { + "epoch": 0.8780968006562756, + "grad_norm": 0.44406014680862427, + "learning_rate": 0.0002, + "loss": 0.9035, + "step": 13380 + }, + { + "epoch": 0.8787530762920427, + "grad_norm": 0.3563307225704193, + "learning_rate": 0.0002, + "loss": 0.8903, + "step": 13390 + }, + { + "epoch": 0.8794093519278097, + "grad_norm": 0.4507097005844116, + "learning_rate": 0.0002, + "loss": 0.9004, + "step": 13400 + }, + { + "epoch": 0.8800656275635766, + "grad_norm": 0.4069702923297882, + "learning_rate": 0.0002, + "loss": 0.9792, + "step": 13410 + }, + { + "epoch": 0.8807219031993437, + "grad_norm": 0.5091678500175476, + "learning_rate": 0.0002, + "loss": 0.8857, + "step": 13420 + }, + { + "epoch": 0.8813781788351107, + "grad_norm": 0.3784930408000946, + "learning_rate": 0.0002, + "loss": 0.9499, + "step": 13430 + }, + { + "epoch": 0.8820344544708778, + "grad_norm": 0.3937450051307678, + "learning_rate": 0.0002, + "loss": 0.9475, + "step": 13440 + }, + { + "epoch": 0.8826907301066448, + "grad_norm": 0.39602798223495483, + "learning_rate": 0.0002, + "loss": 0.9411, + "step": 13450 + }, + { + "epoch": 0.8833470057424118, + "grad_norm": 0.42562225461006165, + "learning_rate": 0.0002, + "loss": 0.947, + "step": 13460 + }, + { + "epoch": 0.8840032813781789, + "grad_norm": 0.3933939039707184, + "learning_rate": 0.0002, + "loss": 0.9748, + "step": 13470 + }, + { + "epoch": 0.8846595570139458, + "grad_norm": 0.4387489855289459, + "learning_rate": 0.0002, + "loss": 0.9206, + "step": 13480 + }, + { + "epoch": 0.8853158326497129, + "grad_norm": 0.3655209243297577, + "learning_rate": 0.0002, + "loss": 0.8417, + "step": 13490 + }, + { + "epoch": 0.8859721082854799, + "grad_norm": 0.40750762820243835, + "learning_rate": 0.0002, + "loss": 0.8601, + "step": 13500 + }, + { + "epoch": 0.8866283839212469, + "grad_norm": 0.5716604590415955, + "learning_rate": 0.0002, + "loss": 0.9343, + "step": 13510 + }, + { + "epoch": 0.887284659557014, + "grad_norm": 0.3286498785018921, + "learning_rate": 0.0002, + "loss": 0.8634, + "step": 13520 + }, + { + "epoch": 0.887940935192781, + "grad_norm": 0.4093165993690491, + "learning_rate": 0.0002, + "loss": 0.8875, + "step": 13530 + }, + { + "epoch": 0.888597210828548, + "grad_norm": 0.37128645181655884, + "learning_rate": 0.0002, + "loss": 0.9056, + "step": 13540 + }, + { + "epoch": 0.889253486464315, + "grad_norm": 0.9630060195922852, + "learning_rate": 0.0002, + "loss": 0.9356, + "step": 13550 + }, + { + "epoch": 0.889909762100082, + "grad_norm": 0.4119563698768616, + "learning_rate": 0.0002, + "loss": 0.9598, + "step": 13560 + }, + { + "epoch": 0.8905660377358491, + "grad_norm": 0.42105695605278015, + "learning_rate": 0.0002, + "loss": 0.8688, + "step": 13570 + }, + { + "epoch": 0.8912223133716161, + "grad_norm": 0.35517653822898865, + "learning_rate": 0.0002, + "loss": 0.9091, + "step": 13580 + }, + { + "epoch": 0.8918785890073831, + "grad_norm": 0.4380730986595154, + "learning_rate": 0.0002, + "loss": 0.9975, + "step": 13590 + }, + { + "epoch": 0.8925348646431501, + "grad_norm": 0.3875235915184021, + "learning_rate": 0.0002, + "loss": 0.8874, + "step": 13600 + }, + { + "epoch": 0.8931911402789171, + "grad_norm": 0.3194465935230255, + "learning_rate": 0.0002, + "loss": 0.8813, + "step": 13610 + }, + { + "epoch": 0.8938474159146842, + "grad_norm": 0.4448561370372772, + "learning_rate": 0.0002, + "loss": 0.8671, + "step": 13620 + }, + { + "epoch": 0.8945036915504512, + "grad_norm": 0.4257558286190033, + "learning_rate": 0.0002, + "loss": 0.88, + "step": 13630 + }, + { + "epoch": 0.8951599671862183, + "grad_norm": 0.3831070065498352, + "learning_rate": 0.0002, + "loss": 0.8844, + "step": 13640 + }, + { + "epoch": 0.8958162428219852, + "grad_norm": 0.3765697479248047, + "learning_rate": 0.0002, + "loss": 0.8836, + "step": 13650 + }, + { + "epoch": 0.8964725184577522, + "grad_norm": 0.3621887266635895, + "learning_rate": 0.0002, + "loss": 0.9015, + "step": 13660 + }, + { + "epoch": 0.8971287940935193, + "grad_norm": 0.4219911992549896, + "learning_rate": 0.0002, + "loss": 0.9147, + "step": 13670 + }, + { + "epoch": 0.8977850697292863, + "grad_norm": 0.550961971282959, + "learning_rate": 0.0002, + "loss": 0.878, + "step": 13680 + }, + { + "epoch": 0.8984413453650533, + "grad_norm": 0.41570132970809937, + "learning_rate": 0.0002, + "loss": 0.8807, + "step": 13690 + }, + { + "epoch": 0.8990976210008204, + "grad_norm": 0.34862181544303894, + "learning_rate": 0.0002, + "loss": 0.953, + "step": 13700 + }, + { + "epoch": 0.8997538966365873, + "grad_norm": 0.3568742871284485, + "learning_rate": 0.0002, + "loss": 0.8738, + "step": 13710 + }, + { + "epoch": 0.9004101722723544, + "grad_norm": 0.3789973258972168, + "learning_rate": 0.0002, + "loss": 0.9346, + "step": 13720 + }, + { + "epoch": 0.9010664479081214, + "grad_norm": 0.3775809109210968, + "learning_rate": 0.0002, + "loss": 0.8225, + "step": 13730 + }, + { + "epoch": 0.9017227235438884, + "grad_norm": 0.33509477972984314, + "learning_rate": 0.0002, + "loss": 0.9238, + "step": 13740 + }, + { + "epoch": 0.9023789991796555, + "grad_norm": 0.4410351514816284, + "learning_rate": 0.0002, + "loss": 0.915, + "step": 13750 + }, + { + "epoch": 0.9030352748154225, + "grad_norm": 0.44638893008232117, + "learning_rate": 0.0002, + "loss": 0.9066, + "step": 13760 + }, + { + "epoch": 0.9036915504511895, + "grad_norm": 0.38487187027931213, + "learning_rate": 0.0002, + "loss": 0.883, + "step": 13770 + }, + { + "epoch": 0.9043478260869565, + "grad_norm": 0.385796457529068, + "learning_rate": 0.0002, + "loss": 0.8886, + "step": 13780 + }, + { + "epoch": 0.9050041017227235, + "grad_norm": 0.4261656403541565, + "learning_rate": 0.0002, + "loss": 0.8781, + "step": 13790 + }, + { + "epoch": 0.9056603773584906, + "grad_norm": 0.38487741351127625, + "learning_rate": 0.0002, + "loss": 0.9237, + "step": 13800 + }, + { + "epoch": 0.9063166529942576, + "grad_norm": 0.40660005807876587, + "learning_rate": 0.0002, + "loss": 0.9357, + "step": 13810 + }, + { + "epoch": 0.9069729286300247, + "grad_norm": 0.4127330780029297, + "learning_rate": 0.0002, + "loss": 0.8943, + "step": 13820 + }, + { + "epoch": 0.9076292042657916, + "grad_norm": 0.4300757944583893, + "learning_rate": 0.0002, + "loss": 0.8981, + "step": 13830 + }, + { + "epoch": 0.9082854799015586, + "grad_norm": 0.3994467854499817, + "learning_rate": 0.0002, + "loss": 0.8956, + "step": 13840 + }, + { + "epoch": 0.9089417555373257, + "grad_norm": 0.4332261383533478, + "learning_rate": 0.0002, + "loss": 0.8949, + "step": 13850 + }, + { + "epoch": 0.9095980311730927, + "grad_norm": 0.3849696218967438, + "learning_rate": 0.0002, + "loss": 0.8897, + "step": 13860 + }, + { + "epoch": 0.9102543068088598, + "grad_norm": 0.39346274733543396, + "learning_rate": 0.0002, + "loss": 0.8897, + "step": 13870 + }, + { + "epoch": 0.9109105824446267, + "grad_norm": 0.39518049359321594, + "learning_rate": 0.0002, + "loss": 0.8847, + "step": 13880 + }, + { + "epoch": 0.9115668580803937, + "grad_norm": 0.4449180066585541, + "learning_rate": 0.0002, + "loss": 0.8982, + "step": 13890 + }, + { + "epoch": 0.9122231337161608, + "grad_norm": 0.41052138805389404, + "learning_rate": 0.0002, + "loss": 0.9057, + "step": 13900 + }, + { + "epoch": 0.9128794093519278, + "grad_norm": 0.36827564239501953, + "learning_rate": 0.0002, + "loss": 0.905, + "step": 13910 + }, + { + "epoch": 0.9135356849876949, + "grad_norm": 0.3875851631164551, + "learning_rate": 0.0002, + "loss": 0.9484, + "step": 13920 + }, + { + "epoch": 0.9141919606234619, + "grad_norm": 0.402854859828949, + "learning_rate": 0.0002, + "loss": 0.9125, + "step": 13930 + }, + { + "epoch": 0.9148482362592288, + "grad_norm": 0.3584592938423157, + "learning_rate": 0.0002, + "loss": 0.9014, + "step": 13940 + }, + { + "epoch": 0.9155045118949959, + "grad_norm": 0.3516979515552521, + "learning_rate": 0.0002, + "loss": 0.8432, + "step": 13950 + }, + { + "epoch": 0.9161607875307629, + "grad_norm": 0.4411509335041046, + "learning_rate": 0.0002, + "loss": 0.8747, + "step": 13960 + }, + { + "epoch": 0.9168170631665299, + "grad_norm": 0.47830596566200256, + "learning_rate": 0.0002, + "loss": 0.8758, + "step": 13970 + }, + { + "epoch": 0.917473338802297, + "grad_norm": 0.3669400215148926, + "learning_rate": 0.0002, + "loss": 0.915, + "step": 13980 + }, + { + "epoch": 0.918129614438064, + "grad_norm": 0.4361213147640228, + "learning_rate": 0.0002, + "loss": 0.9265, + "step": 13990 + }, + { + "epoch": 0.918785890073831, + "grad_norm": 0.4224131405353546, + "learning_rate": 0.0002, + "loss": 0.9106, + "step": 14000 + }, + { + "epoch": 0.919442165709598, + "grad_norm": 0.5785587430000305, + "learning_rate": 0.0002, + "loss": 0.9138, + "step": 14010 + }, + { + "epoch": 0.920098441345365, + "grad_norm": 0.40788379311561584, + "learning_rate": 0.0002, + "loss": 0.9078, + "step": 14020 + }, + { + "epoch": 0.9207547169811321, + "grad_norm": 0.40879732370376587, + "learning_rate": 0.0002, + "loss": 0.9241, + "step": 14030 + }, + { + "epoch": 0.9214109926168991, + "grad_norm": 0.4031982421875, + "learning_rate": 0.0002, + "loss": 0.8565, + "step": 14040 + }, + { + "epoch": 0.9220672682526662, + "grad_norm": 0.3457014560699463, + "learning_rate": 0.0002, + "loss": 0.8224, + "step": 14050 + }, + { + "epoch": 0.9227235438884331, + "grad_norm": 0.38608697056770325, + "learning_rate": 0.0002, + "loss": 0.89, + "step": 14060 + }, + { + "epoch": 0.9233798195242001, + "grad_norm": 0.39772507548332214, + "learning_rate": 0.0002, + "loss": 0.8637, + "step": 14070 + }, + { + "epoch": 0.9240360951599672, + "grad_norm": 0.3684687316417694, + "learning_rate": 0.0002, + "loss": 0.9102, + "step": 14080 + }, + { + "epoch": 0.9246923707957342, + "grad_norm": 0.4479428827762604, + "learning_rate": 0.0002, + "loss": 0.886, + "step": 14090 + }, + { + "epoch": 0.9253486464315013, + "grad_norm": 0.39918506145477295, + "learning_rate": 0.0002, + "loss": 0.857, + "step": 14100 + }, + { + "epoch": 0.9260049220672683, + "grad_norm": 0.4163782000541687, + "learning_rate": 0.0002, + "loss": 0.8765, + "step": 14110 + }, + { + "epoch": 0.9266611977030352, + "grad_norm": 0.40232136845588684, + "learning_rate": 0.0002, + "loss": 0.9022, + "step": 14120 + }, + { + "epoch": 0.9273174733388023, + "grad_norm": 0.36188459396362305, + "learning_rate": 0.0002, + "loss": 0.8815, + "step": 14130 + }, + { + "epoch": 0.9279737489745693, + "grad_norm": 0.42056623101234436, + "learning_rate": 0.0002, + "loss": 0.9072, + "step": 14140 + }, + { + "epoch": 0.9286300246103364, + "grad_norm": 0.3129708468914032, + "learning_rate": 0.0002, + "loss": 0.8678, + "step": 14150 + }, + { + "epoch": 0.9292863002461034, + "grad_norm": 0.4068623185157776, + "learning_rate": 0.0002, + "loss": 0.8895, + "step": 14160 + }, + { + "epoch": 0.9299425758818703, + "grad_norm": 0.43788865208625793, + "learning_rate": 0.0002, + "loss": 0.8939, + "step": 14170 + }, + { + "epoch": 0.9305988515176374, + "grad_norm": 0.35850921273231506, + "learning_rate": 0.0002, + "loss": 0.8954, + "step": 14180 + }, + { + "epoch": 0.9312551271534044, + "grad_norm": 0.41121411323547363, + "learning_rate": 0.0002, + "loss": 0.9001, + "step": 14190 + }, + { + "epoch": 0.9319114027891715, + "grad_norm": 0.4249218702316284, + "learning_rate": 0.0002, + "loss": 0.866, + "step": 14200 + }, + { + "epoch": 0.9325676784249385, + "grad_norm": 0.3779831826686859, + "learning_rate": 0.0002, + "loss": 0.9195, + "step": 14210 + }, + { + "epoch": 0.9332239540607055, + "grad_norm": 0.36781951785087585, + "learning_rate": 0.0002, + "loss": 0.8436, + "step": 14220 + }, + { + "epoch": 0.9338802296964726, + "grad_norm": 0.36741000413894653, + "learning_rate": 0.0002, + "loss": 0.8976, + "step": 14230 + }, + { + "epoch": 0.9345365053322395, + "grad_norm": 0.43414175510406494, + "learning_rate": 0.0002, + "loss": 0.9137, + "step": 14240 + }, + { + "epoch": 0.9351927809680065, + "grad_norm": 0.4406278431415558, + "learning_rate": 0.0002, + "loss": 0.8648, + "step": 14250 + }, + { + "epoch": 0.9358490566037736, + "grad_norm": 0.386152982711792, + "learning_rate": 0.0002, + "loss": 0.8705, + "step": 14260 + }, + { + "epoch": 0.9365053322395406, + "grad_norm": 0.3971618711948395, + "learning_rate": 0.0002, + "loss": 0.8697, + "step": 14270 + }, + { + "epoch": 0.9371616078753077, + "grad_norm": 0.3366684317588806, + "learning_rate": 0.0002, + "loss": 0.8925, + "step": 14280 + }, + { + "epoch": 0.9378178835110746, + "grad_norm": 0.42566195130348206, + "learning_rate": 0.0002, + "loss": 0.8702, + "step": 14290 + }, + { + "epoch": 0.9384741591468416, + "grad_norm": 0.3734602928161621, + "learning_rate": 0.0002, + "loss": 0.8767, + "step": 14300 + }, + { + "epoch": 0.9391304347826087, + "grad_norm": 0.4279540479183197, + "learning_rate": 0.0002, + "loss": 0.8889, + "step": 14310 + }, + { + "epoch": 0.9397867104183757, + "grad_norm": 0.43050041794776917, + "learning_rate": 0.0002, + "loss": 0.9319, + "step": 14320 + }, + { + "epoch": 0.9404429860541428, + "grad_norm": 0.41909968852996826, + "learning_rate": 0.0002, + "loss": 0.91, + "step": 14330 + }, + { + "epoch": 0.9410992616899098, + "grad_norm": 0.39651772379875183, + "learning_rate": 0.0002, + "loss": 0.8782, + "step": 14340 + }, + { + "epoch": 0.9417555373256767, + "grad_norm": 0.4171423614025116, + "learning_rate": 0.0002, + "loss": 0.8868, + "step": 14350 + }, + { + "epoch": 0.9424118129614438, + "grad_norm": 0.44906023144721985, + "learning_rate": 0.0002, + "loss": 0.8751, + "step": 14360 + }, + { + "epoch": 0.9430680885972108, + "grad_norm": 0.4213627576828003, + "learning_rate": 0.0002, + "loss": 0.9048, + "step": 14370 + }, + { + "epoch": 0.9437243642329779, + "grad_norm": 0.38457417488098145, + "learning_rate": 0.0002, + "loss": 0.9257, + "step": 14380 + }, + { + "epoch": 0.9443806398687449, + "grad_norm": 0.43104225397109985, + "learning_rate": 0.0002, + "loss": 0.8421, + "step": 14390 + }, + { + "epoch": 0.9450369155045119, + "grad_norm": 0.40090736746788025, + "learning_rate": 0.0002, + "loss": 0.865, + "step": 14400 + }, + { + "epoch": 0.9456931911402789, + "grad_norm": 0.36180031299591064, + "learning_rate": 0.0002, + "loss": 0.8746, + "step": 14410 + }, + { + "epoch": 0.9463494667760459, + "grad_norm": 0.4608926475048065, + "learning_rate": 0.0002, + "loss": 0.9297, + "step": 14420 + }, + { + "epoch": 0.947005742411813, + "grad_norm": 0.44056418538093567, + "learning_rate": 0.0002, + "loss": 0.9131, + "step": 14430 + }, + { + "epoch": 0.94766201804758, + "grad_norm": 0.334051251411438, + "learning_rate": 0.0002, + "loss": 0.8368, + "step": 14440 + }, + { + "epoch": 0.948318293683347, + "grad_norm": 0.45580169558525085, + "learning_rate": 0.0002, + "loss": 0.8557, + "step": 14450 + }, + { + "epoch": 0.9489745693191141, + "grad_norm": 0.3898446261882782, + "learning_rate": 0.0002, + "loss": 0.8828, + "step": 14460 + }, + { + "epoch": 0.949630844954881, + "grad_norm": 0.465259850025177, + "learning_rate": 0.0002, + "loss": 0.9436, + "step": 14470 + }, + { + "epoch": 0.9502871205906481, + "grad_norm": 0.3595256805419922, + "learning_rate": 0.0002, + "loss": 0.8907, + "step": 14480 + }, + { + "epoch": 0.9509433962264151, + "grad_norm": 0.3710390031337738, + "learning_rate": 0.0002, + "loss": 0.8593, + "step": 14490 + }, + { + "epoch": 0.9515996718621821, + "grad_norm": 0.4002886712551117, + "learning_rate": 0.0002, + "loss": 0.8373, + "step": 14500 + }, + { + "epoch": 0.9522559474979492, + "grad_norm": 0.4221348464488983, + "learning_rate": 0.0002, + "loss": 0.9091, + "step": 14510 + }, + { + "epoch": 0.9529122231337162, + "grad_norm": 0.4163874089717865, + "learning_rate": 0.0002, + "loss": 0.9205, + "step": 14520 + }, + { + "epoch": 0.9535684987694831, + "grad_norm": 0.40220746397972107, + "learning_rate": 0.0002, + "loss": 0.9125, + "step": 14530 + }, + { + "epoch": 0.9542247744052502, + "grad_norm": 0.45836037397384644, + "learning_rate": 0.0002, + "loss": 0.854, + "step": 14540 + }, + { + "epoch": 0.9548810500410172, + "grad_norm": 0.420478492975235, + "learning_rate": 0.0002, + "loss": 0.911, + "step": 14550 + }, + { + "epoch": 0.9555373256767843, + "grad_norm": 0.45627933740615845, + "learning_rate": 0.0002, + "loss": 0.8932, + "step": 14560 + }, + { + "epoch": 0.9561936013125513, + "grad_norm": 0.41290518641471863, + "learning_rate": 0.0002, + "loss": 0.8926, + "step": 14570 + }, + { + "epoch": 0.9568498769483182, + "grad_norm": 0.46660760045051575, + "learning_rate": 0.0002, + "loss": 0.8936, + "step": 14580 + }, + { + "epoch": 0.9575061525840853, + "grad_norm": 0.45327240228652954, + "learning_rate": 0.0002, + "loss": 0.9132, + "step": 14590 + }, + { + "epoch": 0.9581624282198523, + "grad_norm": 0.41973528265953064, + "learning_rate": 0.0002, + "loss": 0.8601, + "step": 14600 + }, + { + "epoch": 0.9588187038556194, + "grad_norm": 0.4022239148616791, + "learning_rate": 0.0002, + "loss": 0.8278, + "step": 14610 + }, + { + "epoch": 0.9594749794913864, + "grad_norm": 0.3226695954799652, + "learning_rate": 0.0002, + "loss": 0.8514, + "step": 14620 + }, + { + "epoch": 0.9601312551271534, + "grad_norm": 0.4233718812465668, + "learning_rate": 0.0002, + "loss": 0.8526, + "step": 14630 + }, + { + "epoch": 0.9607875307629204, + "grad_norm": 0.37561315298080444, + "learning_rate": 0.0002, + "loss": 0.9009, + "step": 14640 + }, + { + "epoch": 0.9614438063986874, + "grad_norm": 0.44843146204948425, + "learning_rate": 0.0002, + "loss": 0.8854, + "step": 14650 + }, + { + "epoch": 0.9621000820344545, + "grad_norm": 0.40062573552131653, + "learning_rate": 0.0002, + "loss": 0.8863, + "step": 14660 + }, + { + "epoch": 0.9627563576702215, + "grad_norm": 0.4720284938812256, + "learning_rate": 0.0002, + "loss": 0.8912, + "step": 14670 + }, + { + "epoch": 0.9634126333059885, + "grad_norm": 0.43565067648887634, + "learning_rate": 0.0002, + "loss": 0.89, + "step": 14680 + }, + { + "epoch": 0.9640689089417556, + "grad_norm": 0.41181448101997375, + "learning_rate": 0.0002, + "loss": 0.8612, + "step": 14690 + }, + { + "epoch": 0.9647251845775225, + "grad_norm": 0.4344978630542755, + "learning_rate": 0.0002, + "loss": 0.9464, + "step": 14700 + }, + { + "epoch": 0.9653814602132896, + "grad_norm": 0.42558008432388306, + "learning_rate": 0.0002, + "loss": 0.855, + "step": 14710 + }, + { + "epoch": 0.9660377358490566, + "grad_norm": 0.39161136746406555, + "learning_rate": 0.0002, + "loss": 0.8945, + "step": 14720 + }, + { + "epoch": 0.9666940114848236, + "grad_norm": 0.3784191608428955, + "learning_rate": 0.0002, + "loss": 0.8587, + "step": 14730 + }, + { + "epoch": 0.9673502871205907, + "grad_norm": 0.39039477705955505, + "learning_rate": 0.0002, + "loss": 0.8566, + "step": 14740 + }, + { + "epoch": 0.9680065627563577, + "grad_norm": 0.5566018223762512, + "learning_rate": 0.0002, + "loss": 0.9006, + "step": 14750 + }, + { + "epoch": 0.9686628383921247, + "grad_norm": 0.38877877593040466, + "learning_rate": 0.0002, + "loss": 0.9222, + "step": 14760 + }, + { + "epoch": 0.9693191140278917, + "grad_norm": 0.33369940519332886, + "learning_rate": 0.0002, + "loss": 0.8811, + "step": 14770 + }, + { + "epoch": 0.9699753896636587, + "grad_norm": 0.5109888315200806, + "learning_rate": 0.0002, + "loss": 0.8743, + "step": 14780 + }, + { + "epoch": 0.9706316652994258, + "grad_norm": 0.5364375114440918, + "learning_rate": 0.0002, + "loss": 0.8883, + "step": 14790 + }, + { + "epoch": 0.9712879409351928, + "grad_norm": 0.3702435791492462, + "learning_rate": 0.0002, + "loss": 0.8696, + "step": 14800 + }, + { + "epoch": 0.9719442165709598, + "grad_norm": 0.5094677805900574, + "learning_rate": 0.0002, + "loss": 0.9144, + "step": 14810 + }, + { + "epoch": 0.9726004922067268, + "grad_norm": 0.3565915524959564, + "learning_rate": 0.0002, + "loss": 0.8823, + "step": 14820 + }, + { + "epoch": 0.9732567678424938, + "grad_norm": 0.44756242632865906, + "learning_rate": 0.0002, + "loss": 0.8964, + "step": 14830 + }, + { + "epoch": 0.9739130434782609, + "grad_norm": 0.4272070527076721, + "learning_rate": 0.0002, + "loss": 0.8524, + "step": 14840 + }, + { + "epoch": 0.9745693191140279, + "grad_norm": 0.4303967356681824, + "learning_rate": 0.0002, + "loss": 0.8551, + "step": 14850 + }, + { + "epoch": 0.9752255947497949, + "grad_norm": 0.4528018534183502, + "learning_rate": 0.0002, + "loss": 0.9303, + "step": 14860 + }, + { + "epoch": 0.975881870385562, + "grad_norm": 0.5745970606803894, + "learning_rate": 0.0002, + "loss": 0.8465, + "step": 14870 + }, + { + "epoch": 0.9765381460213289, + "grad_norm": 0.43847736716270447, + "learning_rate": 0.0002, + "loss": 0.9496, + "step": 14880 + }, + { + "epoch": 0.977194421657096, + "grad_norm": 0.4512104094028473, + "learning_rate": 0.0002, + "loss": 0.8647, + "step": 14890 + }, + { + "epoch": 0.977850697292863, + "grad_norm": 0.4573594331741333, + "learning_rate": 0.0002, + "loss": 0.9712, + "step": 14900 + }, + { + "epoch": 0.97850697292863, + "grad_norm": 0.4297037720680237, + "learning_rate": 0.0002, + "loss": 0.898, + "step": 14910 + }, + { + "epoch": 0.9791632485643971, + "grad_norm": 0.3769957721233368, + "learning_rate": 0.0002, + "loss": 0.9115, + "step": 14920 + }, + { + "epoch": 0.979819524200164, + "grad_norm": 0.3524457514286041, + "learning_rate": 0.0002, + "loss": 0.8956, + "step": 14930 + }, + { + "epoch": 0.9804757998359311, + "grad_norm": 0.3965851664543152, + "learning_rate": 0.0002, + "loss": 0.9254, + "step": 14940 + }, + { + "epoch": 0.9811320754716981, + "grad_norm": 0.3593656122684479, + "learning_rate": 0.0002, + "loss": 0.8869, + "step": 14950 + }, + { + "epoch": 0.9817883511074651, + "grad_norm": 0.35391807556152344, + "learning_rate": 0.0002, + "loss": 0.8859, + "step": 14960 + }, + { + "epoch": 0.9824446267432322, + "grad_norm": 0.40651339292526245, + "learning_rate": 0.0002, + "loss": 0.9111, + "step": 14970 + }, + { + "epoch": 0.9831009023789992, + "grad_norm": 0.4370724558830261, + "learning_rate": 0.0002, + "loss": 0.8714, + "step": 14980 + }, + { + "epoch": 0.9837571780147663, + "grad_norm": 0.37859413027763367, + "learning_rate": 0.0002, + "loss": 0.8776, + "step": 14990 + }, + { + "epoch": 0.9844134536505332, + "grad_norm": 0.4329196512699127, + "learning_rate": 0.0002, + "loss": 0.8744, + "step": 15000 + }, + { + "epoch": 0.9850697292863002, + "grad_norm": 0.4119299054145813, + "learning_rate": 0.0002, + "loss": 0.8384, + "step": 15010 + }, + { + "epoch": 0.9857260049220673, + "grad_norm": 0.4084014892578125, + "learning_rate": 0.0002, + "loss": 0.8885, + "step": 15020 + }, + { + "epoch": 0.9863822805578343, + "grad_norm": 0.34770357608795166, + "learning_rate": 0.0002, + "loss": 0.9255, + "step": 15030 + }, + { + "epoch": 0.9870385561936013, + "grad_norm": 0.429995596408844, + "learning_rate": 0.0002, + "loss": 0.9074, + "step": 15040 + }, + { + "epoch": 0.9876948318293683, + "grad_norm": 0.4588816165924072, + "learning_rate": 0.0002, + "loss": 0.8251, + "step": 15050 + }, + { + "epoch": 0.9883511074651353, + "grad_norm": 0.47414910793304443, + "learning_rate": 0.0002, + "loss": 0.895, + "step": 15060 + }, + { + "epoch": 0.9890073831009024, + "grad_norm": 0.3370365798473358, + "learning_rate": 0.0002, + "loss": 0.8688, + "step": 15070 + }, + { + "epoch": 0.9896636587366694, + "grad_norm": 0.3697716295719147, + "learning_rate": 0.0002, + "loss": 0.8775, + "step": 15080 + }, + { + "epoch": 0.9903199343724364, + "grad_norm": 0.31965479254722595, + "learning_rate": 0.0002, + "loss": 0.8949, + "step": 15090 + }, + { + "epoch": 0.9909762100082035, + "grad_norm": 0.5081075429916382, + "learning_rate": 0.0002, + "loss": 0.9519, + "step": 15100 + }, + { + "epoch": 0.9916324856439704, + "grad_norm": 0.44397613406181335, + "learning_rate": 0.0002, + "loss": 0.9012, + "step": 15110 + }, + { + "epoch": 0.9922887612797375, + "grad_norm": 0.30696988105773926, + "learning_rate": 0.0002, + "loss": 0.8327, + "step": 15120 + }, + { + "epoch": 0.9929450369155045, + "grad_norm": 0.4071432650089264, + "learning_rate": 0.0002, + "loss": 0.8785, + "step": 15130 + }, + { + "epoch": 0.9936013125512715, + "grad_norm": 0.5356084108352661, + "learning_rate": 0.0002, + "loss": 0.8879, + "step": 15140 + }, + { + "epoch": 0.9942575881870386, + "grad_norm": 0.36654597520828247, + "learning_rate": 0.0002, + "loss": 0.8593, + "step": 15150 + }, + { + "epoch": 0.9949138638228056, + "grad_norm": 0.38214483857154846, + "learning_rate": 0.0002, + "loss": 0.92, + "step": 15160 + }, + { + "epoch": 0.9955701394585726, + "grad_norm": 0.4340892434120178, + "learning_rate": 0.0002, + "loss": 0.9101, + "step": 15170 + }, + { + "epoch": 0.9962264150943396, + "grad_norm": 0.41310828924179077, + "learning_rate": 0.0002, + "loss": 0.9049, + "step": 15180 + }, + { + "epoch": 0.9968826907301066, + "grad_norm": 0.4932044744491577, + "learning_rate": 0.0002, + "loss": 0.8557, + "step": 15190 + }, + { + "epoch": 0.9975389663658737, + "grad_norm": 0.45371273159980774, + "learning_rate": 0.0002, + "loss": 0.8989, + "step": 15200 + }, + { + "epoch": 0.9981952420016407, + "grad_norm": 0.42956778407096863, + "learning_rate": 0.0002, + "loss": 0.9003, + "step": 15210 + }, + { + "epoch": 0.9988515176374078, + "grad_norm": 0.4343477487564087, + "learning_rate": 0.0002, + "loss": 0.8763, + "step": 15220 + }, + { + "epoch": 0.9995077932731747, + "grad_norm": 0.4425382912158966, + "learning_rate": 0.0002, + "loss": 0.8832, + "step": 15230 + }, + { + "epoch": 0.9999671862182117, + "eval_loss": 1.055190086364746, + "eval_runtime": 110.4581, + "eval_samples_per_second": 6.564, + "eval_steps_per_second": 0.824, + "step": 15237 + } + ], + "logging_steps": 10, + "max_steps": 121896, + "num_input_tokens_seen": 0, + "num_train_epochs": 8, + "save_steps": 200, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 7.82655074009088e+17, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/training_args.bin b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..33b12debaac07d7469e3dd63cd83a73ab48adf47 --- /dev/null +++ b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe28e20a8c72ab0a47191422d1378f55aaa44a84900b7a9377d408d348d306e6 +size 5560 diff --git a/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/training_log.jsonl b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/training_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..206ba0d460b961bd136272c62fa1db52149b9411 --- /dev/null +++ b/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/training_log.jsonl @@ -0,0 +1,2 @@ +{"epoch": 0.9999671862182117, "step": 15237, "epoch_duration": 41000.40024757385, "total_accumulated_duration": 41000.40024757385, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7887.97119140625}, "peak_memory_usage": {"GPU_0": 11696.9921875}, "avg_memory_reserved": {"GPU_0": 12786.0}, "peak_memory_reserved": {"GPU_0": 12786.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "N/A", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.6059, "grad_norm": 1.0711857080459595, "learning_rate": 0.0002, "epoch": 0.0006562756357670221, "step": 10}, {"loss": 1.7643, "grad_norm": 0.935492992401123, "learning_rate": 0.0002, "epoch": 0.0013125512715340443, "step": 20}, {"loss": 1.2573, "grad_norm": 0.908809244632721, "learning_rate": 0.0002, "epoch": 0.0019688269073010667, "step": 30}, {"loss": 1.1011, "grad_norm": 0.3497907221317291, "learning_rate": 0.0002, "epoch": 0.0026251025430680886, "step": 40}, {"loss": 1.0866, "grad_norm": 0.32830339670181274, "learning_rate": 0.0002, "epoch": 0.003281378178835111, "step": 50}, {"loss": 1.0104, "grad_norm": 0.2850394546985626, "learning_rate": 0.0002, "epoch": 0.003937653814602133, "step": 60}, {"loss": 1.082, "grad_norm": 0.3804827928543091, "learning_rate": 0.0002, "epoch": 0.004593929450369155, "step": 70}, {"loss": 1.052, "grad_norm": 0.23506930470466614, "learning_rate": 0.0002, "epoch": 0.005250205086136177, "step": 80}, {"loss": 1.0593, "grad_norm": 0.2644859552383423, "learning_rate": 0.0002, "epoch": 0.005906480721903199, "step": 90}, {"loss": 0.9978, "grad_norm": 0.36523646116256714, "learning_rate": 0.0002, "epoch": 0.006562756357670222, "step": 100}, {"loss": 0.9958, "grad_norm": 0.3195570111274719, "learning_rate": 0.0002, "epoch": 0.007219031993437244, "step": 110}, {"loss": 1.0041, "grad_norm": 0.2886694371700287, "learning_rate": 0.0002, "epoch": 0.007875307629204267, "step": 120}, {"loss": 1.0623, "grad_norm": 0.3913154900074005, "learning_rate": 0.0002, "epoch": 0.008531583264971289, "step": 130}, {"loss": 1.1128, "grad_norm": 0.3181937336921692, "learning_rate": 0.0002, "epoch": 0.00918785890073831, "step": 140}, {"loss": 0.9989, "grad_norm": 0.2629619538784027, "learning_rate": 0.0002, "epoch": 0.009844134536505332, "step": 150}, {"loss": 1.0226, "grad_norm": 0.30438563227653503, "learning_rate": 0.0002, "epoch": 0.010500410172272354, "step": 160}, {"loss": 1.0321, "grad_norm": 0.2876931130886078, "learning_rate": 0.0002, "epoch": 0.011156685808039376, "step": 170}, {"loss": 0.9824, "grad_norm": 0.29188141226768494, "learning_rate": 0.0002, "epoch": 0.011812961443806398, "step": 180}, {"loss": 0.9439, "grad_norm": 0.2645126283168793, "learning_rate": 0.0002, "epoch": 0.01246923707957342, "step": 190}, {"loss": 1.0273, "grad_norm": 0.26031428575515747, "learning_rate": 0.0002, "epoch": 0.013125512715340444, "step": 200}, {"loss": 1.0518, "grad_norm": 0.25812748074531555, "learning_rate": 0.0002, "epoch": 0.013781788351107466, "step": 210}, {"loss": 0.9502, "grad_norm": 0.24913132190704346, "learning_rate": 0.0002, "epoch": 0.014438063986874488, "step": 220}, {"loss": 1.0131, "grad_norm": 0.30332663655281067, "learning_rate": 0.0002, "epoch": 0.01509433962264151, "step": 230}, {"loss": 1.0459, "grad_norm": 0.25207284092903137, "learning_rate": 0.0002, "epoch": 0.015750615258408533, "step": 240}, {"loss": 0.9798, "grad_norm": 0.26384010910987854, "learning_rate": 0.0002, "epoch": 0.016406890894175553, "step": 250}, {"loss": 1.061, "grad_norm": 0.28651612997055054, "learning_rate": 0.0002, "epoch": 0.017063166529942577, "step": 260}, {"loss": 0.991, "grad_norm": 0.2879799008369446, "learning_rate": 0.0002, "epoch": 0.017719442165709597, "step": 270}, {"loss": 0.9643, "grad_norm": 0.28661131858825684, "learning_rate": 0.0002, "epoch": 0.01837571780147662, "step": 280}, {"loss": 0.9903, "grad_norm": 0.265348345041275, "learning_rate": 0.0002, "epoch": 0.01903199343724364, "step": 290}, {"loss": 1.011, "grad_norm": 0.24186863005161285, "learning_rate": 0.0002, "epoch": 0.019688269073010665, "step": 300}, {"loss": 1.0245, "grad_norm": 0.25996068120002747, "learning_rate": 0.0002, "epoch": 0.020344544708777685, "step": 310}, {"loss": 0.9987, "grad_norm": 0.29615098237991333, "learning_rate": 0.0002, "epoch": 0.02100082034454471, "step": 320}, {"loss": 1.04, "grad_norm": 0.2429388016462326, "learning_rate": 0.0002, "epoch": 0.021657095980311732, "step": 330}, {"loss": 0.9703, "grad_norm": 0.26882505416870117, "learning_rate": 0.0002, "epoch": 0.022313371616078752, "step": 340}, {"loss": 0.9686, "grad_norm": 0.283328652381897, "learning_rate": 0.0002, "epoch": 0.022969647251845776, "step": 350}, {"loss": 0.952, "grad_norm": 0.3115910589694977, "learning_rate": 0.0002, "epoch": 0.023625922887612796, "step": 360}, {"loss": 1.0112, "grad_norm": 0.27969497442245483, "learning_rate": 0.0002, "epoch": 0.02428219852337982, "step": 370}, {"loss": 1.0618, "grad_norm": 0.30471885204315186, "learning_rate": 0.0002, "epoch": 0.02493847415914684, "step": 380}, {"loss": 1.0189, "grad_norm": 0.3183926045894623, "learning_rate": 0.0002, "epoch": 0.025594749794913864, "step": 390}, {"loss": 1.0148, "grad_norm": 0.27311646938323975, "learning_rate": 0.0002, "epoch": 0.026251025430680888, "step": 400}, {"loss": 0.9672, "grad_norm": 0.29148945212364197, "learning_rate": 0.0002, "epoch": 0.026907301066447908, "step": 410}, {"loss": 0.9473, "grad_norm": 0.2386617809534073, "learning_rate": 0.0002, "epoch": 0.02756357670221493, "step": 420}, {"loss": 1.0722, "grad_norm": 0.2546529471874237, "learning_rate": 0.0002, "epoch": 0.02821985233798195, "step": 430}, {"loss": 1.0017, "grad_norm": 0.27932611107826233, "learning_rate": 0.0002, "epoch": 0.028876127973748975, "step": 440}, {"loss": 0.9988, "grad_norm": 0.31259334087371826, "learning_rate": 0.0002, "epoch": 0.029532403609515995, "step": 450}, {"loss": 1.0404, "grad_norm": 0.2675893008708954, "learning_rate": 0.0002, "epoch": 0.03018867924528302, "step": 460}, {"loss": 1.0018, "grad_norm": 0.24047039449214935, "learning_rate": 0.0002, "epoch": 0.03084495488105004, "step": 470}, {"loss": 1.0685, "grad_norm": 0.2637856900691986, "learning_rate": 0.0002, "epoch": 0.031501230516817066, "step": 480}, {"loss": 0.9879, "grad_norm": 0.3064589500427246, "learning_rate": 0.0002, "epoch": 0.03215750615258409, "step": 490}, {"loss": 0.9657, "grad_norm": 0.25345391035079956, "learning_rate": 0.0002, "epoch": 0.03281378178835111, "step": 500}, {"loss": 0.9518, "grad_norm": 0.3100789785385132, "learning_rate": 0.0002, "epoch": 0.03347005742411813, "step": 510}, {"loss": 1.0108, "grad_norm": 0.3312002420425415, "learning_rate": 0.0002, "epoch": 0.034126333059885154, "step": 520}, {"loss": 1.0157, "grad_norm": 0.23432421684265137, "learning_rate": 0.0002, "epoch": 0.034782608695652174, "step": 530}, {"loss": 1.0093, "grad_norm": 0.3079119622707367, "learning_rate": 0.0002, "epoch": 0.035438884331419195, "step": 540}, {"loss": 0.9534, "grad_norm": 0.2555035650730133, "learning_rate": 0.0002, "epoch": 0.03609515996718622, "step": 550}, {"loss": 0.9648, "grad_norm": 0.3530837893486023, "learning_rate": 0.0002, "epoch": 0.03675143560295324, "step": 560}, {"loss": 1.0432, "grad_norm": 0.26614823937416077, "learning_rate": 0.0002, "epoch": 0.03740771123872026, "step": 570}, {"loss": 1.0108, "grad_norm": 0.2618412971496582, "learning_rate": 0.0002, "epoch": 0.03806398687448728, "step": 580}, {"loss": 0.9834, "grad_norm": 0.26110127568244934, "learning_rate": 0.0002, "epoch": 0.03872026251025431, "step": 590}, {"loss": 0.9766, "grad_norm": 0.3050612211227417, "learning_rate": 0.0002, "epoch": 0.03937653814602133, "step": 600}, {"loss": 1.0195, "grad_norm": 0.27394089102745056, "learning_rate": 0.0002, "epoch": 0.04003281378178835, "step": 610}, {"loss": 0.9776, "grad_norm": 0.25288277864456177, "learning_rate": 0.0002, "epoch": 0.04068908941755537, "step": 620}, {"loss": 0.9199, "grad_norm": 0.2835882902145386, "learning_rate": 0.0002, "epoch": 0.0413453650533224, "step": 630}, {"loss": 0.9605, "grad_norm": 0.2621902823448181, "learning_rate": 0.0002, "epoch": 0.04200164068908942, "step": 640}, {"loss": 0.9494, "grad_norm": 0.23218439519405365, "learning_rate": 0.0002, "epoch": 0.04265791632485644, "step": 650}, {"loss": 0.994, "grad_norm": 0.26911118626594543, "learning_rate": 0.0002, "epoch": 0.043314191960623465, "step": 660}, {"loss": 0.972, "grad_norm": 0.25920751690864563, "learning_rate": 0.0002, "epoch": 0.043970467596390485, "step": 670}, {"loss": 0.9774, "grad_norm": 0.2772065997123718, "learning_rate": 0.0002, "epoch": 0.044626743232157505, "step": 680}, {"loss": 0.9114, "grad_norm": 0.275421142578125, "learning_rate": 0.0002, "epoch": 0.045283018867924525, "step": 690}, {"loss": 1.028, "grad_norm": 0.23931777477264404, "learning_rate": 0.0002, "epoch": 0.04593929450369155, "step": 700}, {"loss": 0.9309, "grad_norm": 0.3031066060066223, "learning_rate": 0.0002, "epoch": 0.04659557013945857, "step": 710}, {"loss": 0.9511, "grad_norm": 0.2655068039894104, "learning_rate": 0.0002, "epoch": 0.04725184577522559, "step": 720}, {"loss": 0.9012, "grad_norm": 0.26064610481262207, "learning_rate": 0.0002, "epoch": 0.04790812141099262, "step": 730}, {"loss": 0.9809, "grad_norm": 0.2934698760509491, "learning_rate": 0.0002, "epoch": 0.04856439704675964, "step": 740}, {"loss": 0.8859, "grad_norm": 0.2672717571258545, "learning_rate": 0.0002, "epoch": 0.04922067268252666, "step": 750}, {"loss": 1.0251, "grad_norm": 0.27246803045272827, "learning_rate": 0.0002, "epoch": 0.04987694831829368, "step": 760}, {"loss": 0.977, "grad_norm": 0.27560853958129883, "learning_rate": 0.0002, "epoch": 0.05053322395406071, "step": 770}, {"loss": 0.9735, "grad_norm": 0.2449599653482437, "learning_rate": 0.0002, "epoch": 0.05118949958982773, "step": 780}, {"loss": 0.9765, "grad_norm": 0.2873939871788025, "learning_rate": 0.0002, "epoch": 0.05184577522559475, "step": 790}, {"loss": 0.9647, "grad_norm": 0.2875595688819885, "learning_rate": 0.0002, "epoch": 0.052502050861361775, "step": 800}, {"loss": 1.0379, "grad_norm": 0.4079909324645996, "learning_rate": 0.0002, "epoch": 0.053158326497128795, "step": 810}, {"loss": 0.9782, "grad_norm": 0.2733079195022583, "learning_rate": 0.0002, "epoch": 0.053814602132895815, "step": 820}, {"loss": 0.98, "grad_norm": 0.2624184489250183, "learning_rate": 0.0002, "epoch": 0.054470877768662836, "step": 830}, {"loss": 1.0212, "grad_norm": 0.279851496219635, "learning_rate": 0.0002, "epoch": 0.05512715340442986, "step": 840}, {"loss": 0.8994, "grad_norm": 0.2573111951351166, "learning_rate": 0.0002, "epoch": 0.05578342904019688, "step": 850}, {"loss": 0.9823, "grad_norm": 0.28704535961151123, "learning_rate": 0.0002, "epoch": 0.0564397046759639, "step": 860}, {"loss": 0.9744, "grad_norm": 0.2664150297641754, "learning_rate": 0.0002, "epoch": 0.05709598031173093, "step": 870}, {"loss": 0.9379, "grad_norm": 0.2858041822910309, "learning_rate": 0.0002, "epoch": 0.05775225594749795, "step": 880}, {"loss": 0.9453, "grad_norm": 0.2394150048494339, "learning_rate": 0.0002, "epoch": 0.05840853158326497, "step": 890}, {"loss": 0.9372, "grad_norm": 0.30714845657348633, "learning_rate": 0.0002, "epoch": 0.05906480721903199, "step": 900}, {"loss": 0.9677, "grad_norm": 0.2740330994129181, "learning_rate": 0.0002, "epoch": 0.05972108285479902, "step": 910}, {"loss": 1.0112, "grad_norm": 0.33505478501319885, "learning_rate": 0.0002, "epoch": 0.06037735849056604, "step": 920}, {"loss": 0.986, "grad_norm": 0.2945438325405121, "learning_rate": 0.0002, "epoch": 0.06103363412633306, "step": 930}, {"loss": 0.9498, "grad_norm": 0.24749146401882172, "learning_rate": 0.0002, "epoch": 0.06168990976210008, "step": 940}, {"loss": 1.0357, "grad_norm": 0.3315669894218445, "learning_rate": 0.0002, "epoch": 0.062346185397867106, "step": 950}, {"loss": 0.9698, "grad_norm": 0.27253520488739014, "learning_rate": 0.0002, "epoch": 0.06300246103363413, "step": 960}, {"loss": 0.9544, "grad_norm": 0.26884031295776367, "learning_rate": 0.0002, "epoch": 0.06365873666940115, "step": 970}, {"loss": 0.9697, "grad_norm": 0.286920964717865, "learning_rate": 0.0002, "epoch": 0.06431501230516817, "step": 980}, {"loss": 0.9704, "grad_norm": 0.28334400057792664, "learning_rate": 0.0002, "epoch": 0.0649712879409352, "step": 990}, {"loss": 0.917, "grad_norm": 0.2672102749347687, "learning_rate": 0.0002, "epoch": 0.06562756357670221, "step": 1000}, {"loss": 1.0062, "grad_norm": 0.3247123062610626, "learning_rate": 0.0002, "epoch": 0.06628383921246923, "step": 1010}, {"loss": 0.9716, "grad_norm": 0.259440541267395, "learning_rate": 0.0002, "epoch": 0.06694011484823625, "step": 1020}, {"loss": 0.9569, "grad_norm": 0.2795625329017639, "learning_rate": 0.0002, "epoch": 0.06759639048400329, "step": 1030}, {"loss": 0.9842, "grad_norm": 0.2784935534000397, "learning_rate": 0.0002, "epoch": 0.06825266611977031, "step": 1040}, {"loss": 0.9171, "grad_norm": 0.24605989456176758, "learning_rate": 0.0002, "epoch": 0.06890894175553733, "step": 1050}, {"loss": 0.9538, "grad_norm": 0.25421491265296936, "learning_rate": 0.0002, "epoch": 0.06956521739130435, "step": 1060}, {"loss": 1.0142, "grad_norm": 0.2693536579608917, "learning_rate": 0.0002, "epoch": 0.07022149302707137, "step": 1070}, {"loss": 0.9436, "grad_norm": 0.28166458010673523, "learning_rate": 0.0002, "epoch": 0.07087776866283839, "step": 1080}, {"loss": 0.9187, "grad_norm": 0.2752484679222107, "learning_rate": 0.0002, "epoch": 0.07153404429860541, "step": 1090}, {"loss": 1.0069, "grad_norm": 0.24141381680965424, "learning_rate": 0.0002, "epoch": 0.07219031993437244, "step": 1100}, {"loss": 0.9282, "grad_norm": 0.2347770482301712, "learning_rate": 0.0002, "epoch": 0.07284659557013946, "step": 1110}, {"loss": 0.978, "grad_norm": 0.29999610781669617, "learning_rate": 0.0002, "epoch": 0.07350287120590648, "step": 1120}, {"loss": 0.9763, "grad_norm": 0.2811068892478943, "learning_rate": 0.0002, "epoch": 0.0741591468416735, "step": 1130}, {"loss": 0.978, "grad_norm": 0.24790801107883453, "learning_rate": 0.0002, "epoch": 0.07481542247744052, "step": 1140}, {"loss": 1.001, "grad_norm": 0.3251914978027344, "learning_rate": 0.0002, "epoch": 0.07547169811320754, "step": 1150}, {"loss": 1.0407, "grad_norm": 0.2616347074508667, "learning_rate": 0.0002, "epoch": 0.07612797374897456, "step": 1160}, {"loss": 0.978, "grad_norm": 0.2568797171115875, "learning_rate": 0.0002, "epoch": 0.07678424938474158, "step": 1170}, {"loss": 0.9851, "grad_norm": 0.2693248689174652, "learning_rate": 0.0002, "epoch": 0.07744052502050862, "step": 1180}, {"loss": 0.996, "grad_norm": 0.3270677924156189, "learning_rate": 0.0002, "epoch": 0.07809680065627564, "step": 1190}, {"loss": 0.9751, "grad_norm": 0.2481861710548401, "learning_rate": 0.0002, "epoch": 0.07875307629204266, "step": 1200}, {"loss": 1.0047, "grad_norm": 0.2612398564815521, "learning_rate": 0.0002, "epoch": 0.07940935192780968, "step": 1210}, {"loss": 0.9789, "grad_norm": 0.26566916704177856, "learning_rate": 0.0002, "epoch": 0.0800656275635767, "step": 1220}, {"loss": 1.0468, "grad_norm": 0.28026407957077026, "learning_rate": 0.0002, "epoch": 0.08072190319934372, "step": 1230}, {"loss": 0.9651, "grad_norm": 0.25609225034713745, "learning_rate": 0.0002, "epoch": 0.08137817883511074, "step": 1240}, {"loss": 0.9648, "grad_norm": 0.29063138365745544, "learning_rate": 0.0002, "epoch": 0.08203445447087777, "step": 1250}, {"loss": 0.9302, "grad_norm": 0.25998231768608093, "learning_rate": 0.0002, "epoch": 0.0826907301066448, "step": 1260}, {"loss": 0.9733, "grad_norm": 0.295261412858963, "learning_rate": 0.0002, "epoch": 0.08334700574241181, "step": 1270}, {"loss": 0.9775, "grad_norm": 0.2479529082775116, "learning_rate": 0.0002, "epoch": 0.08400328137817883, "step": 1280}, {"loss": 0.9891, "grad_norm": 0.25226080417633057, "learning_rate": 0.0002, "epoch": 0.08465955701394585, "step": 1290}, {"loss": 0.9693, "grad_norm": 0.290462851524353, "learning_rate": 0.0002, "epoch": 0.08531583264971287, "step": 1300}, {"loss": 1.0319, "grad_norm": 0.2832583785057068, "learning_rate": 0.0002, "epoch": 0.0859721082854799, "step": 1310}, {"loss": 1.0105, "grad_norm": 0.2515616714954376, "learning_rate": 0.0002, "epoch": 0.08662838392124693, "step": 1320}, {"loss": 0.9717, "grad_norm": 0.28894907236099243, "learning_rate": 0.0002, "epoch": 0.08728465955701395, "step": 1330}, {"loss": 0.9736, "grad_norm": 0.2881310284137726, "learning_rate": 0.0002, "epoch": 0.08794093519278097, "step": 1340}, {"loss": 1.0107, "grad_norm": 0.2654068171977997, "learning_rate": 0.0002, "epoch": 0.08859721082854799, "step": 1350}, {"loss": 1.0249, "grad_norm": 0.2929916977882385, "learning_rate": 0.0002, "epoch": 0.08925348646431501, "step": 1360}, {"loss": 0.9605, "grad_norm": 0.27295321226119995, "learning_rate": 0.0002, "epoch": 0.08990976210008203, "step": 1370}, {"loss": 0.9709, "grad_norm": 0.27720171213150024, "learning_rate": 0.0002, "epoch": 0.09056603773584905, "step": 1380}, {"loss": 1.0449, "grad_norm": 0.2784966826438904, "learning_rate": 0.0002, "epoch": 0.09122231337161608, "step": 1390}, {"loss": 0.9442, "grad_norm": 0.25110408663749695, "learning_rate": 0.0002, "epoch": 0.0918785890073831, "step": 1400}, {"loss": 0.9552, "grad_norm": 0.2501158118247986, "learning_rate": 0.0002, "epoch": 0.09253486464315012, "step": 1410}, {"loss": 0.9493, "grad_norm": 0.3097717761993408, "learning_rate": 0.0002, "epoch": 0.09319114027891715, "step": 1420}, {"loss": 0.9635, "grad_norm": 0.257308691740036, "learning_rate": 0.0002, "epoch": 0.09384741591468417, "step": 1430}, {"loss": 0.9749, "grad_norm": 0.2885759472846985, "learning_rate": 0.0002, "epoch": 0.09450369155045119, "step": 1440}, {"loss": 0.9592, "grad_norm": 0.2938264012336731, "learning_rate": 0.0002, "epoch": 0.0951599671862182, "step": 1450}, {"loss": 1.0326, "grad_norm": 0.2543550729751587, "learning_rate": 0.0002, "epoch": 0.09581624282198524, "step": 1460}, {"loss": 0.9433, "grad_norm": 0.3020402789115906, "learning_rate": 0.0002, "epoch": 0.09647251845775226, "step": 1470}, {"loss": 0.9132, "grad_norm": 0.2818080186843872, "learning_rate": 0.0002, "epoch": 0.09712879409351928, "step": 1480}, {"loss": 0.9217, "grad_norm": 0.2420656979084015, "learning_rate": 0.0002, "epoch": 0.0977850697292863, "step": 1490}, {"loss": 0.938, "grad_norm": 0.31427690386772156, "learning_rate": 0.0002, "epoch": 0.09844134536505332, "step": 1500}, {"loss": 0.9234, "grad_norm": 0.27618250250816345, "learning_rate": 0.0002, "epoch": 0.09909762100082034, "step": 1510}, {"loss": 0.9019, "grad_norm": 0.26401254534721375, "learning_rate": 0.0002, "epoch": 0.09975389663658736, "step": 1520}, {"loss": 0.9992, "grad_norm": 0.23575739562511444, "learning_rate": 0.0002, "epoch": 0.1004101722723544, "step": 1530}, {"loss": 0.9315, "grad_norm": 0.3076087534427643, "learning_rate": 0.0002, "epoch": 0.10106644790812142, "step": 1540}, {"loss": 1.0138, "grad_norm": 0.28179317712783813, "learning_rate": 0.0002, "epoch": 0.10172272354388844, "step": 1550}, {"loss": 0.9587, "grad_norm": 0.28638190031051636, "learning_rate": 0.0002, "epoch": 0.10237899917965546, "step": 1560}, {"loss": 0.9576, "grad_norm": 0.24788478016853333, "learning_rate": 0.0002, "epoch": 0.10303527481542248, "step": 1570}, {"loss": 0.9523, "grad_norm": 0.2987830340862274, "learning_rate": 0.0002, "epoch": 0.1036915504511895, "step": 1580}, {"loss": 0.9782, "grad_norm": 0.3252484202384949, "learning_rate": 0.0002, "epoch": 0.10434782608695652, "step": 1590}, {"loss": 0.9378, "grad_norm": 0.2763068377971649, "learning_rate": 0.0002, "epoch": 0.10500410172272355, "step": 1600}, {"loss": 0.9491, "grad_norm": 0.3219335973262787, "learning_rate": 0.0002, "epoch": 0.10566037735849057, "step": 1610}, {"loss": 0.9181, "grad_norm": 0.2708939015865326, "learning_rate": 0.0002, "epoch": 0.10631665299425759, "step": 1620}, {"loss": 1.0008, "grad_norm": 0.29320818185806274, "learning_rate": 0.0002, "epoch": 0.10697292863002461, "step": 1630}, {"loss": 0.9593, "grad_norm": 0.2731851041316986, "learning_rate": 0.0002, "epoch": 0.10762920426579163, "step": 1640}, {"loss": 0.9915, "grad_norm": 0.29563331604003906, "learning_rate": 0.0002, "epoch": 0.10828547990155865, "step": 1650}, {"loss": 0.9543, "grad_norm": 0.33387669920921326, "learning_rate": 0.0002, "epoch": 0.10894175553732567, "step": 1660}, {"loss": 0.941, "grad_norm": 0.3157867193222046, "learning_rate": 0.0002, "epoch": 0.1095980311730927, "step": 1670}, {"loss": 0.9107, "grad_norm": 0.24688409268856049, "learning_rate": 0.0002, "epoch": 0.11025430680885973, "step": 1680}, {"loss": 0.9392, "grad_norm": 0.2868671417236328, "learning_rate": 0.0002, "epoch": 0.11091058244462675, "step": 1690}, {"loss": 0.9476, "grad_norm": 0.2555101811885834, "learning_rate": 0.0002, "epoch": 0.11156685808039377, "step": 1700}, {"loss": 0.9261, "grad_norm": 0.3224661946296692, "learning_rate": 0.0002, "epoch": 0.11222313371616079, "step": 1710}, {"loss": 0.9372, "grad_norm": 0.28927505016326904, "learning_rate": 0.0002, "epoch": 0.1128794093519278, "step": 1720}, {"loss": 0.9237, "grad_norm": 0.25575608015060425, "learning_rate": 0.0002, "epoch": 0.11353568498769483, "step": 1730}, {"loss": 0.9526, "grad_norm": 0.27490100264549255, "learning_rate": 0.0002, "epoch": 0.11419196062346186, "step": 1740}, {"loss": 1.0311, "grad_norm": 0.28235989809036255, "learning_rate": 0.0002, "epoch": 0.11484823625922888, "step": 1750}, {"loss": 0.9112, "grad_norm": 0.2636060416698456, "learning_rate": 0.0002, "epoch": 0.1155045118949959, "step": 1760}, {"loss": 0.9249, "grad_norm": 0.2642059624195099, "learning_rate": 0.0002, "epoch": 0.11616078753076292, "step": 1770}, {"loss": 0.964, "grad_norm": 0.261807918548584, "learning_rate": 0.0002, "epoch": 0.11681706316652994, "step": 1780}, {"loss": 0.9473, "grad_norm": 0.24618281424045563, "learning_rate": 0.0002, "epoch": 0.11747333880229696, "step": 1790}, {"loss": 0.9452, "grad_norm": 0.2700425982475281, "learning_rate": 0.0002, "epoch": 0.11812961443806398, "step": 1800}, {"loss": 0.9687, "grad_norm": 0.22687454521656036, "learning_rate": 0.0002, "epoch": 0.11878589007383102, "step": 1810}, {"loss": 0.9099, "grad_norm": 0.2843499481678009, "learning_rate": 0.0002, "epoch": 0.11944216570959804, "step": 1820}, {"loss": 0.9295, "grad_norm": 0.23511677980422974, "learning_rate": 0.0002, "epoch": 0.12009844134536506, "step": 1830}, {"loss": 0.998, "grad_norm": 0.34054702520370483, "learning_rate": 0.0002, "epoch": 0.12075471698113208, "step": 1840}, {"loss": 0.9343, "grad_norm": 0.32845374941825867, "learning_rate": 0.0002, "epoch": 0.1214109926168991, "step": 1850}, {"loss": 0.983, "grad_norm": 0.2843034267425537, "learning_rate": 0.0002, "epoch": 0.12206726825266612, "step": 1860}, {"loss": 0.963, "grad_norm": 0.2824552357196808, "learning_rate": 0.0002, "epoch": 0.12272354388843314, "step": 1870}, {"loss": 0.9875, "grad_norm": 0.26878267526626587, "learning_rate": 0.0002, "epoch": 0.12337981952420016, "step": 1880}, {"loss": 0.9784, "grad_norm": 0.31207871437072754, "learning_rate": 0.0002, "epoch": 0.12403609515996719, "step": 1890}, {"loss": 0.9497, "grad_norm": 0.2617819905281067, "learning_rate": 0.0002, "epoch": 0.12469237079573421, "step": 1900}, {"loss": 0.8948, "grad_norm": 0.2652885615825653, "learning_rate": 0.0002, "epoch": 0.12534864643150123, "step": 1910}, {"loss": 0.9534, "grad_norm": 0.29473352432250977, "learning_rate": 0.0002, "epoch": 0.12600492206726827, "step": 1920}, {"loss": 0.9695, "grad_norm": 0.3101664185523987, "learning_rate": 0.0002, "epoch": 0.12666119770303527, "step": 1930}, {"loss": 0.9928, "grad_norm": 0.2714068293571472, "learning_rate": 0.0002, "epoch": 0.1273174733388023, "step": 1940}, {"loss": 1.0293, "grad_norm": 0.2684655487537384, "learning_rate": 0.0002, "epoch": 0.1279737489745693, "step": 1950}, {"loss": 0.9236, "grad_norm": 0.2543509900569916, "learning_rate": 0.0002, "epoch": 0.12863002461033635, "step": 1960}, {"loss": 0.9598, "grad_norm": 0.25268790125846863, "learning_rate": 0.0002, "epoch": 0.12928630024610335, "step": 1970}, {"loss": 0.967, "grad_norm": 0.27713078260421753, "learning_rate": 0.0002, "epoch": 0.1299425758818704, "step": 1980}, {"loss": 0.9411, "grad_norm": 0.3185126483440399, "learning_rate": 0.0002, "epoch": 0.13059885151763742, "step": 1990}, {"loss": 0.9864, "grad_norm": 0.26718857884407043, "learning_rate": 0.0002, "epoch": 0.13125512715340443, "step": 2000}, {"loss": 0.9728, "grad_norm": 0.2843841314315796, "learning_rate": 0.0002, "epoch": 0.13191140278917146, "step": 2010}, {"loss": 0.993, "grad_norm": 0.3013055622577667, "learning_rate": 0.0002, "epoch": 0.13256767842493847, "step": 2020}, {"loss": 0.9437, "grad_norm": 0.27985867857933044, "learning_rate": 0.0002, "epoch": 0.1332239540607055, "step": 2030}, {"loss": 0.905, "grad_norm": 0.2501908242702484, "learning_rate": 0.0002, "epoch": 0.1338802296964725, "step": 2040}, {"loss": 0.9603, "grad_norm": 0.441340833902359, "learning_rate": 0.0002, "epoch": 0.13453650533223954, "step": 2050}, {"loss": 0.9166, "grad_norm": 2.568060874938965, "learning_rate": 0.0002, "epoch": 0.13519278096800658, "step": 2060}, {"loss": 0.9803, "grad_norm": 0.2505454421043396, "learning_rate": 0.0002, "epoch": 0.13584905660377358, "step": 2070}, {"loss": 1.0159, "grad_norm": 0.33997446298599243, "learning_rate": 0.0002, "epoch": 0.13650533223954062, "step": 2080}, {"loss": 0.908, "grad_norm": 0.28161343932151794, "learning_rate": 0.0002, "epoch": 0.13716160787530762, "step": 2090}, {"loss": 0.9709, "grad_norm": 0.28142687678337097, "learning_rate": 0.0002, "epoch": 0.13781788351107466, "step": 2100}, {"loss": 0.9716, "grad_norm": 0.3137170076370239, "learning_rate": 0.0002, "epoch": 0.13847415914684166, "step": 2110}, {"loss": 0.9698, "grad_norm": 0.3072240352630615, "learning_rate": 0.0002, "epoch": 0.1391304347826087, "step": 2120}, {"loss": 0.9979, "grad_norm": 0.24328380823135376, "learning_rate": 0.0002, "epoch": 0.13978671041837573, "step": 2130}, {"loss": 0.9549, "grad_norm": 0.3065047860145569, "learning_rate": 0.0002, "epoch": 0.14044298605414274, "step": 2140}, {"loss": 0.9897, "grad_norm": 0.34212175011634827, "learning_rate": 0.0002, "epoch": 0.14109926168990977, "step": 2150}, {"loss": 0.9601, "grad_norm": 0.27491796016693115, "learning_rate": 0.0002, "epoch": 0.14175553732567678, "step": 2160}, {"loss": 0.9928, "grad_norm": 0.26518693566322327, "learning_rate": 0.0002, "epoch": 0.1424118129614438, "step": 2170}, {"loss": 0.9385, "grad_norm": 0.28350934386253357, "learning_rate": 0.0002, "epoch": 0.14306808859721082, "step": 2180}, {"loss": 0.9878, "grad_norm": 0.3287768065929413, "learning_rate": 0.0002, "epoch": 0.14372436423297785, "step": 2190}, {"loss": 0.9192, "grad_norm": 0.26362666487693787, "learning_rate": 0.0002, "epoch": 0.1443806398687449, "step": 2200}, {"loss": 0.9804, "grad_norm": 0.31169235706329346, "learning_rate": 0.0002, "epoch": 0.1450369155045119, "step": 2210}, {"loss": 0.9857, "grad_norm": 0.258667528629303, "learning_rate": 0.0002, "epoch": 0.14569319114027893, "step": 2220}, {"loss": 0.9898, "grad_norm": 0.32289111614227295, "learning_rate": 0.0002, "epoch": 0.14634946677604593, "step": 2230}, {"loss": 1.0051, "grad_norm": 0.3344270586967468, "learning_rate": 0.0002, "epoch": 0.14700574241181297, "step": 2240}, {"loss": 0.9337, "grad_norm": 0.3001033067703247, "learning_rate": 0.0002, "epoch": 0.14766201804757997, "step": 2250}, {"loss": 0.9788, "grad_norm": 0.2718261778354645, "learning_rate": 0.0002, "epoch": 0.148318293683347, "step": 2260}, {"loss": 0.9801, "grad_norm": 0.3059164583683014, "learning_rate": 0.0002, "epoch": 0.14897456931911401, "step": 2270}, {"loss": 0.8914, "grad_norm": 0.2939850389957428, "learning_rate": 0.0002, "epoch": 0.14963084495488105, "step": 2280}, {"loss": 0.951, "grad_norm": 0.2803564965724945, "learning_rate": 0.0002, "epoch": 0.15028712059064808, "step": 2290}, {"loss": 0.9131, "grad_norm": 0.2843068242073059, "learning_rate": 0.0002, "epoch": 0.1509433962264151, "step": 2300}, {"loss": 0.9721, "grad_norm": 0.28517085313796997, "learning_rate": 0.0002, "epoch": 0.15159967186218212, "step": 2310}, {"loss": 0.9402, "grad_norm": 0.291030615568161, "learning_rate": 0.0002, "epoch": 0.15225594749794913, "step": 2320}, {"loss": 0.9509, "grad_norm": 0.2712008059024811, "learning_rate": 0.0002, "epoch": 0.15291222313371616, "step": 2330}, {"loss": 0.9406, "grad_norm": 0.3357657790184021, "learning_rate": 0.0002, "epoch": 0.15356849876948317, "step": 2340}, {"loss": 0.961, "grad_norm": 0.28412291407585144, "learning_rate": 0.0002, "epoch": 0.1542247744052502, "step": 2350}, {"loss": 0.9254, "grad_norm": 0.3217862844467163, "learning_rate": 0.0002, "epoch": 0.15488105004101724, "step": 2360}, {"loss": 0.9172, "grad_norm": 0.32076528668403625, "learning_rate": 0.0002, "epoch": 0.15553732567678424, "step": 2370}, {"loss": 0.9459, "grad_norm": 0.3490257263183594, "learning_rate": 0.0002, "epoch": 0.15619360131255128, "step": 2380}, {"loss": 0.9051, "grad_norm": 0.28253331780433655, "learning_rate": 0.0002, "epoch": 0.15684987694831828, "step": 2390}, {"loss": 0.9597, "grad_norm": 0.2875654399394989, "learning_rate": 0.0002, "epoch": 0.15750615258408532, "step": 2400}, {"loss": 0.9164, "grad_norm": 0.3347921669483185, "learning_rate": 0.0002, "epoch": 0.15816242821985232, "step": 2410}, {"loss": 1.0023, "grad_norm": 0.2624322474002838, "learning_rate": 0.0002, "epoch": 0.15881870385561936, "step": 2420}, {"loss": 0.9751, "grad_norm": 0.29683780670166016, "learning_rate": 0.0002, "epoch": 0.1594749794913864, "step": 2430}, {"loss": 0.967, "grad_norm": 0.29237303137779236, "learning_rate": 0.0002, "epoch": 0.1601312551271534, "step": 2440}, {"loss": 0.9754, "grad_norm": 0.3541257977485657, "learning_rate": 0.0002, "epoch": 0.16078753076292043, "step": 2450}, {"loss": 0.8977, "grad_norm": 0.28203415870666504, "learning_rate": 0.0002, "epoch": 0.16144380639868744, "step": 2460}, {"loss": 0.9489, "grad_norm": 0.322329044342041, "learning_rate": 0.0002, "epoch": 0.16210008203445447, "step": 2470}, {"loss": 0.9224, "grad_norm": 0.27731558680534363, "learning_rate": 0.0002, "epoch": 0.16275635767022148, "step": 2480}, {"loss": 0.9773, "grad_norm": 0.30348825454711914, "learning_rate": 0.0002, "epoch": 0.16341263330598851, "step": 2490}, {"loss": 0.9318, "grad_norm": 0.29504773020744324, "learning_rate": 0.0002, "epoch": 0.16406890894175555, "step": 2500}, {"loss": 0.8931, "grad_norm": 0.2977028489112854, "learning_rate": 0.0002, "epoch": 0.16472518457752255, "step": 2510}, {"loss": 0.953, "grad_norm": 0.3172457218170166, "learning_rate": 0.0002, "epoch": 0.1653814602132896, "step": 2520}, {"loss": 0.9786, "grad_norm": 0.3320612907409668, "learning_rate": 0.0002, "epoch": 0.1660377358490566, "step": 2530}, {"loss": 0.895, "grad_norm": 0.2473023235797882, "learning_rate": 0.0002, "epoch": 0.16669401148482363, "step": 2540}, {"loss": 0.9692, "grad_norm": 0.3250006139278412, "learning_rate": 0.0002, "epoch": 0.16735028712059064, "step": 2550}, {"loss": 0.956, "grad_norm": 0.2940689027309418, "learning_rate": 0.0002, "epoch": 0.16800656275635767, "step": 2560}, {"loss": 0.8926, "grad_norm": 0.34614887833595276, "learning_rate": 0.0002, "epoch": 0.1686628383921247, "step": 2570}, {"loss": 1.0153, "grad_norm": 0.3513164520263672, "learning_rate": 0.0002, "epoch": 0.1693191140278917, "step": 2580}, {"loss": 0.9608, "grad_norm": 0.2530531585216522, "learning_rate": 0.0002, "epoch": 0.16997538966365874, "step": 2590}, {"loss": 0.9876, "grad_norm": 0.3028896152973175, "learning_rate": 0.0002, "epoch": 0.17063166529942575, "step": 2600}, {"loss": 0.9415, "grad_norm": 0.304739773273468, "learning_rate": 0.0002, "epoch": 0.17128794093519278, "step": 2610}, {"loss": 0.9857, "grad_norm": 0.3360660672187805, "learning_rate": 0.0002, "epoch": 0.1719442165709598, "step": 2620}, {"loss": 0.9173, "grad_norm": 0.24901753664016724, "learning_rate": 0.0002, "epoch": 0.17260049220672682, "step": 2630}, {"loss": 0.9351, "grad_norm": 0.32212400436401367, "learning_rate": 0.0002, "epoch": 0.17325676784249386, "step": 2640}, {"loss": 0.9284, "grad_norm": 0.2780437767505646, "learning_rate": 0.0002, "epoch": 0.17391304347826086, "step": 2650}, {"loss": 0.9368, "grad_norm": 0.25902262330055237, "learning_rate": 0.0002, "epoch": 0.1745693191140279, "step": 2660}, {"loss": 0.9378, "grad_norm": 0.29171422123908997, "learning_rate": 0.0002, "epoch": 0.1752255947497949, "step": 2670}, {"loss": 0.9525, "grad_norm": 0.2586783766746521, "learning_rate": 0.0002, "epoch": 0.17588187038556194, "step": 2680}, {"loss": 0.9367, "grad_norm": 0.25609949231147766, "learning_rate": 0.0002, "epoch": 0.17653814602132895, "step": 2690}, {"loss": 0.8675, "grad_norm": 0.29789483547210693, "learning_rate": 0.0002, "epoch": 0.17719442165709598, "step": 2700}, {"loss": 0.9918, "grad_norm": 0.2938411235809326, "learning_rate": 0.0002, "epoch": 0.177850697292863, "step": 2710}, {"loss": 0.9824, "grad_norm": 0.31436532735824585, "learning_rate": 0.0002, "epoch": 0.17850697292863002, "step": 2720}, {"loss": 1.0094, "grad_norm": 0.31310203671455383, "learning_rate": 0.0002, "epoch": 0.17916324856439705, "step": 2730}, {"loss": 0.9105, "grad_norm": 0.2695367634296417, "learning_rate": 0.0002, "epoch": 0.17981952420016406, "step": 2740}, {"loss": 0.9216, "grad_norm": 0.32590436935424805, "learning_rate": 0.0002, "epoch": 0.1804757998359311, "step": 2750}, {"loss": 0.923, "grad_norm": 0.32683756947517395, "learning_rate": 0.0002, "epoch": 0.1811320754716981, "step": 2760}, {"loss": 0.938, "grad_norm": 0.33043643832206726, "learning_rate": 0.0002, "epoch": 0.18178835110746513, "step": 2770}, {"loss": 0.937, "grad_norm": 0.3137816786766052, "learning_rate": 0.0002, "epoch": 0.18244462674323217, "step": 2780}, {"loss": 0.8923, "grad_norm": 0.33037737011909485, "learning_rate": 0.0002, "epoch": 0.18310090237899918, "step": 2790}, {"loss": 0.9179, "grad_norm": 0.27679184079170227, "learning_rate": 0.0002, "epoch": 0.1837571780147662, "step": 2800}, {"loss": 0.9047, "grad_norm": 0.42975902557373047, "learning_rate": 0.0002, "epoch": 0.18441345365053322, "step": 2810}, {"loss": 0.9431, "grad_norm": 0.2915041744709015, "learning_rate": 0.0002, "epoch": 0.18506972928630025, "step": 2820}, {"loss": 0.9573, "grad_norm": 0.315197229385376, "learning_rate": 0.0002, "epoch": 0.18572600492206726, "step": 2830}, {"loss": 0.9909, "grad_norm": 0.31129002571105957, "learning_rate": 0.0002, "epoch": 0.1863822805578343, "step": 2840}, {"loss": 0.9222, "grad_norm": 0.2783232033252716, "learning_rate": 0.0002, "epoch": 0.18703855619360132, "step": 2850}, {"loss": 0.9347, "grad_norm": 0.34501492977142334, "learning_rate": 0.0002, "epoch": 0.18769483182936833, "step": 2860}, {"loss": 0.949, "grad_norm": 0.32616767287254333, "learning_rate": 0.0002, "epoch": 0.18835110746513536, "step": 2870}, {"loss": 0.9909, "grad_norm": 0.25213682651519775, "learning_rate": 0.0002, "epoch": 0.18900738310090237, "step": 2880}, {"loss": 0.892, "grad_norm": 0.2745118737220764, "learning_rate": 0.0002, "epoch": 0.1896636587366694, "step": 2890}, {"loss": 0.9737, "grad_norm": 0.26175656914711, "learning_rate": 0.0002, "epoch": 0.1903199343724364, "step": 2900}, {"loss": 0.8853, "grad_norm": 0.3019673228263855, "learning_rate": 0.0002, "epoch": 0.19097621000820345, "step": 2910}, {"loss": 1.0016, "grad_norm": 0.30918899178504944, "learning_rate": 0.0002, "epoch": 0.19163248564397048, "step": 2920}, {"loss": 0.9301, "grad_norm": 0.320893794298172, "learning_rate": 0.0002, "epoch": 0.19228876127973749, "step": 2930}, {"loss": 0.9592, "grad_norm": 0.2736368477344513, "learning_rate": 0.0002, "epoch": 0.19294503691550452, "step": 2940}, {"loss": 0.9334, "grad_norm": 0.28817251324653625, "learning_rate": 0.0002, "epoch": 0.19360131255127153, "step": 2950}, {"loss": 0.9484, "grad_norm": 0.26863837242126465, "learning_rate": 0.0002, "epoch": 0.19425758818703856, "step": 2960}, {"loss": 0.9466, "grad_norm": 0.28012585639953613, "learning_rate": 0.0002, "epoch": 0.19491386382280557, "step": 2970}, {"loss": 0.9623, "grad_norm": 0.2929932773113251, "learning_rate": 0.0002, "epoch": 0.1955701394585726, "step": 2980}, {"loss": 0.9406, "grad_norm": 0.29878300428390503, "learning_rate": 0.0002, "epoch": 0.19622641509433963, "step": 2990}, {"loss": 0.9714, "grad_norm": 0.2573733329772949, "learning_rate": 0.0002, "epoch": 0.19688269073010664, "step": 3000}, {"loss": 0.9763, "grad_norm": 0.31736820936203003, "learning_rate": 0.0002, "epoch": 0.19753896636587367, "step": 3010}, {"loss": 1.0111, "grad_norm": 0.2864682972431183, "learning_rate": 0.0002, "epoch": 0.19819524200164068, "step": 3020}, {"loss": 0.9193, "grad_norm": 0.3161790668964386, "learning_rate": 0.0002, "epoch": 0.19885151763740772, "step": 3030}, {"loss": 0.9298, "grad_norm": 0.37538009881973267, "learning_rate": 0.0002, "epoch": 0.19950779327317472, "step": 3040}, {"loss": 0.9329, "grad_norm": 0.2850522994995117, "learning_rate": 0.0002, "epoch": 0.20016406890894176, "step": 3050}, {"loss": 1.0777, "grad_norm": 0.36680465936660767, "learning_rate": 0.0002, "epoch": 0.2008203445447088, "step": 3060}, {"loss": 0.9042, "grad_norm": 0.2879831790924072, "learning_rate": 0.0002, "epoch": 0.2014766201804758, "step": 3070}, {"loss": 0.8925, "grad_norm": 0.29652512073516846, "learning_rate": 0.0002, "epoch": 0.20213289581624283, "step": 3080}, {"loss": 0.9928, "grad_norm": 0.3131853938102722, "learning_rate": 0.0002, "epoch": 0.20278917145200984, "step": 3090}, {"loss": 0.9968, "grad_norm": 0.34605351090431213, "learning_rate": 0.0002, "epoch": 0.20344544708777687, "step": 3100}, {"loss": 0.9701, "grad_norm": 0.30542251467704773, "learning_rate": 0.0002, "epoch": 0.20410172272354388, "step": 3110}, {"loss": 0.9483, "grad_norm": 0.28131186962127686, "learning_rate": 0.0002, "epoch": 0.2047579983593109, "step": 3120}, {"loss": 0.9332, "grad_norm": 0.257859468460083, "learning_rate": 0.0002, "epoch": 0.20541427399507795, "step": 3130}, {"loss": 0.9771, "grad_norm": 0.32994887232780457, "learning_rate": 0.0002, "epoch": 0.20607054963084495, "step": 3140}, {"loss": 0.9368, "grad_norm": 0.26293760538101196, "learning_rate": 0.0002, "epoch": 0.20672682526661199, "step": 3150}, {"loss": 0.9801, "grad_norm": 0.29514846205711365, "learning_rate": 0.0002, "epoch": 0.207383100902379, "step": 3160}, {"loss": 0.9879, "grad_norm": 0.36102691292762756, "learning_rate": 0.0002, "epoch": 0.20803937653814603, "step": 3170}, {"loss": 0.9322, "grad_norm": 0.3298998475074768, "learning_rate": 0.0002, "epoch": 0.20869565217391303, "step": 3180}, {"loss": 0.9456, "grad_norm": 0.29835769534111023, "learning_rate": 0.0002, "epoch": 0.20935192780968007, "step": 3190}, {"loss": 1.0199, "grad_norm": 0.3438013792037964, "learning_rate": 0.0002, "epoch": 0.2100082034454471, "step": 3200}, {"loss": 0.9104, "grad_norm": 0.2945845127105713, "learning_rate": 0.0002, "epoch": 0.2106644790812141, "step": 3210}, {"loss": 0.9231, "grad_norm": 0.3173643946647644, "learning_rate": 0.0002, "epoch": 0.21132075471698114, "step": 3220}, {"loss": 0.892, "grad_norm": 0.30580341815948486, "learning_rate": 0.0002, "epoch": 0.21197703035274815, "step": 3230}, {"loss": 0.9458, "grad_norm": 0.32913172245025635, "learning_rate": 0.0002, "epoch": 0.21263330598851518, "step": 3240}, {"loss": 0.9475, "grad_norm": 0.2739659249782562, "learning_rate": 0.0002, "epoch": 0.2132895816242822, "step": 3250}, {"loss": 0.9185, "grad_norm": 0.3016273081302643, "learning_rate": 0.0002, "epoch": 0.21394585726004922, "step": 3260}, {"loss": 0.9819, "grad_norm": 0.2818678319454193, "learning_rate": 0.0002, "epoch": 0.21460213289581626, "step": 3270}, {"loss": 0.9535, "grad_norm": 0.3243506848812103, "learning_rate": 0.0002, "epoch": 0.21525840853158326, "step": 3280}, {"loss": 0.9355, "grad_norm": 0.38120919466018677, "learning_rate": 0.0002, "epoch": 0.2159146841673503, "step": 3290}, {"loss": 0.9556, "grad_norm": 0.3041105568408966, "learning_rate": 0.0002, "epoch": 0.2165709598031173, "step": 3300}, {"loss": 0.9172, "grad_norm": 0.2648089528083801, "learning_rate": 0.0002, "epoch": 0.21722723543888434, "step": 3310}, {"loss": 0.9567, "grad_norm": 0.324095219373703, "learning_rate": 0.0002, "epoch": 0.21788351107465134, "step": 3320}, {"loss": 0.9586, "grad_norm": 0.2796897888183594, "learning_rate": 0.0002, "epoch": 0.21853978671041838, "step": 3330}, {"loss": 0.9505, "grad_norm": 0.30163177847862244, "learning_rate": 0.0002, "epoch": 0.2191960623461854, "step": 3340}, {"loss": 0.9515, "grad_norm": 0.29213520884513855, "learning_rate": 0.0002, "epoch": 0.21985233798195242, "step": 3350}, {"loss": 0.9692, "grad_norm": 0.28203412890434265, "learning_rate": 0.0002, "epoch": 0.22050861361771945, "step": 3360}, {"loss": 0.94, "grad_norm": 0.31636562943458557, "learning_rate": 0.0002, "epoch": 0.22116488925348646, "step": 3370}, {"loss": 0.901, "grad_norm": 0.3153952956199646, "learning_rate": 0.0002, "epoch": 0.2218211648892535, "step": 3380}, {"loss": 0.8994, "grad_norm": 0.2793780267238617, "learning_rate": 0.0002, "epoch": 0.2224774405250205, "step": 3390}, {"loss": 0.9424, "grad_norm": 0.36783504486083984, "learning_rate": 0.0002, "epoch": 0.22313371616078753, "step": 3400}, {"loss": 0.9837, "grad_norm": 0.31803956627845764, "learning_rate": 0.0002, "epoch": 0.22378999179655457, "step": 3410}, {"loss": 0.9234, "grad_norm": 0.31517738103866577, "learning_rate": 0.0002, "epoch": 0.22444626743232157, "step": 3420}, {"loss": 0.9336, "grad_norm": 0.4124458432197571, "learning_rate": 0.0002, "epoch": 0.2251025430680886, "step": 3430}, {"loss": 0.8587, "grad_norm": 0.29330259561538696, "learning_rate": 0.0002, "epoch": 0.2257588187038556, "step": 3440}, {"loss": 0.925, "grad_norm": 0.281255304813385, "learning_rate": 0.0002, "epoch": 0.22641509433962265, "step": 3450}, {"loss": 0.9797, "grad_norm": 0.3765242397785187, "learning_rate": 0.0002, "epoch": 0.22707136997538965, "step": 3460}, {"loss": 1.0201, "grad_norm": 0.33658838272094727, "learning_rate": 0.0002, "epoch": 0.2277276456111567, "step": 3470}, {"loss": 0.9255, "grad_norm": 0.2809208035469055, "learning_rate": 0.0002, "epoch": 0.22838392124692372, "step": 3480}, {"loss": 0.8948, "grad_norm": 0.3064846098423004, "learning_rate": 0.0002, "epoch": 0.22904019688269073, "step": 3490}, {"loss": 0.8805, "grad_norm": 0.31982484459877014, "learning_rate": 0.0002, "epoch": 0.22969647251845776, "step": 3500}, {"loss": 0.927, "grad_norm": 0.27903324365615845, "learning_rate": 0.0002, "epoch": 0.23035274815422477, "step": 3510}, {"loss": 0.9103, "grad_norm": 0.35411202907562256, "learning_rate": 0.0002, "epoch": 0.2310090237899918, "step": 3520}, {"loss": 0.9087, "grad_norm": 0.2796868681907654, "learning_rate": 0.0002, "epoch": 0.2316652994257588, "step": 3530}, {"loss": 0.9861, "grad_norm": 0.3428329825401306, "learning_rate": 0.0002, "epoch": 0.23232157506152584, "step": 3540}, {"loss": 0.9025, "grad_norm": 0.30563032627105713, "learning_rate": 0.0002, "epoch": 0.23297785069729288, "step": 3550}, {"loss": 0.9132, "grad_norm": 0.2954406142234802, "learning_rate": 0.0002, "epoch": 0.23363412633305988, "step": 3560}, {"loss": 0.9444, "grad_norm": 0.3328028917312622, "learning_rate": 0.0002, "epoch": 0.23429040196882692, "step": 3570}, {"loss": 0.9161, "grad_norm": 0.32020696997642517, "learning_rate": 0.0002, "epoch": 0.23494667760459392, "step": 3580}, {"loss": 0.9193, "grad_norm": 0.2774750292301178, "learning_rate": 0.0002, "epoch": 0.23560295324036096, "step": 3590}, {"loss": 0.8935, "grad_norm": 0.24560679495334625, "learning_rate": 0.0002, "epoch": 0.23625922887612796, "step": 3600}, {"loss": 0.9335, "grad_norm": 0.3278765082359314, "learning_rate": 0.0002, "epoch": 0.236915504511895, "step": 3610}, {"loss": 0.8958, "grad_norm": 0.4417719542980194, "learning_rate": 0.0002, "epoch": 0.23757178014766203, "step": 3620}, {"loss": 0.9683, "grad_norm": 0.3339618742465973, "learning_rate": 0.0002, "epoch": 0.23822805578342904, "step": 3630}, {"loss": 0.9536, "grad_norm": 0.2925402522087097, "learning_rate": 0.0002, "epoch": 0.23888433141919607, "step": 3640}, {"loss": 0.9695, "grad_norm": 0.3060242831707001, "learning_rate": 0.0002, "epoch": 0.23954060705496308, "step": 3650}, {"loss": 0.9573, "grad_norm": 0.3095077872276306, "learning_rate": 0.0002, "epoch": 0.2401968826907301, "step": 3660}, {"loss": 0.9023, "grad_norm": 0.31151828169822693, "learning_rate": 0.0002, "epoch": 0.24085315832649712, "step": 3670}, {"loss": 0.9642, "grad_norm": 0.293610543012619, "learning_rate": 0.0002, "epoch": 0.24150943396226415, "step": 3680}, {"loss": 0.9449, "grad_norm": 0.28868040442466736, "learning_rate": 0.0002, "epoch": 0.2421657095980312, "step": 3690}, {"loss": 0.9258, "grad_norm": 0.3123609721660614, "learning_rate": 0.0002, "epoch": 0.2428219852337982, "step": 3700}, {"loss": 0.9013, "grad_norm": 0.29155978560447693, "learning_rate": 0.0002, "epoch": 0.24347826086956523, "step": 3710}, {"loss": 0.8977, "grad_norm": 0.36799871921539307, "learning_rate": 0.0002, "epoch": 0.24413453650533223, "step": 3720}, {"loss": 1.0157, "grad_norm": 0.30915290117263794, "learning_rate": 0.0002, "epoch": 0.24479081214109927, "step": 3730}, {"loss": 0.9137, "grad_norm": 0.2889885902404785, "learning_rate": 0.0002, "epoch": 0.24544708777686627, "step": 3740}, {"loss": 0.9279, "grad_norm": 0.30213671922683716, "learning_rate": 0.0002, "epoch": 0.2461033634126333, "step": 3750}, {"loss": 0.9202, "grad_norm": 0.3242565393447876, "learning_rate": 0.0002, "epoch": 0.24675963904840031, "step": 3760}, {"loss": 0.9323, "grad_norm": 0.27826303243637085, "learning_rate": 0.0002, "epoch": 0.24741591468416735, "step": 3770}, {"loss": 0.933, "grad_norm": 0.2813768982887268, "learning_rate": 0.0002, "epoch": 0.24807219031993438, "step": 3780}, {"loss": 0.9046, "grad_norm": 0.34054139256477356, "learning_rate": 0.0002, "epoch": 0.2487284659557014, "step": 3790}, {"loss": 0.9252, "grad_norm": 0.2804257869720459, "learning_rate": 0.0002, "epoch": 0.24938474159146842, "step": 3800}, {"loss": 0.8305, "grad_norm": 0.2920171618461609, "learning_rate": 0.0002, "epoch": 0.25004101722723543, "step": 3810}, {"loss": 0.883, "grad_norm": 0.2967351973056793, "learning_rate": 0.0002, "epoch": 0.25069729286300246, "step": 3820}, {"loss": 0.8767, "grad_norm": 0.27417901158332825, "learning_rate": 0.0002, "epoch": 0.2513535684987695, "step": 3830}, {"loss": 0.9459, "grad_norm": 0.30916500091552734, "learning_rate": 0.0002, "epoch": 0.25200984413453653, "step": 3840}, {"loss": 0.9774, "grad_norm": 0.3281477391719818, "learning_rate": 0.0002, "epoch": 0.2526661197703035, "step": 3850}, {"loss": 0.8779, "grad_norm": 0.3417379856109619, "learning_rate": 0.0002, "epoch": 0.25332239540607054, "step": 3860}, {"loss": 0.9155, "grad_norm": 0.3119784891605377, "learning_rate": 0.0002, "epoch": 0.2539786710418376, "step": 3870}, {"loss": 0.9466, "grad_norm": 0.3053247332572937, "learning_rate": 0.0002, "epoch": 0.2546349466776046, "step": 3880}, {"loss": 0.8882, "grad_norm": 0.3155108094215393, "learning_rate": 0.0002, "epoch": 0.2552912223133716, "step": 3890}, {"loss": 0.9464, "grad_norm": 0.3240940272808075, "learning_rate": 0.0002, "epoch": 0.2559474979491386, "step": 3900}, {"loss": 0.9765, "grad_norm": 0.32433149218559265, "learning_rate": 0.0002, "epoch": 0.25660377358490566, "step": 3910}, {"loss": 0.9714, "grad_norm": 0.2982637286186218, "learning_rate": 0.0002, "epoch": 0.2572600492206727, "step": 3920}, {"loss": 0.8758, "grad_norm": 0.34742113947868347, "learning_rate": 0.0002, "epoch": 0.2579163248564397, "step": 3930}, {"loss": 0.9335, "grad_norm": 0.3415607213973999, "learning_rate": 0.0002, "epoch": 0.2585726004922067, "step": 3940}, {"loss": 0.9016, "grad_norm": 0.2852018475532532, "learning_rate": 0.0002, "epoch": 0.25922887612797374, "step": 3950}, {"loss": 0.9972, "grad_norm": 0.2924928665161133, "learning_rate": 0.0002, "epoch": 0.2598851517637408, "step": 3960}, {"loss": 0.9524, "grad_norm": 0.29387834668159485, "learning_rate": 0.0002, "epoch": 0.2605414273995078, "step": 3970}, {"loss": 0.9197, "grad_norm": 0.32400810718536377, "learning_rate": 0.0002, "epoch": 0.26119770303527484, "step": 3980}, {"loss": 0.9886, "grad_norm": 0.35629674792289734, "learning_rate": 0.0002, "epoch": 0.2618539786710418, "step": 3990}, {"loss": 0.9002, "grad_norm": 0.40292513370513916, "learning_rate": 0.0002, "epoch": 0.26251025430680885, "step": 4000}, {"loss": 0.9561, "grad_norm": 0.3452664613723755, "learning_rate": 0.0002, "epoch": 0.2631665299425759, "step": 4010}, {"loss": 0.9231, "grad_norm": 0.2956177592277527, "learning_rate": 0.0002, "epoch": 0.2638228055783429, "step": 4020}, {"loss": 0.9269, "grad_norm": 0.3159438669681549, "learning_rate": 0.0002, "epoch": 0.2644790812141099, "step": 4030}, {"loss": 0.9682, "grad_norm": 0.25804826617240906, "learning_rate": 0.0002, "epoch": 0.26513535684987694, "step": 4040}, {"loss": 0.9681, "grad_norm": 0.3446296751499176, "learning_rate": 0.0002, "epoch": 0.26579163248564397, "step": 4050}, {"loss": 0.9451, "grad_norm": 0.32568585872650146, "learning_rate": 0.0002, "epoch": 0.266447908121411, "step": 4060}, {"loss": 0.907, "grad_norm": 0.2872511148452759, "learning_rate": 0.0002, "epoch": 0.26710418375717804, "step": 4070}, {"loss": 0.9365, "grad_norm": 0.327495276927948, "learning_rate": 0.0002, "epoch": 0.267760459392945, "step": 4080}, {"loss": 0.9778, "grad_norm": 0.40853601694107056, "learning_rate": 0.0002, "epoch": 0.26841673502871205, "step": 4090}, {"loss": 0.9232, "grad_norm": 0.32097089290618896, "learning_rate": 0.0002, "epoch": 0.2690730106644791, "step": 4100}, {"loss": 0.9622, "grad_norm": 0.32956209778785706, "learning_rate": 0.0002, "epoch": 0.2697292863002461, "step": 4110}, {"loss": 0.911, "grad_norm": 0.30057013034820557, "learning_rate": 0.0002, "epoch": 0.27038556193601315, "step": 4120}, {"loss": 0.9045, "grad_norm": 0.30630001425743103, "learning_rate": 0.0002, "epoch": 0.27104183757178013, "step": 4130}, {"loss": 0.8914, "grad_norm": 0.31977957487106323, "learning_rate": 0.0002, "epoch": 0.27169811320754716, "step": 4140}, {"loss": 0.9619, "grad_norm": 0.29783955216407776, "learning_rate": 0.0002, "epoch": 0.2723543888433142, "step": 4150}, {"loss": 1.0299, "grad_norm": 0.37371826171875, "learning_rate": 0.0002, "epoch": 0.27301066447908123, "step": 4160}, {"loss": 0.9072, "grad_norm": 0.27656471729278564, "learning_rate": 0.0002, "epoch": 0.2736669401148482, "step": 4170}, {"loss": 0.873, "grad_norm": 0.29685157537460327, "learning_rate": 0.0002, "epoch": 0.27432321575061525, "step": 4180}, {"loss": 1.0057, "grad_norm": 0.3376981317996979, "learning_rate": 0.0002, "epoch": 0.2749794913863823, "step": 4190}, {"loss": 0.9671, "grad_norm": 0.2933194041252136, "learning_rate": 0.0002, "epoch": 0.2756357670221493, "step": 4200}, {"loss": 0.9359, "grad_norm": 0.27594974637031555, "learning_rate": 0.0002, "epoch": 0.27629204265791635, "step": 4210}, {"loss": 0.9336, "grad_norm": 0.29267510771751404, "learning_rate": 0.0002, "epoch": 0.2769483182936833, "step": 4220}, {"loss": 0.9241, "grad_norm": 0.3266076445579529, "learning_rate": 0.0002, "epoch": 0.27760459392945036, "step": 4230}, {"loss": 0.933, "grad_norm": 0.3626921474933624, "learning_rate": 0.0002, "epoch": 0.2782608695652174, "step": 4240}, {"loss": 0.9533, "grad_norm": 0.3043886423110962, "learning_rate": 0.0002, "epoch": 0.27891714520098443, "step": 4250}, {"loss": 0.902, "grad_norm": 0.28676939010620117, "learning_rate": 0.0002, "epoch": 0.27957342083675146, "step": 4260}, {"loss": 0.9565, "grad_norm": 0.32501107454299927, "learning_rate": 0.0002, "epoch": 0.28022969647251844, "step": 4270}, {"loss": 1.0009, "grad_norm": 0.3533550202846527, "learning_rate": 0.0002, "epoch": 0.2808859721082855, "step": 4280}, {"loss": 0.9128, "grad_norm": 0.28312650322914124, "learning_rate": 0.0002, "epoch": 0.2815422477440525, "step": 4290}, {"loss": 0.9626, "grad_norm": 0.2910906672477722, "learning_rate": 0.0002, "epoch": 0.28219852337981954, "step": 4300}, {"loss": 0.8803, "grad_norm": 0.27612248063087463, "learning_rate": 0.0002, "epoch": 0.2828547990155865, "step": 4310}, {"loss": 0.9451, "grad_norm": 0.416061133146286, "learning_rate": 0.0002, "epoch": 0.28351107465135356, "step": 4320}, {"loss": 0.9197, "grad_norm": 0.36900338530540466, "learning_rate": 0.0002, "epoch": 0.2841673502871206, "step": 4330}, {"loss": 0.9623, "grad_norm": 0.32224977016448975, "learning_rate": 0.0002, "epoch": 0.2848236259228876, "step": 4340}, {"loss": 0.9264, "grad_norm": 0.2805930972099304, "learning_rate": 0.0002, "epoch": 0.28547990155865466, "step": 4350}, {"loss": 0.9486, "grad_norm": 0.286539226770401, "learning_rate": 0.0002, "epoch": 0.28613617719442164, "step": 4360}, {"loss": 0.9225, "grad_norm": 0.3326348066329956, "learning_rate": 0.0002, "epoch": 0.28679245283018867, "step": 4370}, {"loss": 0.8774, "grad_norm": 0.29906603693962097, "learning_rate": 0.0002, "epoch": 0.2874487284659557, "step": 4380}, {"loss": 0.9582, "grad_norm": 0.33906206488609314, "learning_rate": 0.0002, "epoch": 0.28810500410172274, "step": 4390}, {"loss": 0.8987, "grad_norm": 0.36083030700683594, "learning_rate": 0.0002, "epoch": 0.2887612797374898, "step": 4400}, {"loss": 0.9189, "grad_norm": 0.30945461988449097, "learning_rate": 0.0002, "epoch": 0.28941755537325675, "step": 4410}, {"loss": 0.9454, "grad_norm": 0.29673129320144653, "learning_rate": 0.0002, "epoch": 0.2900738310090238, "step": 4420}, {"loss": 0.9454, "grad_norm": 0.29949837923049927, "learning_rate": 0.0002, "epoch": 0.2907301066447908, "step": 4430}, {"loss": 0.9026, "grad_norm": 0.29319390654563904, "learning_rate": 0.0002, "epoch": 0.29138638228055785, "step": 4440}, {"loss": 0.9318, "grad_norm": 0.38954150676727295, "learning_rate": 0.0002, "epoch": 0.29204265791632483, "step": 4450}, {"loss": 1.0558, "grad_norm": 0.32702240347862244, "learning_rate": 0.0002, "epoch": 0.29269893355209187, "step": 4460}, {"loss": 0.9684, "grad_norm": 0.3480510413646698, "learning_rate": 0.0002, "epoch": 0.2933552091878589, "step": 4470}, {"loss": 0.9267, "grad_norm": 0.34290337562561035, "learning_rate": 0.0002, "epoch": 0.29401148482362593, "step": 4480}, {"loss": 0.9561, "grad_norm": 0.3169049620628357, "learning_rate": 0.0002, "epoch": 0.29466776045939297, "step": 4490}, {"loss": 0.9386, "grad_norm": 0.3368853032588959, "learning_rate": 0.0002, "epoch": 0.29532403609515995, "step": 4500}, {"loss": 0.8803, "grad_norm": 0.30214768648147583, "learning_rate": 0.0002, "epoch": 0.295980311730927, "step": 4510}, {"loss": 0.9485, "grad_norm": 0.33051690459251404, "learning_rate": 0.0002, "epoch": 0.296636587366694, "step": 4520}, {"loss": 0.9498, "grad_norm": 0.37137898802757263, "learning_rate": 0.0002, "epoch": 0.29729286300246105, "step": 4530}, {"loss": 0.9518, "grad_norm": 0.3780321180820465, "learning_rate": 0.0002, "epoch": 0.29794913863822803, "step": 4540}, {"loss": 0.8912, "grad_norm": 0.3048851788043976, "learning_rate": 0.0002, "epoch": 0.29860541427399506, "step": 4550}, {"loss": 1.0054, "grad_norm": 0.36871910095214844, "learning_rate": 0.0002, "epoch": 0.2992616899097621, "step": 4560}, {"loss": 0.9172, "grad_norm": 0.3335227966308594, "learning_rate": 0.0002, "epoch": 0.29991796554552913, "step": 4570}, {"loss": 0.9368, "grad_norm": 0.3256683349609375, "learning_rate": 0.0002, "epoch": 0.30057424118129616, "step": 4580}, {"loss": 0.9641, "grad_norm": 0.34209194779396057, "learning_rate": 0.0002, "epoch": 0.30123051681706314, "step": 4590}, {"loss": 0.9128, "grad_norm": 0.278749018907547, "learning_rate": 0.0002, "epoch": 0.3018867924528302, "step": 4600}, {"loss": 0.931, "grad_norm": 0.28081533312797546, "learning_rate": 0.0002, "epoch": 0.3025430680885972, "step": 4610}, {"loss": 0.9084, "grad_norm": 0.35687389969825745, "learning_rate": 0.0002, "epoch": 0.30319934372436425, "step": 4620}, {"loss": 0.8927, "grad_norm": 0.367288738489151, "learning_rate": 0.0002, "epoch": 0.3038556193601313, "step": 4630}, {"loss": 0.9175, "grad_norm": 0.40433239936828613, "learning_rate": 0.0002, "epoch": 0.30451189499589826, "step": 4640}, {"loss": 0.9115, "grad_norm": 0.31489279866218567, "learning_rate": 0.0002, "epoch": 0.3051681706316653, "step": 4650}, {"loss": 0.9843, "grad_norm": 0.30823174118995667, "learning_rate": 0.0002, "epoch": 0.3058244462674323, "step": 4660}, {"loss": 0.964, "grad_norm": 0.3204546570777893, "learning_rate": 0.0002, "epoch": 0.30648072190319936, "step": 4670}, {"loss": 0.9754, "grad_norm": 0.3188243508338928, "learning_rate": 0.0002, "epoch": 0.30713699753896634, "step": 4680}, {"loss": 0.8923, "grad_norm": 0.31931981444358826, "learning_rate": 0.0002, "epoch": 0.3077932731747334, "step": 4690}, {"loss": 0.9113, "grad_norm": 0.33908605575561523, "learning_rate": 0.0002, "epoch": 0.3084495488105004, "step": 4700}, {"loss": 0.9552, "grad_norm": 0.3362937569618225, "learning_rate": 0.0002, "epoch": 0.30910582444626744, "step": 4710}, {"loss": 0.9695, "grad_norm": 0.3304995000362396, "learning_rate": 0.0002, "epoch": 0.3097621000820345, "step": 4720}, {"loss": 0.9588, "grad_norm": 0.39165404438972473, "learning_rate": 0.0002, "epoch": 0.31041837571780145, "step": 4730}, {"loss": 0.9583, "grad_norm": 0.29097774624824524, "learning_rate": 0.0002, "epoch": 0.3110746513535685, "step": 4740}, {"loss": 0.9224, "grad_norm": 0.289065420627594, "learning_rate": 0.0002, "epoch": 0.3117309269893355, "step": 4750}, {"loss": 0.8905, "grad_norm": 0.32188501954078674, "learning_rate": 0.0002, "epoch": 0.31238720262510256, "step": 4760}, {"loss": 0.951, "grad_norm": 0.2738335430622101, "learning_rate": 0.0002, "epoch": 0.3130434782608696, "step": 4770}, {"loss": 0.9662, "grad_norm": 0.2811134457588196, "learning_rate": 0.0002, "epoch": 0.31369975389663657, "step": 4780}, {"loss": 0.9372, "grad_norm": 0.2986338138580322, "learning_rate": 0.0002, "epoch": 0.3143560295324036, "step": 4790}, {"loss": 0.9016, "grad_norm": 0.30152231454849243, "learning_rate": 0.0002, "epoch": 0.31501230516817064, "step": 4800}, {"loss": 0.9902, "grad_norm": 0.37527933716773987, "learning_rate": 0.0002, "epoch": 0.31566858080393767, "step": 4810}, {"loss": 0.9786, "grad_norm": 0.4206887483596802, "learning_rate": 0.0002, "epoch": 0.31632485643970465, "step": 4820}, {"loss": 0.9265, "grad_norm": 0.3060953617095947, "learning_rate": 0.0002, "epoch": 0.3169811320754717, "step": 4830}, {"loss": 0.874, "grad_norm": 0.28399568796157837, "learning_rate": 0.0002, "epoch": 0.3176374077112387, "step": 4840}, {"loss": 0.9287, "grad_norm": 0.30498018860816956, "learning_rate": 0.0002, "epoch": 0.31829368334700575, "step": 4850}, {"loss": 0.9682, "grad_norm": 0.3168811798095703, "learning_rate": 0.0002, "epoch": 0.3189499589827728, "step": 4860}, {"loss": 0.9916, "grad_norm": 0.33051279187202454, "learning_rate": 0.0002, "epoch": 0.31960623461853976, "step": 4870}, {"loss": 0.9427, "grad_norm": 0.34563300013542175, "learning_rate": 0.0002, "epoch": 0.3202625102543068, "step": 4880}, {"loss": 0.9018, "grad_norm": 0.28437602519989014, "learning_rate": 0.0002, "epoch": 0.32091878589007383, "step": 4890}, {"loss": 0.9357, "grad_norm": 0.3883301913738251, "learning_rate": 0.0002, "epoch": 0.32157506152584087, "step": 4900}, {"loss": 0.9034, "grad_norm": 0.2933879792690277, "learning_rate": 0.0002, "epoch": 0.3222313371616079, "step": 4910}, {"loss": 0.9332, "grad_norm": 0.3490493595600128, "learning_rate": 0.0002, "epoch": 0.3228876127973749, "step": 4920}, {"loss": 0.9424, "grad_norm": 0.29370415210723877, "learning_rate": 0.0002, "epoch": 0.3235438884331419, "step": 4930}, {"loss": 0.9751, "grad_norm": 0.32653453946113586, "learning_rate": 0.0002, "epoch": 0.32420016406890895, "step": 4940}, {"loss": 0.9278, "grad_norm": 0.32540783286094666, "learning_rate": 0.0002, "epoch": 0.324856439704676, "step": 4950}, {"loss": 0.9309, "grad_norm": 0.37576451897621155, "learning_rate": 0.0002, "epoch": 0.32551271534044296, "step": 4960}, {"loss": 0.9612, "grad_norm": 0.27892962098121643, "learning_rate": 0.0002, "epoch": 0.32616899097621, "step": 4970}, {"loss": 0.9765, "grad_norm": 0.38215309381484985, "learning_rate": 0.0002, "epoch": 0.32682526661197703, "step": 4980}, {"loss": 0.9226, "grad_norm": 0.4062992334365845, "learning_rate": 0.0002, "epoch": 0.32748154224774406, "step": 4990}, {"loss": 0.9157, "grad_norm": 0.3053376376628876, "learning_rate": 0.0002, "epoch": 0.3281378178835111, "step": 5000}, {"loss": 0.9117, "grad_norm": 0.3228836953639984, "learning_rate": 0.0002, "epoch": 0.3287940935192781, "step": 5010}, {"loss": 0.8644, "grad_norm": 0.27714234590530396, "learning_rate": 0.0002, "epoch": 0.3294503691550451, "step": 5020}, {"loss": 0.9034, "grad_norm": 0.3127416968345642, "learning_rate": 0.0002, "epoch": 0.33010664479081214, "step": 5030}, {"loss": 0.89, "grad_norm": 0.3653234839439392, "learning_rate": 0.0002, "epoch": 0.3307629204265792, "step": 5040}, {"loss": 0.9313, "grad_norm": 0.33517029881477356, "learning_rate": 0.0002, "epoch": 0.3314191960623462, "step": 5050}, {"loss": 0.9219, "grad_norm": 0.3412803113460541, "learning_rate": 0.0002, "epoch": 0.3320754716981132, "step": 5060}, {"loss": 0.9358, "grad_norm": 0.32958096265792847, "learning_rate": 0.0002, "epoch": 0.3327317473338802, "step": 5070}, {"loss": 0.9479, "grad_norm": 0.2991558015346527, "learning_rate": 0.0002, "epoch": 0.33338802296964726, "step": 5080}, {"loss": 0.9754, "grad_norm": 0.35870906710624695, "learning_rate": 0.0002, "epoch": 0.3340442986054143, "step": 5090}, {"loss": 0.9183, "grad_norm": 0.26751458644866943, "learning_rate": 0.0002, "epoch": 0.33470057424118127, "step": 5100}, {"loss": 0.9676, "grad_norm": 0.35588568449020386, "learning_rate": 0.0002, "epoch": 0.3353568498769483, "step": 5110}, {"loss": 0.8964, "grad_norm": 0.41509315371513367, "learning_rate": 0.0002, "epoch": 0.33601312551271534, "step": 5120}, {"loss": 0.9864, "grad_norm": 0.37643107771873474, "learning_rate": 0.0002, "epoch": 0.3366694011484824, "step": 5130}, {"loss": 0.9367, "grad_norm": 0.3062657117843628, "learning_rate": 0.0002, "epoch": 0.3373256767842494, "step": 5140}, {"loss": 0.9167, "grad_norm": 0.2778759300708771, "learning_rate": 0.0002, "epoch": 0.3379819524200164, "step": 5150}, {"loss": 0.9787, "grad_norm": 0.40632501244544983, "learning_rate": 0.0002, "epoch": 0.3386382280557834, "step": 5160}, {"loss": 0.8941, "grad_norm": 0.2996899485588074, "learning_rate": 0.0002, "epoch": 0.33929450369155045, "step": 5170}, {"loss": 0.9611, "grad_norm": 0.2869918942451477, "learning_rate": 0.0002, "epoch": 0.3399507793273175, "step": 5180}, {"loss": 0.9201, "grad_norm": 0.3303343951702118, "learning_rate": 0.0002, "epoch": 0.3406070549630845, "step": 5190}, {"loss": 0.9162, "grad_norm": 0.3582284450531006, "learning_rate": 0.0002, "epoch": 0.3412633305988515, "step": 5200}, {"loss": 0.9441, "grad_norm": 0.3600800037384033, "learning_rate": 0.0002, "epoch": 0.34191960623461853, "step": 5210}, {"loss": 0.9061, "grad_norm": 0.3147357106208801, "learning_rate": 0.0002, "epoch": 0.34257588187038557, "step": 5220}, {"loss": 0.9189, "grad_norm": 0.3327115774154663, "learning_rate": 0.0002, "epoch": 0.3432321575061526, "step": 5230}, {"loss": 0.9606, "grad_norm": 0.3514555096626282, "learning_rate": 0.0002, "epoch": 0.3438884331419196, "step": 5240}, {"loss": 0.8944, "grad_norm": 0.35748785734176636, "learning_rate": 0.0002, "epoch": 0.3445447087776866, "step": 5250}, {"loss": 0.9228, "grad_norm": 0.3179738223552704, "learning_rate": 0.0002, "epoch": 0.34520098441345365, "step": 5260}, {"loss": 0.9313, "grad_norm": 0.3399673104286194, "learning_rate": 0.0002, "epoch": 0.3458572600492207, "step": 5270}, {"loss": 0.9379, "grad_norm": 0.32293835282325745, "learning_rate": 0.0002, "epoch": 0.3465135356849877, "step": 5280}, {"loss": 0.9066, "grad_norm": 0.3028234839439392, "learning_rate": 0.0002, "epoch": 0.3471698113207547, "step": 5290}, {"loss": 0.929, "grad_norm": 0.3056369423866272, "learning_rate": 0.0002, "epoch": 0.34782608695652173, "step": 5300}, {"loss": 0.9597, "grad_norm": 0.36918318271636963, "learning_rate": 0.0002, "epoch": 0.34848236259228876, "step": 5310}, {"loss": 0.9365, "grad_norm": 0.2661174535751343, "learning_rate": 0.0002, "epoch": 0.3491386382280558, "step": 5320}, {"loss": 0.8832, "grad_norm": 0.294240266084671, "learning_rate": 0.0002, "epoch": 0.34979491386382283, "step": 5330}, {"loss": 0.9135, "grad_norm": 0.3509284555912018, "learning_rate": 0.0002, "epoch": 0.3504511894995898, "step": 5340}, {"loss": 0.8919, "grad_norm": 0.331851989030838, "learning_rate": 0.0002, "epoch": 0.35110746513535684, "step": 5350}, {"loss": 0.9357, "grad_norm": 0.39378881454467773, "learning_rate": 0.0002, "epoch": 0.3517637407711239, "step": 5360}, {"loss": 0.8844, "grad_norm": 0.3014044761657715, "learning_rate": 0.0002, "epoch": 0.3524200164068909, "step": 5370}, {"loss": 0.9234, "grad_norm": 0.3350718021392822, "learning_rate": 0.0002, "epoch": 0.3530762920426579, "step": 5380}, {"loss": 0.9119, "grad_norm": 0.29478874802589417, "learning_rate": 0.0002, "epoch": 0.3537325676784249, "step": 5390}, {"loss": 0.8781, "grad_norm": 0.3219947814941406, "learning_rate": 0.0002, "epoch": 0.35438884331419196, "step": 5400}, {"loss": 0.9527, "grad_norm": 0.31258803606033325, "learning_rate": 0.0002, "epoch": 0.355045118949959, "step": 5410}, {"loss": 0.9124, "grad_norm": 0.2951710820198059, "learning_rate": 0.0002, "epoch": 0.355701394585726, "step": 5420}, {"loss": 0.8997, "grad_norm": 0.3726331293582916, "learning_rate": 0.0002, "epoch": 0.356357670221493, "step": 5430}, {"loss": 0.9288, "grad_norm": 0.3571377098560333, "learning_rate": 0.0002, "epoch": 0.35701394585726004, "step": 5440}, {"loss": 0.9538, "grad_norm": 0.3004431128501892, "learning_rate": 0.0002, "epoch": 0.3576702214930271, "step": 5450}, {"loss": 0.9466, "grad_norm": 0.3258959949016571, "learning_rate": 0.0002, "epoch": 0.3583264971287941, "step": 5460}, {"loss": 0.8999, "grad_norm": 0.31265372037887573, "learning_rate": 0.0002, "epoch": 0.35898277276456114, "step": 5470}, {"loss": 0.9438, "grad_norm": 0.3120972514152527, "learning_rate": 0.0002, "epoch": 0.3596390484003281, "step": 5480}, {"loss": 0.9541, "grad_norm": 0.31449300050735474, "learning_rate": 0.0002, "epoch": 0.36029532403609515, "step": 5490}, {"loss": 0.9569, "grad_norm": 0.378205806016922, "learning_rate": 0.0002, "epoch": 0.3609515996718622, "step": 5500}, {"loss": 0.8964, "grad_norm": 0.34647420048713684, "learning_rate": 0.0002, "epoch": 0.3616078753076292, "step": 5510}, {"loss": 0.9965, "grad_norm": 0.34195807576179504, "learning_rate": 0.0002, "epoch": 0.3622641509433962, "step": 5520}, {"loss": 0.8916, "grad_norm": 0.35365694761276245, "learning_rate": 0.0002, "epoch": 0.36292042657916324, "step": 5530}, {"loss": 0.9429, "grad_norm": 0.3188548982143402, "learning_rate": 0.0002, "epoch": 0.36357670221493027, "step": 5540}, {"loss": 0.9258, "grad_norm": 0.35012900829315186, "learning_rate": 0.0002, "epoch": 0.3642329778506973, "step": 5550}, {"loss": 0.9338, "grad_norm": 0.3564446270465851, "learning_rate": 0.0002, "epoch": 0.36488925348646434, "step": 5560}, {"loss": 0.9531, "grad_norm": 0.29641905426979065, "learning_rate": 0.0002, "epoch": 0.3655455291222313, "step": 5570}, {"loss": 0.9507, "grad_norm": 0.3476453721523285, "learning_rate": 0.0002, "epoch": 0.36620180475799835, "step": 5580}, {"loss": 0.9509, "grad_norm": 0.32947802543640137, "learning_rate": 0.0002, "epoch": 0.3668580803937654, "step": 5590}, {"loss": 0.9577, "grad_norm": 0.28639012575149536, "learning_rate": 0.0002, "epoch": 0.3675143560295324, "step": 5600}, {"loss": 0.9196, "grad_norm": 0.35645192861557007, "learning_rate": 0.0002, "epoch": 0.36817063166529945, "step": 5610}, {"loss": 0.9544, "grad_norm": 0.3594934046268463, "learning_rate": 0.0002, "epoch": 0.36882690730106643, "step": 5620}, {"loss": 0.9081, "grad_norm": 0.3584282398223877, "learning_rate": 0.0002, "epoch": 0.36948318293683347, "step": 5630}, {"loss": 0.955, "grad_norm": 0.32094448804855347, "learning_rate": 0.0002, "epoch": 0.3701394585726005, "step": 5640}, {"loss": 0.9271, "grad_norm": 0.3229917287826538, "learning_rate": 0.0002, "epoch": 0.37079573420836753, "step": 5650}, {"loss": 0.933, "grad_norm": 0.3371448218822479, "learning_rate": 0.0002, "epoch": 0.3714520098441345, "step": 5660}, {"loss": 0.9189, "grad_norm": 0.38639435172080994, "learning_rate": 0.0002, "epoch": 0.37210828547990155, "step": 5670}, {"loss": 0.8987, "grad_norm": 0.37710604071617126, "learning_rate": 0.0002, "epoch": 0.3727645611156686, "step": 5680}, {"loss": 0.9133, "grad_norm": 0.28154921531677246, "learning_rate": 0.0002, "epoch": 0.3734208367514356, "step": 5690}, {"loss": 0.9104, "grad_norm": 0.35519927740097046, "learning_rate": 0.0002, "epoch": 0.37407711238720265, "step": 5700}, {"loss": 0.9187, "grad_norm": 0.3271195888519287, "learning_rate": 0.0002, "epoch": 0.3747333880229696, "step": 5710}, {"loss": 1.0015, "grad_norm": 0.30836206674575806, "learning_rate": 0.0002, "epoch": 0.37538966365873666, "step": 5720}, {"loss": 0.8888, "grad_norm": 0.36397358775138855, "learning_rate": 0.0002, "epoch": 0.3760459392945037, "step": 5730}, {"loss": 0.8962, "grad_norm": 0.31649520993232727, "learning_rate": 0.0002, "epoch": 0.37670221493027073, "step": 5740}, {"loss": 0.9667, "grad_norm": 0.3790256977081299, "learning_rate": 0.0002, "epoch": 0.37735849056603776, "step": 5750}, {"loss": 0.9548, "grad_norm": 0.34175634384155273, "learning_rate": 0.0002, "epoch": 0.37801476620180474, "step": 5760}, {"loss": 0.9031, "grad_norm": 0.4169650673866272, "learning_rate": 0.0002, "epoch": 0.3786710418375718, "step": 5770}, {"loss": 0.9279, "grad_norm": 0.3118833899497986, "learning_rate": 0.0002, "epoch": 0.3793273174733388, "step": 5780}, {"loss": 0.9204, "grad_norm": 0.3474520444869995, "learning_rate": 0.0002, "epoch": 0.37998359310910584, "step": 5790}, {"loss": 0.9845, "grad_norm": 0.40819892287254333, "learning_rate": 0.0002, "epoch": 0.3806398687448728, "step": 5800}, {"loss": 0.8863, "grad_norm": 0.34402596950531006, "learning_rate": 0.0002, "epoch": 0.38129614438063986, "step": 5810}, {"loss": 0.9356, "grad_norm": 0.31899532675743103, "learning_rate": 0.0002, "epoch": 0.3819524200164069, "step": 5820}, {"loss": 0.857, "grad_norm": 0.38860151171684265, "learning_rate": 0.0002, "epoch": 0.3826086956521739, "step": 5830}, {"loss": 0.9433, "grad_norm": 0.2960244417190552, "learning_rate": 0.0002, "epoch": 0.38326497128794096, "step": 5840}, {"loss": 0.9546, "grad_norm": 0.3614438772201538, "learning_rate": 0.0002, "epoch": 0.38392124692370794, "step": 5850}, {"loss": 0.9447, "grad_norm": 0.33000093698501587, "learning_rate": 0.0002, "epoch": 0.38457752255947497, "step": 5860}, {"loss": 0.964, "grad_norm": 0.3543432354927063, "learning_rate": 0.0002, "epoch": 0.385233798195242, "step": 5870}, {"loss": 0.9329, "grad_norm": 0.3281349837779999, "learning_rate": 0.0002, "epoch": 0.38589007383100904, "step": 5880}, {"loss": 0.9463, "grad_norm": 0.3775436580181122, "learning_rate": 0.0002, "epoch": 0.3865463494667761, "step": 5890}, {"loss": 0.9035, "grad_norm": 0.3037714660167694, "learning_rate": 0.0002, "epoch": 0.38720262510254305, "step": 5900}, {"loss": 0.8953, "grad_norm": 0.30885955691337585, "learning_rate": 0.0002, "epoch": 0.3878589007383101, "step": 5910}, {"loss": 0.8789, "grad_norm": 0.2839881181716919, "learning_rate": 0.0002, "epoch": 0.3885151763740771, "step": 5920}, {"loss": 0.9585, "grad_norm": 0.30544501543045044, "learning_rate": 0.0002, "epoch": 0.38917145200984415, "step": 5930}, {"loss": 0.9163, "grad_norm": 0.281946063041687, "learning_rate": 0.0002, "epoch": 0.38982772764561113, "step": 5940}, {"loss": 0.8999, "grad_norm": 0.44500306248664856, "learning_rate": 0.0002, "epoch": 0.39048400328137817, "step": 5950}, {"loss": 0.8928, "grad_norm": 0.324260950088501, "learning_rate": 0.0002, "epoch": 0.3911402789171452, "step": 5960}, {"loss": 0.8844, "grad_norm": 0.31535372138023376, "learning_rate": 0.0002, "epoch": 0.39179655455291224, "step": 5970}, {"loss": 0.9185, "grad_norm": 0.31681323051452637, "learning_rate": 0.0002, "epoch": 0.39245283018867927, "step": 5980}, {"loss": 0.9258, "grad_norm": 0.32668736577033997, "learning_rate": 0.0002, "epoch": 0.39310910582444625, "step": 5990}, {"loss": 0.9278, "grad_norm": 0.34531155228614807, "learning_rate": 0.0002, "epoch": 0.3937653814602133, "step": 6000}, {"loss": 0.8969, "grad_norm": 0.32312485575675964, "learning_rate": 0.0002, "epoch": 0.3944216570959803, "step": 6010}, {"loss": 0.9501, "grad_norm": 0.31243696808815, "learning_rate": 0.0002, "epoch": 0.39507793273174735, "step": 6020}, {"loss": 0.9174, "grad_norm": 0.3558828830718994, "learning_rate": 0.0002, "epoch": 0.39573420836751433, "step": 6030}, {"loss": 0.9486, "grad_norm": 0.3894706964492798, "learning_rate": 0.0002, "epoch": 0.39639048400328136, "step": 6040}, {"loss": 0.9549, "grad_norm": 0.36445918679237366, "learning_rate": 0.0002, "epoch": 0.3970467596390484, "step": 6050}, {"loss": 0.9268, "grad_norm": 0.34586355090141296, "learning_rate": 0.0002, "epoch": 0.39770303527481543, "step": 6060}, {"loss": 0.9256, "grad_norm": 0.4428504705429077, "learning_rate": 0.0002, "epoch": 0.39835931091058246, "step": 6070}, {"loss": 0.9706, "grad_norm": 0.32784751057624817, "learning_rate": 0.0002, "epoch": 0.39901558654634944, "step": 6080}, {"loss": 0.9107, "grad_norm": 0.30480754375457764, "learning_rate": 0.0002, "epoch": 0.3996718621821165, "step": 6090}, {"loss": 0.9431, "grad_norm": 0.3382161557674408, "learning_rate": 0.0002, "epoch": 0.4003281378178835, "step": 6100}, {"loss": 0.8669, "grad_norm": 0.34486305713653564, "learning_rate": 0.0002, "epoch": 0.40098441345365055, "step": 6110}, {"loss": 0.9174, "grad_norm": 0.3488329350948334, "learning_rate": 0.0002, "epoch": 0.4016406890894176, "step": 6120}, {"loss": 0.8907, "grad_norm": 0.2977526783943176, "learning_rate": 0.0002, "epoch": 0.40229696472518456, "step": 6130}, {"loss": 0.9088, "grad_norm": 0.27584630250930786, "learning_rate": 0.0002, "epoch": 0.4029532403609516, "step": 6140}, {"loss": 0.9062, "grad_norm": 0.30325573682785034, "learning_rate": 0.0002, "epoch": 0.4036095159967186, "step": 6150}, {"loss": 0.9626, "grad_norm": 0.3318769633769989, "learning_rate": 0.0002, "epoch": 0.40426579163248566, "step": 6160}, {"loss": 0.8849, "grad_norm": 0.27929258346557617, "learning_rate": 0.0002, "epoch": 0.40492206726825264, "step": 6170}, {"loss": 0.9118, "grad_norm": 0.3127989172935486, "learning_rate": 0.0002, "epoch": 0.4055783429040197, "step": 6180}, {"loss": 0.9659, "grad_norm": 0.34189802408218384, "learning_rate": 0.0002, "epoch": 0.4062346185397867, "step": 6190}, {"loss": 0.9041, "grad_norm": 0.2901368737220764, "learning_rate": 0.0002, "epoch": 0.40689089417555374, "step": 6200}, {"loss": 0.8863, "grad_norm": 0.2957440912723541, "learning_rate": 0.0002, "epoch": 0.4075471698113208, "step": 6210}, {"loss": 0.8872, "grad_norm": 0.35854384303092957, "learning_rate": 0.0002, "epoch": 0.40820344544708775, "step": 6220}, {"loss": 0.9913, "grad_norm": 0.3516639471054077, "learning_rate": 0.0002, "epoch": 0.4088597210828548, "step": 6230}, {"loss": 0.9526, "grad_norm": 0.33773764967918396, "learning_rate": 0.0002, "epoch": 0.4095159967186218, "step": 6240}, {"loss": 0.9116, "grad_norm": 0.3359714448451996, "learning_rate": 0.0002, "epoch": 0.41017227235438886, "step": 6250}, {"loss": 0.8789, "grad_norm": 0.37056079506874084, "learning_rate": 0.0002, "epoch": 0.4108285479901559, "step": 6260}, {"loss": 0.9262, "grad_norm": 0.37948423624038696, "learning_rate": 0.0002, "epoch": 0.41148482362592287, "step": 6270}, {"loss": 0.9333, "grad_norm": 0.30971240997314453, "learning_rate": 0.0002, "epoch": 0.4121410992616899, "step": 6280}, {"loss": 0.8636, "grad_norm": 0.3357695937156677, "learning_rate": 0.0002, "epoch": 0.41279737489745694, "step": 6290}, {"loss": 0.8857, "grad_norm": 0.2873750627040863, "learning_rate": 0.0002, "epoch": 0.41345365053322397, "step": 6300}, {"loss": 0.9401, "grad_norm": 0.31098586320877075, "learning_rate": 0.0002, "epoch": 0.41410992616899095, "step": 6310}, {"loss": 0.9113, "grad_norm": 0.3232232630252838, "learning_rate": 0.0002, "epoch": 0.414766201804758, "step": 6320}, {"loss": 0.9165, "grad_norm": 0.3094746172428131, "learning_rate": 0.0002, "epoch": 0.415422477440525, "step": 6330}, {"loss": 0.92, "grad_norm": 0.3032565414905548, "learning_rate": 0.0002, "epoch": 0.41607875307629205, "step": 6340}, {"loss": 0.9254, "grad_norm": 0.30271366238594055, "learning_rate": 0.0002, "epoch": 0.4167350287120591, "step": 6350}, {"loss": 0.9766, "grad_norm": 0.35404741764068604, "learning_rate": 0.0002, "epoch": 0.41739130434782606, "step": 6360}, {"loss": 0.889, "grad_norm": 0.33428773283958435, "learning_rate": 0.0002, "epoch": 0.4180475799835931, "step": 6370}, {"loss": 0.9284, "grad_norm": 0.3523367941379547, "learning_rate": 0.0002, "epoch": 0.41870385561936013, "step": 6380}, {"loss": 0.8831, "grad_norm": 0.28411954641342163, "learning_rate": 0.0002, "epoch": 0.41936013125512717, "step": 6390}, {"loss": 0.9429, "grad_norm": 0.3410111963748932, "learning_rate": 0.0002, "epoch": 0.4200164068908942, "step": 6400}, {"loss": 0.8956, "grad_norm": 0.33837398886680603, "learning_rate": 0.0002, "epoch": 0.4206726825266612, "step": 6410}, {"loss": 0.9169, "grad_norm": 0.38739341497421265, "learning_rate": 0.0002, "epoch": 0.4213289581624282, "step": 6420}, {"loss": 0.8943, "grad_norm": 0.28810667991638184, "learning_rate": 0.0002, "epoch": 0.42198523379819525, "step": 6430}, {"loss": 0.9505, "grad_norm": 0.3242197036743164, "learning_rate": 0.0002, "epoch": 0.4226415094339623, "step": 6440}, {"loss": 0.9409, "grad_norm": 0.39538851380348206, "learning_rate": 0.0002, "epoch": 0.42329778506972926, "step": 6450}, {"loss": 0.9152, "grad_norm": 0.36792051792144775, "learning_rate": 0.0002, "epoch": 0.4239540607054963, "step": 6460}, {"loss": 0.9203, "grad_norm": 0.31599462032318115, "learning_rate": 0.0002, "epoch": 0.42461033634126333, "step": 6470}, {"loss": 0.926, "grad_norm": 0.3358552157878876, "learning_rate": 0.0002, "epoch": 0.42526661197703036, "step": 6480}, {"loss": 0.9337, "grad_norm": 0.40409335494041443, "learning_rate": 0.0002, "epoch": 0.4259228876127974, "step": 6490}, {"loss": 0.901, "grad_norm": 0.3785623610019684, "learning_rate": 0.0002, "epoch": 0.4265791632485644, "step": 6500}, {"loss": 0.9281, "grad_norm": 0.43033692240715027, "learning_rate": 0.0002, "epoch": 0.4272354388843314, "step": 6510}, {"loss": 0.9332, "grad_norm": 0.3074797987937927, "learning_rate": 0.0002, "epoch": 0.42789171452009844, "step": 6520}, {"loss": 0.9161, "grad_norm": 0.3498784005641937, "learning_rate": 0.0002, "epoch": 0.4285479901558655, "step": 6530}, {"loss": 0.9829, "grad_norm": 0.40507981181144714, "learning_rate": 0.0002, "epoch": 0.4292042657916325, "step": 6540}, {"loss": 0.9269, "grad_norm": 0.35614013671875, "learning_rate": 0.0002, "epoch": 0.4298605414273995, "step": 6550}, {"loss": 0.9033, "grad_norm": 0.451616495847702, "learning_rate": 0.0002, "epoch": 0.4305168170631665, "step": 6560}, {"loss": 0.9723, "grad_norm": 0.3707262873649597, "learning_rate": 0.0002, "epoch": 0.43117309269893356, "step": 6570}, {"loss": 0.9512, "grad_norm": 0.3271283209323883, "learning_rate": 0.0002, "epoch": 0.4318293683347006, "step": 6580}, {"loss": 0.8529, "grad_norm": 0.38105711340904236, "learning_rate": 0.0002, "epoch": 0.43248564397046757, "step": 6590}, {"loss": 0.9208, "grad_norm": 0.345217227935791, "learning_rate": 0.0002, "epoch": 0.4331419196062346, "step": 6600}, {"loss": 0.8958, "grad_norm": 0.3507174551486969, "learning_rate": 0.0002, "epoch": 0.43379819524200164, "step": 6610}, {"loss": 0.9402, "grad_norm": 0.318654328584671, "learning_rate": 0.0002, "epoch": 0.4344544708777687, "step": 6620}, {"loss": 0.9068, "grad_norm": 0.3628910183906555, "learning_rate": 0.0002, "epoch": 0.4351107465135357, "step": 6630}, {"loss": 0.9122, "grad_norm": 0.4454437494277954, "learning_rate": 0.0002, "epoch": 0.4357670221493027, "step": 6640}, {"loss": 1.0092, "grad_norm": 0.36227893829345703, "learning_rate": 0.0002, "epoch": 0.4364232977850697, "step": 6650}, {"loss": 0.9289, "grad_norm": 0.33995604515075684, "learning_rate": 0.0002, "epoch": 0.43707957342083675, "step": 6660}, {"loss": 0.9032, "grad_norm": 0.3784409165382385, "learning_rate": 0.0002, "epoch": 0.4377358490566038, "step": 6670}, {"loss": 0.9399, "grad_norm": 0.39470377564430237, "learning_rate": 0.0002, "epoch": 0.4383921246923708, "step": 6680}, {"loss": 0.8795, "grad_norm": 0.34198591113090515, "learning_rate": 0.0002, "epoch": 0.4390484003281378, "step": 6690}, {"loss": 0.9665, "grad_norm": 0.35575005412101746, "learning_rate": 0.0002, "epoch": 0.43970467596390483, "step": 6700}, {"loss": 0.9133, "grad_norm": 0.40915772318840027, "learning_rate": 0.0002, "epoch": 0.44036095159967187, "step": 6710}, {"loss": 0.9112, "grad_norm": 0.3526946008205414, "learning_rate": 0.0002, "epoch": 0.4410172272354389, "step": 6720}, {"loss": 0.8886, "grad_norm": 0.30411285161972046, "learning_rate": 0.0002, "epoch": 0.4416735028712059, "step": 6730}, {"loss": 0.8969, "grad_norm": 0.2935996949672699, "learning_rate": 0.0002, "epoch": 0.4423297785069729, "step": 6740}, {"loss": 0.8864, "grad_norm": 0.3806987702846527, "learning_rate": 0.0002, "epoch": 0.44298605414273995, "step": 6750}, {"loss": 0.9267, "grad_norm": 0.3329904079437256, "learning_rate": 0.0002, "epoch": 0.443642329778507, "step": 6760}, {"loss": 0.9223, "grad_norm": 0.33832767605781555, "learning_rate": 0.0002, "epoch": 0.444298605414274, "step": 6770}, {"loss": 0.9127, "grad_norm": 0.3387627899646759, "learning_rate": 0.0002, "epoch": 0.444954881050041, "step": 6780}, {"loss": 0.8853, "grad_norm": 0.32709822058677673, "learning_rate": 0.0002, "epoch": 0.44561115668580803, "step": 6790}, {"loss": 0.9059, "grad_norm": 0.2914820909500122, "learning_rate": 0.0002, "epoch": 0.44626743232157506, "step": 6800}, {"loss": 0.9209, "grad_norm": 0.3288695216178894, "learning_rate": 0.0002, "epoch": 0.4469237079573421, "step": 6810}, {"loss": 0.9208, "grad_norm": 0.5003459453582764, "learning_rate": 0.0002, "epoch": 0.44757998359310913, "step": 6820}, {"loss": 0.9797, "grad_norm": 0.359402060508728, "learning_rate": 0.0002, "epoch": 0.4482362592288761, "step": 6830}, {"loss": 0.856, "grad_norm": 0.35508984327316284, "learning_rate": 0.0002, "epoch": 0.44889253486464314, "step": 6840}, {"loss": 0.9513, "grad_norm": 0.33327680826187134, "learning_rate": 0.0002, "epoch": 0.4495488105004102, "step": 6850}, {"loss": 0.965, "grad_norm": 0.4084452688694, "learning_rate": 0.0002, "epoch": 0.4502050861361772, "step": 6860}, {"loss": 0.899, "grad_norm": 0.30232545733451843, "learning_rate": 0.0002, "epoch": 0.4508613617719442, "step": 6870}, {"loss": 0.9294, "grad_norm": 0.3244702219963074, "learning_rate": 0.0002, "epoch": 0.4515176374077112, "step": 6880}, {"loss": 0.9312, "grad_norm": 0.32148104906082153, "learning_rate": 0.0002, "epoch": 0.45217391304347826, "step": 6890}, {"loss": 1.007, "grad_norm": 0.3831937313079834, "learning_rate": 0.0002, "epoch": 0.4528301886792453, "step": 6900}, {"loss": 0.9111, "grad_norm": 0.33381497859954834, "learning_rate": 0.0002, "epoch": 0.4534864643150123, "step": 6910}, {"loss": 0.8955, "grad_norm": 0.4074220359325409, "learning_rate": 0.0002, "epoch": 0.4541427399507793, "step": 6920}, {"loss": 0.9326, "grad_norm": 0.35602903366088867, "learning_rate": 0.0002, "epoch": 0.45479901558654634, "step": 6930}, {"loss": 0.9146, "grad_norm": 0.3411916196346283, "learning_rate": 0.0002, "epoch": 0.4554552912223134, "step": 6940}, {"loss": 0.9382, "grad_norm": 0.38504868745803833, "learning_rate": 0.0002, "epoch": 0.4561115668580804, "step": 6950}, {"loss": 0.9105, "grad_norm": 0.5072926878929138, "learning_rate": 0.0002, "epoch": 0.45676784249384744, "step": 6960}, {"loss": 0.9383, "grad_norm": 0.38257330656051636, "learning_rate": 0.0002, "epoch": 0.4574241181296144, "step": 6970}, {"loss": 0.8864, "grad_norm": 0.40472176671028137, "learning_rate": 0.0002, "epoch": 0.45808039376538146, "step": 6980}, {"loss": 0.949, "grad_norm": 0.34501466155052185, "learning_rate": 0.0002, "epoch": 0.4587366694011485, "step": 6990}, {"loss": 0.9717, "grad_norm": 0.32561028003692627, "learning_rate": 0.0002, "epoch": 0.4593929450369155, "step": 7000}, {"loss": 0.9655, "grad_norm": 0.3388477861881256, "learning_rate": 0.0002, "epoch": 0.4600492206726825, "step": 7010}, {"loss": 0.9039, "grad_norm": 0.38045769929885864, "learning_rate": 0.0002, "epoch": 0.46070549630844954, "step": 7020}, {"loss": 0.8879, "grad_norm": 0.32500898838043213, "learning_rate": 0.0002, "epoch": 0.46136177194421657, "step": 7030}, {"loss": 0.9872, "grad_norm": 0.3758746385574341, "learning_rate": 0.0002, "epoch": 0.4620180475799836, "step": 7040}, {"loss": 0.9043, "grad_norm": 0.39033588767051697, "learning_rate": 0.0002, "epoch": 0.46267432321575064, "step": 7050}, {"loss": 0.891, "grad_norm": 0.35141658782958984, "learning_rate": 0.0002, "epoch": 0.4633305988515176, "step": 7060}, {"loss": 0.9196, "grad_norm": 0.3715546727180481, "learning_rate": 0.0002, "epoch": 0.46398687448728465, "step": 7070}, {"loss": 0.9051, "grad_norm": 0.3062947988510132, "learning_rate": 0.0002, "epoch": 0.4646431501230517, "step": 7080}, {"loss": 0.9395, "grad_norm": 0.43001696467399597, "learning_rate": 0.0002, "epoch": 0.4652994257588187, "step": 7090}, {"loss": 0.9363, "grad_norm": 0.34747597575187683, "learning_rate": 0.0002, "epoch": 0.46595570139458575, "step": 7100}, {"loss": 0.9052, "grad_norm": 0.33514827489852905, "learning_rate": 0.0002, "epoch": 0.46661197703035273, "step": 7110}, {"loss": 0.8673, "grad_norm": 0.3500545918941498, "learning_rate": 0.0002, "epoch": 0.46726825266611977, "step": 7120}, {"loss": 0.9373, "grad_norm": 0.3959280252456665, "learning_rate": 0.0002, "epoch": 0.4679245283018868, "step": 7130}, {"loss": 0.868, "grad_norm": 0.3119291067123413, "learning_rate": 0.0002, "epoch": 0.46858080393765383, "step": 7140}, {"loss": 0.923, "grad_norm": 0.36544349789619446, "learning_rate": 0.0002, "epoch": 0.4692370795734208, "step": 7150}, {"loss": 0.9281, "grad_norm": 0.3375662565231323, "learning_rate": 0.0002, "epoch": 0.46989335520918785, "step": 7160}, {"loss": 0.8948, "grad_norm": 0.48938584327697754, "learning_rate": 0.0002, "epoch": 0.4705496308449549, "step": 7170}, {"loss": 0.9016, "grad_norm": 0.36706942319869995, "learning_rate": 0.0002, "epoch": 0.4712059064807219, "step": 7180}, {"loss": 0.906, "grad_norm": 0.43472692370414734, "learning_rate": 0.0002, "epoch": 0.47186218211648895, "step": 7190}, {"loss": 0.9114, "grad_norm": 0.3433953821659088, "learning_rate": 0.0002, "epoch": 0.4725184577522559, "step": 7200}, {"loss": 0.899, "grad_norm": 0.3224952518939972, "learning_rate": 0.0002, "epoch": 0.47317473338802296, "step": 7210}, {"loss": 0.9039, "grad_norm": 0.34640923142433167, "learning_rate": 0.0002, "epoch": 0.47383100902379, "step": 7220}, {"loss": 0.9372, "grad_norm": 0.36221131682395935, "learning_rate": 0.0002, "epoch": 0.47448728465955703, "step": 7230}, {"loss": 0.9328, "grad_norm": 0.3695160150527954, "learning_rate": 0.0002, "epoch": 0.47514356029532406, "step": 7240}, {"loss": 0.8726, "grad_norm": 0.3915520906448364, "learning_rate": 0.0002, "epoch": 0.47579983593109104, "step": 7250}, {"loss": 0.9305, "grad_norm": 0.34111252427101135, "learning_rate": 0.0002, "epoch": 0.4764561115668581, "step": 7260}, {"loss": 0.8853, "grad_norm": 0.3156047761440277, "learning_rate": 0.0002, "epoch": 0.4771123872026251, "step": 7270}, {"loss": 0.9442, "grad_norm": 0.349992573261261, "learning_rate": 0.0002, "epoch": 0.47776866283839214, "step": 7280}, {"loss": 0.8841, "grad_norm": 0.3627530634403229, "learning_rate": 0.0002, "epoch": 0.4784249384741591, "step": 7290}, {"loss": 0.937, "grad_norm": 0.39342308044433594, "learning_rate": 0.0002, "epoch": 0.47908121410992616, "step": 7300}, {"loss": 0.951, "grad_norm": 0.321122944355011, "learning_rate": 0.0002, "epoch": 0.4797374897456932, "step": 7310}, {"loss": 0.9085, "grad_norm": 0.37064728140830994, "learning_rate": 0.0002, "epoch": 0.4803937653814602, "step": 7320}, {"loss": 0.9479, "grad_norm": 0.348779559135437, "learning_rate": 0.0002, "epoch": 0.48105004101722726, "step": 7330}, {"loss": 0.8651, "grad_norm": 0.3484368324279785, "learning_rate": 0.0002, "epoch": 0.48170631665299424, "step": 7340}, {"loss": 0.9096, "grad_norm": 0.3343070149421692, "learning_rate": 0.0002, "epoch": 0.48236259228876127, "step": 7350}, {"loss": 0.9148, "grad_norm": 0.3310532867908478, "learning_rate": 0.0002, "epoch": 0.4830188679245283, "step": 7360}, {"loss": 0.8652, "grad_norm": 0.37587985396385193, "learning_rate": 0.0002, "epoch": 0.48367514356029534, "step": 7370}, {"loss": 0.9302, "grad_norm": 0.37325888872146606, "learning_rate": 0.0002, "epoch": 0.4843314191960624, "step": 7380}, {"loss": 0.9382, "grad_norm": 0.3869538903236389, "learning_rate": 0.0002, "epoch": 0.48498769483182935, "step": 7390}, {"loss": 0.8967, "grad_norm": 0.34064534306526184, "learning_rate": 0.0002, "epoch": 0.4856439704675964, "step": 7400}, {"loss": 0.8884, "grad_norm": 0.35124653577804565, "learning_rate": 0.0002, "epoch": 0.4863002461033634, "step": 7410}, {"loss": 0.9178, "grad_norm": 0.38339361548423767, "learning_rate": 0.0002, "epoch": 0.48695652173913045, "step": 7420}, {"loss": 0.8984, "grad_norm": 0.32823026180267334, "learning_rate": 0.0002, "epoch": 0.48761279737489743, "step": 7430}, {"loss": 0.9348, "grad_norm": 0.32759153842926025, "learning_rate": 0.0002, "epoch": 0.48826907301066447, "step": 7440}, {"loss": 0.9023, "grad_norm": 0.3855777680873871, "learning_rate": 0.0002, "epoch": 0.4889253486464315, "step": 7450}, {"loss": 0.9201, "grad_norm": 0.34890854358673096, "learning_rate": 0.0002, "epoch": 0.48958162428219854, "step": 7460}, {"loss": 0.8947, "grad_norm": 0.3580712378025055, "learning_rate": 0.0002, "epoch": 0.49023789991796557, "step": 7470}, {"loss": 0.8413, "grad_norm": 0.34897708892822266, "learning_rate": 0.0002, "epoch": 0.49089417555373255, "step": 7480}, {"loss": 0.8508, "grad_norm": 0.32964155077934265, "learning_rate": 0.0002, "epoch": 0.4915504511894996, "step": 7490}, {"loss": 0.9298, "grad_norm": 0.3805377185344696, "learning_rate": 0.0002, "epoch": 0.4922067268252666, "step": 7500}, {"loss": 0.9354, "grad_norm": 0.3312858045101166, "learning_rate": 0.0002, "epoch": 0.49286300246103365, "step": 7510}, {"loss": 0.8966, "grad_norm": 0.32818418741226196, "learning_rate": 0.0002, "epoch": 0.49351927809680063, "step": 7520}, {"loss": 0.9036, "grad_norm": 0.3721756339073181, "learning_rate": 0.0002, "epoch": 0.49417555373256766, "step": 7530}, {"loss": 0.9483, "grad_norm": 0.4272252321243286, "learning_rate": 0.0002, "epoch": 0.4948318293683347, "step": 7540}, {"loss": 0.9159, "grad_norm": 0.3034988343715668, "learning_rate": 0.0002, "epoch": 0.49548810500410173, "step": 7550}, {"loss": 0.8996, "grad_norm": 0.34006139636039734, "learning_rate": 0.0002, "epoch": 0.49614438063986877, "step": 7560}, {"loss": 0.9122, "grad_norm": 0.33074137568473816, "learning_rate": 0.0002, "epoch": 0.49680065627563574, "step": 7570}, {"loss": 0.877, "grad_norm": 0.3747742772102356, "learning_rate": 0.0002, "epoch": 0.4974569319114028, "step": 7580}, {"loss": 0.8738, "grad_norm": 0.29682815074920654, "learning_rate": 0.0002, "epoch": 0.4981132075471698, "step": 7590}, {"loss": 0.9669, "grad_norm": 0.36150112748146057, "learning_rate": 0.0002, "epoch": 0.49876948318293685, "step": 7600}, {"loss": 0.8936, "grad_norm": 0.3893260955810547, "learning_rate": 0.0002, "epoch": 0.4994257588187039, "step": 7610}, {"loss": 0.9273, "grad_norm": 0.358791708946228, "learning_rate": 0.0002, "epoch": 0.5000820344544709, "step": 7620}, {"loss": 0.9053, "grad_norm": 0.3799469470977783, "learning_rate": 0.0002, "epoch": 0.500738310090238, "step": 7630}, {"loss": 0.9655, "grad_norm": 0.35908520221710205, "learning_rate": 0.0002, "epoch": 0.5013945857260049, "step": 7640}, {"loss": 0.8993, "grad_norm": 0.33538103103637695, "learning_rate": 0.0002, "epoch": 0.5020508613617719, "step": 7650}, {"loss": 0.8812, "grad_norm": 0.2990545928478241, "learning_rate": 0.0002, "epoch": 0.502707136997539, "step": 7660}, {"loss": 0.8965, "grad_norm": 0.3342699408531189, "learning_rate": 0.0002, "epoch": 0.503363412633306, "step": 7670}, {"loss": 0.9476, "grad_norm": 0.3656594455242157, "learning_rate": 0.0002, "epoch": 0.5040196882690731, "step": 7680}, {"loss": 0.9225, "grad_norm": 0.315176784992218, "learning_rate": 0.0002, "epoch": 0.50467596390484, "step": 7690}, {"loss": 0.8998, "grad_norm": 0.3070623576641083, "learning_rate": 0.0002, "epoch": 0.505332239540607, "step": 7700}, {"loss": 0.8565, "grad_norm": 0.3741548955440521, "learning_rate": 0.0002, "epoch": 0.5059885151763741, "step": 7710}, {"loss": 0.8882, "grad_norm": 0.3047017753124237, "learning_rate": 0.0002, "epoch": 0.5066447908121411, "step": 7720}, {"loss": 0.9002, "grad_norm": 0.5524629354476929, "learning_rate": 0.0002, "epoch": 0.5073010664479081, "step": 7730}, {"loss": 0.901, "grad_norm": 0.39234456419944763, "learning_rate": 0.0002, "epoch": 0.5079573420836752, "step": 7740}, {"loss": 0.8948, "grad_norm": 0.3306262791156769, "learning_rate": 0.0002, "epoch": 0.5086136177194421, "step": 7750}, {"loss": 0.8627, "grad_norm": 0.3181036710739136, "learning_rate": 0.0002, "epoch": 0.5092698933552092, "step": 7760}, {"loss": 0.9068, "grad_norm": 0.3531821668148041, "learning_rate": 0.0002, "epoch": 0.5099261689909762, "step": 7770}, {"loss": 0.9452, "grad_norm": 0.39781567454338074, "learning_rate": 0.0002, "epoch": 0.5105824446267432, "step": 7780}, {"loss": 0.8829, "grad_norm": 0.32413530349731445, "learning_rate": 0.0002, "epoch": 0.5112387202625103, "step": 7790}, {"loss": 0.9162, "grad_norm": 0.33857491612434387, "learning_rate": 0.0002, "epoch": 0.5118949958982772, "step": 7800}, {"loss": 0.9183, "grad_norm": 0.33426207304000854, "learning_rate": 0.0002, "epoch": 0.5125512715340443, "step": 7810}, {"loss": 0.8487, "grad_norm": 0.31170961260795593, "learning_rate": 0.0002, "epoch": 0.5132075471698113, "step": 7820}, {"loss": 0.9425, "grad_norm": 0.3401695787906647, "learning_rate": 0.0002, "epoch": 0.5138638228055783, "step": 7830}, {"loss": 0.8852, "grad_norm": 0.4325079619884491, "learning_rate": 0.0002, "epoch": 0.5145200984413454, "step": 7840}, {"loss": 0.9165, "grad_norm": 0.33266621828079224, "learning_rate": 0.0002, "epoch": 0.5151763740771124, "step": 7850}, {"loss": 0.9178, "grad_norm": 0.353476345539093, "learning_rate": 0.0002, "epoch": 0.5158326497128795, "step": 7860}, {"loss": 0.9734, "grad_norm": 0.34789302945137024, "learning_rate": 0.0002, "epoch": 0.5164889253486464, "step": 7870}, {"loss": 0.8674, "grad_norm": 0.3378570079803467, "learning_rate": 0.0002, "epoch": 0.5171452009844134, "step": 7880}, {"loss": 0.9147, "grad_norm": 0.340122789144516, "learning_rate": 0.0002, "epoch": 0.5178014766201805, "step": 7890}, {"loss": 0.9627, "grad_norm": 0.39235031604766846, "learning_rate": 0.0002, "epoch": 0.5184577522559475, "step": 7900}, {"loss": 0.9003, "grad_norm": 0.31752005219459534, "learning_rate": 0.0002, "epoch": 0.5191140278917146, "step": 7910}, {"loss": 0.9353, "grad_norm": 0.3948894739151001, "learning_rate": 0.0002, "epoch": 0.5197703035274815, "step": 7920}, {"loss": 0.8697, "grad_norm": 0.29803305864334106, "learning_rate": 0.0002, "epoch": 0.5204265791632485, "step": 7930}, {"loss": 0.9912, "grad_norm": 0.34285855293273926, "learning_rate": 0.0002, "epoch": 0.5210828547990156, "step": 7940}, {"loss": 0.9241, "grad_norm": 0.40978604555130005, "learning_rate": 0.0002, "epoch": 0.5217391304347826, "step": 7950}, {"loss": 0.9609, "grad_norm": 0.35473906993865967, "learning_rate": 0.0002, "epoch": 0.5223954060705497, "step": 7960}, {"loss": 0.925, "grad_norm": 0.3896619379520416, "learning_rate": 0.0002, "epoch": 0.5230516817063167, "step": 7970}, {"loss": 0.8863, "grad_norm": 0.3622605502605438, "learning_rate": 0.0002, "epoch": 0.5237079573420836, "step": 7980}, {"loss": 0.939, "grad_norm": 0.4097590744495392, "learning_rate": 0.0002, "epoch": 0.5243642329778507, "step": 7990}, {"loss": 0.8895, "grad_norm": 0.31397542357444763, "learning_rate": 0.0002, "epoch": 0.5250205086136177, "step": 8000}, {"loss": 0.9185, "grad_norm": 0.45559775829315186, "learning_rate": 0.0002, "epoch": 0.5256767842493847, "step": 8010}, {"loss": 0.9318, "grad_norm": 0.3471437692642212, "learning_rate": 0.0002, "epoch": 0.5263330598851518, "step": 8020}, {"loss": 0.8684, "grad_norm": 0.3105354607105255, "learning_rate": 0.0002, "epoch": 0.5269893355209188, "step": 8030}, {"loss": 0.8633, "grad_norm": 0.42847758531570435, "learning_rate": 0.0002, "epoch": 0.5276456111566858, "step": 8040}, {"loss": 0.924, "grad_norm": 0.732471227645874, "learning_rate": 0.0002, "epoch": 0.5283018867924528, "step": 8050}, {"loss": 0.8332, "grad_norm": 0.3242183327674866, "learning_rate": 0.0002, "epoch": 0.5289581624282198, "step": 8060}, {"loss": 0.9531, "grad_norm": 0.36898288130760193, "learning_rate": 0.0002, "epoch": 0.5296144380639869, "step": 8070}, {"loss": 0.933, "grad_norm": 0.3850383162498474, "learning_rate": 0.0002, "epoch": 0.5302707136997539, "step": 8080}, {"loss": 0.9184, "grad_norm": 0.3375824987888336, "learning_rate": 0.0002, "epoch": 0.530926989335521, "step": 8090}, {"loss": 0.9284, "grad_norm": 0.4321442246437073, "learning_rate": 0.0002, "epoch": 0.5315832649712879, "step": 8100}, {"loss": 0.9389, "grad_norm": 0.3836155831813812, "learning_rate": 0.0002, "epoch": 0.5322395406070549, "step": 8110}, {"loss": 0.9117, "grad_norm": 0.36536362767219543, "learning_rate": 0.0002, "epoch": 0.532895816242822, "step": 8120}, {"loss": 0.8724, "grad_norm": 0.4364359676837921, "learning_rate": 0.0002, "epoch": 0.533552091878589, "step": 8130}, {"loss": 0.8866, "grad_norm": 0.413506418466568, "learning_rate": 0.0002, "epoch": 0.5342083675143561, "step": 8140}, {"loss": 0.8985, "grad_norm": 0.33120569586753845, "learning_rate": 0.0002, "epoch": 0.534864643150123, "step": 8150}, {"loss": 0.9553, "grad_norm": 0.3408608138561249, "learning_rate": 0.0002, "epoch": 0.53552091878589, "step": 8160}, {"loss": 0.888, "grad_norm": 0.30265191197395325, "learning_rate": 0.0002, "epoch": 0.5361771944216571, "step": 8170}, {"loss": 0.8799, "grad_norm": 0.40208759903907776, "learning_rate": 0.0002, "epoch": 0.5368334700574241, "step": 8180}, {"loss": 0.9343, "grad_norm": 0.38238683342933655, "learning_rate": 0.0002, "epoch": 0.5374897456931912, "step": 8190}, {"loss": 0.9051, "grad_norm": 0.3405042290687561, "learning_rate": 0.0002, "epoch": 0.5381460213289582, "step": 8200}, {"loss": 0.9355, "grad_norm": 0.3806214928627014, "learning_rate": 0.0002, "epoch": 0.5388022969647251, "step": 8210}, {"loss": 0.9148, "grad_norm": 0.3639737069606781, "learning_rate": 0.0002, "epoch": 0.5394585726004922, "step": 8220}, {"loss": 0.9297, "grad_norm": 0.3366181552410126, "learning_rate": 0.0002, "epoch": 0.5401148482362592, "step": 8230}, {"loss": 0.9439, "grad_norm": 0.34822607040405273, "learning_rate": 0.0002, "epoch": 0.5407711238720263, "step": 8240}, {"loss": 0.925, "grad_norm": 0.3682822287082672, "learning_rate": 0.0002, "epoch": 0.5414273995077933, "step": 8250}, {"loss": 0.9246, "grad_norm": 0.39051371812820435, "learning_rate": 0.0002, "epoch": 0.5420836751435603, "step": 8260}, {"loss": 0.9008, "grad_norm": 0.37731602787971497, "learning_rate": 0.0002, "epoch": 0.5427399507793274, "step": 8270}, {"loss": 0.8849, "grad_norm": 0.34222212433815, "learning_rate": 0.0002, "epoch": 0.5433962264150943, "step": 8280}, {"loss": 0.8956, "grad_norm": 0.3870106637477875, "learning_rate": 0.0002, "epoch": 0.5440525020508613, "step": 8290}, {"loss": 0.8974, "grad_norm": 0.35894039273262024, "learning_rate": 0.0002, "epoch": 0.5447087776866284, "step": 8300}, {"loss": 0.9664, "grad_norm": 0.38450875878334045, "learning_rate": 0.0002, "epoch": 0.5453650533223954, "step": 8310}, {"loss": 0.9024, "grad_norm": 0.3106869161128998, "learning_rate": 0.0002, "epoch": 0.5460213289581625, "step": 8320}, {"loss": 0.8578, "grad_norm": 0.35935118794441223, "learning_rate": 0.0002, "epoch": 0.5466776045939294, "step": 8330}, {"loss": 0.904, "grad_norm": 0.37256261706352234, "learning_rate": 0.0002, "epoch": 0.5473338802296964, "step": 8340}, {"loss": 0.9091, "grad_norm": 0.3807767629623413, "learning_rate": 0.0002, "epoch": 0.5479901558654635, "step": 8350}, {"loss": 0.9314, "grad_norm": 0.3551439046859741, "learning_rate": 0.0002, "epoch": 0.5486464315012305, "step": 8360}, {"loss": 0.9046, "grad_norm": 0.39783644676208496, "learning_rate": 0.0002, "epoch": 0.5493027071369976, "step": 8370}, {"loss": 0.9466, "grad_norm": 0.37729522585868835, "learning_rate": 0.0002, "epoch": 0.5499589827727646, "step": 8380}, {"loss": 0.9559, "grad_norm": 0.32453545928001404, "learning_rate": 0.0002, "epoch": 0.5506152584085315, "step": 8390}, {"loss": 0.9073, "grad_norm": 0.353837251663208, "learning_rate": 0.0002, "epoch": 0.5512715340442986, "step": 8400}, {"loss": 0.961, "grad_norm": 0.37711212038993835, "learning_rate": 0.0002, "epoch": 0.5519278096800656, "step": 8410}, {"loss": 0.9844, "grad_norm": 0.3966476619243622, "learning_rate": 0.0002, "epoch": 0.5525840853158327, "step": 8420}, {"loss": 0.9221, "grad_norm": 0.338074266910553, "learning_rate": 0.0002, "epoch": 0.5532403609515997, "step": 8430}, {"loss": 0.9101, "grad_norm": 0.3984934389591217, "learning_rate": 0.0002, "epoch": 0.5538966365873667, "step": 8440}, {"loss": 0.9177, "grad_norm": 0.3430469334125519, "learning_rate": 0.0002, "epoch": 0.5545529122231337, "step": 8450}, {"loss": 0.9128, "grad_norm": 0.34223586320877075, "learning_rate": 0.0002, "epoch": 0.5552091878589007, "step": 8460}, {"loss": 0.8961, "grad_norm": 0.4057091474533081, "learning_rate": 0.0002, "epoch": 0.5558654634946678, "step": 8470}, {"loss": 0.9121, "grad_norm": 0.39272257685661316, "learning_rate": 0.0002, "epoch": 0.5565217391304348, "step": 8480}, {"loss": 0.8941, "grad_norm": 0.34801942110061646, "learning_rate": 0.0002, "epoch": 0.5571780147662018, "step": 8490}, {"loss": 0.8771, "grad_norm": 0.5177333354949951, "learning_rate": 0.0002, "epoch": 0.5578342904019689, "step": 8500}, {"loss": 0.9289, "grad_norm": 0.4139311909675598, "learning_rate": 0.0002, "epoch": 0.5584905660377358, "step": 8510}, {"loss": 0.9391, "grad_norm": 0.3526647388935089, "learning_rate": 0.0002, "epoch": 0.5591468416735029, "step": 8520}, {"loss": 0.9354, "grad_norm": 0.3475663661956787, "learning_rate": 0.0002, "epoch": 0.5598031173092699, "step": 8530}, {"loss": 0.9623, "grad_norm": 0.3338899314403534, "learning_rate": 0.0002, "epoch": 0.5604593929450369, "step": 8540}, {"loss": 0.9156, "grad_norm": 0.392212837934494, "learning_rate": 0.0002, "epoch": 0.561115668580804, "step": 8550}, {"loss": 0.9469, "grad_norm": 0.3058992028236389, "learning_rate": 0.0002, "epoch": 0.561771944216571, "step": 8560}, {"loss": 0.8465, "grad_norm": 0.40603798627853394, "learning_rate": 0.0002, "epoch": 0.5624282198523379, "step": 8570}, {"loss": 0.8962, "grad_norm": 0.37745383381843567, "learning_rate": 0.0002, "epoch": 0.563084495488105, "step": 8580}, {"loss": 0.8787, "grad_norm": 0.40886175632476807, "learning_rate": 0.0002, "epoch": 0.563740771123872, "step": 8590}, {"loss": 0.9469, "grad_norm": 0.3653600215911865, "learning_rate": 0.0002, "epoch": 0.5643970467596391, "step": 8600}, {"loss": 0.8976, "grad_norm": 0.32329508662223816, "learning_rate": 0.0002, "epoch": 0.5650533223954061, "step": 8610}, {"loss": 0.9081, "grad_norm": 0.3535236716270447, "learning_rate": 0.0002, "epoch": 0.565709598031173, "step": 8620}, {"loss": 0.8703, "grad_norm": 0.37497273087501526, "learning_rate": 0.0002, "epoch": 0.5663658736669401, "step": 8630}, {"loss": 0.8853, "grad_norm": 0.3781719505786896, "learning_rate": 0.0002, "epoch": 0.5670221493027071, "step": 8640}, {"loss": 0.9074, "grad_norm": 0.4481894075870514, "learning_rate": 0.0002, "epoch": 0.5676784249384742, "step": 8650}, {"loss": 0.8923, "grad_norm": 0.35790109634399414, "learning_rate": 0.0002, "epoch": 0.5683347005742412, "step": 8660}, {"loss": 0.8946, "grad_norm": 0.4617280662059784, "learning_rate": 0.0002, "epoch": 0.5689909762100082, "step": 8670}, {"loss": 0.9059, "grad_norm": 0.3813382089138031, "learning_rate": 0.0002, "epoch": 0.5696472518457752, "step": 8680}, {"loss": 0.9126, "grad_norm": 0.33388257026672363, "learning_rate": 0.0002, "epoch": 0.5703035274815422, "step": 8690}, {"loss": 0.8866, "grad_norm": 0.330185204744339, "learning_rate": 0.0002, "epoch": 0.5709598031173093, "step": 8700}, {"loss": 0.916, "grad_norm": 0.3768845200538635, "learning_rate": 0.0002, "epoch": 0.5716160787530763, "step": 8710}, {"loss": 0.9213, "grad_norm": 0.3221369683742523, "learning_rate": 0.0002, "epoch": 0.5722723543888433, "step": 8720}, {"loss": 0.8919, "grad_norm": 0.3221580684185028, "learning_rate": 0.0002, "epoch": 0.5729286300246104, "step": 8730}, {"loss": 0.924, "grad_norm": 0.3693002760410309, "learning_rate": 0.0002, "epoch": 0.5735849056603773, "step": 8740}, {"loss": 0.8983, "grad_norm": 0.3859325349330902, "learning_rate": 0.0002, "epoch": 0.5742411812961444, "step": 8750}, {"loss": 0.9321, "grad_norm": 0.3891449570655823, "learning_rate": 0.0002, "epoch": 0.5748974569319114, "step": 8760}, {"loss": 0.9315, "grad_norm": 0.3313274681568146, "learning_rate": 0.0002, "epoch": 0.5755537325676784, "step": 8770}, {"loss": 0.89, "grad_norm": 0.33418914675712585, "learning_rate": 0.0002, "epoch": 0.5762100082034455, "step": 8780}, {"loss": 0.9029, "grad_norm": 0.4153108596801758, "learning_rate": 0.0002, "epoch": 0.5768662838392125, "step": 8790}, {"loss": 0.9126, "grad_norm": 0.3614528477191925, "learning_rate": 0.0002, "epoch": 0.5775225594749795, "step": 8800}, {"loss": 0.9086, "grad_norm": 0.3172459602355957, "learning_rate": 0.0002, "epoch": 0.5781788351107465, "step": 8810}, {"loss": 0.9225, "grad_norm": 0.3795868456363678, "learning_rate": 0.0002, "epoch": 0.5788351107465135, "step": 8820}, {"loss": 0.9199, "grad_norm": 0.3750050961971283, "learning_rate": 0.0002, "epoch": 0.5794913863822806, "step": 8830}, {"loss": 0.9527, "grad_norm": 0.3348597288131714, "learning_rate": 0.0002, "epoch": 0.5801476620180476, "step": 8840}, {"loss": 0.9379, "grad_norm": 0.39116451144218445, "learning_rate": 0.0002, "epoch": 0.5808039376538146, "step": 8850}, {"loss": 0.9087, "grad_norm": 0.33639633655548096, "learning_rate": 0.0002, "epoch": 0.5814602132895816, "step": 8860}, {"loss": 0.9181, "grad_norm": 0.33336129784584045, "learning_rate": 0.0002, "epoch": 0.5821164889253486, "step": 8870}, {"loss": 0.9194, "grad_norm": 0.3624817132949829, "learning_rate": 0.0002, "epoch": 0.5827727645611157, "step": 8880}, {"loss": 0.9059, "grad_norm": 0.2976662218570709, "learning_rate": 0.0002, "epoch": 0.5834290401968827, "step": 8890}, {"loss": 0.8903, "grad_norm": 0.4231838583946228, "learning_rate": 0.0002, "epoch": 0.5840853158326497, "step": 8900}, {"loss": 0.9157, "grad_norm": 0.343156099319458, "learning_rate": 0.0002, "epoch": 0.5847415914684168, "step": 8910}, {"loss": 0.9655, "grad_norm": 0.39717403054237366, "learning_rate": 0.0002, "epoch": 0.5853978671041837, "step": 8920}, {"loss": 0.8758, "grad_norm": 0.3388144075870514, "learning_rate": 0.0002, "epoch": 0.5860541427399508, "step": 8930}, {"loss": 0.922, "grad_norm": 0.3949063718318939, "learning_rate": 0.0002, "epoch": 0.5867104183757178, "step": 8940}, {"loss": 0.9081, "grad_norm": 0.358415424823761, "learning_rate": 0.0002, "epoch": 0.5873666940114848, "step": 8950}, {"loss": 0.906, "grad_norm": 0.367883563041687, "learning_rate": 0.0002, "epoch": 0.5880229696472519, "step": 8960}, {"loss": 0.9131, "grad_norm": 0.3859010636806488, "learning_rate": 0.0002, "epoch": 0.5886792452830188, "step": 8970}, {"loss": 0.916, "grad_norm": 0.3540095090866089, "learning_rate": 0.0002, "epoch": 0.5893355209187859, "step": 8980}, {"loss": 0.9704, "grad_norm": 0.33913302421569824, "learning_rate": 0.0002, "epoch": 0.5899917965545529, "step": 8990}, {"loss": 0.8772, "grad_norm": 0.35641804337501526, "learning_rate": 0.0002, "epoch": 0.5906480721903199, "step": 9000}, {"loss": 0.8644, "grad_norm": 0.3448907434940338, "learning_rate": 0.0002, "epoch": 0.591304347826087, "step": 9010}, {"loss": 0.9059, "grad_norm": 0.4147050082683563, "learning_rate": 0.0002, "epoch": 0.591960623461854, "step": 9020}, {"loss": 0.9134, "grad_norm": 0.45032307505607605, "learning_rate": 0.0002, "epoch": 0.592616899097621, "step": 9030}, {"loss": 0.8855, "grad_norm": 0.3628511428833008, "learning_rate": 0.0002, "epoch": 0.593273174733388, "step": 9040}, {"loss": 0.8924, "grad_norm": 0.42760607600212097, "learning_rate": 0.0002, "epoch": 0.593929450369155, "step": 9050}, {"loss": 0.889, "grad_norm": 0.3591140806674957, "learning_rate": 0.0002, "epoch": 0.5945857260049221, "step": 9060}, {"loss": 0.9409, "grad_norm": 0.3496510982513428, "learning_rate": 0.0002, "epoch": 0.5952420016406891, "step": 9070}, {"loss": 0.9394, "grad_norm": 0.4098506569862366, "learning_rate": 0.0002, "epoch": 0.5958982772764561, "step": 9080}, {"loss": 0.9455, "grad_norm": 0.3535410165786743, "learning_rate": 0.0002, "epoch": 0.5965545529122231, "step": 9090}, {"loss": 0.8814, "grad_norm": 0.3716369569301605, "learning_rate": 0.0002, "epoch": 0.5972108285479901, "step": 9100}, {"loss": 0.8656, "grad_norm": 0.3236614763736725, "learning_rate": 0.0002, "epoch": 0.5978671041837572, "step": 9110}, {"loss": 0.905, "grad_norm": 0.311577171087265, "learning_rate": 0.0002, "epoch": 0.5985233798195242, "step": 9120}, {"loss": 0.9118, "grad_norm": 0.3261461555957794, "learning_rate": 0.0002, "epoch": 0.5991796554552912, "step": 9130}, {"loss": 0.9274, "grad_norm": 0.3794492781162262, "learning_rate": 0.0002, "epoch": 0.5998359310910583, "step": 9140}, {"loss": 0.8967, "grad_norm": 0.31726357340812683, "learning_rate": 0.0002, "epoch": 0.6004922067268252, "step": 9150}, {"loss": 0.9156, "grad_norm": 0.38432174921035767, "learning_rate": 0.0002, "epoch": 0.6011484823625923, "step": 9160}, {"loss": 0.9703, "grad_norm": 0.4364495873451233, "learning_rate": 0.0002, "epoch": 0.6018047579983593, "step": 9170}, {"loss": 0.9162, "grad_norm": 0.41058987379074097, "learning_rate": 0.0002, "epoch": 0.6024610336341263, "step": 9180}, {"loss": 0.8914, "grad_norm": 0.3146302103996277, "learning_rate": 0.0002, "epoch": 0.6031173092698934, "step": 9190}, {"loss": 0.8859, "grad_norm": 0.3117610216140747, "learning_rate": 0.0002, "epoch": 0.6037735849056604, "step": 9200}, {"loss": 0.8741, "grad_norm": 0.31921523809432983, "learning_rate": 0.0002, "epoch": 0.6044298605414274, "step": 9210}, {"loss": 0.8756, "grad_norm": 0.3295772969722748, "learning_rate": 0.0002, "epoch": 0.6050861361771944, "step": 9220}, {"loss": 0.8578, "grad_norm": 0.3216910660266876, "learning_rate": 0.0002, "epoch": 0.6057424118129614, "step": 9230}, {"loss": 0.9341, "grad_norm": 0.40590721368789673, "learning_rate": 0.0002, "epoch": 0.6063986874487285, "step": 9240}, {"loss": 0.935, "grad_norm": 0.4337029457092285, "learning_rate": 0.0002, "epoch": 0.6070549630844955, "step": 9250}, {"loss": 0.8866, "grad_norm": 0.3434010148048401, "learning_rate": 0.0002, "epoch": 0.6077112387202626, "step": 9260}, {"loss": 0.8692, "grad_norm": 0.340589702129364, "learning_rate": 0.0002, "epoch": 0.6083675143560295, "step": 9270}, {"loss": 0.8741, "grad_norm": 0.33650949597358704, "learning_rate": 0.0002, "epoch": 0.6090237899917965, "step": 9280}, {"loss": 0.9396, "grad_norm": 0.4195605218410492, "learning_rate": 0.0002, "epoch": 0.6096800656275636, "step": 9290}, {"loss": 0.8877, "grad_norm": 0.444624125957489, "learning_rate": 0.0002, "epoch": 0.6103363412633306, "step": 9300}, {"loss": 0.8792, "grad_norm": 0.5425066947937012, "learning_rate": 0.0002, "epoch": 0.6109926168990977, "step": 9310}, {"loss": 0.865, "grad_norm": 0.36225831508636475, "learning_rate": 0.0002, "epoch": 0.6116488925348647, "step": 9320}, {"loss": 0.9439, "grad_norm": 0.3421785533428192, "learning_rate": 0.0002, "epoch": 0.6123051681706316, "step": 9330}, {"loss": 0.8842, "grad_norm": 0.3792729079723358, "learning_rate": 0.0002, "epoch": 0.6129614438063987, "step": 9340}, {"loss": 0.8791, "grad_norm": 0.41844120621681213, "learning_rate": 0.0002, "epoch": 0.6136177194421657, "step": 9350}, {"loss": 0.9091, "grad_norm": 0.3430991768836975, "learning_rate": 0.0002, "epoch": 0.6142739950779327, "step": 9360}, {"loss": 0.9165, "grad_norm": 0.3460402190685272, "learning_rate": 0.0002, "epoch": 0.6149302707136998, "step": 9370}, {"loss": 0.8571, "grad_norm": 0.3775254189968109, "learning_rate": 0.0002, "epoch": 0.6155865463494667, "step": 9380}, {"loss": 0.8915, "grad_norm": 0.3706645965576172, "learning_rate": 0.0002, "epoch": 0.6162428219852338, "step": 9390}, {"loss": 0.9085, "grad_norm": 0.3817055821418762, "learning_rate": 0.0002, "epoch": 0.6168990976210008, "step": 9400}, {"loss": 0.9086, "grad_norm": 0.3856641352176666, "learning_rate": 0.0002, "epoch": 0.6175553732567678, "step": 9410}, {"loss": 0.9031, "grad_norm": 0.3271346688270569, "learning_rate": 0.0002, "epoch": 0.6182116488925349, "step": 9420}, {"loss": 0.883, "grad_norm": 0.334314227104187, "learning_rate": 0.0002, "epoch": 0.6188679245283019, "step": 9430}, {"loss": 0.9398, "grad_norm": 0.3850700557231903, "learning_rate": 0.0002, "epoch": 0.619524200164069, "step": 9440}, {"loss": 0.9246, "grad_norm": 0.37492436170578003, "learning_rate": 0.0002, "epoch": 0.6201804757998359, "step": 9450}, {"loss": 0.9308, "grad_norm": 0.44262826442718506, "learning_rate": 0.0002, "epoch": 0.6208367514356029, "step": 9460}, {"loss": 0.9659, "grad_norm": 0.4070657193660736, "learning_rate": 0.0002, "epoch": 0.62149302707137, "step": 9470}, {"loss": 0.8812, "grad_norm": 0.37636154890060425, "learning_rate": 0.0002, "epoch": 0.622149302707137, "step": 9480}, {"loss": 0.941, "grad_norm": 0.38988572359085083, "learning_rate": 0.0002, "epoch": 0.6228055783429041, "step": 9490}, {"loss": 0.8959, "grad_norm": 0.36479735374450684, "learning_rate": 0.0002, "epoch": 0.623461853978671, "step": 9500}, {"loss": 0.9431, "grad_norm": 0.3759172260761261, "learning_rate": 0.0002, "epoch": 0.624118129614438, "step": 9510}, {"loss": 0.889, "grad_norm": 0.3380950689315796, "learning_rate": 0.0002, "epoch": 0.6247744052502051, "step": 9520}, {"loss": 0.8597, "grad_norm": 0.3483046889305115, "learning_rate": 0.0002, "epoch": 0.6254306808859721, "step": 9530}, {"loss": 0.9195, "grad_norm": 0.3562379777431488, "learning_rate": 0.0002, "epoch": 0.6260869565217392, "step": 9540}, {"loss": 0.9434, "grad_norm": 0.40167364478111267, "learning_rate": 0.0002, "epoch": 0.6267432321575062, "step": 9550}, {"loss": 0.8677, "grad_norm": 0.36348867416381836, "learning_rate": 0.0002, "epoch": 0.6273995077932731, "step": 9560}, {"loss": 0.9061, "grad_norm": 0.33701515197753906, "learning_rate": 0.0002, "epoch": 0.6280557834290402, "step": 9570}, {"loss": 0.8926, "grad_norm": 0.3795888125896454, "learning_rate": 0.0002, "epoch": 0.6287120590648072, "step": 9580}, {"loss": 0.8672, "grad_norm": 0.3701418340206146, "learning_rate": 0.0002, "epoch": 0.6293683347005743, "step": 9590}, {"loss": 0.9025, "grad_norm": 0.32559722661972046, "learning_rate": 0.0002, "epoch": 0.6300246103363413, "step": 9600}, {"loss": 0.9223, "grad_norm": 0.3093271553516388, "learning_rate": 0.0002, "epoch": 0.6306808859721083, "step": 9610}, {"loss": 0.9032, "grad_norm": 0.41225478053092957, "learning_rate": 0.0002, "epoch": 0.6313371616078753, "step": 9620}, {"loss": 0.9125, "grad_norm": 0.3798231780529022, "learning_rate": 0.0002, "epoch": 0.6319934372436423, "step": 9630}, {"loss": 0.9252, "grad_norm": 0.38690295815467834, "learning_rate": 0.0002, "epoch": 0.6326497128794093, "step": 9640}, {"loss": 0.8668, "grad_norm": 0.3629007637500763, "learning_rate": 0.0002, "epoch": 0.6333059885151764, "step": 9650}, {"loss": 0.9143, "grad_norm": 0.39467132091522217, "learning_rate": 0.0002, "epoch": 0.6339622641509434, "step": 9660}, {"loss": 0.9267, "grad_norm": 0.37746182084083557, "learning_rate": 0.0002, "epoch": 0.6346185397867105, "step": 9670}, {"loss": 0.8915, "grad_norm": 0.3718436062335968, "learning_rate": 0.0002, "epoch": 0.6352748154224774, "step": 9680}, {"loss": 0.9111, "grad_norm": 0.3951144218444824, "learning_rate": 0.0002, "epoch": 0.6359310910582444, "step": 9690}, {"loss": 0.897, "grad_norm": 0.4104543924331665, "learning_rate": 0.0002, "epoch": 0.6365873666940115, "step": 9700}, {"loss": 0.9349, "grad_norm": 0.37318137288093567, "learning_rate": 0.0002, "epoch": 0.6372436423297785, "step": 9710}, {"loss": 0.9009, "grad_norm": 0.35614442825317383, "learning_rate": 0.0002, "epoch": 0.6378999179655456, "step": 9720}, {"loss": 0.9858, "grad_norm": 0.37922942638397217, "learning_rate": 0.0002, "epoch": 0.6385561936013125, "step": 9730}, {"loss": 0.8533, "grad_norm": 0.35624784231185913, "learning_rate": 0.0002, "epoch": 0.6392124692370795, "step": 9740}, {"loss": 0.934, "grad_norm": 0.34946876764297485, "learning_rate": 0.0002, "epoch": 0.6398687448728466, "step": 9750}, {"loss": 0.9176, "grad_norm": 0.37666648626327515, "learning_rate": 0.0002, "epoch": 0.6405250205086136, "step": 9760}, {"loss": 0.9297, "grad_norm": 0.3619046211242676, "learning_rate": 0.0002, "epoch": 0.6411812961443807, "step": 9770}, {"loss": 0.9135, "grad_norm": 0.3814936578273773, "learning_rate": 0.0002, "epoch": 0.6418375717801477, "step": 9780}, {"loss": 0.9143, "grad_norm": 0.4181577265262604, "learning_rate": 0.0002, "epoch": 0.6424938474159146, "step": 9790}, {"loss": 0.8978, "grad_norm": 0.3707144558429718, "learning_rate": 0.0002, "epoch": 0.6431501230516817, "step": 9800}, {"loss": 0.9311, "grad_norm": 0.3364716172218323, "learning_rate": 0.0002, "epoch": 0.6438063986874487, "step": 9810}, {"loss": 0.8566, "grad_norm": 0.31425684690475464, "learning_rate": 0.0002, "epoch": 0.6444626743232158, "step": 9820}, {"loss": 0.8792, "grad_norm": 0.32928282022476196, "learning_rate": 0.0002, "epoch": 0.6451189499589828, "step": 9830}, {"loss": 0.9232, "grad_norm": 0.39178264141082764, "learning_rate": 0.0002, "epoch": 0.6457752255947498, "step": 9840}, {"loss": 0.9413, "grad_norm": 0.37753361463546753, "learning_rate": 0.0002, "epoch": 0.6464315012305168, "step": 9850}, {"loss": 0.8777, "grad_norm": 0.4574730396270752, "learning_rate": 0.0002, "epoch": 0.6470877768662838, "step": 9860}, {"loss": 0.9078, "grad_norm": 0.3926962614059448, "learning_rate": 0.0002, "epoch": 0.6477440525020509, "step": 9870}, {"loss": 0.9221, "grad_norm": 0.3919081687927246, "learning_rate": 0.0002, "epoch": 0.6484003281378179, "step": 9880}, {"loss": 0.8681, "grad_norm": 0.3904387652873993, "learning_rate": 0.0002, "epoch": 0.6490566037735849, "step": 9890}, {"loss": 0.883, "grad_norm": 0.40808236598968506, "learning_rate": 0.0002, "epoch": 0.649712879409352, "step": 9900}, {"loss": 0.9133, "grad_norm": 0.40531080961227417, "learning_rate": 0.0002, "epoch": 0.6503691550451189, "step": 9910}, {"loss": 0.9417, "grad_norm": 0.3374323844909668, "learning_rate": 0.0002, "epoch": 0.6510254306808859, "step": 9920}, {"loss": 0.9769, "grad_norm": 0.4000678062438965, "learning_rate": 0.0002, "epoch": 0.651681706316653, "step": 9930}, {"loss": 0.9069, "grad_norm": 0.35415270924568176, "learning_rate": 0.0002, "epoch": 0.65233798195242, "step": 9940}, {"loss": 0.9337, "grad_norm": 0.3612231910228729, "learning_rate": 0.0002, "epoch": 0.6529942575881871, "step": 9950}, {"loss": 0.9402, "grad_norm": 0.3081146776676178, "learning_rate": 0.0002, "epoch": 0.6536505332239541, "step": 9960}, {"loss": 0.8942, "grad_norm": 0.31079018115997314, "learning_rate": 0.0002, "epoch": 0.654306808859721, "step": 9970}, {"loss": 0.905, "grad_norm": 0.4256346523761749, "learning_rate": 0.0002, "epoch": 0.6549630844954881, "step": 9980}, {"loss": 0.9226, "grad_norm": 0.3592916429042816, "learning_rate": 0.0002, "epoch": 0.6556193601312551, "step": 9990}, {"loss": 0.9018, "grad_norm": 0.3496004641056061, "learning_rate": 0.0002, "epoch": 0.6562756357670222, "step": 10000}, {"loss": 0.931, "grad_norm": 0.4177037179470062, "learning_rate": 0.0002, "epoch": 0.6569319114027892, "step": 10010}, {"loss": 0.9034, "grad_norm": 0.36680638790130615, "learning_rate": 0.0002, "epoch": 0.6575881870385561, "step": 10020}, {"loss": 0.9706, "grad_norm": 0.3916943073272705, "learning_rate": 0.0002, "epoch": 0.6582444626743232, "step": 10030}, {"loss": 0.886, "grad_norm": 0.44537290930747986, "learning_rate": 0.0002, "epoch": 0.6589007383100902, "step": 10040}, {"loss": 0.8798, "grad_norm": 0.3404697775840759, "learning_rate": 0.0002, "epoch": 0.6595570139458573, "step": 10050}, {"loss": 0.9801, "grad_norm": 0.3304594159126282, "learning_rate": 0.0002, "epoch": 0.6602132895816243, "step": 10060}, {"loss": 0.9107, "grad_norm": 0.39067313075065613, "learning_rate": 0.0002, "epoch": 0.6608695652173913, "step": 10070}, {"loss": 0.9336, "grad_norm": 0.366178959608078, "learning_rate": 0.0002, "epoch": 0.6615258408531584, "step": 10080}, {"loss": 0.8817, "grad_norm": 0.3880734145641327, "learning_rate": 0.0002, "epoch": 0.6621821164889253, "step": 10090}, {"loss": 0.9087, "grad_norm": 0.42047396302223206, "learning_rate": 0.0002, "epoch": 0.6628383921246924, "step": 10100}, {"loss": 0.8859, "grad_norm": 0.3299349546432495, "learning_rate": 0.0002, "epoch": 0.6634946677604594, "step": 10110}, {"loss": 0.9461, "grad_norm": 0.33470937609672546, "learning_rate": 0.0002, "epoch": 0.6641509433962264, "step": 10120}, {"loss": 0.9453, "grad_norm": 0.36240577697753906, "learning_rate": 0.0002, "epoch": 0.6648072190319935, "step": 10130}, {"loss": 0.8706, "grad_norm": 0.41457104682922363, "learning_rate": 0.0002, "epoch": 0.6654634946677604, "step": 10140}, {"loss": 0.9385, "grad_norm": 0.39064788818359375, "learning_rate": 0.0002, "epoch": 0.6661197703035275, "step": 10150}, {"loss": 0.9252, "grad_norm": 0.4137183725833893, "learning_rate": 0.0002, "epoch": 0.6667760459392945, "step": 10160}, {"loss": 0.8874, "grad_norm": 0.363413542509079, "learning_rate": 0.0002, "epoch": 0.6674323215750615, "step": 10170}, {"loss": 0.8936, "grad_norm": 0.3631424307823181, "learning_rate": 0.0002, "epoch": 0.6680885972108286, "step": 10180}, {"loss": 0.9181, "grad_norm": 0.460721880197525, "learning_rate": 0.0002, "epoch": 0.6687448728465956, "step": 10190}, {"loss": 0.8879, "grad_norm": 0.3525084853172302, "learning_rate": 0.0002, "epoch": 0.6694011484823625, "step": 10200}, {"loss": 0.934, "grad_norm": 0.34321045875549316, "learning_rate": 0.0002, "epoch": 0.6700574241181296, "step": 10210}, {"loss": 0.9056, "grad_norm": 0.3397759795188904, "learning_rate": 0.0002, "epoch": 0.6707136997538966, "step": 10220}, {"loss": 0.8883, "grad_norm": 0.3680257201194763, "learning_rate": 0.0002, "epoch": 0.6713699753896637, "step": 10230}, {"loss": 0.9316, "grad_norm": 0.4023214876651764, "learning_rate": 0.0002, "epoch": 0.6720262510254307, "step": 10240}, {"loss": 0.8809, "grad_norm": 0.3645709753036499, "learning_rate": 0.0002, "epoch": 0.6726825266611977, "step": 10250}, {"loss": 0.9766, "grad_norm": 0.3558615744113922, "learning_rate": 0.0002, "epoch": 0.6733388022969647, "step": 10260}, {"loss": 0.872, "grad_norm": 0.3725513517856598, "learning_rate": 0.0002, "epoch": 0.6739950779327317, "step": 10270}, {"loss": 0.9354, "grad_norm": 0.42790961265563965, "learning_rate": 0.0002, "epoch": 0.6746513535684988, "step": 10280}, {"loss": 0.8955, "grad_norm": 0.3808377683162689, "learning_rate": 0.0002, "epoch": 0.6753076292042658, "step": 10290}, {"loss": 0.8761, "grad_norm": 0.40500468015670776, "learning_rate": 0.0002, "epoch": 0.6759639048400328, "step": 10300}, {"loss": 0.9212, "grad_norm": 0.3631184995174408, "learning_rate": 0.0002, "epoch": 0.6766201804757999, "step": 10310}, {"loss": 0.8867, "grad_norm": 0.3319573998451233, "learning_rate": 0.0002, "epoch": 0.6772764561115668, "step": 10320}, {"loss": 0.857, "grad_norm": 0.3851188123226166, "learning_rate": 0.0002, "epoch": 0.6779327317473339, "step": 10330}, {"loss": 0.9159, "grad_norm": 0.37413159012794495, "learning_rate": 0.0002, "epoch": 0.6785890073831009, "step": 10340}, {"loss": 0.8728, "grad_norm": 0.34124433994293213, "learning_rate": 0.0002, "epoch": 0.6792452830188679, "step": 10350}, {"loss": 0.9654, "grad_norm": 0.3786381781101227, "learning_rate": 0.0002, "epoch": 0.679901558654635, "step": 10360}, {"loss": 0.915, "grad_norm": 0.3848523199558258, "learning_rate": 0.0002, "epoch": 0.680557834290402, "step": 10370}, {"loss": 0.8873, "grad_norm": 0.29365262389183044, "learning_rate": 0.0002, "epoch": 0.681214109926169, "step": 10380}, {"loss": 0.8692, "grad_norm": 0.3481557369232178, "learning_rate": 0.0002, "epoch": 0.681870385561936, "step": 10390}, {"loss": 0.885, "grad_norm": 0.38596320152282715, "learning_rate": 0.0002, "epoch": 0.682526661197703, "step": 10400}, {"loss": 0.8545, "grad_norm": 0.30872032046318054, "learning_rate": 0.0002, "epoch": 0.6831829368334701, "step": 10410}, {"loss": 0.867, "grad_norm": 0.3729351758956909, "learning_rate": 0.0002, "epoch": 0.6838392124692371, "step": 10420}, {"loss": 0.9089, "grad_norm": 0.35829052329063416, "learning_rate": 0.0002, "epoch": 0.684495488105004, "step": 10430}, {"loss": 0.8797, "grad_norm": 0.34916967153549194, "learning_rate": 0.0002, "epoch": 0.6851517637407711, "step": 10440}, {"loss": 0.8731, "grad_norm": 0.40377020835876465, "learning_rate": 0.0002, "epoch": 0.6858080393765381, "step": 10450}, {"loss": 0.9027, "grad_norm": 0.36841881275177, "learning_rate": 0.0002, "epoch": 0.6864643150123052, "step": 10460}, {"loss": 0.8808, "grad_norm": 0.35462167859077454, "learning_rate": 0.0002, "epoch": 0.6871205906480722, "step": 10470}, {"loss": 0.967, "grad_norm": 0.4630918502807617, "learning_rate": 0.0002, "epoch": 0.6877768662838392, "step": 10480}, {"loss": 0.9114, "grad_norm": 0.4230295419692993, "learning_rate": 0.0002, "epoch": 0.6884331419196063, "step": 10490}, {"loss": 0.8908, "grad_norm": 0.35059425234794617, "learning_rate": 0.0002, "epoch": 0.6890894175553732, "step": 10500}, {"loss": 0.9325, "grad_norm": 0.4072548747062683, "learning_rate": 0.0002, "epoch": 0.6897456931911403, "step": 10510}, {"loss": 0.9026, "grad_norm": 0.33755314350128174, "learning_rate": 0.0002, "epoch": 0.6904019688269073, "step": 10520}, {"loss": 0.908, "grad_norm": 0.4411669075489044, "learning_rate": 0.0002, "epoch": 0.6910582444626743, "step": 10530}, {"loss": 0.8639, "grad_norm": 0.9155740737915039, "learning_rate": 0.0002, "epoch": 0.6917145200984414, "step": 10540}, {"loss": 0.8975, "grad_norm": 0.39329391717910767, "learning_rate": 0.0002, "epoch": 0.6923707957342083, "step": 10550}, {"loss": 0.92, "grad_norm": 0.3602689206600189, "learning_rate": 0.0002, "epoch": 0.6930270713699754, "step": 10560}, {"loss": 0.8247, "grad_norm": 0.35848474502563477, "learning_rate": 0.0002, "epoch": 0.6936833470057424, "step": 10570}, {"loss": 0.9171, "grad_norm": 0.37857621908187866, "learning_rate": 0.0002, "epoch": 0.6943396226415094, "step": 10580}, {"loss": 0.9533, "grad_norm": 0.37071332335472107, "learning_rate": 0.0002, "epoch": 0.6949958982772765, "step": 10590}, {"loss": 0.9237, "grad_norm": 0.33589425683021545, "learning_rate": 0.0002, "epoch": 0.6956521739130435, "step": 10600}, {"loss": 0.8772, "grad_norm": 0.4279285669326782, "learning_rate": 0.0002, "epoch": 0.6963084495488105, "step": 10610}, {"loss": 0.9452, "grad_norm": 0.47047463059425354, "learning_rate": 0.0002, "epoch": 0.6969647251845775, "step": 10620}, {"loss": 0.8996, "grad_norm": 0.41214805841445923, "learning_rate": 0.0002, "epoch": 0.6976210008203445, "step": 10630}, {"loss": 0.8776, "grad_norm": 0.3999953866004944, "learning_rate": 0.0002, "epoch": 0.6982772764561116, "step": 10640}, {"loss": 0.9426, "grad_norm": 0.42441290616989136, "learning_rate": 0.0002, "epoch": 0.6989335520918786, "step": 10650}, {"loss": 0.9319, "grad_norm": 0.41990748047828674, "learning_rate": 0.0002, "epoch": 0.6995898277276457, "step": 10660}, {"loss": 0.9024, "grad_norm": 0.37445810437202454, "learning_rate": 0.0002, "epoch": 0.7002461033634126, "step": 10670}, {"loss": 0.9029, "grad_norm": 0.37638649344444275, "learning_rate": 0.0002, "epoch": 0.7009023789991796, "step": 10680}, {"loss": 0.9211, "grad_norm": 0.41475534439086914, "learning_rate": 0.0002, "epoch": 0.7015586546349467, "step": 10690}, {"loss": 0.914, "grad_norm": 0.3430996537208557, "learning_rate": 0.0002, "epoch": 0.7022149302707137, "step": 10700}, {"loss": 0.9051, "grad_norm": 0.3569522798061371, "learning_rate": 0.0002, "epoch": 0.7028712059064807, "step": 10710}, {"loss": 0.8792, "grad_norm": 0.3145142197608948, "learning_rate": 0.0002, "epoch": 0.7035274815422478, "step": 10720}, {"loss": 0.9537, "grad_norm": 0.40700432658195496, "learning_rate": 0.0002, "epoch": 0.7041837571780147, "step": 10730}, {"loss": 0.8677, "grad_norm": 0.37982651591300964, "learning_rate": 0.0002, "epoch": 0.7048400328137818, "step": 10740}, {"loss": 0.8899, "grad_norm": 0.39240679144859314, "learning_rate": 0.0002, "epoch": 0.7054963084495488, "step": 10750}, {"loss": 0.9516, "grad_norm": 0.41788724064826965, "learning_rate": 0.0002, "epoch": 0.7061525840853158, "step": 10760}, {"loss": 0.8672, "grad_norm": 0.4015905559062958, "learning_rate": 0.0002, "epoch": 0.7068088597210829, "step": 10770}, {"loss": 0.9131, "grad_norm": 0.4019724130630493, "learning_rate": 0.0002, "epoch": 0.7074651353568499, "step": 10780}, {"loss": 0.8698, "grad_norm": 0.37387898564338684, "learning_rate": 0.0002, "epoch": 0.7081214109926169, "step": 10790}, {"loss": 0.8547, "grad_norm": 0.32087528705596924, "learning_rate": 0.0002, "epoch": 0.7087776866283839, "step": 10800}, {"loss": 0.9241, "grad_norm": 0.38160258531570435, "learning_rate": 0.0002, "epoch": 0.7094339622641509, "step": 10810}, {"loss": 0.8941, "grad_norm": 0.340973436832428, "learning_rate": 0.0002, "epoch": 0.710090237899918, "step": 10820}, {"loss": 0.9404, "grad_norm": 0.3848867416381836, "learning_rate": 0.0002, "epoch": 0.710746513535685, "step": 10830}, {"loss": 0.9299, "grad_norm": 0.36736220121383667, "learning_rate": 0.0002, "epoch": 0.711402789171452, "step": 10840}, {"loss": 0.8881, "grad_norm": 0.367404967546463, "learning_rate": 0.0002, "epoch": 0.712059064807219, "step": 10850}, {"loss": 0.9224, "grad_norm": 0.333751380443573, "learning_rate": 0.0002, "epoch": 0.712715340442986, "step": 10860}, {"loss": 0.8712, "grad_norm": 0.3865894079208374, "learning_rate": 0.0002, "epoch": 0.7133716160787531, "step": 10870}, {"loss": 0.8957, "grad_norm": 0.34016433358192444, "learning_rate": 0.0002, "epoch": 0.7140278917145201, "step": 10880}, {"loss": 0.8493, "grad_norm": 0.3233864903450012, "learning_rate": 0.0002, "epoch": 0.7146841673502872, "step": 10890}, {"loss": 0.9148, "grad_norm": 0.4275553822517395, "learning_rate": 0.0002, "epoch": 0.7153404429860541, "step": 10900}, {"loss": 0.9345, "grad_norm": 0.41715168952941895, "learning_rate": 0.0002, "epoch": 0.7159967186218211, "step": 10910}, {"loss": 0.9321, "grad_norm": 0.40540871024131775, "learning_rate": 0.0002, "epoch": 0.7166529942575882, "step": 10920}, {"loss": 0.9035, "grad_norm": 0.35520824790000916, "learning_rate": 0.0002, "epoch": 0.7173092698933552, "step": 10930}, {"loss": 0.9243, "grad_norm": 0.3805985748767853, "learning_rate": 0.0002, "epoch": 0.7179655455291223, "step": 10940}, {"loss": 0.9542, "grad_norm": 0.36969226598739624, "learning_rate": 0.0002, "epoch": 0.7186218211648893, "step": 10950}, {"loss": 0.892, "grad_norm": 0.38408684730529785, "learning_rate": 0.0002, "epoch": 0.7192780968006562, "step": 10960}, {"loss": 0.8699, "grad_norm": 0.3865699768066406, "learning_rate": 0.0002, "epoch": 0.7199343724364233, "step": 10970}, {"loss": 0.8767, "grad_norm": 0.37172383069992065, "learning_rate": 0.0002, "epoch": 0.7205906480721903, "step": 10980}, {"loss": 0.8609, "grad_norm": 0.39816758036613464, "learning_rate": 0.0002, "epoch": 0.7212469237079573, "step": 10990}, {"loss": 0.94, "grad_norm": 0.4290051758289337, "learning_rate": 0.0002, "epoch": 0.7219031993437244, "step": 11000}, {"loss": 0.9328, "grad_norm": 0.3733605146408081, "learning_rate": 0.0002, "epoch": 0.7225594749794914, "step": 11010}, {"loss": 0.9048, "grad_norm": 0.37305396795272827, "learning_rate": 0.0002, "epoch": 0.7232157506152584, "step": 11020}, {"loss": 0.8713, "grad_norm": 0.3557985723018646, "learning_rate": 0.0002, "epoch": 0.7238720262510254, "step": 11030}, {"loss": 0.8558, "grad_norm": 0.4635949730873108, "learning_rate": 0.0002, "epoch": 0.7245283018867924, "step": 11040}, {"loss": 0.8711, "grad_norm": 0.3419910967350006, "learning_rate": 0.0002, "epoch": 0.7251845775225595, "step": 11050}, {"loss": 0.9138, "grad_norm": 0.41529953479766846, "learning_rate": 0.0002, "epoch": 0.7258408531583265, "step": 11060}, {"loss": 0.8859, "grad_norm": 0.325923353433609, "learning_rate": 0.0002, "epoch": 0.7264971287940936, "step": 11070}, {"loss": 0.9924, "grad_norm": 0.41932010650634766, "learning_rate": 0.0002, "epoch": 0.7271534044298605, "step": 11080}, {"loss": 0.9662, "grad_norm": 0.39984336495399475, "learning_rate": 0.0002, "epoch": 0.7278096800656275, "step": 11090}, {"loss": 0.8849, "grad_norm": 0.3673977851867676, "learning_rate": 0.0002, "epoch": 0.7284659557013946, "step": 11100}, {"loss": 0.9201, "grad_norm": 0.3722262680530548, "learning_rate": 0.0002, "epoch": 0.7291222313371616, "step": 11110}, {"loss": 0.9156, "grad_norm": 0.3859870135784149, "learning_rate": 0.0002, "epoch": 0.7297785069729287, "step": 11120}, {"loss": 0.899, "grad_norm": 0.3323642313480377, "learning_rate": 0.0002, "epoch": 0.7304347826086957, "step": 11130}, {"loss": 0.9244, "grad_norm": 0.37793564796447754, "learning_rate": 0.0002, "epoch": 0.7310910582444626, "step": 11140}, {"loss": 0.8767, "grad_norm": 0.44874733686447144, "learning_rate": 0.0002, "epoch": 0.7317473338802297, "step": 11150}, {"loss": 0.9078, "grad_norm": 0.4104187786579132, "learning_rate": 0.0002, "epoch": 0.7324036095159967, "step": 11160}, {"loss": 0.8828, "grad_norm": 0.3699926435947418, "learning_rate": 0.0002, "epoch": 0.7330598851517638, "step": 11170}, {"loss": 0.8909, "grad_norm": 0.41013723611831665, "learning_rate": 0.0002, "epoch": 0.7337161607875308, "step": 11180}, {"loss": 0.8934, "grad_norm": 0.370669424533844, "learning_rate": 0.0002, "epoch": 0.7343724364232977, "step": 11190}, {"loss": 0.881, "grad_norm": 0.4059436619281769, "learning_rate": 0.0002, "epoch": 0.7350287120590648, "step": 11200}, {"loss": 0.903, "grad_norm": 0.5038959383964539, "learning_rate": 0.0002, "epoch": 0.7356849876948318, "step": 11210}, {"loss": 0.9022, "grad_norm": 0.337137371301651, "learning_rate": 0.0002, "epoch": 0.7363412633305989, "step": 11220}, {"loss": 0.9171, "grad_norm": 0.412392795085907, "learning_rate": 0.0002, "epoch": 0.7369975389663659, "step": 11230}, {"loss": 0.8736, "grad_norm": 0.4415507912635803, "learning_rate": 0.0002, "epoch": 0.7376538146021329, "step": 11240}, {"loss": 0.8936, "grad_norm": 0.35013696551322937, "learning_rate": 0.0002, "epoch": 0.7383100902379, "step": 11250}, {"loss": 0.8841, "grad_norm": 0.3677300810813904, "learning_rate": 0.0002, "epoch": 0.7389663658736669, "step": 11260}, {"loss": 0.8756, "grad_norm": 0.36722511053085327, "learning_rate": 0.0002, "epoch": 0.7396226415094339, "step": 11270}, {"loss": 0.8727, "grad_norm": 0.41611534357070923, "learning_rate": 0.0002, "epoch": 0.740278917145201, "step": 11280}, {"loss": 0.921, "grad_norm": 0.4244968295097351, "learning_rate": 0.0002, "epoch": 0.740935192780968, "step": 11290}, {"loss": 0.9696, "grad_norm": 0.38986068964004517, "learning_rate": 0.0002, "epoch": 0.7415914684167351, "step": 11300}, {"loss": 0.855, "grad_norm": 0.33212459087371826, "learning_rate": 0.0002, "epoch": 0.742247744052502, "step": 11310}, {"loss": 0.9296, "grad_norm": 0.4290331304073334, "learning_rate": 0.0002, "epoch": 0.742904019688269, "step": 11320}, {"loss": 0.867, "grad_norm": 0.3750900626182556, "learning_rate": 0.0002, "epoch": 0.7435602953240361, "step": 11330}, {"loss": 0.9462, "grad_norm": 0.37758177518844604, "learning_rate": 0.0002, "epoch": 0.7442165709598031, "step": 11340}, {"loss": 0.849, "grad_norm": 0.31147271394729614, "learning_rate": 0.0002, "epoch": 0.7448728465955702, "step": 11350}, {"loss": 0.9675, "grad_norm": 0.4493428170681, "learning_rate": 0.0002, "epoch": 0.7455291222313372, "step": 11360}, {"loss": 0.9506, "grad_norm": 0.4268129765987396, "learning_rate": 0.0002, "epoch": 0.7461853978671041, "step": 11370}, {"loss": 0.9371, "grad_norm": 0.3716314733028412, "learning_rate": 0.0002, "epoch": 0.7468416735028712, "step": 11380}, {"loss": 0.8762, "grad_norm": 0.33728593587875366, "learning_rate": 0.0002, "epoch": 0.7474979491386382, "step": 11390}, {"loss": 0.8532, "grad_norm": 0.36548155546188354, "learning_rate": 0.0002, "epoch": 0.7481542247744053, "step": 11400}, {"loss": 0.8709, "grad_norm": 0.32645145058631897, "learning_rate": 0.0002, "epoch": 0.7488105004101723, "step": 11410}, {"loss": 0.9169, "grad_norm": 0.36403900384902954, "learning_rate": 0.0002, "epoch": 0.7494667760459393, "step": 11420}, {"loss": 0.9734, "grad_norm": 0.4186992049217224, "learning_rate": 0.0002, "epoch": 0.7501230516817063, "step": 11430}, {"loss": 0.8982, "grad_norm": 0.342457115650177, "learning_rate": 0.0002, "epoch": 0.7507793273174733, "step": 11440}, {"loss": 0.8785, "grad_norm": 0.3882320821285248, "learning_rate": 0.0002, "epoch": 0.7514356029532404, "step": 11450}, {"loss": 0.9492, "grad_norm": 0.4653763771057129, "learning_rate": 0.0002, "epoch": 0.7520918785890074, "step": 11460}, {"loss": 0.9357, "grad_norm": 0.3960241377353668, "learning_rate": 0.0002, "epoch": 0.7527481542247744, "step": 11470}, {"loss": 0.8912, "grad_norm": 0.38367652893066406, "learning_rate": 0.0002, "epoch": 0.7534044298605415, "step": 11480}, {"loss": 0.9367, "grad_norm": 0.39881640672683716, "learning_rate": 0.0002, "epoch": 0.7540607054963084, "step": 11490}, {"loss": 0.9018, "grad_norm": 0.40201085805892944, "learning_rate": 0.0002, "epoch": 0.7547169811320755, "step": 11500}, {"loss": 0.8668, "grad_norm": 0.38070711493492126, "learning_rate": 0.0002, "epoch": 0.7553732567678425, "step": 11510}, {"loss": 0.8542, "grad_norm": 0.334245502948761, "learning_rate": 0.0002, "epoch": 0.7560295324036095, "step": 11520}, {"loss": 0.9036, "grad_norm": 0.42725566029548645, "learning_rate": 0.0002, "epoch": 0.7566858080393766, "step": 11530}, {"loss": 0.9443, "grad_norm": 1.4140206575393677, "learning_rate": 0.0002, "epoch": 0.7573420836751436, "step": 11540}, {"loss": 0.8962, "grad_norm": 0.5802565813064575, "learning_rate": 0.0002, "epoch": 0.7579983593109105, "step": 11550}, {"loss": 0.9095, "grad_norm": 0.45892661809921265, "learning_rate": 0.0002, "epoch": 0.7586546349466776, "step": 11560}, {"loss": 0.8373, "grad_norm": 0.3837396800518036, "learning_rate": 0.0002, "epoch": 0.7593109105824446, "step": 11570}, {"loss": 0.9452, "grad_norm": 0.4158342480659485, "learning_rate": 0.0002, "epoch": 0.7599671862182117, "step": 11580}, {"loss": 0.9551, "grad_norm": 0.4374973773956299, "learning_rate": 0.0002, "epoch": 0.7606234618539787, "step": 11590}, {"loss": 0.9181, "grad_norm": 0.36342450976371765, "learning_rate": 0.0002, "epoch": 0.7612797374897456, "step": 11600}, {"loss": 0.8596, "grad_norm": 0.37019461393356323, "learning_rate": 0.0002, "epoch": 0.7619360131255127, "step": 11610}, {"loss": 0.8804, "grad_norm": 0.3745611608028412, "learning_rate": 0.0002, "epoch": 0.7625922887612797, "step": 11620}, {"loss": 0.8745, "grad_norm": 0.33488842844963074, "learning_rate": 0.0002, "epoch": 0.7632485643970468, "step": 11630}, {"loss": 0.8798, "grad_norm": 0.3700532615184784, "learning_rate": 0.0002, "epoch": 0.7639048400328138, "step": 11640}, {"loss": 0.8943, "grad_norm": 0.3722131848335266, "learning_rate": 0.0002, "epoch": 0.7645611156685808, "step": 11650}, {"loss": 0.8702, "grad_norm": 0.3463144302368164, "learning_rate": 0.0002, "epoch": 0.7652173913043478, "step": 11660}, {"loss": 0.865, "grad_norm": 0.339691162109375, "learning_rate": 0.0002, "epoch": 0.7658736669401148, "step": 11670}, {"loss": 0.9536, "grad_norm": 0.33323508501052856, "learning_rate": 0.0002, "epoch": 0.7665299425758819, "step": 11680}, {"loss": 0.8657, "grad_norm": 0.3937166631221771, "learning_rate": 0.0002, "epoch": 0.7671862182116489, "step": 11690}, {"loss": 0.8857, "grad_norm": 0.4112081527709961, "learning_rate": 0.0002, "epoch": 0.7678424938474159, "step": 11700}, {"loss": 0.9108, "grad_norm": 0.4242405593395233, "learning_rate": 0.0002, "epoch": 0.768498769483183, "step": 11710}, {"loss": 0.9148, "grad_norm": 0.33512821793556213, "learning_rate": 0.0002, "epoch": 0.7691550451189499, "step": 11720}, {"loss": 0.8599, "grad_norm": 0.5148407816886902, "learning_rate": 0.0002, "epoch": 0.769811320754717, "step": 11730}, {"loss": 0.9678, "grad_norm": 0.4562109112739563, "learning_rate": 0.0002, "epoch": 0.770467596390484, "step": 11740}, {"loss": 0.8937, "grad_norm": 0.4246354401111603, "learning_rate": 0.0002, "epoch": 0.771123872026251, "step": 11750}, {"loss": 0.8821, "grad_norm": 0.39338991045951843, "learning_rate": 0.0002, "epoch": 0.7717801476620181, "step": 11760}, {"loss": 0.8769, "grad_norm": 0.403199166059494, "learning_rate": 0.0002, "epoch": 0.7724364232977851, "step": 11770}, {"loss": 0.8914, "grad_norm": 0.4494798183441162, "learning_rate": 0.0002, "epoch": 0.7730926989335521, "step": 11780}, {"loss": 0.8633, "grad_norm": 0.3649079501628876, "learning_rate": 0.0002, "epoch": 0.7737489745693191, "step": 11790}, {"loss": 0.8684, "grad_norm": 0.3314788341522217, "learning_rate": 0.0002, "epoch": 0.7744052502050861, "step": 11800}, {"loss": 0.8756, "grad_norm": 0.36505308747291565, "learning_rate": 0.0002, "epoch": 0.7750615258408532, "step": 11810}, {"loss": 0.8778, "grad_norm": 0.45973560214042664, "learning_rate": 0.0002, "epoch": 0.7757178014766202, "step": 11820}, {"loss": 0.8845, "grad_norm": 0.3941294550895691, "learning_rate": 0.0002, "epoch": 0.7763740771123872, "step": 11830}, {"loss": 0.9033, "grad_norm": 0.34385251998901367, "learning_rate": 0.0002, "epoch": 0.7770303527481542, "step": 11840}, {"loss": 0.9595, "grad_norm": 0.35967403650283813, "learning_rate": 0.0002, "epoch": 0.7776866283839212, "step": 11850}, {"loss": 0.9193, "grad_norm": 0.4731179177761078, "learning_rate": 0.0002, "epoch": 0.7783429040196883, "step": 11860}, {"loss": 0.9344, "grad_norm": 0.38854387402534485, "learning_rate": 0.0002, "epoch": 0.7789991796554553, "step": 11870}, {"loss": 0.9056, "grad_norm": 0.3925110995769501, "learning_rate": 0.0002, "epoch": 0.7796554552912223, "step": 11880}, {"loss": 0.8818, "grad_norm": 0.35882773995399475, "learning_rate": 0.0002, "epoch": 0.7803117309269894, "step": 11890}, {"loss": 0.9097, "grad_norm": 0.4351222813129425, "learning_rate": 0.0002, "epoch": 0.7809680065627563, "step": 11900}, {"loss": 0.8866, "grad_norm": 0.39528653025627136, "learning_rate": 0.0002, "epoch": 0.7816242821985234, "step": 11910}, {"loss": 0.91, "grad_norm": 0.34876471757888794, "learning_rate": 0.0002, "epoch": 0.7822805578342904, "step": 11920}, {"loss": 0.9329, "grad_norm": 0.44766634702682495, "learning_rate": 0.0002, "epoch": 0.7829368334700574, "step": 11930}, {"loss": 0.9687, "grad_norm": 0.42268314957618713, "learning_rate": 0.0002, "epoch": 0.7835931091058245, "step": 11940}, {"loss": 0.911, "grad_norm": 0.377101868391037, "learning_rate": 0.0002, "epoch": 0.7842493847415914, "step": 11950}, {"loss": 0.8711, "grad_norm": 0.4489518404006958, "learning_rate": 0.0002, "epoch": 0.7849056603773585, "step": 11960}, {"loss": 0.9177, "grad_norm": 0.4585464596748352, "learning_rate": 0.0002, "epoch": 0.7855619360131255, "step": 11970}, {"loss": 0.901, "grad_norm": 0.41329675912857056, "learning_rate": 0.0002, "epoch": 0.7862182116488925, "step": 11980}, {"loss": 0.8993, "grad_norm": 0.42738014459609985, "learning_rate": 0.0002, "epoch": 0.7868744872846596, "step": 11990}, {"loss": 0.9206, "grad_norm": 0.40386950969696045, "learning_rate": 0.0002, "epoch": 0.7875307629204266, "step": 12000}, {"loss": 0.8702, "grad_norm": 0.3859177827835083, "learning_rate": 0.0002, "epoch": 0.7881870385561937, "step": 12010}, {"loss": 0.87, "grad_norm": 0.3537571430206299, "learning_rate": 0.0002, "epoch": 0.7888433141919606, "step": 12020}, {"loss": 0.9106, "grad_norm": 0.39319470524787903, "learning_rate": 0.0002, "epoch": 0.7894995898277276, "step": 12030}, {"loss": 0.9197, "grad_norm": 0.32762688398361206, "learning_rate": 0.0002, "epoch": 0.7901558654634947, "step": 12040}, {"loss": 0.9218, "grad_norm": 0.34617939591407776, "learning_rate": 0.0002, "epoch": 0.7908121410992617, "step": 12050}, {"loss": 0.9099, "grad_norm": 0.4012453258037567, "learning_rate": 0.0002, "epoch": 0.7914684167350287, "step": 12060}, {"loss": 0.9857, "grad_norm": 0.4265747666358948, "learning_rate": 0.0002, "epoch": 0.7921246923707957, "step": 12070}, {"loss": 0.9169, "grad_norm": 0.42164385318756104, "learning_rate": 0.0002, "epoch": 0.7927809680065627, "step": 12080}, {"loss": 0.8952, "grad_norm": 0.4054335951805115, "learning_rate": 0.0002, "epoch": 0.7934372436423298, "step": 12090}, {"loss": 0.9706, "grad_norm": 0.38486307859420776, "learning_rate": 0.0002, "epoch": 0.7940935192780968, "step": 12100}, {"loss": 0.8703, "grad_norm": 0.3842015266418457, "learning_rate": 0.0002, "epoch": 0.7947497949138638, "step": 12110}, {"loss": 0.8847, "grad_norm": 0.3772023320198059, "learning_rate": 0.0002, "epoch": 0.7954060705496309, "step": 12120}, {"loss": 0.9248, "grad_norm": 0.39477309584617615, "learning_rate": 0.0002, "epoch": 0.7960623461853978, "step": 12130}, {"loss": 0.8865, "grad_norm": 0.3458614945411682, "learning_rate": 0.0002, "epoch": 0.7967186218211649, "step": 12140}, {"loss": 0.884, "grad_norm": 0.42238670587539673, "learning_rate": 0.0002, "epoch": 0.7973748974569319, "step": 12150}, {"loss": 0.9561, "grad_norm": 0.3623220920562744, "learning_rate": 0.0002, "epoch": 0.7980311730926989, "step": 12160}, {"loss": 1.0033, "grad_norm": 0.426715224981308, "learning_rate": 0.0002, "epoch": 0.798687448728466, "step": 12170}, {"loss": 0.9158, "grad_norm": 0.3558938205242157, "learning_rate": 0.0002, "epoch": 0.799343724364233, "step": 12180}, {"loss": 0.9323, "grad_norm": 0.426761269569397, "learning_rate": 0.0002, "epoch": 0.8, "step": 12190}, {"loss": 0.9256, "grad_norm": 0.42333319783210754, "learning_rate": 0.0002, "epoch": 0.800656275635767, "step": 12200}, {"loss": 0.9307, "grad_norm": 0.42534688115119934, "learning_rate": 0.0002, "epoch": 0.801312551271534, "step": 12210}, {"loss": 0.8999, "grad_norm": 0.37565773725509644, "learning_rate": 0.0002, "epoch": 0.8019688269073011, "step": 12220}, {"loss": 0.8531, "grad_norm": 0.3591224253177643, "learning_rate": 0.0002, "epoch": 0.8026251025430681, "step": 12230}, {"loss": 0.8817, "grad_norm": 0.35887411236763, "learning_rate": 0.0002, "epoch": 0.8032813781788352, "step": 12240}, {"loss": 0.8562, "grad_norm": 0.3895672559738159, "learning_rate": 0.0002, "epoch": 0.8039376538146021, "step": 12250}, {"loss": 0.8885, "grad_norm": 0.3483835756778717, "learning_rate": 0.0002, "epoch": 0.8045939294503691, "step": 12260}, {"loss": 0.9343, "grad_norm": 0.37694090604782104, "learning_rate": 0.0002, "epoch": 0.8052502050861362, "step": 12270}, {"loss": 0.9315, "grad_norm": 0.4011424779891968, "learning_rate": 0.0002, "epoch": 0.8059064807219032, "step": 12280}, {"loss": 0.9341, "grad_norm": 0.3254278004169464, "learning_rate": 0.0002, "epoch": 0.8065627563576703, "step": 12290}, {"loss": 0.9025, "grad_norm": 0.3868531286716461, "learning_rate": 0.0002, "epoch": 0.8072190319934373, "step": 12300}, {"loss": 0.8959, "grad_norm": 0.44830775260925293, "learning_rate": 0.0002, "epoch": 0.8078753076292042, "step": 12310}, {"loss": 0.9639, "grad_norm": 0.38808006048202515, "learning_rate": 0.0002, "epoch": 0.8085315832649713, "step": 12320}, {"loss": 0.9119, "grad_norm": 0.42684856057167053, "learning_rate": 0.0002, "epoch": 0.8091878589007383, "step": 12330}, {"loss": 0.9231, "grad_norm": 0.310099333524704, "learning_rate": 0.0002, "epoch": 0.8098441345365053, "step": 12340}, {"loss": 0.8607, "grad_norm": 0.5252287983894348, "learning_rate": 0.0002, "epoch": 0.8105004101722724, "step": 12350}, {"loss": 0.8559, "grad_norm": 0.41571253538131714, "learning_rate": 0.0002, "epoch": 0.8111566858080393, "step": 12360}, {"loss": 0.8626, "grad_norm": 0.4471582770347595, "learning_rate": 0.0002, "epoch": 0.8118129614438064, "step": 12370}, {"loss": 0.923, "grad_norm": 0.3977353274822235, "learning_rate": 0.0002, "epoch": 0.8124692370795734, "step": 12380}, {"loss": 0.8792, "grad_norm": 0.3274862766265869, "learning_rate": 0.0002, "epoch": 0.8131255127153404, "step": 12390}, {"loss": 0.907, "grad_norm": 0.4114132821559906, "learning_rate": 0.0002, "epoch": 0.8137817883511075, "step": 12400}, {"loss": 0.9324, "grad_norm": 0.40929168462753296, "learning_rate": 0.0002, "epoch": 0.8144380639868745, "step": 12410}, {"loss": 0.8745, "grad_norm": 0.32346615195274353, "learning_rate": 0.0002, "epoch": 0.8150943396226416, "step": 12420}, {"loss": 0.8766, "grad_norm": 0.36344656348228455, "learning_rate": 0.0002, "epoch": 0.8157506152584085, "step": 12430}, {"loss": 0.872, "grad_norm": 0.3749464452266693, "learning_rate": 0.0002, "epoch": 0.8164068908941755, "step": 12440}, {"loss": 0.9128, "grad_norm": 0.4742373526096344, "learning_rate": 0.0002, "epoch": 0.8170631665299426, "step": 12450}, {"loss": 0.8972, "grad_norm": 0.3863218128681183, "learning_rate": 0.0002, "epoch": 0.8177194421657096, "step": 12460}, {"loss": 0.9148, "grad_norm": 0.47756487131118774, "learning_rate": 0.0002, "epoch": 0.8183757178014767, "step": 12470}, {"loss": 0.8817, "grad_norm": 0.4045886993408203, "learning_rate": 0.0002, "epoch": 0.8190319934372436, "step": 12480}, {"loss": 0.866, "grad_norm": 0.4119892418384552, "learning_rate": 0.0002, "epoch": 0.8196882690730106, "step": 12490}, {"loss": 0.866, "grad_norm": 0.3714514672756195, "learning_rate": 0.0002, "epoch": 0.8203445447087777, "step": 12500}, {"loss": 0.8943, "grad_norm": 0.38410791754722595, "learning_rate": 0.0002, "epoch": 0.8210008203445447, "step": 12510}, {"loss": 0.8857, "grad_norm": 0.3178478479385376, "learning_rate": 0.0002, "epoch": 0.8216570959803118, "step": 12520}, {"loss": 0.9356, "grad_norm": 0.4347972869873047, "learning_rate": 0.0002, "epoch": 0.8223133716160788, "step": 12530}, {"loss": 0.8926, "grad_norm": 0.4276008903980255, "learning_rate": 0.0002, "epoch": 0.8229696472518457, "step": 12540}, {"loss": 0.9213, "grad_norm": 0.36309465765953064, "learning_rate": 0.0002, "epoch": 0.8236259228876128, "step": 12550}, {"loss": 0.8847, "grad_norm": 0.45721492171287537, "learning_rate": 0.0002, "epoch": 0.8242821985233798, "step": 12560}, {"loss": 0.8655, "grad_norm": 0.37675052881240845, "learning_rate": 0.0002, "epoch": 0.8249384741591469, "step": 12570}, {"loss": 0.8717, "grad_norm": 0.41907957196235657, "learning_rate": 0.0002, "epoch": 0.8255947497949139, "step": 12580}, {"loss": 0.9465, "grad_norm": 0.36430326104164124, "learning_rate": 0.0002, "epoch": 0.8262510254306809, "step": 12590}, {"loss": 0.9244, "grad_norm": 0.45015767216682434, "learning_rate": 0.0002, "epoch": 0.8269073010664479, "step": 12600}, {"loss": 0.9076, "grad_norm": 0.369115948677063, "learning_rate": 0.0002, "epoch": 0.8275635767022149, "step": 12610}, {"loss": 0.92, "grad_norm": 0.4821915030479431, "learning_rate": 0.0002, "epoch": 0.8282198523379819, "step": 12620}, {"loss": 0.8813, "grad_norm": 0.4291541874408722, "learning_rate": 0.0002, "epoch": 0.828876127973749, "step": 12630}, {"loss": 0.9196, "grad_norm": 0.3816904127597809, "learning_rate": 0.0002, "epoch": 0.829532403609516, "step": 12640}, {"loss": 0.8821, "grad_norm": 0.3915407657623291, "learning_rate": 0.0002, "epoch": 0.8301886792452831, "step": 12650}, {"loss": 0.907, "grad_norm": 0.35964086651802063, "learning_rate": 0.0002, "epoch": 0.83084495488105, "step": 12660}, {"loss": 0.8512, "grad_norm": 0.3201609253883362, "learning_rate": 0.0002, "epoch": 0.831501230516817, "step": 12670}, {"loss": 0.9085, "grad_norm": 0.44153767824172974, "learning_rate": 0.0002, "epoch": 0.8321575061525841, "step": 12680}, {"loss": 0.859, "grad_norm": 0.37000054121017456, "learning_rate": 0.0002, "epoch": 0.8328137817883511, "step": 12690}, {"loss": 0.9413, "grad_norm": 0.39843010902404785, "learning_rate": 0.0002, "epoch": 0.8334700574241182, "step": 12700}, {"loss": 0.8787, "grad_norm": 0.3594053387641907, "learning_rate": 0.0002, "epoch": 0.8341263330598851, "step": 12710}, {"loss": 0.8542, "grad_norm": 0.38574180006980896, "learning_rate": 0.0002, "epoch": 0.8347826086956521, "step": 12720}, {"loss": 0.8746, "grad_norm": 0.4101716876029968, "learning_rate": 0.0002, "epoch": 0.8354388843314192, "step": 12730}, {"loss": 0.8442, "grad_norm": 0.3665215075016022, "learning_rate": 0.0002, "epoch": 0.8360951599671862, "step": 12740}, {"loss": 0.8898, "grad_norm": 0.39471596479415894, "learning_rate": 0.0002, "epoch": 0.8367514356029533, "step": 12750}, {"loss": 0.9026, "grad_norm": 0.35624340176582336, "learning_rate": 0.0002, "epoch": 0.8374077112387203, "step": 12760}, {"loss": 0.917, "grad_norm": 0.3838249742984772, "learning_rate": 0.0002, "epoch": 0.8380639868744872, "step": 12770}, {"loss": 0.874, "grad_norm": 0.408368319272995, "learning_rate": 0.0002, "epoch": 0.8387202625102543, "step": 12780}, {"loss": 0.934, "grad_norm": 0.46758291125297546, "learning_rate": 0.0002, "epoch": 0.8393765381460213, "step": 12790}, {"loss": 0.8603, "grad_norm": 0.35787731409072876, "learning_rate": 0.0002, "epoch": 0.8400328137817884, "step": 12800}, {"loss": 0.9026, "grad_norm": 0.39618661999702454, "learning_rate": 0.0002, "epoch": 0.8406890894175554, "step": 12810}, {"loss": 0.9307, "grad_norm": 0.44962066411972046, "learning_rate": 0.0002, "epoch": 0.8413453650533224, "step": 12820}, {"loss": 0.8756, "grad_norm": 0.36435529589653015, "learning_rate": 0.0002, "epoch": 0.8420016406890894, "step": 12830}, {"loss": 0.93, "grad_norm": 0.37484753131866455, "learning_rate": 0.0002, "epoch": 0.8426579163248564, "step": 12840}, {"loss": 0.8655, "grad_norm": 0.36679843068122864, "learning_rate": 0.0002, "epoch": 0.8433141919606235, "step": 12850}, {"loss": 0.9339, "grad_norm": 0.3948156237602234, "learning_rate": 0.0002, "epoch": 0.8439704675963905, "step": 12860}, {"loss": 0.8915, "grad_norm": 0.3789501488208771, "learning_rate": 0.0002, "epoch": 0.8446267432321575, "step": 12870}, {"loss": 0.916, "grad_norm": 0.45156800746917725, "learning_rate": 0.0002, "epoch": 0.8452830188679246, "step": 12880}, {"loss": 0.8762, "grad_norm": 0.37762370705604553, "learning_rate": 0.0002, "epoch": 0.8459392945036915, "step": 12890}, {"loss": 0.8922, "grad_norm": 0.41654065251350403, "learning_rate": 0.0002, "epoch": 0.8465955701394585, "step": 12900}, {"loss": 0.9136, "grad_norm": 0.35026174783706665, "learning_rate": 0.0002, "epoch": 0.8472518457752256, "step": 12910}, {"loss": 0.8443, "grad_norm": 0.5227314829826355, "learning_rate": 0.0002, "epoch": 0.8479081214109926, "step": 12920}, {"loss": 0.8533, "grad_norm": 0.365063339471817, "learning_rate": 0.0002, "epoch": 0.8485643970467597, "step": 12930}, {"loss": 0.9012, "grad_norm": 0.40983277559280396, "learning_rate": 0.0002, "epoch": 0.8492206726825267, "step": 12940}, {"loss": 0.8788, "grad_norm": 0.36414071917533875, "learning_rate": 0.0002, "epoch": 0.8498769483182936, "step": 12950}, {"loss": 0.8981, "grad_norm": 0.3703882694244385, "learning_rate": 0.0002, "epoch": 0.8505332239540607, "step": 12960}, {"loss": 0.9256, "grad_norm": 0.3218643367290497, "learning_rate": 0.0002, "epoch": 0.8511894995898277, "step": 12970}, {"loss": 0.8525, "grad_norm": 0.38008660078048706, "learning_rate": 0.0002, "epoch": 0.8518457752255948, "step": 12980}, {"loss": 0.8651, "grad_norm": 0.4357127547264099, "learning_rate": 0.0002, "epoch": 0.8525020508613618, "step": 12990}, {"loss": 0.8592, "grad_norm": 0.36831775307655334, "learning_rate": 0.0002, "epoch": 0.8531583264971287, "step": 13000}, {"loss": 0.8772, "grad_norm": 0.4546806812286377, "learning_rate": 0.0002, "epoch": 0.8538146021328958, "step": 13010}, {"loss": 0.865, "grad_norm": 0.3474937379360199, "learning_rate": 0.0002, "epoch": 0.8544708777686628, "step": 13020}, {"loss": 0.9098, "grad_norm": 0.438834547996521, "learning_rate": 0.0002, "epoch": 0.8551271534044299, "step": 13030}, {"loss": 0.9573, "grad_norm": 0.3401171565055847, "learning_rate": 0.0002, "epoch": 0.8557834290401969, "step": 13040}, {"loss": 0.8286, "grad_norm": 0.3887326419353485, "learning_rate": 0.0002, "epoch": 0.8564397046759639, "step": 13050}, {"loss": 0.8695, "grad_norm": 0.3489287197589874, "learning_rate": 0.0002, "epoch": 0.857095980311731, "step": 13060}, {"loss": 0.9118, "grad_norm": 0.481189489364624, "learning_rate": 0.0002, "epoch": 0.8577522559474979, "step": 13070}, {"loss": 0.9116, "grad_norm": 0.4638312757015228, "learning_rate": 0.0002, "epoch": 0.858408531583265, "step": 13080}, {"loss": 0.8775, "grad_norm": 0.43477529287338257, "learning_rate": 0.0002, "epoch": 0.859064807219032, "step": 13090}, {"loss": 0.9232, "grad_norm": 0.43358466029167175, "learning_rate": 0.0002, "epoch": 0.859721082854799, "step": 13100}, {"loss": 0.8771, "grad_norm": 0.385527104139328, "learning_rate": 0.0002, "epoch": 0.8603773584905661, "step": 13110}, {"loss": 0.8708, "grad_norm": 0.37878429889678955, "learning_rate": 0.0002, "epoch": 0.861033634126333, "step": 13120}, {"loss": 0.83, "grad_norm": 0.409476637840271, "learning_rate": 0.0002, "epoch": 0.8616899097621001, "step": 13130}, {"loss": 0.9149, "grad_norm": 0.3888716697692871, "learning_rate": 0.0002, "epoch": 0.8623461853978671, "step": 13140}, {"loss": 0.8261, "grad_norm": 0.38093528151512146, "learning_rate": 0.0002, "epoch": 0.8630024610336341, "step": 13150}, {"loss": 0.9163, "grad_norm": 0.45613282918930054, "learning_rate": 0.0002, "epoch": 0.8636587366694012, "step": 13160}, {"loss": 0.8967, "grad_norm": 0.41665518283843994, "learning_rate": 0.0002, "epoch": 0.8643150123051682, "step": 13170}, {"loss": 0.8692, "grad_norm": 0.4322538673877716, "learning_rate": 0.0002, "epoch": 0.8649712879409351, "step": 13180}, {"loss": 0.8429, "grad_norm": 0.3452875018119812, "learning_rate": 0.0002, "epoch": 0.8656275635767022, "step": 13190}, {"loss": 0.8872, "grad_norm": 0.36628788709640503, "learning_rate": 0.0002, "epoch": 0.8662838392124692, "step": 13200}, {"loss": 0.9141, "grad_norm": 0.4329487383365631, "learning_rate": 0.0002, "epoch": 0.8669401148482363, "step": 13210}, {"loss": 0.8586, "grad_norm": 0.36800137162208557, "learning_rate": 0.0002, "epoch": 0.8675963904840033, "step": 13220}, {"loss": 0.9133, "grad_norm": 0.5235224366188049, "learning_rate": 0.0002, "epoch": 0.8682526661197703, "step": 13230}, {"loss": 0.8826, "grad_norm": 0.40818873047828674, "learning_rate": 0.0002, "epoch": 0.8689089417555373, "step": 13240}, {"loss": 0.8869, "grad_norm": 0.46719685196876526, "learning_rate": 0.0002, "epoch": 0.8695652173913043, "step": 13250}, {"loss": 0.9426, "grad_norm": 0.34422767162323, "learning_rate": 0.0002, "epoch": 0.8702214930270714, "step": 13260}, {"loss": 0.8921, "grad_norm": 0.37454837560653687, "learning_rate": 0.0002, "epoch": 0.8708777686628384, "step": 13270}, {"loss": 0.89, "grad_norm": 0.39750349521636963, "learning_rate": 0.0002, "epoch": 0.8715340442986054, "step": 13280}, {"loss": 0.9256, "grad_norm": 0.3864808678627014, "learning_rate": 0.0002, "epoch": 0.8721903199343725, "step": 13290}, {"loss": 0.8663, "grad_norm": 0.36088764667510986, "learning_rate": 0.0002, "epoch": 0.8728465955701394, "step": 13300}, {"loss": 0.8334, "grad_norm": 0.384287029504776, "learning_rate": 0.0002, "epoch": 0.8735028712059065, "step": 13310}, {"loss": 0.8697, "grad_norm": 0.3988962173461914, "learning_rate": 0.0002, "epoch": 0.8741591468416735, "step": 13320}, {"loss": 0.8924, "grad_norm": 0.40126702189445496, "learning_rate": 0.0002, "epoch": 0.8748154224774405, "step": 13330}, {"loss": 0.8766, "grad_norm": 0.3931732475757599, "learning_rate": 0.0002, "epoch": 0.8754716981132076, "step": 13340}, {"loss": 0.9249, "grad_norm": 0.35348305106163025, "learning_rate": 0.0002, "epoch": 0.8761279737489746, "step": 13350}, {"loss": 0.9412, "grad_norm": 0.3603714108467102, "learning_rate": 0.0002, "epoch": 0.8767842493847416, "step": 13360}, {"loss": 0.8432, "grad_norm": 0.3853464126586914, "learning_rate": 0.0002, "epoch": 0.8774405250205086, "step": 13370}, {"loss": 0.9035, "grad_norm": 0.44406014680862427, "learning_rate": 0.0002, "epoch": 0.8780968006562756, "step": 13380}, {"loss": 0.8903, "grad_norm": 0.3563307225704193, "learning_rate": 0.0002, "epoch": 0.8787530762920427, "step": 13390}, {"loss": 0.9004, "grad_norm": 0.4507097005844116, "learning_rate": 0.0002, "epoch": 0.8794093519278097, "step": 13400}, {"loss": 0.9792, "grad_norm": 0.4069702923297882, "learning_rate": 0.0002, "epoch": 0.8800656275635766, "step": 13410}, {"loss": 0.8857, "grad_norm": 0.5091678500175476, "learning_rate": 0.0002, "epoch": 0.8807219031993437, "step": 13420}, {"loss": 0.9499, "grad_norm": 0.3784930408000946, "learning_rate": 0.0002, "epoch": 0.8813781788351107, "step": 13430}, {"loss": 0.9475, "grad_norm": 0.3937450051307678, "learning_rate": 0.0002, "epoch": 0.8820344544708778, "step": 13440}, {"loss": 0.9411, "grad_norm": 0.39602798223495483, "learning_rate": 0.0002, "epoch": 0.8826907301066448, "step": 13450}, {"loss": 0.947, "grad_norm": 0.42562225461006165, "learning_rate": 0.0002, "epoch": 0.8833470057424118, "step": 13460}, {"loss": 0.9748, "grad_norm": 0.3933939039707184, "learning_rate": 0.0002, "epoch": 0.8840032813781789, "step": 13470}, {"loss": 0.9206, "grad_norm": 0.4387489855289459, "learning_rate": 0.0002, "epoch": 0.8846595570139458, "step": 13480}, {"loss": 0.8417, "grad_norm": 0.3655209243297577, "learning_rate": 0.0002, "epoch": 0.8853158326497129, "step": 13490}, {"loss": 0.8601, "grad_norm": 0.40750762820243835, "learning_rate": 0.0002, "epoch": 0.8859721082854799, "step": 13500}, {"loss": 0.9343, "grad_norm": 0.5716604590415955, "learning_rate": 0.0002, "epoch": 0.8866283839212469, "step": 13510}, {"loss": 0.8634, "grad_norm": 0.3286498785018921, "learning_rate": 0.0002, "epoch": 0.887284659557014, "step": 13520}, {"loss": 0.8875, "grad_norm": 0.4093165993690491, "learning_rate": 0.0002, "epoch": 0.887940935192781, "step": 13530}, {"loss": 0.9056, "grad_norm": 0.37128645181655884, "learning_rate": 0.0002, "epoch": 0.888597210828548, "step": 13540}, {"loss": 0.9356, "grad_norm": 0.9630060195922852, "learning_rate": 0.0002, "epoch": 0.889253486464315, "step": 13550}, {"loss": 0.9598, "grad_norm": 0.4119563698768616, "learning_rate": 0.0002, "epoch": 0.889909762100082, "step": 13560}, {"loss": 0.8688, "grad_norm": 0.42105695605278015, "learning_rate": 0.0002, "epoch": 0.8905660377358491, "step": 13570}, {"loss": 0.9091, "grad_norm": 0.35517653822898865, "learning_rate": 0.0002, "epoch": 0.8912223133716161, "step": 13580}, {"loss": 0.9975, "grad_norm": 0.4380730986595154, "learning_rate": 0.0002, "epoch": 0.8918785890073831, "step": 13590}, {"loss": 0.8874, "grad_norm": 0.3875235915184021, "learning_rate": 0.0002, "epoch": 0.8925348646431501, "step": 13600}, {"loss": 0.8813, "grad_norm": 0.3194465935230255, "learning_rate": 0.0002, "epoch": 0.8931911402789171, "step": 13610}, {"loss": 0.8671, "grad_norm": 0.4448561370372772, "learning_rate": 0.0002, "epoch": 0.8938474159146842, "step": 13620}, {"loss": 0.88, "grad_norm": 0.4257558286190033, "learning_rate": 0.0002, "epoch": 0.8945036915504512, "step": 13630}, {"loss": 0.8844, "grad_norm": 0.3831070065498352, "learning_rate": 0.0002, "epoch": 0.8951599671862183, "step": 13640}, {"loss": 0.8836, "grad_norm": 0.3765697479248047, "learning_rate": 0.0002, "epoch": 0.8958162428219852, "step": 13650}, {"loss": 0.9015, "grad_norm": 0.3621887266635895, "learning_rate": 0.0002, "epoch": 0.8964725184577522, "step": 13660}, {"loss": 0.9147, "grad_norm": 0.4219911992549896, "learning_rate": 0.0002, "epoch": 0.8971287940935193, "step": 13670}, {"loss": 0.878, "grad_norm": 0.550961971282959, "learning_rate": 0.0002, "epoch": 0.8977850697292863, "step": 13680}, {"loss": 0.8807, "grad_norm": 0.41570132970809937, "learning_rate": 0.0002, "epoch": 0.8984413453650533, "step": 13690}, {"loss": 0.953, "grad_norm": 0.34862181544303894, "learning_rate": 0.0002, "epoch": 0.8990976210008204, "step": 13700}, {"loss": 0.8738, "grad_norm": 0.3568742871284485, "learning_rate": 0.0002, "epoch": 0.8997538966365873, "step": 13710}, {"loss": 0.9346, "grad_norm": 0.3789973258972168, "learning_rate": 0.0002, "epoch": 0.9004101722723544, "step": 13720}, {"loss": 0.8225, "grad_norm": 0.3775809109210968, "learning_rate": 0.0002, "epoch": 0.9010664479081214, "step": 13730}, {"loss": 0.9238, "grad_norm": 0.33509477972984314, "learning_rate": 0.0002, "epoch": 0.9017227235438884, "step": 13740}, {"loss": 0.915, "grad_norm": 0.4410351514816284, "learning_rate": 0.0002, "epoch": 0.9023789991796555, "step": 13750}, {"loss": 0.9066, "grad_norm": 0.44638893008232117, "learning_rate": 0.0002, "epoch": 0.9030352748154225, "step": 13760}, {"loss": 0.883, "grad_norm": 0.38487187027931213, "learning_rate": 0.0002, "epoch": 0.9036915504511895, "step": 13770}, {"loss": 0.8886, "grad_norm": 0.385796457529068, "learning_rate": 0.0002, "epoch": 0.9043478260869565, "step": 13780}, {"loss": 0.8781, "grad_norm": 0.4261656403541565, "learning_rate": 0.0002, "epoch": 0.9050041017227235, "step": 13790}, {"loss": 0.9237, "grad_norm": 0.38487741351127625, "learning_rate": 0.0002, "epoch": 0.9056603773584906, "step": 13800}, {"loss": 0.9357, "grad_norm": 0.40660005807876587, "learning_rate": 0.0002, "epoch": 0.9063166529942576, "step": 13810}, {"loss": 0.8943, "grad_norm": 0.4127330780029297, "learning_rate": 0.0002, "epoch": 0.9069729286300247, "step": 13820}, {"loss": 0.8981, "grad_norm": 0.4300757944583893, "learning_rate": 0.0002, "epoch": 0.9076292042657916, "step": 13830}, {"loss": 0.8956, "grad_norm": 0.3994467854499817, "learning_rate": 0.0002, "epoch": 0.9082854799015586, "step": 13840}, {"loss": 0.8949, "grad_norm": 0.4332261383533478, "learning_rate": 0.0002, "epoch": 0.9089417555373257, "step": 13850}, {"loss": 0.8897, "grad_norm": 0.3849696218967438, "learning_rate": 0.0002, "epoch": 0.9095980311730927, "step": 13860}, {"loss": 0.8897, "grad_norm": 0.39346274733543396, "learning_rate": 0.0002, "epoch": 0.9102543068088598, "step": 13870}, {"loss": 0.8847, "grad_norm": 0.39518049359321594, "learning_rate": 0.0002, "epoch": 0.9109105824446267, "step": 13880}, {"loss": 0.8982, "grad_norm": 0.4449180066585541, "learning_rate": 0.0002, "epoch": 0.9115668580803937, "step": 13890}, {"loss": 0.9057, "grad_norm": 0.41052138805389404, "learning_rate": 0.0002, "epoch": 0.9122231337161608, "step": 13900}, {"loss": 0.905, "grad_norm": 0.36827564239501953, "learning_rate": 0.0002, "epoch": 0.9128794093519278, "step": 13910}, {"loss": 0.9484, "grad_norm": 0.3875851631164551, "learning_rate": 0.0002, "epoch": 0.9135356849876949, "step": 13920}, {"loss": 0.9125, "grad_norm": 0.402854859828949, "learning_rate": 0.0002, "epoch": 0.9141919606234619, "step": 13930}, {"loss": 0.9014, "grad_norm": 0.3584592938423157, "learning_rate": 0.0002, "epoch": 0.9148482362592288, "step": 13940}, {"loss": 0.8432, "grad_norm": 0.3516979515552521, "learning_rate": 0.0002, "epoch": 0.9155045118949959, "step": 13950}, {"loss": 0.8747, "grad_norm": 0.4411509335041046, "learning_rate": 0.0002, "epoch": 0.9161607875307629, "step": 13960}, {"loss": 0.8758, "grad_norm": 0.47830596566200256, "learning_rate": 0.0002, "epoch": 0.9168170631665299, "step": 13970}, {"loss": 0.915, "grad_norm": 0.3669400215148926, "learning_rate": 0.0002, "epoch": 0.917473338802297, "step": 13980}, {"loss": 0.9265, "grad_norm": 0.4361213147640228, "learning_rate": 0.0002, "epoch": 0.918129614438064, "step": 13990}, {"loss": 0.9106, "grad_norm": 0.4224131405353546, "learning_rate": 0.0002, "epoch": 0.918785890073831, "step": 14000}, {"loss": 0.9138, "grad_norm": 0.5785587430000305, "learning_rate": 0.0002, "epoch": 0.919442165709598, "step": 14010}, {"loss": 0.9078, "grad_norm": 0.40788379311561584, "learning_rate": 0.0002, "epoch": 0.920098441345365, "step": 14020}, {"loss": 0.9241, "grad_norm": 0.40879732370376587, "learning_rate": 0.0002, "epoch": 0.9207547169811321, "step": 14030}, {"loss": 0.8565, "grad_norm": 0.4031982421875, "learning_rate": 0.0002, "epoch": 0.9214109926168991, "step": 14040}, {"loss": 0.8224, "grad_norm": 0.3457014560699463, "learning_rate": 0.0002, "epoch": 0.9220672682526662, "step": 14050}, {"loss": 0.89, "grad_norm": 0.38608697056770325, "learning_rate": 0.0002, "epoch": 0.9227235438884331, "step": 14060}, {"loss": 0.8637, "grad_norm": 0.39772507548332214, "learning_rate": 0.0002, "epoch": 0.9233798195242001, "step": 14070}, {"loss": 0.9102, "grad_norm": 0.3684687316417694, "learning_rate": 0.0002, "epoch": 0.9240360951599672, "step": 14080}, {"loss": 0.886, "grad_norm": 0.4479428827762604, "learning_rate": 0.0002, "epoch": 0.9246923707957342, "step": 14090}, {"loss": 0.857, "grad_norm": 0.39918506145477295, "learning_rate": 0.0002, "epoch": 0.9253486464315013, "step": 14100}, {"loss": 0.8765, "grad_norm": 0.4163782000541687, "learning_rate": 0.0002, "epoch": 0.9260049220672683, "step": 14110}, {"loss": 0.9022, "grad_norm": 0.40232136845588684, "learning_rate": 0.0002, "epoch": 0.9266611977030352, "step": 14120}, {"loss": 0.8815, "grad_norm": 0.36188459396362305, "learning_rate": 0.0002, "epoch": 0.9273174733388023, "step": 14130}, {"loss": 0.9072, "grad_norm": 0.42056623101234436, "learning_rate": 0.0002, "epoch": 0.9279737489745693, "step": 14140}, {"loss": 0.8678, "grad_norm": 0.3129708468914032, "learning_rate": 0.0002, "epoch": 0.9286300246103364, "step": 14150}, {"loss": 0.8895, "grad_norm": 0.4068623185157776, "learning_rate": 0.0002, "epoch": 0.9292863002461034, "step": 14160}, {"loss": 0.8939, "grad_norm": 0.43788865208625793, "learning_rate": 0.0002, "epoch": 0.9299425758818703, "step": 14170}, {"loss": 0.8954, "grad_norm": 0.35850921273231506, "learning_rate": 0.0002, "epoch": 0.9305988515176374, "step": 14180}, {"loss": 0.9001, "grad_norm": 0.41121411323547363, "learning_rate": 0.0002, "epoch": 0.9312551271534044, "step": 14190}, {"loss": 0.866, "grad_norm": 0.4249218702316284, "learning_rate": 0.0002, "epoch": 0.9319114027891715, "step": 14200}, {"loss": 0.9195, "grad_norm": 0.3779831826686859, "learning_rate": 0.0002, "epoch": 0.9325676784249385, "step": 14210}, {"loss": 0.8436, "grad_norm": 0.36781951785087585, "learning_rate": 0.0002, "epoch": 0.9332239540607055, "step": 14220}, {"loss": 0.8976, "grad_norm": 0.36741000413894653, "learning_rate": 0.0002, "epoch": 0.9338802296964726, "step": 14230}, {"loss": 0.9137, "grad_norm": 0.43414175510406494, "learning_rate": 0.0002, "epoch": 0.9345365053322395, "step": 14240}, {"loss": 0.8648, "grad_norm": 0.4406278431415558, "learning_rate": 0.0002, "epoch": 0.9351927809680065, "step": 14250}, {"loss": 0.8705, "grad_norm": 0.386152982711792, "learning_rate": 0.0002, "epoch": 0.9358490566037736, "step": 14260}, {"loss": 0.8697, "grad_norm": 0.3971618711948395, "learning_rate": 0.0002, "epoch": 0.9365053322395406, "step": 14270}, {"loss": 0.8925, "grad_norm": 0.3366684317588806, "learning_rate": 0.0002, "epoch": 0.9371616078753077, "step": 14280}, {"loss": 0.8702, "grad_norm": 0.42566195130348206, "learning_rate": 0.0002, "epoch": 0.9378178835110746, "step": 14290}, {"loss": 0.8767, "grad_norm": 0.3734602928161621, "learning_rate": 0.0002, "epoch": 0.9384741591468416, "step": 14300}, {"loss": 0.8889, "grad_norm": 0.4279540479183197, "learning_rate": 0.0002, "epoch": 0.9391304347826087, "step": 14310}, {"loss": 0.9319, "grad_norm": 0.43050041794776917, "learning_rate": 0.0002, "epoch": 0.9397867104183757, "step": 14320}, {"loss": 0.91, "grad_norm": 0.41909968852996826, "learning_rate": 0.0002, "epoch": 0.9404429860541428, "step": 14330}, {"loss": 0.8782, "grad_norm": 0.39651772379875183, "learning_rate": 0.0002, "epoch": 0.9410992616899098, "step": 14340}, {"loss": 0.8868, "grad_norm": 0.4171423614025116, "learning_rate": 0.0002, "epoch": 0.9417555373256767, "step": 14350}, {"loss": 0.8751, "grad_norm": 0.44906023144721985, "learning_rate": 0.0002, "epoch": 0.9424118129614438, "step": 14360}, {"loss": 0.9048, "grad_norm": 0.4213627576828003, "learning_rate": 0.0002, "epoch": 0.9430680885972108, "step": 14370}, {"loss": 0.9257, "grad_norm": 0.38457417488098145, "learning_rate": 0.0002, "epoch": 0.9437243642329779, "step": 14380}, {"loss": 0.8421, "grad_norm": 0.43104225397109985, "learning_rate": 0.0002, "epoch": 0.9443806398687449, "step": 14390}, {"loss": 0.865, "grad_norm": 0.40090736746788025, "learning_rate": 0.0002, "epoch": 0.9450369155045119, "step": 14400}, {"loss": 0.8746, "grad_norm": 0.36180031299591064, "learning_rate": 0.0002, "epoch": 0.9456931911402789, "step": 14410}, {"loss": 0.9297, "grad_norm": 0.4608926475048065, "learning_rate": 0.0002, "epoch": 0.9463494667760459, "step": 14420}, {"loss": 0.9131, "grad_norm": 0.44056418538093567, "learning_rate": 0.0002, "epoch": 0.947005742411813, "step": 14430}, {"loss": 0.8368, "grad_norm": 0.334051251411438, "learning_rate": 0.0002, "epoch": 0.94766201804758, "step": 14440}, {"loss": 0.8557, "grad_norm": 0.45580169558525085, "learning_rate": 0.0002, "epoch": 0.948318293683347, "step": 14450}, {"loss": 0.8828, "grad_norm": 0.3898446261882782, "learning_rate": 0.0002, "epoch": 0.9489745693191141, "step": 14460}, {"loss": 0.9436, "grad_norm": 0.465259850025177, "learning_rate": 0.0002, "epoch": 0.949630844954881, "step": 14470}, {"loss": 0.8907, "grad_norm": 0.3595256805419922, "learning_rate": 0.0002, "epoch": 0.9502871205906481, "step": 14480}, {"loss": 0.8593, "grad_norm": 0.3710390031337738, "learning_rate": 0.0002, "epoch": 0.9509433962264151, "step": 14490}, {"loss": 0.8373, "grad_norm": 0.4002886712551117, "learning_rate": 0.0002, "epoch": 0.9515996718621821, "step": 14500}, {"loss": 0.9091, "grad_norm": 0.4221348464488983, "learning_rate": 0.0002, "epoch": 0.9522559474979492, "step": 14510}, {"loss": 0.9205, "grad_norm": 0.4163874089717865, "learning_rate": 0.0002, "epoch": 0.9529122231337162, "step": 14520}, {"loss": 0.9125, "grad_norm": 0.40220746397972107, "learning_rate": 0.0002, "epoch": 0.9535684987694831, "step": 14530}, {"loss": 0.854, "grad_norm": 0.45836037397384644, "learning_rate": 0.0002, "epoch": 0.9542247744052502, "step": 14540}, {"loss": 0.911, "grad_norm": 0.420478492975235, "learning_rate": 0.0002, "epoch": 0.9548810500410172, "step": 14550}, {"loss": 0.8932, "grad_norm": 0.45627933740615845, "learning_rate": 0.0002, "epoch": 0.9555373256767843, "step": 14560}, {"loss": 0.8926, "grad_norm": 0.41290518641471863, "learning_rate": 0.0002, "epoch": 0.9561936013125513, "step": 14570}, {"loss": 0.8936, "grad_norm": 0.46660760045051575, "learning_rate": 0.0002, "epoch": 0.9568498769483182, "step": 14580}, {"loss": 0.9132, "grad_norm": 0.45327240228652954, "learning_rate": 0.0002, "epoch": 0.9575061525840853, "step": 14590}, {"loss": 0.8601, "grad_norm": 0.41973528265953064, "learning_rate": 0.0002, "epoch": 0.9581624282198523, "step": 14600}, {"loss": 0.8278, "grad_norm": 0.4022239148616791, "learning_rate": 0.0002, "epoch": 0.9588187038556194, "step": 14610}, {"loss": 0.8514, "grad_norm": 0.3226695954799652, "learning_rate": 0.0002, "epoch": 0.9594749794913864, "step": 14620}, {"loss": 0.8526, "grad_norm": 0.4233718812465668, "learning_rate": 0.0002, "epoch": 0.9601312551271534, "step": 14630}, {"loss": 0.9009, "grad_norm": 0.37561315298080444, "learning_rate": 0.0002, "epoch": 0.9607875307629204, "step": 14640}, {"loss": 0.8854, "grad_norm": 0.44843146204948425, "learning_rate": 0.0002, "epoch": 0.9614438063986874, "step": 14650}, {"loss": 0.8863, "grad_norm": 0.40062573552131653, "learning_rate": 0.0002, "epoch": 0.9621000820344545, "step": 14660}, {"loss": 0.8912, "grad_norm": 0.4720284938812256, "learning_rate": 0.0002, "epoch": 0.9627563576702215, "step": 14670}, {"loss": 0.89, "grad_norm": 0.43565067648887634, "learning_rate": 0.0002, "epoch": 0.9634126333059885, "step": 14680}, {"loss": 0.8612, "grad_norm": 0.41181448101997375, "learning_rate": 0.0002, "epoch": 0.9640689089417556, "step": 14690}, {"loss": 0.9464, "grad_norm": 0.4344978630542755, "learning_rate": 0.0002, "epoch": 0.9647251845775225, "step": 14700}, {"loss": 0.855, "grad_norm": 0.42558008432388306, "learning_rate": 0.0002, "epoch": 0.9653814602132896, "step": 14710}, {"loss": 0.8945, "grad_norm": 0.39161136746406555, "learning_rate": 0.0002, "epoch": 0.9660377358490566, "step": 14720}, {"loss": 0.8587, "grad_norm": 0.3784191608428955, "learning_rate": 0.0002, "epoch": 0.9666940114848236, "step": 14730}, {"loss": 0.8566, "grad_norm": 0.39039477705955505, "learning_rate": 0.0002, "epoch": 0.9673502871205907, "step": 14740}, {"loss": 0.9006, "grad_norm": 0.5566018223762512, "learning_rate": 0.0002, "epoch": 0.9680065627563577, "step": 14750}, {"loss": 0.9222, "grad_norm": 0.38877877593040466, "learning_rate": 0.0002, "epoch": 0.9686628383921247, "step": 14760}, {"loss": 0.8811, "grad_norm": 0.33369940519332886, "learning_rate": 0.0002, "epoch": 0.9693191140278917, "step": 14770}, {"loss": 0.8743, "grad_norm": 0.5109888315200806, "learning_rate": 0.0002, "epoch": 0.9699753896636587, "step": 14780}, {"loss": 0.8883, "grad_norm": 0.5364375114440918, "learning_rate": 0.0002, "epoch": 0.9706316652994258, "step": 14790}, {"loss": 0.8696, "grad_norm": 0.3702435791492462, "learning_rate": 0.0002, "epoch": 0.9712879409351928, "step": 14800}, {"loss": 0.9144, "grad_norm": 0.5094677805900574, "learning_rate": 0.0002, "epoch": 0.9719442165709598, "step": 14810}, {"loss": 0.8823, "grad_norm": 0.3565915524959564, "learning_rate": 0.0002, "epoch": 0.9726004922067268, "step": 14820}, {"loss": 0.8964, "grad_norm": 0.44756242632865906, "learning_rate": 0.0002, "epoch": 0.9732567678424938, "step": 14830}, {"loss": 0.8524, "grad_norm": 0.4272070527076721, "learning_rate": 0.0002, "epoch": 0.9739130434782609, "step": 14840}, {"loss": 0.8551, "grad_norm": 0.4303967356681824, "learning_rate": 0.0002, "epoch": 0.9745693191140279, "step": 14850}, {"loss": 0.9303, "grad_norm": 0.4528018534183502, "learning_rate": 0.0002, "epoch": 0.9752255947497949, "step": 14860}, {"loss": 0.8465, "grad_norm": 0.5745970606803894, "learning_rate": 0.0002, "epoch": 0.975881870385562, "step": 14870}, {"loss": 0.9496, "grad_norm": 0.43847736716270447, "learning_rate": 0.0002, "epoch": 0.9765381460213289, "step": 14880}, {"loss": 0.8647, "grad_norm": 0.4512104094028473, "learning_rate": 0.0002, "epoch": 0.977194421657096, "step": 14890}, {"loss": 0.9712, "grad_norm": 0.4573594331741333, "learning_rate": 0.0002, "epoch": 0.977850697292863, "step": 14900}, {"loss": 0.898, "grad_norm": 0.4297037720680237, "learning_rate": 0.0002, "epoch": 0.97850697292863, "step": 14910}, {"loss": 0.9115, "grad_norm": 0.3769957721233368, "learning_rate": 0.0002, "epoch": 0.9791632485643971, "step": 14920}, {"loss": 0.8956, "grad_norm": 0.3524457514286041, "learning_rate": 0.0002, "epoch": 0.979819524200164, "step": 14930}, {"loss": 0.9254, "grad_norm": 0.3965851664543152, "learning_rate": 0.0002, "epoch": 0.9804757998359311, "step": 14940}, {"loss": 0.8869, "grad_norm": 0.3593656122684479, "learning_rate": 0.0002, "epoch": 0.9811320754716981, "step": 14950}, {"loss": 0.8859, "grad_norm": 0.35391807556152344, "learning_rate": 0.0002, "epoch": 0.9817883511074651, "step": 14960}, {"loss": 0.9111, "grad_norm": 0.40651339292526245, "learning_rate": 0.0002, "epoch": 0.9824446267432322, "step": 14970}, {"loss": 0.8714, "grad_norm": 0.4370724558830261, "learning_rate": 0.0002, "epoch": 0.9831009023789992, "step": 14980}, {"loss": 0.8776, "grad_norm": 0.37859413027763367, "learning_rate": 0.0002, "epoch": 0.9837571780147663, "step": 14990}, {"loss": 0.8744, "grad_norm": 0.4329196512699127, "learning_rate": 0.0002, "epoch": 0.9844134536505332, "step": 15000}, {"loss": 0.8384, "grad_norm": 0.4119299054145813, "learning_rate": 0.0002, "epoch": 0.9850697292863002, "step": 15010}, {"loss": 0.8885, "grad_norm": 0.4084014892578125, "learning_rate": 0.0002, "epoch": 0.9857260049220673, "step": 15020}, {"loss": 0.9255, "grad_norm": 0.34770357608795166, "learning_rate": 0.0002, "epoch": 0.9863822805578343, "step": 15030}, {"loss": 0.9074, "grad_norm": 0.429995596408844, "learning_rate": 0.0002, "epoch": 0.9870385561936013, "step": 15040}, {"loss": 0.8251, "grad_norm": 0.4588816165924072, "learning_rate": 0.0002, "epoch": 0.9876948318293683, "step": 15050}, {"loss": 0.895, "grad_norm": 0.47414910793304443, "learning_rate": 0.0002, "epoch": 0.9883511074651353, "step": 15060}, {"loss": 0.8688, "grad_norm": 0.3370365798473358, "learning_rate": 0.0002, "epoch": 0.9890073831009024, "step": 15070}, {"loss": 0.8775, "grad_norm": 0.3697716295719147, "learning_rate": 0.0002, "epoch": 0.9896636587366694, "step": 15080}, {"loss": 0.8949, "grad_norm": 0.31965479254722595, "learning_rate": 0.0002, "epoch": 0.9903199343724364, "step": 15090}, {"loss": 0.9519, "grad_norm": 0.5081075429916382, "learning_rate": 0.0002, "epoch": 0.9909762100082035, "step": 15100}, {"loss": 0.9012, "grad_norm": 0.44397613406181335, "learning_rate": 0.0002, "epoch": 0.9916324856439704, "step": 15110}, {"loss": 0.8327, "grad_norm": 0.30696988105773926, "learning_rate": 0.0002, "epoch": 0.9922887612797375, "step": 15120}, {"loss": 0.8785, "grad_norm": 0.4071432650089264, "learning_rate": 0.0002, "epoch": 0.9929450369155045, "step": 15130}, {"loss": 0.8879, "grad_norm": 0.5356084108352661, "learning_rate": 0.0002, "epoch": 0.9936013125512715, "step": 15140}, {"loss": 0.8593, "grad_norm": 0.36654597520828247, "learning_rate": 0.0002, "epoch": 0.9942575881870386, "step": 15150}, {"loss": 0.92, "grad_norm": 0.38214483857154846, "learning_rate": 0.0002, "epoch": 0.9949138638228056, "step": 15160}, {"loss": 0.9101, "grad_norm": 0.4340892434120178, "learning_rate": 0.0002, "epoch": 0.9955701394585726, "step": 15170}, {"loss": 0.9049, "grad_norm": 0.41310828924179077, "learning_rate": 0.0002, "epoch": 0.9962264150943396, "step": 15180}, {"loss": 0.8557, "grad_norm": 0.4932044744491577, "learning_rate": 0.0002, "epoch": 0.9968826907301066, "step": 15190}, {"loss": 0.8989, "grad_norm": 0.45371273159980774, "learning_rate": 0.0002, "epoch": 0.9975389663658737, "step": 15200}, {"loss": 0.9003, "grad_norm": 0.42956778407096863, "learning_rate": 0.0002, "epoch": 0.9981952420016407, "step": 15210}, {"loss": 0.8763, "grad_norm": 0.4343477487564087, "learning_rate": 0.0002, "epoch": 0.9988515176374078, "step": 15220}, {"loss": 0.8832, "grad_norm": 0.4425382912158966, "learning_rate": 0.0002, "epoch": 0.9995077932731747, "step": 15230}]} +{"epoch": 2.0, "step": 30475, "epoch_duration": 26246.496717453003, "total_accumulated_duration": 67246.89696502686, "gpu_info": {"GPU_0": "NVIDIA A100-PCIE-40GB"}, "memory_usage": {"avg_memory_usage": {"GPU_0": 7751.47119140625}, "peak_memory_usage": {"GPU_0": 19996.7255859375}, "avg_memory_reserved": {"GPU_0": 24388.0}, "peak_memory_reserved": {"GPU_0": 24780.0}, "total_memory": {"GPU_0": 40444.375}}, "best_checkpoint_path": "outputs-001/gemma-2-9b-it_int4_medmcqa_full_con_lr-0.0002_e-8_seq-512_lora-a-32-d-0.05-r-64_bs-1_gas-2_tf32-True_tunedata-portion-p-1.0-num-127975-sd-1/checkpoint-15237", "params": {"epochs": 8, "batch_size": 1, "learning_rate": 0.0002, "gradient_accumulation_steps": 2, "warmup_ratio": 0.03, "max_grad_norm": 0.3, "lora_alpha": 32, "lora_dropout": 0.05, "lora_r": 64, "tf32": true, "seed": 42}, "log_history": [{"loss": 2.6059, "grad_norm": 1.0711857080459595, "learning_rate": 0.0002, "epoch": 0.0006562756357670221, "step": 10}, {"loss": 1.7643, "grad_norm": 0.935492992401123, "learning_rate": 0.0002, "epoch": 0.0013125512715340443, "step": 20}, {"loss": 1.2573, "grad_norm": 0.908809244632721, "learning_rate": 0.0002, "epoch": 0.001968826907301 \ No newline at end of file